diff --git a/hal/halx86/pc98.cmake b/hal/halx86/pc98.cmake index 3f28342eb2c..f75a2a40ca3 100644 --- a/hal/halx86/pc98.cmake +++ b/hal/halx86/pc98.cmake @@ -34,6 +34,7 @@ list(APPEND HAL_PC98_SOURCE pic/processor.c) list(APPEND HAL_PC98_ASM_SOURCE + pc98/delay.S generic/trap.S pic/pic.S) diff --git a/hal/halx86/pc98/delay.S b/hal/halx86/pc98/delay.S new file mode 100644 index 00000000000..b003373e680 --- /dev/null +++ b/hal/halx86/pc98/delay.S @@ -0,0 +1,125 @@ +/* + * PROJECT: NEC PC-98 series HAL + * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) + * PURPOSE: Busy-wait loop implementation + * COPYRIGHT: Copyright 2026 Dmitry Borisov + */ + +/* INCLUDES ******************************************************************/ + +#include +#include + +#include "delay.h" + +/* GLOBALS *******************************************************************/ + +#define PIC1_CONTROL_PORT HEX(00) +#define PIC1_DATA_PORT HEX(02) +#define PIC2_CONTROL_PORT HEX(08) +#define PIC2_DATA_PORT HEX(0A) + +#define PIC_EOI HEX(20) + +#define MSR_RDTSC HEX(10) + +/* FUNCTIONS *****************************************************************/ + +.code + +#ifdef _USE_ML +INIT_ASM SEGMENT PARA PUBLIC USE32 READ WRITE EXECUTE DISCARD +#endif + +PUBLIC _HalpTscCalibrationISR +_HalpTscCalibrationISR: + push edx + push ecx + push eax + + /* The first thing we do is read the current TSC value */ + rdtsc + + mov ecx, dword ptr ds:[_TscCalibrationPhase] + + /* Ignore the first interrupt since it fires randomly */ + test ecx, ecx + je .FirstInterrupt + + /* Check if we're already done */ + cmp ecx, NUM_SAMPLES + ja .Done + + /* Store the current TSC value (the phase number is 1-based) */ + mov dword ptr ds:[ecx*8 + _TscCalibrationArray + 0 - 8], eax + mov dword ptr ds:[ecx*8 + _TscCalibrationArray + 4 - 8], edx + jmp .AdvancePhase + +.FirstInterrupt: + /* Reset TSC value to 0 */ + mov ecx, MSR_RDTSC + xor eax, eax + xor edx, edx + wrmsr + +.AdvancePhase: + inc dword ptr ds:[_TscCalibrationPhase] + +.Done: + /* Send the EOI for the IRQ */ + mov al, PIC_EOI + out PIC1_CONTROL_PORT, al + + pop eax + pop ecx + pop edx + iretd + +.align 8 + +PUBLIC _TscCalibrationArray +_TscCalibrationArray: + .space NUM_SAMPLES * 8 + +PUBLIC _TscCalibrationPhase +_TscCalibrationPhase: + .long 0 + +#ifdef _USE_ML +INIT_ASM ENDS +#endif + +PUBLIC _KeStallExecutionProcessor@4 +_KeStallExecutionProcessor@4: + push ebx + + /* + * Force the in-order execution of the RDTSC instruction. + * HAL will overwrite this with a no-op instruction on older processors. + */ + xor eax, eax + cpuid + + /* Get the initial time */ + rdtsc + + /* Calculate the ending time */ + mov ecx, eax + mov eax, fs:[KPCR_STALL_SCALE_FACTOR] + mov ebx, edx + mul dword ptr [esp + 8] + add ecx, eax + adc ebx, edx + + /* Loop until time is elapsed */ +.Loop: + rdtsc + cmp eax, ecx + mov eax, edx + sbb eax, ebx + jc .Loop + + pop ebx + ret 4 + +END diff --git a/hal/halx86/pc98/delay.c b/hal/halx86/pc98/delay.c index 7561a1caff7..c8a812e7d51 100644 --- a/hal/halx86/pc98/delay.c +++ b/hal/halx86/pc98/delay.c @@ -1,8 +1,9 @@ /* * PROJECT: NEC PC-98 series HAL * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) - * PURPOSE: Delay routines - * COPYRIGHT: Copyright 2020 Dmitry Borisov (di.sean@protonmail.com) + * PURPOSE: TSC calibration for the busy-wait loop routine + * COPYRIGHT: Copyright 2011 Timo Kreuzer + * Copyright 2026 Dmitry Borisov */ /* INCLUDES ******************************************************************/ @@ -12,29 +13,137 @@ #define NDEBUG #include -/* PRIVATE FUNCTIONS *********************************************************/ +#include "delay.h" + +/* GLOBALS *******************************************************************/ + +#define SAMPLE_FREQUENCY 1024 // 0.977 ms + +VOID +__cdecl +HalpTscCalibrationISR(VOID); + +extern volatile ULONG TscCalibrationPhase; +extern ULONG64 TscCalibrationArray[NUM_SAMPLES]; + +/* FUNCTIONS *****************************************************************/ + +static +CODE_SEG("INIT") +VOID +HalpPrepareStallExecution(VOID) +{ + PUCHAR Instruction = (PUCHAR)((ULONG_PTR)KeStallExecutionProcessor + 1); + PKPRCB Prcb = KeGetCurrentPrcb(); + + /* xor eax, eax; cpuid */ + ASSERT((Instruction[1] == 0xC0) && // The byte [0] has different encodings + (Instruction[2] == 0x0F) && + (Instruction[3] == 0xA2)); + + /* + * Starting with the Pentium Pro processor it is necessary to force + * the in-order execution of the RDTSC instruction using a serializing instruction. + * For more details, please refer to Section 3.1 of + * Intel "Using the RDTSC Instruction for Performance Monitoring". + * + * Patch the KeStallExecutionProcessor function to remove the serializing instruction + * for the Pentium and Pentium MMX processors. + */ + if ((Prcb->CpuType < 6) && !strcmp(Prcb->VendorString, "GenuineIntel")) + { + /* Replace "xor eax, eax; cpuid" with "lea esi, [esi+0]" */ + Instruction[0] = 0x8D; + Instruction[1] = 0x74; + Instruction[2] = 0x26; + Instruction[3] = 0x00; + + KeSweepICache(Instruction, 4); + } +} + +static +CODE_SEG("INIT") +ULONG64 +HalpDoLinearRegression( + _In_ ULONG XMax, + _In_reads_(XMax + 1) const ULONG64* ArrayY) +{ + ULONG X, SumXX; + ULONG64 SumXY; + + /* Calculate the sum of the squares of X */ + SumXX = (XMax * (XMax + 1) * (2 * XMax + 1)) / 6; + + /* Calculate the sum of the differences to the first value weighted by X */ + for (SumXY = 0, X = 1; X <= XMax; X++) + { + SumXY += X * (ArrayY[X] - ArrayY[0]); + } + + /* Account for sample frequency */ + SumXY *= SAMPLE_FREQUENCY; + + /* Return the quotient of the sums */ + return (SumXY + (SumXX / 2)) / SumXX; +} CODE_SEG("INIT") VOID NTAPI HalpCalibrateStallExecution(VOID) { - /* FIXME */ - NOTHING; -} + ULONG_PTR Flags; + PVOID PreviousHandler; + TIMER_CONTROL_PORT_REGISTER TimerControl; + ULONG TimerFrequency; + USHORT Period; + ULONG64 CpuClockFrequency; -/* PUBLIC FUNCTIONS **********************************************************/ - -#ifndef _MINIHAL_ -VOID -NTAPI -KeStallExecutionProcessor( - _In_ ULONG MicroSeconds) -{ - while (MicroSeconds--) + /* Check if the CPU supports RDTSC */ + if (!(KeGetCurrentPrcb()->FeatureBits & KF_RDTSC)) { - /* FIXME: Use stall factor */ - WRITE_PORT_UCHAR((PUCHAR)CPU_IO_o_ARTIC_DELAY, 0); + KeBugCheck(HAL_INITIALIZATION_FAILED); } + + Flags = __readeflags(); + _disable(); + + PreviousHandler = KeQueryInterruptHandler(PIC_TIMER_IRQ); + KeRegisterInterruptHandler(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, HalpTscCalibrationISR); + + /* Program the PIT for binary mode */ + TimerControl.BcdMode = FALSE; + TimerControl.OperatingMode = PitOperatingMode2; + TimerControl.Channel = PitChannel0; + TimerControl.AccessMode = PitAccessModeLowHigh; + + if (__inbyte(0x42) & 0x20) + TimerFrequency = TIMER_FREQUENCY_1; + else + TimerFrequency = TIMER_FREQUENCY_2; + Period = (TimerFrequency + (SAMPLE_FREQUENCY / 2)) / SAMPLE_FREQUENCY; + + __outbyte(TIMER_CONTROL_PORT, TimerControl.Bits); + __outbyte(TIMER_CHANNEL0_DATA_PORT, Period & 0xFF); + __outbyte(TIMER_CHANNEL0_DATA_PORT, Period >> 8); + + HalEnableSystemInterrupt(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, CLOCK2_LEVEL, Latched); + + /* Collect the sample data */ + _enable(); + while (TscCalibrationPhase != (NUM_SAMPLES + 1)) + NOTHING; + _disable(); + + HalDisableSystemInterrupt(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, CLOCK2_LEVEL); + KeRegisterInterruptHandler(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, PreviousHandler); + + /* Calculate an average, using simplified linear regression */ + CpuClockFrequency = HalpDoLinearRegression(NUM_SAMPLES - 1, TscCalibrationArray); + KeGetPcr()->StallScaleFactor = (ULONG)(CpuClockFrequency / 1000000); + + HalpPrepareStallExecution(); + + __writeeflags(Flags); } -#endif diff --git a/hal/halx86/pc98/delay.h b/hal/halx86/pc98/delay.h new file mode 100644 index 00000000000..aa1507e440c --- /dev/null +++ b/hal/halx86/pc98/delay.h @@ -0,0 +1,8 @@ +/* + * PROJECT: NEC PC-98 series HAL + * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) + * PURPOSE: TSC calibration definitions + * COPYRIGHT: Copyright 2026 Dmitry Borisov + */ + +#define NUM_SAMPLES 4