bits 16 global ap_real_entry ; CR3 bits %define PROTECTED_MODE_ENABLE (1) %define MONITOR_COPROCESSOR (1 << 1) %define PAGING (1 << 31) ; CR4 bits %define DE (1 << 3) %define PAE (1 << 5) %define OS_FXSR (1 << 9) %define OS_XMM_FPEXC (1 << 10) %define OS_XSAVE (1 << 18) %define MSR_EFER (0xc0000080) ; EFER bits ; 100100000001 %define SYSCALL (1) %define LONG (1 << 8) %define NX (1 << 11) org 0x8000 ap_real_entry: cli cld xor ax, ax mov ds, ax mov es, ax mov fs, ax mov gs, ax ; Only one core can execute this trampoline at a time or shit gets weird. ; This is a shared-memory mutex to force that. mov bx, 0x6000 mov cl, 1 .wait_for_lock: ; compare AL (0) with [0x6000]. If not equal, loop. If equal, load CL (1) ; into [0x6000] and continue. pause xor al, al lock cmpxchg byte [ds:bx], cl jnz short .wait_for_lock inc bx ; increment n_aps (0x6001) inc byte [ds:bx] movzx cx, byte [ds:bx] ; Set the A20 line to enable protected-mode addressing ("fast" method) in al, 0x92 or al, 2 out 0x92, al ; Duplicate BSP's CR3 mov eax, dword [bsp_cr3] mov cr3, eax ; Set NXE (NX enable), LME (long mode enable), and SCE (syscall enable) in EFER. xor edx, edx mov eax, (SYSCALL|LONG|NX) mov ecx, MSR_EFER wrmsr mov eax, (OS_XSAVE|OS_XMM_FPEXC|OS_FXSR|PAE|DE) mov cr4, eax mov eax, (PAGING|MONITOR_COPROCESSOR|PROTECTED_MODE_ENABLE) mov cr0, eax ; Load long mode IDT lidt [lmidt] ; Load long mode GDT lgdt [lmgdt] jmp 0x0008:ap_long_entry bits 64 ap_long_entry: ; Enable AVX, SSE and x87 mov edx, 0 mov eax, 7 mov ecx, 0 xsetbv ; unlock AP initialization mutex mov rax, 0x6000 mov byte [rax], 0 .loop: hlt jmp .loop align 8 lmgdt_base: dq 0x0000000000000000 ; Null descriptor dq 0x00209a0000000000 ; 64-bit, present, code dq 0x0000920000000000 ; Present, data r/w lmgdt: dw (lmgdt - lmgdt_base) - 1 dq lmgdt_base bsp_cr3: dd 0 lmidt: dw 0 dq 0