1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
bits 16
global ap_real_entry
; CR3 bits
%define PROTECTED_MODE_ENABLE (1)
%define MONITOR_COPROCESSOR (1 << 1)
%define PAGING (1 << 31)
; CR4 bits
%define DE (1 << 3)
%define PAE (1 << 5)
%define OS_FXSR (1 << 9)
%define OS_XMM_FPEXC (1 << 10)
%define OS_XSAVE (1 << 18)
%define MSR_EFER (0xc0000080)
; EFER bits
; 100100000001
%define SYSCALL (1)
%define LONG (1 << 8)
%define NX (1 << 11)
org 0x8000
ap_real_entry:
cli
cld
xor ax, ax
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
; Only one core can execute this trampoline at a time or shit gets weird.
; This is a shared-memory mutex to force that.
mov bx, 0x6000
mov cl, 1
.wait_for_lock:
; compare AL (0) with [0x6000]. If not equal, loop. If equal, load CL (1)
; into [0x6000] and continue.
pause
xor al, al
lock cmpxchg byte [ds:bx], cl
jnz short .wait_for_lock
inc bx
; increment n_aps (0x6001)
inc byte [ds:bx]
movzx cx, byte [ds:bx]
; Set the A20 line to enable protected-mode addressing ("fast" method)
in al, 0x92
or al, 2
out 0x92, al
; Duplicate BSP's CR3
mov eax, dword [bsp_cr3]
mov cr3, eax
; Set NXE (NX enable), LME (long mode enable), and SCE (syscall enable) in EFER.
xor edx, edx
mov eax, (SYSCALL|LONG|NX)
mov ecx, MSR_EFER
wrmsr
mov eax, (OS_XSAVE|OS_XMM_FPEXC|OS_FXSR|PAE|DE)
mov cr4, eax
mov eax, (PAGING|MONITOR_COPROCESSOR|PROTECTED_MODE_ENABLE)
mov cr0, eax
; Load long mode IDT
lidt [lmidt]
; Load long mode GDT
lgdt [lmgdt]
jmp 0x0008:ap_long_entry
bits 64
ap_long_entry:
; Enable AVX, SSE and x87
mov edx, 0
mov eax, 7
mov ecx, 0
xsetbv
; unlock AP initialization mutex
mov rax, 0x6000
mov byte [rax], 0
.loop:
hlt
jmp .loop
align 8
lmgdt_base:
dq 0x0000000000000000 ; Null descriptor
dq 0x00209a0000000000 ; 64-bit, present, code
dq 0x0000920000000000 ; Present, data r/w
lmgdt:
dw (lmgdt - lmgdt_base) - 1
dq lmgdt_base
bsp_cr3:
dd 0
lmidt:
dw 0
dq 0
|