aboutsummaryrefslogtreecommitdiff
path: root/bunny/src/ap_boot.s
blob: 5058b00c1d0bcc5028141df8ae521d2c4dd1e88d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
bits 16

global ap_real_entry

; CR3 bits
%define PROTECTED_MODE_ENABLE (1)
%define MONITOR_COPROCESSOR   (1 << 1)
%define PAGING                (1 << 31)

; CR4 bits
%define DE           (1 << 3)
%define PAE          (1 << 5)
%define OS_FXSR      (1 << 9)
%define OS_XMM_FPEXC (1 << 10)
%define OS_XSAVE     (1 << 18)

%define MSR_EFER (0xc0000080)

; EFER bits
; 100100000001
%define SYSCALL (1)
%define LONG    (1 << 8)
%define NX      (1 << 11)

org 0x8000
ap_real_entry:
    cli
    cld

    xor ax, ax
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax

    ; Only one core can execute this trampoline at a time or shit gets weird.
    ; This is a shared-memory mutex to force that.
    mov bx, 0x6000
    mov cl, 1
.wait_for_lock:
    ; compare AL (0) with [0x6000]. If not equal, loop. If equal, load CL (1)
    ; into [0x6000] and continue.
    pause
    xor al, al
    lock cmpxchg byte [ds:bx], cl
    jnz short .wait_for_lock

    inc bx
    ; increment n_aps (0x6001)
    inc byte [ds:bx]
    movzx cx, byte [ds:bx]

    ; Set the A20 line to enable protected-mode addressing ("fast" method)
    in al, 0x92
    or al, 2
    out 0x92, al

    ; Duplicate BSP's CR3
    mov eax, dword [bsp_cr3]
    mov cr3, eax

    ; Set NXE (NX enable), LME (long mode enable), and SCE (syscall enable) in EFER.
    xor edx, edx
    mov eax, (SYSCALL|LONG|NX)
    mov ecx, MSR_EFER
    wrmsr

    mov eax, (OS_XSAVE|OS_XMM_FPEXC|OS_FXSR|PAE|DE)
    mov cr4, eax

    mov eax, (PAGING|MONITOR_COPROCESSOR|PROTECTED_MODE_ENABLE)
    mov cr0, eax

    ; Load long mode IDT
    lidt [lmidt]

    ; Load long mode GDT
    lgdt [lmgdt]

    jmp 0x0008:ap_long_entry



bits 64
ap_long_entry:

    ; Enable AVX, SSE and x87
    mov edx, 0
    mov eax, 7
    mov ecx, 0
    xsetbv

    ; unlock AP initialization mutex
    mov rax, 0x6000
    mov byte [rax], 0

.loop:
    hlt
    jmp .loop

align 8
lmgdt_base:
    dq 0x0000000000000000 ; Null descriptor
    dq 0x00209a0000000000 ; 64-bit, present, code
    dq 0x0000920000000000 ; Present, data r/w

lmgdt:
    dw (lmgdt - lmgdt_base) - 1
    dq lmgdt_base

bsp_cr3:
    dd 0
lmidt:
    dw 0
    dq 0