1 #include <arch/assembler.h>
4 #include <arch/page_types.h>
5 #include <arch/ptbls_types.h>
9 #include <arch/setup.h>
11 #include <nyan/kconfig.h>
13 #include <uapi/arch/boot_params.h>
15 #ifdef CONFIG_EARLY_PRINTK_USB_XDBC
16 ASM_EXTERN(xdbc_switch_from_pa_to_va)
17 ASM_EXTERN(usb_xdbc_map_pages)
18 ASM_EXTERN(usb_xdbc_map_mmio)
19 ASM_EXTERN(usb_xdbc_reserve)
21 ASM_EXTERN(memblk_init)
22 ASM_EXTERN(memblk_reserve)
24 ASM_EXTERN(kernel_pt256tib)
25 ASM_EXTERN(kernel_pt512gib)
26 ASM_EXTERN(kernel_image_pt512gib)
27 ASM_EXTERN(kernel_image_pt1gib)
28 ASM_EXTERN(vmemmap_pt512gib)
29 ASM_EXTERN(free_pages_init)
31 #define L(l) LOCAL__##l
32 // XXX: the code which will jump from the identity mapping to the kernel virtual address
33 // space must be in the first 2MiB page, because we keep only that while remapping.
34 ASM_SECTION(.head_text_va_jmp_code,12) // 4KiB
35 //**************************************************************************************************
36 // Expectations on startup:
37 // - efi x64 hardware configuration (specs is very incomplete->*will* have issues)
38 // - boot_params in rdi
39 // - aligned an a 2MiB page boundary.
40 ASM_GLOBAL_FUNCTION(startup) // XXX: alignment is enforced externally.
43 lea rsp, [rel L(init_stack_top)] // Do not reuse the boot firmware stack.
49 #ifdef CONFIG_EARLY_PRINTK_USB_XDBC
50 // Get an independent copy of the pa of xdbc pages from the boot_params.
51 mov rax, qword ptr [BP + BP_DBC_PAGES]
52 mov qword ptr [rel xdbc_pages], rax
56 bt ax, 12 // LA57 bit, 5lvls or 4lvls ptbls.
57 jc L(need_5lvls_kernel)
59 DBGNL("nyankernel is starting")
64 //------------------------------------------------------------------------------------------
65 mov rsi, qword ptr [BP + BP_CMDLINE_PTR]
69 je L(cmdline_termnull_found)
71 jmp L(cmdline_next_byte)
72 L(cmdline_termnull_found):
73 inc rcx // = cmdline_end (include the 0-terminating char)
74 sub rcx, rsi // = cmdline_bytes_n
75 cmp rcx, CMDLINE_BYTES_N_MAX
77 mov rcx, CMDLINE_BYTES_N_MAX
79 lea rdi, [rel cmdline]
81 #ifdef CONFIG_EARLY_PRINTK_USB_XDBC
82 lea rax, [rel cmdline]
83 DBG1NL("copied command line is %s",rax)
85 //------------------------------------------------------------------------------------------
88 call L(mem_reservations)
91 call L(memory_map_general_use_p2mib_init) // Using the efi memory map.
92 DBG1NL("memory map: 2MiB-aligned minimal general use physical address is 0x%p", qword ptr [rel pa_min])
93 DBG1NL("memory map: 2MiB-aligned maximal general use physical address is 0x%p", qword ptr [rel pa_max])
96 // mov rax, qword ptr [rip + memory_map_general_use_p2mib + 8 * 0]
97 //1: cmp rax, qword ptr [rip + memory_map_general_use_p2mib + 8 * 1]
99 // XDBC1("memory map:DESC=0x%p",rax);XDBC1(":start=0x%p",qword ptr [rax + 8 * 0]);XDBC1(":end=0x%p\n",qword ptr [rax + 8 * 1])
105 call L(vmemmap_prepare)
108 // install the kernel 512GiB entry in the kernel 256TiB page table
109 lea rax, [rel kernel_pt512gib]
111 or ax, ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
112 mov qword ptr [rel kernel_pt256tib + ASM_SHL(CANONICAL_HI_PT256TIB_512GIBENT_IDX_START \
113 + CONFIG_KERNEL_PT256TIB_512GIBENT_HI_IDX, PTS_ENTRY_BYTES_N_LOG2)], rax
114 DBG1NL("kernel pt256tib 512GiB entry=0x%p", rax)
116 call L(kernel_image_ptbls_build) // we will map the xdbc pages and mmio right after the kernel image
118 //DEBUG -- START ###########################################################################
119 //TODO: remove gas local label
120 // mov rcx, PTE_PT_ADDR_MSK
123 // lea r11, [rip + kernel_pt256tib]
125 // //and r11, 0xfffffffffffff000
126 // XDBC1("kernel_pt256tib=0x%p\n", r11)
129 // mov qword ptr [rsp + 8 * 0], rax // 512GiB entry end
131 //0: bt qword ptr [r11], PTE_PRESENT_BIT
134 // mov r10, qword ptr [r11]
135 // XDBC1("pt256tib_pt512gibent=0x%p\n", r10)
139 // mov qword ptr [rsp + 8 * 1], rax // 1GiB entry end
141 //2: bt qword ptr [r10], PTE_PRESENT_BIT
143 // bt qword ptr [r10], PTE_PAGE_SIZE_BIT
145 // XDBC1("1GiB entry is a page=0x%p\n",qword ptr [r10])
148 //4: mov r9, qword ptr [r10]
149 // XDBC1("pt512gib_pt1gibent=0x%p\n", r9)
153 // mov qword ptr [rsp + 8 * 2], rax // 2MiB entry end
155 //5: bt qword ptr [r9], PTE_PRESENT_BIT
157 // bt qword ptr [r9], PTE_PAGE_SIZE_BIT
159 // XDBC1("2MiB enty is a page=0x%p\n",qword ptr [r9])
162 //7: mov r8, qword ptr [r9]
163 // XDBC1("pt1gib_pt2mibent=0x%p\n", r8)
167 // mov qword ptr [rsp + 8 * 3], rax // 4KiB entry end
169 //8: bt qword ptr [r8], PTE_PRESENT_BIT
171 // XDBC1("4KiB entry is a page=0x%p\n",qword ptr [r8])
173 // cmp r8, qword ptr [rsp + 8 * 3]
177 // cmp r9, qword ptr [rsp + 8 * 2]
181 // cmp r10, qword ptr [rsp + 8 * 1]
185 // cmp r11, qword ptr [rsp + 8 * 0]
188 //DEBUG -- END #############################################################################
190 #ifdef CONFIG_EARLY_PRINTK_USB_XDBC
191 lea rdi, [rel startup]
192 call xdbc_switch_from_pa_to_va
194 //==========================================================================================
195 // Before switching to kernel virtual address space, we have to map in the kernel page
196 // tables or copy in the kernel image everything we need from the current identity mapping.
197 // We still have an identity map of the first 2MiB page of the kernel image in order to be
198 // able to run this code (we don't rely on TLB cache).
199 lea rax, [rel kernel_pt256tib]
200 mov cr3, rax // XXX:bit 63 usage depends on PCIDE if available and enabled by EFI
201 // classic way to perform a global page tlb flush which is not done by changing cr3, PGE
209 //==========================================================================================
210 // jumping to kernel virtual address space
211 mov rax, CANONICAL_HI_VA_MSK ASM_OR \
212 (ASM_SHL(1,39) * CONFIG_KERNEL_PT256TIB_512GIBENT_HI_IDX \
213 + ASM_SHL(1,30) * CONFIG_KERNEL_PT512GIB_1GIBENT_IDX)
214 add rax, L(va_startup) - startup
215 jmp rax // right below
216 // XXX: all the code from startup up to here must be in the first 2MiB page of the kernel
217 // image, since we identity map only one 2MiB page in our kernel virtual address space in
218 // order to have still the code to perform the jump. We use a linker assertion for this.
219 ASM_SECTION(.head_text, 12) // 4KiB
220 //==========================================================================================
221 //##################################################################################################
222 // We decided to do a major split for 5lvls or 4lvls kernel.
223 L(need_5lvls_kernel):
224 DBGNL("5lvls page tables are enabled and this is a 4lvls kernel, halting")
226 //**************************************************************************************************
227 ASM_ALIGN_NOPS(CACHE_LINE_BYTES_N_LOG2)
229 // Reset and install the stack in the kernel virtual address space.
230 lea rsp, [rel L(init_stack_top)]
232 DBGNL("now running in kernel virtual address space")
234 // Configure the fine-grained behavior of TLB cache invalidation.
237 DBG3NL("EFER(rcx=0x%lx):RDX=0x%lx:RAX=0x%lx",rcx,rdx,rax)
238 bts eax, EFER_TCE_BIT
240 DBGNL("EFER[TCE] enabled")
242 // Remove the identity map of the first 2MiB of the kernel image.
243 lea rax, [rel kernel_pt256tib]
244 mov rdx, qword ptr [rel L(identity_first_2mib_startup_pa)]
246 shr rcx, 39 // 512GiB
247 and rcx, ASM_SHL(1, PTS_BITS_N) - 1 // msk_lo(PTS_BITS_N) = index of the 512GiB entry in the 256TiB page table
248 mov qword ptr [rax + rcx * 8], 0
249 invlpg [rdx] // fine-grained (EFER[TCE] was enabled)
253 ASM_FATAL // END -- L(va_startup)
255 //**************************************************************************************************
256 // We have memory reservation to do: protect hardware state, kernel, many things from boot_params...
257 #define LL(l) LOCAL__mem_reservations__##l
258 #define DBG_HDR "mem_reservations:"
262 mov qword ptr [rsp + 8 * 0], rbx
263 // 6qws as misc variables.
268 DBGHNL("reserving kernel image")
270 lea rdi, [rel startup]
271 R_X86_64_64_PROLOG(img_bytes_n_load_imm64,2,ld_img_bytes_n)
272 mov rsi, R_X86_64_64_PLACEHOLDER(ld_img_bytes_n)
273 xor edx, edx // Not reclaimable.
276 jz LL(kernel_image_is_reserved)
277 DBGHNL("failed to reserve kernel image")
279 LL(kernel_image_is_reserved):
282 // boot_params -- START --------------------------------------------------------------------
283 DBGHNL("fine-grained reservations from boot_params -- START")
284 DBGHNL("reserving root boot_params structure")
287 mov rdx, 1 // Reclaimable.
290 jz LL(root_boot_params_is_reserved)
291 DBGHNL("failed to reserve root boot_params structure")
293 LL(root_boot_params_is_reserved):
295 DBGHNL("reserving efi memory map")
296 mov rdi, qword ptr [BP + BP_EFI_INFO_MEMMAP_PTR]
297 mov esi, dword ptr [BP + BP_EFI_INFO_MEMMAP_BYTES_N]
298 mov rdx, 1 // Reclaimable.
301 jz LL(efi_memory_map_is_reserved)
302 DBGHNL("failed to reserve the efi memory map")
304 LL(efi_memory_map_is_reserved):
305 #ifdef CONFIG_EARLY_PRINTK_USB_XDBC
307 call usb_xdbc_reserve
309 DBGHNL("fine-grained reservations from boot_params -- END")
310 // boot_params -- END ----------------------------------------------------------------------
313 // boot CPU state -- START =================================================================
314 DBGHNL("reserving current boot CPU hardware critical state -- START")
315 // boot CPU identity page tables -- START --------------------------------------------------
316 DBGHNL("reserving boot CPU 4levels page tables")
317 // Reserve the top 256TiB page table with its 512GiB entries.
319 mov rdx, PTE_PT_ADDR_MSK // Cleanup cr3.
321 mov rsi, ASM_SHL(1,12) // 4KiB
322 mov rdx, 1 // Reclaimable.
323 DBGH1NL("current page tables:reserving 256TiB page table at 0x%p",rdi)
329 mov rdx, PTE_PT_ADDR_MSK // cleanup cr3
331 #define PT256TIB_512GIBENT qword ptr [rsp + 8 * 1]
332 mov PT256TIB_512GIBENT, rax
333 add rax, ASM_SHL(1,12) // 4KiB
334 #define PT256TIB_512GIBENT_END qword ptr [rsp + 8 * 2]
335 mov PT256TIB_512GIBENT_END, rax
337 // loop over 512GiB entries of the 256TiB page table -- START ------------------------------
338 LL(pt256tib_512gibent_process):
339 mov rax, PT256TIB_512GIBENT
340 cmp PT256TIB_512GIBENT_END, rax
341 je LL(no_more_pt256tib_512gibents)
343 mov rdi, qword ptr [rax] // Load current 512GiB entry of the 256TiB page table.
344 bt rdi, PTE_PRESENT_BIT // Is the 512GiB entry a present page table?
345 jnc LL(next_pt256tib_512gibent) // Nope.
347 mov rdx, PTE_PT_ADDR_MSK
348 and rdi, rdx // = address of the 512GiB page table.
350 #define PT512GIB_1GIBENT qword ptr [rsp + 8 * 3]
351 mov PT512GIB_1GIBENT, rdi // Save the address of the 512GiB page table.
352 mov rsi, ASM_SHL(1,12) // 4KiB
353 mov rdx, 1 // Reclaimable.
354 DBGH1NL("current page tables:reserving 512GiB page table at 0x%p",rdi)
359 mov rax, PT512GIB_1GIBENT
360 add rax, ASM_SHL(1,12)
361 #define PT512GIB_1GIBENT_END qword ptr [rsp + 8 * 4]
362 mov PT512GIB_1GIBENT_END, rax
364 // loop over 1GiB entries of the 512GiB page table -- START --------------------------------
365 LL(pt512gib_1gibent_process):
366 mov rax, PT512GIB_1GIBENT
367 cmp PT512GIB_1GIBENT_END, rax
368 je LL(next_pt256tib_512gibent)
370 mov rdi, qword ptr [rax] // Load the 1GiB entry.
371 bt rdi, PTE_PRESENT_BIT // Is the 1GiB entry a present page table or page?
372 jnc LL(next_pt512gib_1gibent) // Nope.
373 bt rdi, PTE_PAGE_SIZE_BIT // Is this a 1GiB page?
374 jc LL(next_pt512gib_1gibent) // Yes.
376 mov rdx, PTE_PT_ADDR_MSK
377 and rdi, rdx // = address of the 1GiB page table.
379 #define PT1GIB_2MIBENT qword ptr [rsp + 8 * 5]
380 mov PT1GIB_2MIBENT, rdi
381 mov rsi, ASM_SHL(1,12) // 4KiB
382 mov rdx, 1 // Reclaimable.
383 DBGH1NL("current page tables:reserving 1GiB page table at 0x%p",rdi)
388 mov rax, PT1GIB_2MIBENT
389 add rax, ASM_SHL(1,12) // 4KiB
390 #define PT1GIB_2MIBENT_END qword ptr [rsp + 8 * 6]
391 mov PT1GIB_2MIBENT_END, rax
393 // loop over 2MiB entries of the 1GiB page table -- START ----------------------------------
394 LL(pt1gib_2mibent_process):
395 mov rax, PT1GIB_2MIBENT
396 cmp PT1GIB_2MIBENT_END, rax
397 je LL(next_pt512gib_1gibent)
399 mov rdi, qword ptr [rax] // Load the 2MiB entry.
400 bt rdi, PTE_PRESENT_BIT // Is the 2MiB entry a present page table or page?
401 jnc LL(next_pt1gib_2mibent) // Nope.
402 bt rdi, PTE_PAGE_SIZE_BIT // Is this a 2MiB page?
403 jc LL(next_pt1gib_2mibent) // Yes.
405 mov rdx, PTE_PT_ADDR_MSK
406 and rdi, rdx // = address of the 2MiB page table of the 4KiB entries.
408 mov rsi, ASM_SHL(1,12) // 4KiB
409 mov rdx, 1 // Reclaimable.
410 DBGH1NL("current page tables:reserving 2MiB page table at 0x%p",rdi)
415 LL(next_pt1gib_2mibent):
416 add PT1GIB_2MIBENT, ASM_SHL(1, PTS_ENTRY_BYTES_N_LOG2)
417 jmp LL(pt1gib_2mibent_process)
418 #undef PT1GIB_2MIBENT
419 #undef PT1GIB_2MIBENT_END
420 // loop over 2MiB entries of the 1GiB page table -- END ------------------------------------
422 LL(next_pt512gib_1gibent):
423 add PT512GIB_1GIBENT, ASM_SHL(1, PTS_ENTRY_BYTES_N_LOG2)
424 jmp LL(pt512gib_1gibent_process)
425 #undef PT512GIB_1GIBENT
426 #undef PT512GIB_1GIBENT_END
427 // loop over 1GiB entries of the 512GiB page table -- END ----------------------------------
429 LL(next_pt256tib_512gibent):
430 add PT256TIB_512GIBENT, ASM_SHL(1, PTS_ENTRY_BYTES_N_LOG2)
431 jmp LL(pt256tib_512gibent_process)
432 #undef PT256TIB_512GIBENT
433 #undef PT256TIB_512GIBENT_END
434 // loop over 512GiB entries of the 256TiB page table -- END --------------------------------
436 DBGHNL("failed to reserve the boot CPU page tables")
438 LL(no_more_pt256tib_512gibents):
439 // boot CPU identity page tables -- END ----------------------------------------------------
441 // We should not need to reserve GDT/LDT/IDT before moving to kernel virtual address space,
442 // namely overwritting them should be benign.
444 DBGHNL("reserving current boot CPU hardware critical state -- END")
445 // boot CPU state -- END ===================================================================
446 mov rbx, qword ptr [rsp + 8 * 0]
452 ////**************************************************************************************************
453 // The following are bit testers for the efi memory map descriptor type.
454 ASM_GLOBAL_OBJECT(efi_mmap_general_use_bits)
455 ASM_QWORDS ASM_SHL(1,EFI_LOADER_CODE) \
456 ASM_OR ASM_SHL(1, EFI_LOADER_DATA) \
457 ASM_OR ASM_SHL(1, EFI_BOOT_SERVICES_CODE) \
458 ASM_OR ASM_SHL(1, EFI_BOOT_SERVICES_DATA) \
459 ASM_OR ASM_SHL(1, EFI_CONVENTIONAL_MEMORY)
460 ASM_GLOBAL_OBJECT_END(efi_mmap_general_use_bits)
461 //==========================================================================================
462 ASM_GLOBAL_OBJECT(efi_mmap_runtime_services_bits)
463 ASM_QWORDS ASM_SHL(1, EFI_RUNTIME_SERVICES_CODE) \
464 ASM_OR ASM_SHL(1, EFI_RUNTIME_SERVICES_DATA)
465 ASM_GLOBAL_OBJECT_END(efi_mmap_runtime_services_bits)
466 //**************************************************************************************************
467 // Kernel image mapping, we give it a 1GiB page table, minus the xdbc pages.
468 #define LL(l) L(kernel_image_ptbls__##l)
469 #define DBG_HDR "kernel_image_ptbls_build:"
470 L(kernel_image_ptbls_build):
471 //------------------------------------------------------------------------------------------
472 // We keep the identity map of the first 2MiB page of the kernel image for the code jumping
473 // to the kernel virtual address space (we don't rely on global page tlb cache).
474 lea r8, [rel kernel_pt256tib]
475 lea r9, [rel L(identity_first_2mib_map_pt512gib)]
476 lea r10, [rel L(identity_first_2mib_map_pt1gib)]
477 lea r11, [rel startup]
478 mov qword ptr [rel L(identity_first_2mib_startup_pa)], r11
481 shr rcx, 39 // 512GiB
482 and rcx, ASM_SHL(1,PTS_BITS_N) - 1 // msk_lo(pts_bits_n), = index of the 512GiB entry in the 256TiB page table.
483 DBGH1NL("first 2MiB identity map:pt256tib_512gibent_idx=0x%lx",rcx)
487 or ax, ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
488 mov qword ptr [r8 + 8 * rcx], rax
492 and rcx, ASM_SHL(1,PTS_BITS_N) - 1 // msk_lo(pts_bits_n), = index of the 1GiB entry in the 512GiB page table.
493 DBGH1NL("first 2MiB identity map:pt512gib_1gibent_idx=0x%lx",rcx)
497 or ax, ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
498 mov qword ptr [r9 + 8 * rcx], rax
502 and rcx, ASM_SHL(1,PTS_BITS_N) - 1 // msk_lo(pts_bits_n), = index of the 2MiB entry in the 1GiB page table.
503 DBGH1NL("first 2MiB identity map:pt1gib_2mibent_idx=0x%lx",rcx)
507 // 2MiB page, default PAT 0x0 is cacheable/WB
508 or ax, ASM_SHL(1,PTE_GLOBAL_PAGE_BIT) ASM_OR ASM_SHL(1,PTE_PAGE_SIZE_BIT) \
509 ASM_OR ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
510 mov qword ptr [r10 + 8 * rcx], rax
511 //------------------------------------------------------------------------------------------
512 // From here we build the kernel image page tables. We put the kernel image at the start
513 // of the kernel page tables.
515 // 1GiB entry in the kernel 512GiB page table.
516 lea rax, [rel kernel_image_pt1gib]
518 or ax, ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
519 mov qword ptr [rel kernel_pt512gib + ASM_SHL(CONFIG_KERNEL_PT512GIB_1GIBENT_IDX,\
520 PTS_ENTRY_BYTES_N_LOG2)], rax
521 DBGH1NL("pt512gib_1gibent=0x%p", rax)
523 // XXX: startup must be 2MiB aligned.
524 lea rcx, [rel startup] // = KERNEL_IMAGE_PA_START = KERNEL_IMAGE_CURRENT_PA.
525 lea rdi, [rel kernel_image_pt1gib] // = PT1GIB_2MIBENT_FIRST, this is at a 1GiB boundary, then we start on the first 2MiB entry of the 1GiB page table.
526 R_X86_64_64_PROLOG(img_p2mibs_n_sz_imm64,2,ld_img_p2mibs_n_sz)
527 mov rdx, R_X86_64_64_PLACEHOLDER(ld_img_p2mibs_n_sz)
528 add rdx, rdi // = PT1GIB_2MIBENTS_END.
530 LL(next_pt1gib_2mibent):
531 mov rax, rcx // = KERNEL_IMAGE_CURRENT_PA.
533 // 2MiB page, default PAT 0x0 is WB.
534 or ax, ASM_SHL(1,PTE_GLOBAL_PAGE_BIT) ASM_OR ASM_SHL(1,PTE_PAGE_SIZE_BIT) \
535 ASM_OR ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
536 mov qword ptr [rdi], rax
537 DBGH2NL("pt1gib_2mibent=0x%p at 0x%p",rax,rdi)
539 add rcx, ASM_SHL(1,21) // 2MiB, = KERNEL_IMAGE_CURRENT_PA += P2MIB_BYTES_N, next kernel image physical address.
540 add rdi, ASM_SHL(1,PTS_ENTRY_BYTES_N_LOG2) // = PT1GIB_2MIBENT += 8, next 2MiB entry of the 1GiB page table.
542 jne LL(next_pt1gib_2mibent) // PT1GIB_2MIBENT != PT1GIB_2MIBENTS_END -> loop to next 2Mib entry of the 1GiB page table.
543 #ifdef CONFIG_EARLY_PRINTK_USB_XDBC
544 // rdi = PT1GIB_2MIBENT_FIRST
545 call usb_xdbc_map_pages
546 // rax = PT1GIB_2MIBENT_NEXT
548 call usb_xdbc_map_mmio
553 //**************************************************************************************************
554 // The memory map of 2MiB pages for general use (efi definition) will be smaller than the efi memory
555 // map, then we are going to scan the efi memory map for general use memory and a block of the size
556 // of this very efi memory map which does not conflict with current memblk reservations.
557 #define LL(l) L(memory_map_general_use_p2mib_init__##l)
558 #define DBG_HDR "memory_map_general_use_p2mib_init:"
559 // IN: rdi = boot parameters
560 L(memory_map_general_use_p2mib_init):
562 mov qword ptr [rsp + 8 * 0], rbx
563 mov qword ptr [rsp + 8 * 1], rbp
564 mov qword ptr [rsp + 8 * 2], r15
565 mov qword ptr [rsp + 8 * 3], r14
566 mov qword ptr [rsp + 8 * 4], r13
567 mov qword ptr [rsp + 8 * 5], r12
570 DBGHNL("looking for memblk reservation into the efi memory map")
572 mov DESC, qword ptr [BP + BP_EFI_INFO_MEMMAP_PTR]
573 DBGH1NL("efi descriptors at 0x%p", DESC)
574 #define EFI_MMAP_END r15
575 #define EFI_MMAP_END_d r15d
576 mov EFI_MMAP_END_d, dword ptr [BP + BP_EFI_INFO_MEMMAP_BYTES_N]
577 add EFI_MMAP_END, DESC
578 DBGH1NL("end of efi descriptors at 0x%p", EFI_MMAP_END)
580 // loop over the descriptors -- START ======================================================
581 #define LLL(l) LL(reserve_memory_map__##l)
582 LLL(descriptor_process):
583 mov eax, dword ptr [BP + BP_EFI_INFO_MEMMAP_DESC_BYTES_N]
584 add rax, DESC // = desc_end.
585 cmp rax, EFI_MMAP_END // desc_end > mmap_end ?
586 jbe LLL(descriptor_is_valid)
587 DBGHNL("ERROR:unable to find memory room for our memory map, halting")
589 #define MMAP_PA_START r14
590 LLL(descriptor_is_valid):
591 mov MMAP_PA_START, qword ptr [DESC + EFI_MEMORY_DESCRIPTOR_PhysicalStart]
592 DBGH1NL("PhysicalStart=0x%p", MMAP_PA_START)
593 test MMAP_PA_START, 0xfff
594 jnz LLL(next_descriptor) // Not 4KiB aligned, mandated by efi specs.
595 // Is this some memory we can add?
596 mov eax, dword ptr [DESC + EFI_MEMORY_DESCRIPTOR_Type]
597 DBGH1NL("type=0x%x", rax)
598 bt qword ptr [rel efi_mmap_general_use_bits], rax
599 jnc LLL(next_descriptor) // Don't touch this.
600 // Here, we have a efi descriptor we want to process.
601 #define DESC_PA_END r13
602 mov DESC_PA_END, qword ptr [DESC + EFI_MEMORY_DESCRIPTOR_NumberOfPages]
603 DBGH1NL("NumberOfPages=0x%lx", DESC_PA_END)
604 shl DESC_PA_END, 12 // 4KiB
605 add DESC_PA_END, MMAP_PA_START
606 // descriptor scan -- START ----------------------------------------------------------------
607 // This is a brutal qword scan.
608 #define MMAP_PA_END r12
609 #define MMAP_PA_END_d r12d
611 mov MMAP_PA_END_d, dword ptr [BP + BP_EFI_INFO_MEMMAP_BYTES_N] // Worst case scenario, the memory map of 2MiB pages for general use is as big as the efi memory map.
612 mov esi, MMAP_PA_END_d // For the memblk reservation below.
613 add MMAP_PA_END, MMAP_PA_START
614 // First test, do we fit in this efi descriptor?
615 cmp MMAP_PA_END, DESC_PA_END
616 jae LLL(next_descriptor) // MMAP_PA_END >= DESC_PA_END, go next efi descriptor.
617 // Try to reserve, it is reclaimable once our free list are inited.
618 mov rdi, MMAP_PA_START
619 mov rdx, 1 // = reclaimable
622 jz LLL(reservation_successful)// Reservation successful.
623 add MMAP_PA_START, 8 // Brutal qword scan.
625 // descriptor scan -- END ------------------------------------------------------------------
627 LLL(next_descriptor):
628 mov eax, dword ptr [BP + BP_EFI_INFO_MEMMAP_DESC_BYTES_N]
630 jmp LLL(descriptor_process)
631 // loop over the descriptors -- END ========================================================
632 LLL(reservation_successful):
633 mov qword ptr [rel memory_map_general_use_p2mib + 8 * 0], MMAP_PA_START
634 mov qword ptr [rel memory_map_general_use_p2mib + 8 * 1], MMAP_PA_START // Properly inited to start inserting our descriptors.
635 DBGH1("memblk reserved [0x%p", qword ptr [rel memory_map_general_use_p2mib + 8 * 0])
636 DBG1NL(",0x%p[ for our memory map", MMAP_PA_END)
641 //##########################################################################################
642 #define LLL(l) LL(merge__##l)
643 DBGHNL("now merge all 4KiB general use memory descriptors")
644 mov DESC, qword ptr [BP + BP_EFI_INFO_MEMMAP_PTR]
645 DBGH1NL("efi descriptors at 0x%p", DESC)
646 mov EFI_MMAP_END_d, dword ptr [BP + BP_EFI_INFO_MEMMAP_BYTES_N]
647 add EFI_MMAP_END, DESC
648 DBGH1NL("end of efi descriptors at 0x%p", EFI_MMAP_END)
649 // loop over the descriptors -- START ======================================================
650 LLL(descriptor_process):
651 mov eax, dword ptr [BP + BP_EFI_INFO_MEMMAP_DESC_BYTES_N]
652 add rax, DESC // = desc_end.
653 cmp rax, EFI_MMAP_END // desc_end > mmap_end ?
655 #define DESC_PA_START rdi
656 mov DESC_PA_START, qword ptr [DESC + EFI_MEMORY_DESCRIPTOR_PhysicalStart]
657 DBGH1NL("PhysicalStart=0x%p", DESC_PA_START)
658 test DESC_PA_START, 0xfff
659 jnz LLL(next_descriptor) // Not 4KiB aligned, mandated by efi specs.
660 // Is this some memory we can add?
661 mov eax, dword ptr [DESC + EFI_MEMORY_DESCRIPTOR_Type]
662 DBGH1NL("type=0x%x", rax)
663 bt qword ptr [rel efi_mmap_general_use_bits], rax
664 jnc LLL(next_descriptor)
665 #define DESC_PA_END rsi
666 mov DESC_PA_END, qword ptr [DESC + EFI_MEMORY_DESCRIPTOR_NumberOfPages]
667 DBGH1NL("NumberOfPages=0x%lx", DESC_PA_END)
668 shl DESC_PA_END, 12 // 4KiB
669 add DESC_PA_END, DESC_PA_START
670 // rdi = DESC_PA_START.
671 // rsi = DESC_PA_END.
674 LLL(next_descriptor):
675 mov eax, dword ptr [BP + BP_EFI_INFO_MEMMAP_DESC_BYTES_N]
677 jmp LLL(descriptor_process)
678 // loop over the descriptors -- END ========================================================
683 #undef EFI_MMAP_END_d
686 //##########################################################################################
687 #define LLL(l) LL(chopping__##l)
688 // Now, chop off non-2MiB-aligned heads and tails of descriptors and compute the pa_min
689 // and pa_max at the same time.
690 DBGHNL("chopping off heads and tails while getting minimum and maximum memory physical addresses")
692 #define MMAP_DESC_START (8 * 0)
693 #define MMAP_DESC_END (8 * 1)
694 #define DESC_PA_START (8 * 0)
695 #define DESC_PA_END (8 * 1)
696 #define DESC_INVALID 0xffffffffffffffff // We use the start physical address value for that.
697 #define DESC_BYTES_N 16 // Start and end.
698 mov DESC, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_START]
699 LLL(descriptor_process):
700 cmp DESC, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_END]
701 je LLL(no_more_descriptors)
702 mov rax, qword ptr [DESC + DESC_PA_START]
703 mov rdx, qword ptr [DESC + DESC_PA_END]
704 // Align up or "align".
705 add rax, ASM_SHL(1,21) - 1 // msk_lo(2MiB)
706 and rax, -ASM_SHL(1,21) // msk_hi(2MiB), 32bits sign extended.
708 and rdx, -ASM_SHL(1,21) // msk_hi(2MiB), 32bits sign extended.
711 jb LLL(does_fit) // ALIGN_UP(DESC_PA_START) < ALIGN_DOWN(DESC_PA_END)
712 // Invalid blk, do disable.
713 mov qword ptr [DESC + DESC_PA_START], DESC_INVALID
714 jmp LLL(next_descriptor)
716 mov qword ptr [DESC + DESC_PA_START], rax
717 mov qword ptr [DESC + DESC_PA_END], rdx
718 // Now, minimum and maximum physical addresses.
719 dec rdx // "End" to "last".
720 cmp rdx, qword ptr [rel pa_max]
721 jbe LLL(pa_min) // DESC_PA_END <= PA_MAX
722 mov qword ptr [rel pa_max], rdx // DESC_PA_END > PA_MAX
724 cmp rax, qword ptr [rel pa_min]
725 jae LLL(next_descriptor) // DESC_PA_START >= PA_MIN
726 mov qword ptr [rel pa_min], rax // DESC_PA_START < PA_MIN
727 LLL(next_descriptor):
728 add DESC, DESC_BYTES_N
729 jmp LLL(descriptor_process)
731 #undef MMAP_DESC_START
737 LLL(no_more_descriptors):
739 //##########################################################################################
740 mov rbx, qword ptr [rsp + 8 * 0]
741 mov rbp, qword ptr [rsp + 8 * 1]
742 mov r15, qword ptr [rsp + 8 * 2]
743 mov r14, qword ptr [rsp + 8 * 3]
744 mov r13, qword ptr [rsp + 8 * 4]
745 mov r12, qword ptr [rsp + 8 * 5]
751 //**************************************************************************************************
752 // TODO: cleanup the preprocessor namespace properly
753 #define LL(l) L(p4kibs_merge__##l)
754 #define DBG_HDR "blk_merge:"
755 // Here we merge as much as much as we can the memory map descriptors, we want the less holes as
756 // possible to avoid as many as possible non-2MiB-aligned heads and tails (which we will be
758 // We brutally loop over the descriptors until no more merge is happening, namely restarting
759 // each time a merge does happen.
760 // XXX: If we are in the internal candidate code path, the memory map is guaranteed to be non-empty
761 // with at least one valid descriptor.
762 // IN: rdi = EFI_DESC_PA_START
763 // IN: rsi = EFI_DESC_PA_END
764 #define DESC_CANDIDATE_PA_START rdi
765 #define DESC_CANDIDATE_PA_END rsi
766 #define DESC_INVALID 0xffffffffffffffff // We use the start physical address value for that.
767 #define DESC_BYTES_N 16 // Start and end.
768 // The following are offsets in structures.
769 #define MMAP_DESC_START (8 * 0)
770 #define MMAP_DESC_END (8 * 1)
771 #define DESC_PA_START (8 * 0)
772 #define DESC_PA_END (8 * 1)
776 #define DESC_CANDIDATE_IS_INTERNAL_BIT 0
777 #define EXIT_INSTEAD_OF_RESTART_BIT 1
779 DBGH2NL("external_candidate:pa_start=0x%p:pa_end=0x%p",DESC_CANDIDATE_PA_START,DESC_CANDIDATE_PA_END)
780 // On entry to this function we are not trying to merge an already there descriptor, namely
781 // we have an external candidate. Until we are trying to merge an already there descriptor,
782 // an internal descriptor, this variable is invalid and in order to know which type of
783 // candidate we are working with, namely internal or external, we have the flag
784 // DESC_CANDIDATE_IS_INTERNAL_BIT.
785 #define DESC_CANDIDATE_INTERNAL r10
786 // This is the descriptor we are trying to merge with the candidate (external or internal).
787 #define DESC_INSPECTED r9
788 LL(restart_inspection):
789 // We start to inspect from the first descriptor of the memory map.
790 mov DESC_INSPECTED, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_START]
792 LL(desc_inspected_validation):
793 // Fall-thru validation.
794 cmp DESC_INSPECTED, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_END]
795 je LL(desc_inspected_mmap_end_reached) // DESC_INSPECTED = MMAP_DESC_END, no merge did happen in this pass.
796 cmp qword ptr [DESC_INSPECTED + DESC_PA_START], DESC_INVALID
797 je LL(desc_inspected_next)
798 // Ofc, skip the internal candidate, if any.
799 bt FLAGS, DESC_CANDIDATE_IS_INTERNAL_BIT
800 jnc LL(desc_inspected_is_valid)
801 cmp DESC_INSPECTED, DESC_CANDIDATE_INTERNAL
802 jne LL(desc_inspected_is_valid)
804 LL(desc_inspected_next):
805 add DESC_INSPECTED, DESC_BYTES_N
806 jmp LL(desc_inspected_validation)
808 LL(desc_inspected_is_valid):
809 DBGH1("valid inspected:pa_start=0x%p",qword ptr [DESC_INSPECTED + DESC_PA_START])
810 DBG1NL(":pa_end=0x%p",qword ptr [DESC_INSPECTED + DESC_PA_END])
812 // candidate merging logic -- START ========================================================
813 DBGH2NL("merge:candidate:pa_start=0x%p:pa_end=0x%p", DESC_CANDIDATE_PA_START, DESC_CANDIDATE_PA_END)
814 cmp DESC_CANDIDATE_PA_START, qword ptr [DESC_INSPECTED + DESC_PA_END]
815 ja LL(desc_inspected_next)
816 cmp DESC_CANDIDATE_PA_END, qword ptr [DESC_INSPECTED + DESC_PA_START]
817 jb LL(desc_inspected_next)
819 // Here, we have an non empty intersection: 2 parts, head merging and tail merging.
821 // head merging -- START -------------------------------------------------------------------
822 cmp DESC_CANDIDATE_PA_START, qword ptr [DESC_INSPECTED + DESC_PA_START]
823 jae LL(no_head_merge)
824 DBGHNL("merge:candidate_pa_start replaced inspected_pa_start")
825 mov qword ptr [DESC_INSPECTED + DESC_PA_START], DESC_CANDIDATE_PA_START
827 // head merging -- END ---------------------------------------------------------------------
829 // tail merging -- START -------------------------------------------------------------------
830 cmp DESC_CANDIDATE_PA_END, qword ptr [DESC_INSPECTED + DESC_PA_END]
831 jbe LL(no_tail_merge)
832 DBGHNL("merge:candidate_pa_end replaced inspected_pa_end")
833 mov qword ptr [DESC_INSPECTED + DESC_PA_END], DESC_CANDIDATE_PA_END
835 // tail merging -- END ---------------------------------------------------------------------
836 DBGHNL("merging done")
837 // candidate merging logic -- END ==========================================================
839 // next candidate handling with restarts or exit -- START =================================
840 bt FLAGS, DESC_CANDIDATE_IS_INTERNAL_BIT
841 jnc LL(switch_candidate_from_external_to_internal)
842 // Here, our merged candidate is internal.
843 // It is merged, remove it from the memory map by invalidating its start physical address.
844 mov qword ptr [DESC_CANDIDATE_INTERNAL + DESC_PA_START], DESC_INVALID
846 LL(candidate_internal_go_next): //XXX: Could be jumped to once the inspected descriptor reached the end of the memory map.
847 add DESC_CANDIDATE_INTERNAL, DESC_BYTES_N
848 // If this new internal candidate is the last descriptor from the memory map, start again
849 // from the beginning.
850 cmp DESC_CANDIDATE_INTERNAL, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_END]
851 jne LL(candidate_internal_is_not_end)
852 // DESC_CANDIDATE_INTERNAL = MMAP_DESC_END.
853 //------------------------------------------------------------------------------------------
854 bt FLAGS, EXIT_INSTEAD_OF_RESTART_BIT
855 jnc LL(candidate_internal_restart) // Not the last valid internal candidate.
856 DBGHNL("last valid internal candidate processed, exiting")
857 ret // The last valid internal candidate was not merged, exit.
858 //------------------------------------------------------------------------------------------
859 LL(candidate_internal_restart):
860 DBGHNL("restarting internal candidates from the first valid of the memory map")
861 mov DESC_CANDIDATE_INTERNAL, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_START]
862 // We are not the last valid internal candidate anymore, we don't want to exit anymore if
863 // all valid descriptors were inspected.
864 btr FLAGS, EXIT_INSTEAD_OF_RESTART_BIT
865 // XXX: Fall-thru since we are guarenteed to have a non-empty memory map with at least one
867 LL(candidate_internal_is_not_end):
868 mov DESC_CANDIDATE_PA_START, qword ptr [DESC_CANDIDATE_INTERNAL + DESC_PA_START]
869 cmp DESC_CANDIDATE_PA_START, DESC_INVALID
870 je LL(candidate_internal_go_next)
871 // Here the internal candidate is valid.
872 mov DESC_CANDIDATE_PA_END, qword ptr [DESC_CANDIDATE_INTERNAL + DESC_PA_END]
873 DBGH1("valid candidate_internal=0x%p",DESC_CANDIDATE_INTERNAL)
874 DBG1(":pa_start=0x%p",qword ptr [DESC_CANDIDATE_INTERNAL + DESC_PA_START])
875 DBG1NL(":pa_end=0x%p",qword ptr [DESC_CANDIDATE_INTERNAL + DESC_PA_END])
876 jmp LL(restart_inspection)
878 LL(switch_candidate_from_external_to_internal):
879 DBGHNL("merge:switching from external to internal candidate")
880 bts FLAGS, DESC_CANDIDATE_IS_INTERNAL_BIT
881 mov DESC_CANDIDATE_INTERNAL, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_START]
882 // XXX: here the memory maps is guarenteed to be non-empty, with one valid descriptor
883 jmp LL(candidate_internal_is_not_end)
884 // next candidate handling with restarts or exit -- END ====================================
886 LL(desc_inspected_mmap_end_reached):
887 DBGHNL("valid descriptor inspection reached the end of mmap")
888 // INSPECTED_DESC == DESC_END, we did reach the end, then no merge did happen, then what
889 // to do next does depend on if we were trying to merge an external or internal candidate,
890 // the external candidate being the one provided by the caller.
891 bt FLAGS, DESC_CANDIDATE_IS_INTERNAL_BIT
892 jnc LL(end_of_mmap_candidate_external)
893 // The candidate is internal, and we do not have more descriptors to inspect, toggle the
894 // flag to tell internal candidate code to exit if that internal candidate was the last
896 bts FLAGS, EXIT_INSTEAD_OF_RESTART_BIT
897 jmp LL(candidate_internal_go_next)
899 LL(end_of_mmap_candidate_external):
900 DBGHNL("appending external candidate to mmap and exiting")
901 // Here, we did reach the end of the memory map with an external candidate, namely with a
902 // new descriptor provided on entry by the caller. Append this new descriptor to the memory
904 // DESC_INSPECTED = MMAP_DESC_END
905 mov qword ptr [DESC_INSPECTED + DESC_PA_START], DESC_CANDIDATE_PA_START
906 mov qword ptr [DESC_INSPECTED + DESC_PA_END], DESC_CANDIDATE_PA_END
907 add qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_END], DESC_BYTES_N
909 #undef DESC_INSPECTED
910 #undef DESC_CANDIDATE_INTERNAL
913 #undef DESC_CANDIDATE_PA_START
914 #undef DESC_CANDIDATE_PA_END
919 //**************************************************************************************************
920 #define LL(l) L(vmemmap_init__##l)
921 #define DBG_HDR "vmemmap_init:"
922 // We don't use a sparse vmemmap, we consider the worst case scenario of maximum 2MiB fragmentation
923 // and allocation and don't really bother anymore (and we accept that). A "struct page" would have a
924 // power of 2 size which fits at least a cache line of 64bytes, and then will stay cache line
927 // The "pfn", page frame number, in this case will be the offset of the struct page in vmemmap.
929 // On a 4lvls machine with physical addresses of 48bits the worst case scenario is:
930 // 256TiB(48bits) / 2MiB = 128Mi 2MiB pages
931 // Example: for a a "struct page" of 512(64*8)/256(64*4) bytes, vmemmap will use 64/32GiB
933 // On a 5lvls machine with physical addresses of 53bits the worst case scenario is:
934 // 4PiB(53bits) / 2MiB = 2Gi 2MiB pages
935 // Example: for a a "struct page" of 512(64*8)/256(64*4) bytes, vmemmap will use 1TiB/512GiB
937 // "Nowdays-real-life"(2022) "desktop-sized" scenario with a maximum physical address usually being
938 // the top memory physical address:
939 // 16/32/64/128GiB / 2MiB = 8/16/32/64Ki 2MiB pages
940 // Example: 16/32/64/128GiB, for a "struct page" of 512(64*8)/256(64*4) bytes, vmemmap will use
941 // (4/2)/(8/4)/(16/8)/(32/16)MiB
944 mov qword ptr [rsp + 8 * 0], rbx
945 mov qword ptr [rsp + 8 * 1], rbp
946 mov qword ptr [rsp + 8 * 2], r15
947 mov qword ptr [rsp + 8 * 3], r14
948 mov qword ptr [rsp + 8 * 4], r13
949 //##########################################################################################
950 // We are going to reserve the vmemmap *AND* the required 1GiB page tables, namely we will
951 // content ourselves with "only" one 512GiB page table, see the worst case scenarios above.
952 // We have to be carefull with all alignment constraints.
953 //------------------------------------------------------------------------------------------
954 // We compute the following here.
955 #define VMEMMAP_TOTAL_BYTES_N r15
956 #define VMEMMAP_PT1GIBS_START r14
957 #define VMEMMAP_PAGESTS_P2MIBS_N r13
958 mov r14, qword ptr [rel pa_max] // pa_max is the last byte of the last 2MiB page.
959 inc r14 // pa_end is 2MiB aligned = memory_p2mibs_n
960 mov r15, r14 // Keep this one for page table entries memory evaluation.
961 shr r14, 21 - PAGEST_BYTES_N_LOG2 // = memory_p2mibs_n * sizeof(struct page)
962 add r14, ASM_SHL(1,21) // msk_lo(2MiB)
963 and r14, -ASM_SHL(1,21) // msk_hi(2MiB) 32bits sign extended, ALIGN_UP_2MIB(memory_p2mibs_n * sizeof(struct page)), this is the offset for the installation of the page tables.
964 DBGH1NL("vmemmap_pagests_2mib_aligned_bytes_n=0x%lx",r14)
966 shr r13, 21 // ALIGN_UP_2MIB(memory_p2mibs_n * sizeof(struct page)) / p2mib_bytes_n
967 DBGH1NL("vmemmap needs exactly 0x%p page table 2MiB entries",r13)
968 // We will put the 1GiB page tables right after the vmemmap struct pages, and since the end
969 // of this area is 2MiB aligned, it fit the 4KiB alignment requirement for 1GiB page tables,
970 // and since we reserve them continously, all 1GiB page tables are 4KiB aligned.
971 // We don't require the size of the area of the 1GiB page tables to be 2MiB aligned, since
972 // those page tables won't be accessed anymore, in another words we won't need a virtual
973 // space mapping for them.
975 // There are 0x200(512) entries in a 1GiB page table.
976 add r15, ASM_SHL(1,PTS_BITS_N) - 1 // msk_lo(pts_bits_n)
977 and r15, -ASM_SHL(1,PTS_BITS_N) // msk_hi(pts_bits_n) 32bits sign extended, = ALIGN_UP_512_ENTS(vmemmap_pagests_p2mibs_n)
978 DBGH1NL("vmemmap needs 1GiB page table aligned 0x%p page table 2MiB entries",r15)
979 shl r15, PTS_ENTRY_BYTES_N_LOG2 // = ALIGN_UP_512 ENTS(vmemmap_pagests_p2mibs_n) * page_entry_bytes_n (8 bytes)
980 DBGH1NL("vmemmap_pt1gibs_bytes_n=0x%lx",r15)
982 DBGH1NL("vmemmap_total_bytes_to_reserve=0x%lx",r15)
983 //------------------------------------------------------------------------------------------
984 // Brute force 2MiB page per 2MiB page force, a non-reclaimable reservation from the memory
986 #define MMAP_DESC_START (8 * 0)
987 #define MMAP_DESC_END (8 * 1)
988 #define DESC_PA_START (8 * 0)
989 #define DESC_PA_END (8 * 1)
990 #define DESC_INVALID 0xffffffffffffffff
991 #define DESC_BYTES_N 16
993 mov DESC, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_START]
994 // descriptors loop -- START ===============================================================
996 cmp DESC, qword ptr [rel memory_map_general_use_p2mib + MMAP_DESC_END]
997 je LL(reservation_failure)
998 #define VMEMMAP_PA rbp
999 mov VMEMMAP_PA, qword ptr [DESC + DESC_PA_START]
1000 cmp VMEMMAP_PA, DESC_INVALID
1001 je LL(next_descriptor)
1002 // reservation test loop -- START ----------------------------------------------------------
1003 LL(reservation_test):
1005 add rax, VMEMMAP_TOTAL_BYTES_N // = VMEMMAP_END
1006 cmp rax, qword ptr [DESC + DESC_PA_END]
1007 ja LL(next_descriptor)
1008 // Here, VMEMMAP fits into this descriptor starting from VMEMMAP_PA, then try to reserve
1009 // those 2MiB pages.
1011 mov rsi, VMEMMAP_TOTAL_BYTES_N
1012 xor edx,edx // Non-reclaimable memory.
1015 jz LL(reservation_success)
1016 // Here, those 2MiB pages are not available, brute force to the next 2MiB page.
1017 add VMEMMAP_PA, ASM_SHL(1,21) // 2MiB
1018 jmp LL(reservation_test)
1019 // reservation test loop -- END ------------------------------------------------------------
1020 LL(next_descriptor):
1021 add DESC, DESC_BYTES_N
1023 // descriptors loop -- END =================================================================
1024 LL(reservation_success):
1025 DBGH1NL("reservation successfull at 0x%p",VMEMMAP_PA)
1026 #undef MMAP_DESC_START
1027 #undef MMAP_DESC_END
1028 #undef DESC_PA_START
1033 //##########################################################################################
1034 mov qword ptr [rel vmemmap_pa], VMEMMAP_PA // Needed when we will initialize the struct pages.
1036 // Zero the entire area.
1039 mov rcx, VMEMMAP_TOTAL_BYTES_N
1041 //##########################################################################################
1042 #define LLL(l) LL(install__##l)
1043 // Install the entry in the top page table.
1044 lea rax, [rel vmemmap_pt512gib]
1045 #define PT512GIBS_1GIBENT rsi
1046 mov PT512GIBS_1GIBENT, rax // Don't forget to install the 512GiB page table 1GiB entries.
1047 or ax, ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
1048 mov qword ptr [rel kernel_pt256tib + ASM_SHL(CANONICAL_HI_PT256TIB_512GIBENT_IDX_START \
1049 + CONFIG_VMEMMAP_PT256TIB_512GIBENT_HI_IDX, PTS_ENTRY_BYTES_N_LOG2)], rax
1051 #define PT1GIBS_2MIBENT rcx
1052 #define PT1GIBS_2MIBENTS_N rdx // A counter to know when to install a 512GiB page table 1GiB entry.
1053 #define PT1GIBS_2MIBENTS_N_d edx
1054 mov PT1GIBS_2MIBENT, VMEMMAP_PA
1055 add PT1GIBS_2MIBENT, VMEMMAP_PT1GIBS_START // Add the offset we kept.
1056 xor PT1GIBS_2MIBENTS_N_d,PT1GIBS_2MIBENTS_N_d
1058 // All 1GiB page table 2MiB entries are continous in physical memory.
1059 LLL(pt1gibs_2mibents_process):
1060 test PT1GIBS_2MIBENTS_N, 0x1ff // Do we need to install a 512GiB page table 1GiB entry?
1061 jnz LLL(dont_need_pt512gib_1gibent_installation) // No.
1062 mov rax, PT1GIBS_2MIBENT
1063 or ax, ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
1064 mov qword ptr [PT512GIBS_1GIBENT], rax
1065 DBGH2NL("pt512gib_1gibent=0x%lx at 0x%p",rax,PT512GIBS_1GIBENT)
1066 add PT512GIBS_1GIBENT, ASM_SHL(1,PTS_ENTRY_BYTES_N_LOG2)
1067 LLL(dont_need_pt512gib_1gibent_installation):
1069 or ax, ASM_SHL(1,PTE_GLOBAL_PAGE_BIT) ASM_OR ASM_SHL(1,PTE_PAGE_SIZE_BIT) \
1070 ASM_OR ASM_SHL(1,PTE_RW_BIT) ASM_OR ASM_SHL(1,PTE_PRESENT_BIT)
1071 bts rax, PTE_NX_BIT // No code there.
1072 mov qword ptr [PT1GIBS_2MIBENT], rax
1073 DBGH2NL("pt1gibs_2mibent=0x%lx at 0x%p",rax,PT1GIBS_2MIBENT)
1075 add VMEMMAP_PA, ASM_SHL(1,21) // 2MiB
1076 add PT1GIBS_2MIBENT, ASM_SHL(1,PTS_ENTRY_BYTES_N_LOG2)
1077 inc PT1GIBS_2MIBENTS_N
1078 dec VMEMMAP_PAGESTS_P2MIBS_N
1079 jnz LLL(pt1gibs_2mibents_process)
1080 //------------------------------------------------------------------------------------------
1081 #undef PT512GIBS_1GIBENT
1082 #undef PT1GIBS_2MIBENT
1083 #undef PT1GIBS_2MIBENTS_N
1084 #undef PT1GIBS_2MIBENTS_N_d
1085 #undef VMEMMAP_PAGESTS_P2MIBS_N
1087 //##########################################################################################
1088 mov rbx, qword ptr [rsp + 8 * 0]
1089 mov rbp, qword ptr [rsp + 8 * 1]
1090 mov r15, qword ptr [rsp + 8 * 2]
1091 mov r14, qword ptr [rsp + 8 * 3]
1092 mov r13, qword ptr [rsp + 8 * 4]
1095 LL(reservation_failure):
1096 DBGHNL("unable to reserve bytes for the vmemmap, halting.")
1098 #define PT1GIBS_2MIBENT rcx
1099 #undef VMEMMAP_TOTAL_BYTES_N
1100 #undef VMEMMAP_PT1GIBS_START
1103 //**************************************************************************************************
1104 // This is our sparse memory map. Array of (start/end) addresses pairs (ofc, 2MiB aligned), which
1105 // can have an INVALID state.
1106 ASM_GLOBAL_OBJECT(memory_map_general_use_p2mib)
1107 ASM_QWORDS 0xffffffffffffffff // start.
1108 ASM_QWORDS 0xffffffffffffffff // end, inited to start.
1109 ASM_GLOBAL_OBJECT_END(memory_map_general_use_p2mib)
1110 //==================================================================================================
1111 // pa_min, not used for now, pa_max for our vmemmap of struct pages.
1112 ASM_GLOBAL_OBJECT(pa_min)
1113 ASM_QWORDS 0xffffffffffffffff
1114 ASM_GLOBAL_OBJECT_END(pa_min)
1115 //==================================================================================================
1116 ASM_GLOBAL_OBJECT(pa_max)
1117 ASM_QWORDS 0 // It is guaranteed to be 4KiB aligned from efi.
1118 ASM_GLOBAL_OBJECT_END(pa_max)
1119 //==================================================================================================
1120 ASM_GLOBAL_OBJECT(vmemmap_pa)
1121 ASM_QWORDS 0xffffffffffffffff
1122 ASM_GLOBAL_OBJECT_END(vmemmap_pa)
1123 //==================================================================================================
1124 // we need a 512GiB page table and a 1GiB page table for the identity mapping of the first 2MiB of
1126 ASM_ALIGN_ZERO(12) // 4KiB
1127 L(identity_first_2mib_map_pt512gib): ASM_ZERO(ASM_SHL(1,12)) // 4KiB
1128 ASM_ALIGN_ZERO(12) // 4KiB
1129 L(identity_first_2mib_map_pt1gib): ASM_ZERO(ASM_SHL(1,12)) // 4KiB
1130 L(identity_first_2mib_startup_pa): ASM_QWORDS 0 // To invalidate the tlb.
1131 //==================================================================================================
1132 ASM_ALIGN_ZERO(STACK_ALIGN_BYTES_N_LOG2)
1133 L(init_stack_bottom):
1134 ASM_ZERO(ASM_SHL(1,12) * CONFIG_INIT_STACK_P4KIBS_N)
1136 //==================================================================================================