2 * Machine specific setup for xen
4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
7 #include <linux/module.h>
8 #include <linux/sched.h>
11 #include <linux/memblock.h>
12 #include <linux/cpuidle.h>
13 #include <linux/cpufreq.h>
18 #include <asm/setup.h>
21 #include <asm/xen/hypervisor.h>
22 #include <asm/xen/hypercall.h>
26 #include <xen/interface/callback.h>
27 #include <xen/interface/memory.h>
28 #include <xen/interface/physdev.h>
29 #include <xen/features.h>
35 /* Amount of extra memory space we add to the e820 ranges */
36 struct xen_memory_region xen_extra_mem
[XEN_EXTRA_MEM_MAX_REGIONS
] __initdata
;
38 /* Number of pages released from the initial allocation. */
39 unsigned long xen_released_pages
;
42 * Buffer used to remap identity mapped pages. We only need the virtual space.
43 * The physical page behind this address is remapped as needed to different
46 #define REMAP_SIZE (P2M_PER_PAGE - 3)
48 unsigned long next_area_mfn
;
49 unsigned long target_pfn
;
51 unsigned long mfns
[REMAP_SIZE
];
52 } xen_remap_buf __initdata
__aligned(PAGE_SIZE
);
53 static unsigned long xen_remap_mfn __initdata
= INVALID_P2M_ENTRY
;
56 * The maximum amount of extra memory compared to the base size. The
57 * main scaling factor is the size of struct page. At extreme ratios
58 * of base:extra, all the base memory can be filled with page
59 * structures for the extra memory, leaving no space for anything
62 * 10x seems like a reasonable balance between scaling flexibility and
63 * leaving a practically usable system.
65 #define EXTRA_MEM_RATIO (10)
67 static void __init
xen_add_extra_mem(phys_addr_t start
, phys_addr_t size
)
71 for (i
= 0; i
< XEN_EXTRA_MEM_MAX_REGIONS
; i
++) {
73 if (xen_extra_mem
[i
].size
== 0) {
74 xen_extra_mem
[i
].start
= start
;
75 xen_extra_mem
[i
].size
= size
;
78 /* Append to existing region. */
79 if (xen_extra_mem
[i
].start
+ xen_extra_mem
[i
].size
== start
) {
80 xen_extra_mem
[i
].size
+= size
;
84 if (i
== XEN_EXTRA_MEM_MAX_REGIONS
)
85 printk(KERN_WARNING
"Warning: not enough extra memory regions\n");
87 memblock_reserve(start
, size
);
90 static void __init
xen_del_extra_mem(phys_addr_t start
, phys_addr_t size
)
93 phys_addr_t start_r
, size_r
;
95 for (i
= 0; i
< XEN_EXTRA_MEM_MAX_REGIONS
; i
++) {
96 start_r
= xen_extra_mem
[i
].start
;
97 size_r
= xen_extra_mem
[i
].size
;
99 /* Start of region. */
100 if (start_r
== start
) {
101 BUG_ON(size
> size_r
);
102 xen_extra_mem
[i
].start
+= size
;
103 xen_extra_mem
[i
].size
-= size
;
107 if (start_r
+ size_r
== start
+ size
) {
108 BUG_ON(size
> size_r
);
109 xen_extra_mem
[i
].size
-= size
;
113 if (start
> start_r
&& start
< start_r
+ size_r
) {
114 BUG_ON(start
+ size
> start_r
+ size_r
);
115 xen_extra_mem
[i
].size
= start
- start_r
;
116 /* Calling memblock_reserve() again is okay. */
117 xen_add_extra_mem(start
+ size
, start_r
+ size_r
-
122 memblock_free(start
, size
);
126 * Called during boot before the p2m list can take entries beyond the
127 * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
130 unsigned long __ref
xen_chk_extra_mem(unsigned long pfn
)
133 phys_addr_t addr
= PFN_PHYS(pfn
);
135 for (i
= 0; i
< XEN_EXTRA_MEM_MAX_REGIONS
; i
++) {
136 if (addr
>= xen_extra_mem
[i
].start
&&
137 addr
< xen_extra_mem
[i
].start
+ xen_extra_mem
[i
].size
)
138 return INVALID_P2M_ENTRY
;
141 return IDENTITY_FRAME(pfn
);
145 * Mark all pfns of extra mem as invalid in p2m list.
147 void __init
xen_inv_extra_mem(void)
149 unsigned long pfn
, pfn_s
, pfn_e
;
152 for (i
= 0; i
< XEN_EXTRA_MEM_MAX_REGIONS
; i
++) {
153 if (!xen_extra_mem
[i
].size
)
155 pfn_s
= PFN_DOWN(xen_extra_mem
[i
].start
);
156 pfn_e
= PFN_UP(xen_extra_mem
[i
].start
+ xen_extra_mem
[i
].size
);
157 for (pfn
= pfn_s
; pfn
< pfn_e
; pfn
++)
158 set_phys_to_machine(pfn
, INVALID_P2M_ENTRY
);
163 * Finds the next RAM pfn available in the E820 map after min_pfn.
164 * This function updates min_pfn with the pfn found and returns
165 * the size of that range or zero if not found.
167 static unsigned long __init
xen_find_pfn_range(
168 const struct e820entry
*list
, size_t map_size
,
169 unsigned long *min_pfn
)
171 const struct e820entry
*entry
;
173 unsigned long done
= 0;
175 for (i
= 0, entry
= list
; i
< map_size
; i
++, entry
++) {
179 if (entry
->type
!= E820_RAM
)
182 e_pfn
= PFN_DOWN(entry
->addr
+ entry
->size
);
184 /* We only care about E820 after this */
185 if (e_pfn
< *min_pfn
)
188 s_pfn
= PFN_UP(entry
->addr
);
190 /* If min_pfn falls within the E820 entry, we want to start
191 * at the min_pfn PFN.
193 if (s_pfn
<= *min_pfn
) {
194 done
= e_pfn
- *min_pfn
;
196 done
= e_pfn
- s_pfn
;
205 static int __init
xen_free_mfn(unsigned long mfn
)
207 struct xen_memory_reservation reservation
= {
213 set_xen_guest_handle(reservation
.extent_start
, &mfn
);
214 reservation
.nr_extents
= 1;
216 return HYPERVISOR_memory_op(XENMEM_decrease_reservation
, &reservation
);
220 * This releases a chunk of memory and then does the identity map. It's used
221 * as a fallback if the remapping fails.
223 static void __init
xen_set_identity_and_release_chunk(unsigned long start_pfn
,
224 unsigned long end_pfn
, unsigned long nr_pages
, unsigned long *released
)
226 unsigned long pfn
, end
;
229 WARN_ON(start_pfn
> end_pfn
);
231 /* Release pages first. */
232 end
= min(end_pfn
, nr_pages
);
233 for (pfn
= start_pfn
; pfn
< end
; pfn
++) {
234 unsigned long mfn
= pfn_to_mfn(pfn
);
236 /* Make sure pfn exists to start with */
237 if (mfn
== INVALID_P2M_ENTRY
|| mfn_to_pfn(mfn
) != pfn
)
240 ret
= xen_free_mfn(mfn
);
241 WARN(ret
!= 1, "Failed to release pfn %lx err=%d\n", pfn
, ret
);
245 if (!__set_phys_to_machine(pfn
, INVALID_P2M_ENTRY
))
251 set_phys_range_identity(start_pfn
, end_pfn
);
255 * Helper function to update the p2m and m2p tables and kernel mapping.
257 static void __init
xen_update_mem_tables(unsigned long pfn
, unsigned long mfn
)
259 struct mmu_update update
= {
260 .ptr
= ((uint64_t)mfn
<< PAGE_SHIFT
) | MMU_MACHPHYS_UPDATE
,
265 if (!set_phys_to_machine(pfn
, mfn
)) {
266 WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
272 if (HYPERVISOR_mmu_update(&update
, 1, NULL
, DOMID_SELF
) < 0) {
273 WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
278 /* Update kernel mapping, but not for highmem. */
279 if (pfn
>= PFN_UP(__pa(high_memory
- 1)))
282 if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn
<< PAGE_SHIFT
),
283 mfn_pte(mfn
, PAGE_KERNEL
), 0)) {
284 WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
291 * This function updates the p2m and m2p tables with an identity map from
292 * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
293 * original allocation at remap_pfn. The information needed for remapping is
294 * saved in the memory itself to avoid the need for allocating buffers. The
295 * complete remap information is contained in a list of MFNs each containing
296 * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
297 * This enables us to preserve the original mfn sequence while doing the
298 * remapping at a time when the memory management is capable of allocating
299 * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
302 static void __init
xen_do_set_identity_and_remap_chunk(
303 unsigned long start_pfn
, unsigned long size
, unsigned long remap_pfn
)
305 unsigned long buf
= (unsigned long)&xen_remap_buf
;
306 unsigned long mfn_save
, mfn
;
307 unsigned long ident_pfn_iter
, remap_pfn_iter
;
308 unsigned long ident_end_pfn
= start_pfn
+ size
;
309 unsigned long left
= size
;
310 unsigned int i
, chunk
;
314 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap
));
316 mfn_save
= virt_to_mfn(buf
);
318 for (ident_pfn_iter
= start_pfn
, remap_pfn_iter
= remap_pfn
;
319 ident_pfn_iter
< ident_end_pfn
;
320 ident_pfn_iter
+= REMAP_SIZE
, remap_pfn_iter
+= REMAP_SIZE
) {
321 chunk
= (left
< REMAP_SIZE
) ? left
: REMAP_SIZE
;
323 /* Map first pfn to xen_remap_buf */
324 mfn
= pfn_to_mfn(ident_pfn_iter
);
325 set_pte_mfn(buf
, mfn
, PAGE_KERNEL
);
327 /* Save mapping information in page */
328 xen_remap_buf
.next_area_mfn
= xen_remap_mfn
;
329 xen_remap_buf
.target_pfn
= remap_pfn_iter
;
330 xen_remap_buf
.size
= chunk
;
331 for (i
= 0; i
< chunk
; i
++)
332 xen_remap_buf
.mfns
[i
] = pfn_to_mfn(ident_pfn_iter
+ i
);
334 /* Put remap buf into list. */
337 /* Set identity map */
338 set_phys_range_identity(ident_pfn_iter
, ident_pfn_iter
+ chunk
);
343 /* Restore old xen_remap_buf mapping */
344 set_pte_mfn(buf
, mfn_save
, PAGE_KERNEL
);
348 * This function takes a contiguous pfn range that needs to be identity mapped
351 * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
352 * 2) Calls the do_ function to actually do the mapping/remapping work.
354 * The goal is to not allocate additional memory but to remap the existing
355 * pages. In the case of an error the underlying memory is simply released back
356 * to Xen and not remapped.
358 static unsigned long __init
xen_set_identity_and_remap_chunk(
359 const struct e820entry
*list
, size_t map_size
, unsigned long start_pfn
,
360 unsigned long end_pfn
, unsigned long nr_pages
, unsigned long remap_pfn
,
361 unsigned long *released
, unsigned long *remapped
)
365 unsigned long n
= end_pfn
- start_pfn
;
368 unsigned long cur_pfn
= start_pfn
+ i
;
369 unsigned long left
= n
- i
;
370 unsigned long size
= left
;
371 unsigned long remap_range_size
;
373 /* Do not remap pages beyond the current allocation */
374 if (cur_pfn
>= nr_pages
) {
375 /* Identity map remaining pages */
376 set_phys_range_identity(cur_pfn
, cur_pfn
+ size
);
379 if (cur_pfn
+ size
> nr_pages
)
380 size
= nr_pages
- cur_pfn
;
382 remap_range_size
= xen_find_pfn_range(list
, map_size
,
384 if (!remap_range_size
) {
385 pr_warning("Unable to find available pfn range, not remapping identity pages\n");
386 xen_set_identity_and_release_chunk(cur_pfn
,
387 cur_pfn
+ left
, nr_pages
, released
);
390 /* Adjust size to fit in current e820 RAM region */
391 if (size
> remap_range_size
)
392 size
= remap_range_size
;
394 xen_do_set_identity_and_remap_chunk(cur_pfn
, size
, remap_pfn
);
396 /* Update variables to reflect new mappings. */
403 * If the PFNs are currently mapped, the VA mapping also needs
404 * to be updated to be 1:1.
406 for (pfn
= start_pfn
; pfn
<= max_pfn_mapped
&& pfn
< end_pfn
; pfn
++)
407 (void)HYPERVISOR_update_va_mapping(
408 (unsigned long)__va(pfn
<< PAGE_SHIFT
),
409 mfn_pte(pfn
, PAGE_KERNEL_IO
), 0);
414 static void __init
xen_set_identity_and_remap(
415 const struct e820entry
*list
, size_t map_size
, unsigned long nr_pages
,
416 unsigned long *released
, unsigned long *remapped
)
418 phys_addr_t start
= 0;
419 unsigned long last_pfn
= nr_pages
;
420 const struct e820entry
*entry
;
421 unsigned long num_released
= 0;
422 unsigned long num_remapped
= 0;
426 * Combine non-RAM regions and gaps until a RAM region (or the
427 * end of the map) is reached, then set the 1:1 map and
428 * remap the memory in those non-RAM regions.
430 * The combined non-RAM regions are rounded to a whole number
431 * of pages so any partial pages are accessible via the 1:1
432 * mapping. This is needed for some BIOSes that put (for
433 * example) the DMI tables in a reserved region that begins on
434 * a non-page boundary.
436 for (i
= 0, entry
= list
; i
< map_size
; i
++, entry
++) {
437 phys_addr_t end
= entry
->addr
+ entry
->size
;
438 if (entry
->type
== E820_RAM
|| i
== map_size
- 1) {
439 unsigned long start_pfn
= PFN_DOWN(start
);
440 unsigned long end_pfn
= PFN_UP(end
);
442 if (entry
->type
== E820_RAM
)
443 end_pfn
= PFN_UP(entry
->addr
);
445 if (start_pfn
< end_pfn
)
446 last_pfn
= xen_set_identity_and_remap_chunk(
447 list
, map_size
, start_pfn
,
448 end_pfn
, nr_pages
, last_pfn
,
449 &num_released
, &num_remapped
);
454 *released
= num_released
;
455 *remapped
= num_remapped
;
457 pr_info("Released %ld page(s)\n", num_released
);
461 * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
462 * The remap information (which mfn remap to which pfn) is contained in the
463 * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
464 * This scheme allows to remap the different chunks in arbitrary order while
465 * the resulting mapping will be independant from the order.
467 void __init
xen_remap_memory(void)
469 unsigned long buf
= (unsigned long)&xen_remap_buf
;
470 unsigned long mfn_save
, mfn
, pfn
;
471 unsigned long remapped
= 0;
473 unsigned long pfn_s
= ~0UL;
474 unsigned long len
= 0;
476 mfn_save
= virt_to_mfn(buf
);
478 while (xen_remap_mfn
!= INVALID_P2M_ENTRY
) {
479 /* Map the remap information */
480 set_pte_mfn(buf
, xen_remap_mfn
, PAGE_KERNEL
);
482 BUG_ON(xen_remap_mfn
!= xen_remap_buf
.mfns
[0]);
484 pfn
= xen_remap_buf
.target_pfn
;
485 for (i
= 0; i
< xen_remap_buf
.size
; i
++) {
486 mfn
= xen_remap_buf
.mfns
[i
];
487 xen_update_mem_tables(pfn
, mfn
);
491 if (pfn_s
== ~0UL || pfn
== pfn_s
) {
492 pfn_s
= xen_remap_buf
.target_pfn
;
493 len
+= xen_remap_buf
.size
;
494 } else if (pfn_s
+ len
== xen_remap_buf
.target_pfn
) {
495 len
+= xen_remap_buf
.size
;
497 xen_del_extra_mem(PFN_PHYS(pfn_s
), PFN_PHYS(len
));
498 pfn_s
= xen_remap_buf
.target_pfn
;
499 len
= xen_remap_buf
.size
;
503 xen_remap_mfn
= xen_remap_buf
.next_area_mfn
;
506 if (pfn_s
!= ~0UL && len
)
507 xen_del_extra_mem(PFN_PHYS(pfn_s
), PFN_PHYS(len
));
509 set_pte_mfn(buf
, mfn_save
, PAGE_KERNEL
);
511 pr_info("Remapped %ld page(s)\n", remapped
);
514 static unsigned long __init
xen_get_max_pages(void)
516 unsigned long max_pages
= MAX_DOMAIN_PAGES
;
517 domid_t domid
= DOMID_SELF
;
521 * For the initial domain we use the maximum reservation as
524 * For guest domains the current maximum reservation reflects
525 * the current maximum rather than the static maximum. In this
526 * case the e820 map provided to us will cover the static
529 if (xen_initial_domain()) {
530 ret
= HYPERVISOR_memory_op(XENMEM_maximum_reservation
, &domid
);
535 return min(max_pages
, MAX_DOMAIN_PAGES
);
538 static void __init
xen_align_and_add_e820_region(phys_addr_t start
,
539 phys_addr_t size
, int type
)
541 phys_addr_t end
= start
+ size
;
543 /* Align RAM regions to page boundaries. */
544 if (type
== E820_RAM
) {
545 start
= PAGE_ALIGN(start
);
546 end
&= ~((phys_addr_t
)PAGE_SIZE
- 1);
549 e820_add_region(start
, end
- start
, type
);
552 static void __init
xen_ignore_unusable(struct e820entry
*list
, size_t map_size
)
554 struct e820entry
*entry
;
557 for (i
= 0, entry
= list
; i
< map_size
; i
++, entry
++) {
558 if (entry
->type
== E820_UNUSABLE
)
559 entry
->type
= E820_RAM
;
564 * machine_specific_memory_setup - Hook for machine specific memory setup.
566 char * __init
xen_memory_setup(void)
568 static struct e820entry map
[E820MAX
] __initdata
;
570 unsigned long max_pfn
= xen_start_info
->nr_pages
;
573 struct xen_memory_map memmap
;
574 unsigned long max_pages
;
575 unsigned long extra_pages
= 0;
576 unsigned long remapped_pages
;
580 max_pfn
= min(MAX_DOMAIN_PAGES
, max_pfn
);
581 mem_end
= PFN_PHYS(max_pfn
);
583 memmap
.nr_entries
= E820MAX
;
584 set_xen_guest_handle(memmap
.buffer
, map
);
586 op
= xen_initial_domain() ?
587 XENMEM_machine_memory_map
:
589 rc
= HYPERVISOR_memory_op(op
, &memmap
);
591 BUG_ON(xen_initial_domain());
592 memmap
.nr_entries
= 1;
594 map
[0].size
= mem_end
;
595 /* 8MB slack (to balance backend allocations). */
596 map
[0].size
+= 8ULL << 20;
597 map
[0].type
= E820_RAM
;
601 BUG_ON(memmap
.nr_entries
== 0);
604 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
605 * regions, so if we're using the machine memory map leave the
606 * region as RAM as it is in the pseudo-physical map.
608 * UNUSABLE regions in domUs are not handled and will need
609 * a patch in the future.
611 if (xen_initial_domain())
612 xen_ignore_unusable(map
, memmap
.nr_entries
);
614 /* Make sure the Xen-supplied memory map is well-ordered. */
615 sanitize_e820_map(map
, memmap
.nr_entries
, &memmap
.nr_entries
);
617 max_pages
= xen_get_max_pages();
618 if (max_pages
> max_pfn
)
619 extra_pages
+= max_pages
- max_pfn
;
622 * Set identity map on non-RAM pages and prepare remapping the
625 xen_set_identity_and_remap(map
, memmap
.nr_entries
, max_pfn
,
626 &xen_released_pages
, &remapped_pages
);
628 extra_pages
+= xen_released_pages
;
629 extra_pages
+= remapped_pages
;
632 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
633 * factor the base size. On non-highmem systems, the base
634 * size is the full initial memory allocation; on highmem it
635 * is limited to the max size of lowmem, so that it doesn't
636 * get completely filled.
638 * In principle there could be a problem in lowmem systems if
639 * the initial memory is also very large with respect to
640 * lowmem, but we won't try to deal with that here.
642 extra_pages
= min(EXTRA_MEM_RATIO
* min(max_pfn
, PFN_DOWN(MAXMEM
)),
645 while (i
< memmap
.nr_entries
) {
646 phys_addr_t addr
= map
[i
].addr
;
647 phys_addr_t size
= map
[i
].size
;
648 u32 type
= map
[i
].type
;
650 if (type
== E820_RAM
) {
651 if (addr
< mem_end
) {
652 size
= min(size
, mem_end
- addr
);
653 } else if (extra_pages
) {
654 size
= min(size
, PFN_PHYS(extra_pages
));
655 extra_pages
-= PFN_DOWN(size
);
656 xen_add_extra_mem(addr
, size
);
657 xen_max_p2m_pfn
= PFN_DOWN(addr
+ size
);
659 type
= E820_UNUSABLE
;
662 xen_align_and_add_e820_region(addr
, size
, type
);
666 if (map
[i
].size
== 0)
671 * Set the rest as identity mapped, in case PCI BARs are
674 * PFNs above MAX_P2M_PFN are considered identity mapped as
677 set_phys_range_identity(map
[i
-1].addr
/ PAGE_SIZE
, ~0ul);
680 * In domU, the ISA region is normal, usable memory, but we
681 * reserve ISA memory anyway because too many things poke
684 e820_add_region(ISA_START_ADDRESS
, ISA_END_ADDRESS
- ISA_START_ADDRESS
,
691 * See comment above "struct start_info" in <xen/interface/xen.h>
692 * We tried to make the the memblock_reserve more selective so
693 * that it would be clear what region is reserved. Sadly we ran
694 * in the problem wherein on a 64-bit hypervisor with a 32-bit
695 * initial domain, the pt_base has the cr3 value which is not
696 * neccessarily where the pagetable starts! As Jan put it: "
697 * Actually, the adjustment turns out to be correct: The page
698 * tables for a 32-on-64 dom0 get allocated in the order "first L1",
699 * "first L2", "first L3", so the offset to the page table base is
700 * indeed 2. When reading xen/include/public/xen.h's comment
701 * very strictly, this is not a violation (since there nothing is said
702 * that the first thing in the page table space is pointed to by
703 * pt_base; I admit that this seems to be implied though, namely
704 * do I think that it is implied that the page table space is the
705 * range [pt_base, pt_base + nt_pt_frames), whereas that
706 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
707 * which - without a priori knowledge - the kernel would have
708 * difficulty to figure out)." - so lets just fall back to the
709 * easy way and reserve the whole region.
711 memblock_reserve(__pa(xen_start_info
->mfn_list
),
712 xen_start_info
->pt_base
- xen_start_info
->mfn_list
);
714 sanitize_e820_map(e820
.map
, ARRAY_SIZE(e820
.map
), &e820
.nr_map
);
720 * Machine specific memory setup for auto-translated guests.
722 char * __init
xen_auto_xlated_memory_setup(void)
724 static struct e820entry map
[E820MAX
] __initdata
;
726 struct xen_memory_map memmap
;
730 memmap
.nr_entries
= E820MAX
;
731 set_xen_guest_handle(memmap
.buffer
, map
);
733 rc
= HYPERVISOR_memory_op(XENMEM_memory_map
, &memmap
);
735 panic("No memory map (%d)\n", rc
);
737 sanitize_e820_map(map
, ARRAY_SIZE(map
), &memmap
.nr_entries
);
739 for (i
= 0; i
< memmap
.nr_entries
; i
++)
740 e820_add_region(map
[i
].addr
, map
[i
].size
, map
[i
].type
);
742 memblock_reserve(__pa(xen_start_info
->mfn_list
),
743 xen_start_info
->pt_base
- xen_start_info
->mfn_list
);
749 * Set the bit indicating "nosegneg" library variants should be used.
750 * We only need to bother in pure 32-bit mode; compat 32-bit processes
751 * can have un-truncated segments, so wrapping around is allowed.
753 static void __init
fiddle_vdso(void)
757 * This could be called before selected_vdso32 is initialized, so
758 * just fiddle with both possible images. vdso_image_32_syscall
759 * can't be selected, since it only exists on 64-bit systems.
762 mask
= vdso_image_32_int80
.data
+
763 vdso_image_32_int80
.sym_VDSO32_NOTE_MASK
;
764 *mask
|= 1 << VDSO_NOTE_NONEGSEG_BIT
;
765 mask
= vdso_image_32_sysenter
.data
+
766 vdso_image_32_sysenter
.sym_VDSO32_NOTE_MASK
;
767 *mask
|= 1 << VDSO_NOTE_NONEGSEG_BIT
;
771 static int register_callback(unsigned type
, const void *func
)
773 struct callback_register callback
= {
775 .address
= XEN_CALLBACK(__KERNEL_CS
, func
),
776 .flags
= CALLBACKF_mask_events
,
779 return HYPERVISOR_callback_op(CALLBACKOP_register
, &callback
);
782 void xen_enable_sysenter(void)
785 unsigned sysenter_feature
;
788 sysenter_feature
= X86_FEATURE_SEP
;
790 sysenter_feature
= X86_FEATURE_SYSENTER32
;
793 if (!boot_cpu_has(sysenter_feature
))
796 ret
= register_callback(CALLBACKTYPE_sysenter
, xen_sysenter_target
);
798 setup_clear_cpu_cap(sysenter_feature
);
801 void xen_enable_syscall(void)
806 ret
= register_callback(CALLBACKTYPE_syscall
, xen_syscall_target
);
808 printk(KERN_ERR
"Failed to set syscall callback: %d\n", ret
);
809 /* Pretty fatal; 64-bit userspace has no other
810 mechanism for syscalls. */
813 if (boot_cpu_has(X86_FEATURE_SYSCALL32
)) {
814 ret
= register_callback(CALLBACKTYPE_syscall32
,
815 xen_syscall32_target
);
817 setup_clear_cpu_cap(X86_FEATURE_SYSCALL32
);
819 #endif /* CONFIG_X86_64 */
822 void __init
xen_pvmmu_arch_setup(void)
824 HYPERVISOR_vm_assist(VMASST_CMD_enable
, VMASST_TYPE_4gb_segments
);
825 HYPERVISOR_vm_assist(VMASST_CMD_enable
, VMASST_TYPE_writable_pagetables
);
827 HYPERVISOR_vm_assist(VMASST_CMD_enable
,
828 VMASST_TYPE_pae_extended_cr3
);
830 if (register_callback(CALLBACKTYPE_event
, xen_hypervisor_callback
) ||
831 register_callback(CALLBACKTYPE_failsafe
, xen_failsafe_callback
))
834 xen_enable_sysenter();
835 xen_enable_syscall();
838 /* This function is not called for HVM domains */
839 void __init
xen_arch_setup(void)
841 xen_panic_handler_init();
842 if (!xen_feature(XENFEAT_auto_translated_physmap
))
843 xen_pvmmu_arch_setup();
846 if (!(xen_start_info
->flags
& SIF_INITDOMAIN
)) {
847 printk(KERN_INFO
"ACPI in unprivileged domain disabled\n");
852 memcpy(boot_command_line
, xen_start_info
->cmd_line
,
853 MAX_GUEST_CMDLINE
> COMMAND_LINE_SIZE
?
854 COMMAND_LINE_SIZE
: MAX_GUEST_CMDLINE
);
856 /* Set up idle, making sure it calls safe_halt() pvop */
859 WARN_ON(xen_set_default_idle());