2 * Machine specific setup for xen
4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
7 #include <linux/module.h>
8 #include <linux/sched.h>
11 #include <linux/memblock.h>
16 #include <asm/setup.h>
18 #include <asm/xen/hypervisor.h>
19 #include <asm/xen/hypercall.h>
23 #include <xen/interface/callback.h>
24 #include <xen/interface/memory.h>
25 #include <xen/interface/physdev.h>
26 #include <xen/interface/memory.h>
27 #include <xen/features.h>
32 /* These are code, but not functions. Defined in entry.S */
33 extern const char xen_hypervisor_callback
[];
34 extern const char xen_failsafe_callback
[];
35 extern void xen_sysenter_target(void);
36 extern void xen_syscall_target(void);
37 extern void xen_syscall32_target(void);
39 /* Amount of extra memory space we add to the e820 ranges */
40 phys_addr_t xen_extra_mem_start
, xen_extra_mem_size
;
43 * The maximum amount of extra memory compared to the base size. The
44 * main scaling factor is the size of struct page. At extreme ratios
45 * of base:extra, all the base memory can be filled with page
46 * structures for the extra memory, leaving no space for anything
49 * 10x seems like a reasonable balance between scaling flexibility and
50 * leaving a practically usable system.
52 #define EXTRA_MEM_RATIO (10)
54 static __init
void xen_add_extra_mem(unsigned long pages
)
56 u64 size
= (u64
)pages
* PAGE_SIZE
;
57 u64 extra_start
= xen_extra_mem_start
+ xen_extra_mem_size
;
62 e820_add_region(extra_start
, size
, E820_RAM
);
63 sanitize_e820_map(e820
.map
, ARRAY_SIZE(e820
.map
), &e820
.nr_map
);
65 memblock_x86_reserve_range(extra_start
, extra_start
+ size
, "XEN EXTRA");
67 xen_extra_mem_size
+= size
;
69 xen_max_p2m_pfn
= PFN_DOWN(extra_start
+ size
);
72 static unsigned long __init
xen_release_chunk(phys_addr_t start_addr
,
75 struct xen_memory_reservation reservation
= {
80 unsigned long start
, end
;
81 unsigned long len
= 0;
85 start
= PFN_UP(start_addr
);
86 end
= PFN_DOWN(end_addr
);
91 printk(KERN_INFO
"xen_release_chunk: looking at area pfn %lx-%lx: ",
93 for(pfn
= start
; pfn
< end
; pfn
++) {
94 unsigned long mfn
= pfn_to_mfn(pfn
);
96 /* Make sure pfn exists to start with */
97 if (mfn
== INVALID_P2M_ENTRY
|| mfn_to_pfn(mfn
) != pfn
)
100 set_xen_guest_handle(reservation
.extent_start
, &mfn
);
101 reservation
.nr_extents
= 1;
103 ret
= HYPERVISOR_memory_op(XENMEM_decrease_reservation
,
105 WARN(ret
!= 1, "Failed to release memory %lx-%lx err=%d\n",
108 set_phys_to_machine(pfn
, INVALID_P2M_ENTRY
);
112 printk(KERN_CONT
"%ld pages freed\n", len
);
117 static unsigned long __init
xen_return_unused_memory(unsigned long max_pfn
,
118 const struct e820map
*e820
)
120 phys_addr_t max_addr
= PFN_PHYS(max_pfn
);
121 phys_addr_t last_end
= ISA_END_ADDRESS
;
122 unsigned long released
= 0;
125 /* Free any unused memory above the low 1Mbyte. */
126 for (i
= 0; i
< e820
->nr_map
&& last_end
< max_addr
; i
++) {
127 phys_addr_t end
= e820
->map
[i
].addr
;
128 end
= min(max_addr
, end
);
131 released
+= xen_release_chunk(last_end
, end
);
132 last_end
= max(last_end
, e820
->map
[i
].addr
+ e820
->map
[i
].size
);
135 if (last_end
< max_addr
)
136 released
+= xen_release_chunk(last_end
, max_addr
);
138 printk(KERN_INFO
"released %ld pages of unused memory\n", released
);
143 * machine_specific_memory_setup - Hook for machine specific memory setup.
145 char * __init
xen_memory_setup(void)
147 static struct e820entry map
[E820MAX
] __initdata
;
149 unsigned long max_pfn
= xen_start_info
->nr_pages
;
150 unsigned long long mem_end
;
152 struct xen_memory_map memmap
;
153 unsigned long extra_pages
= 0;
154 unsigned long extra_limit
;
158 max_pfn
= min(MAX_DOMAIN_PAGES
, max_pfn
);
159 mem_end
= PFN_PHYS(max_pfn
);
161 memmap
.nr_entries
= E820MAX
;
162 set_xen_guest_handle(memmap
.buffer
, map
);
164 op
= xen_initial_domain() ?
165 XENMEM_machine_memory_map
:
167 rc
= HYPERVISOR_memory_op(op
, &memmap
);
169 BUG_ON(xen_initial_domain());
170 memmap
.nr_entries
= 1;
172 map
[0].size
= mem_end
;
173 /* 8MB slack (to balance backend allocations). */
174 map
[0].size
+= 8ULL << 20;
175 map
[0].type
= E820_RAM
;
181 xen_extra_mem_start
= mem_end
;
182 for (i
= 0; i
< memmap
.nr_entries
; i
++) {
183 unsigned long long end
= map
[i
].addr
+ map
[i
].size
;
185 if (map
[i
].type
== E820_RAM
) {
186 if (map
[i
].addr
< mem_end
&& end
> mem_end
) {
187 /* Truncate region to max_mem. */
188 u64 delta
= end
- mem_end
;
190 map
[i
].size
-= delta
;
191 extra_pages
+= PFN_DOWN(delta
);
197 if (end
> xen_extra_mem_start
)
198 xen_extra_mem_start
= end
;
200 /* If region is non-RAM or below mem_end, add what remains */
201 if ((map
[i
].type
!= E820_RAM
|| map
[i
].addr
< mem_end
) &&
203 e820_add_region(map
[i
].addr
, map
[i
].size
, map
[i
].type
);
207 * In domU, the ISA region is normal, usable memory, but we
208 * reserve ISA memory anyway because too many things poke
211 * In Dom0, the host E820 information can leave gaps in the
212 * ISA range, which would cause us to release those pages. To
213 * avoid this, we unconditionally reserve them here.
215 e820_add_region(ISA_START_ADDRESS
, ISA_END_ADDRESS
- ISA_START_ADDRESS
,
222 * See comment above "struct start_info" in <xen/interface/xen.h>
224 memblock_x86_reserve_range(__pa(xen_start_info
->mfn_list
),
225 __pa(xen_start_info
->pt_base
),
228 sanitize_e820_map(e820
.map
, ARRAY_SIZE(e820
.map
), &e820
.nr_map
);
230 extra_pages
+= xen_return_unused_memory(xen_start_info
->nr_pages
, &e820
);
233 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
234 * factor the base size. On non-highmem systems, the base
235 * size is the full initial memory allocation; on highmem it
236 * is limited to the max size of lowmem, so that it doesn't
237 * get completely filled.
239 * In principle there could be a problem in lowmem systems if
240 * the initial memory is also very large with respect to
241 * lowmem, but we won't try to deal with that here.
243 extra_limit
= min(EXTRA_MEM_RATIO
* min(max_pfn
, PFN_DOWN(MAXMEM
)),
244 max_pfn
+ extra_pages
);
246 if (extra_limit
>= max_pfn
)
247 extra_pages
= extra_limit
- max_pfn
;
251 if (!xen_initial_domain())
252 xen_add_extra_mem(extra_pages
);
257 static void xen_idle(void)
264 current_thread_info()->status
&= ~TS_POLLING
;
265 smp_mb__after_clear_bit();
267 current_thread_info()->status
|= TS_POLLING
;
272 * Set the bit indicating "nosegneg" library variants should be used.
273 * We only need to bother in pure 32-bit mode; compat 32-bit processes
274 * can have un-truncated segments, so wrapping around is allowed.
276 static void __init
fiddle_vdso(void)
280 mask
= VDSO32_SYMBOL(&vdso32_int80_start
, NOTE_MASK
);
281 *mask
|= 1 << VDSO_NOTE_NONEGSEG_BIT
;
282 mask
= VDSO32_SYMBOL(&vdso32_sysenter_start
, NOTE_MASK
);
283 *mask
|= 1 << VDSO_NOTE_NONEGSEG_BIT
;
287 static __cpuinit
int register_callback(unsigned type
, const void *func
)
289 struct callback_register callback
= {
291 .address
= XEN_CALLBACK(__KERNEL_CS
, func
),
292 .flags
= CALLBACKF_mask_events
,
295 return HYPERVISOR_callback_op(CALLBACKOP_register
, &callback
);
298 void __cpuinit
xen_enable_sysenter(void)
301 unsigned sysenter_feature
;
304 sysenter_feature
= X86_FEATURE_SEP
;
306 sysenter_feature
= X86_FEATURE_SYSENTER32
;
309 if (!boot_cpu_has(sysenter_feature
))
312 ret
= register_callback(CALLBACKTYPE_sysenter
, xen_sysenter_target
);
314 setup_clear_cpu_cap(sysenter_feature
);
317 void __cpuinit
xen_enable_syscall(void)
322 ret
= register_callback(CALLBACKTYPE_syscall
, xen_syscall_target
);
324 printk(KERN_ERR
"Failed to set syscall callback: %d\n", ret
);
325 /* Pretty fatal; 64-bit userspace has no other
326 mechanism for syscalls. */
329 if (boot_cpu_has(X86_FEATURE_SYSCALL32
)) {
330 ret
= register_callback(CALLBACKTYPE_syscall32
,
331 xen_syscall32_target
);
333 setup_clear_cpu_cap(X86_FEATURE_SYSCALL32
);
335 #endif /* CONFIG_X86_64 */
338 void __init
xen_arch_setup(void)
340 struct physdev_set_iopl set_iopl
;
343 xen_panic_handler_init();
345 HYPERVISOR_vm_assist(VMASST_CMD_enable
, VMASST_TYPE_4gb_segments
);
346 HYPERVISOR_vm_assist(VMASST_CMD_enable
, VMASST_TYPE_writable_pagetables
);
348 if (!xen_feature(XENFEAT_auto_translated_physmap
))
349 HYPERVISOR_vm_assist(VMASST_CMD_enable
,
350 VMASST_TYPE_pae_extended_cr3
);
352 if (register_callback(CALLBACKTYPE_event
, xen_hypervisor_callback
) ||
353 register_callback(CALLBACKTYPE_failsafe
, xen_failsafe_callback
))
356 xen_enable_sysenter();
357 xen_enable_syscall();
360 rc
= HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl
, &set_iopl
);
362 printk(KERN_INFO
"physdev_op failed %d\n", rc
);
365 if (!(xen_start_info
->flags
& SIF_INITDOMAIN
)) {
366 printk(KERN_INFO
"ACPI in unprivileged domain disabled\n");
371 memcpy(boot_command_line
, xen_start_info
->cmd_line
,
372 MAX_GUEST_CMDLINE
> COMMAND_LINE_SIZE
?
373 COMMAND_LINE_SIZE
: MAX_GUEST_CMDLINE
);