2 * store hypervisor information instruction emulation functions.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License (version 2 only)
6 * as published by the Free Software Foundation.
8 * Copyright IBM Corp. 2016
9 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
11 #include <linux/kvm_host.h>
12 #include <linux/errno.h>
13 #include <linux/pagemap.h>
14 #include <linux/vmalloc.h>
15 #include <linux/ratelimit.h>
17 #include <asm/kvm_host.h>
18 #include <asm/asm-offsets.h>
21 #include <asm/sysinfo.h>
22 #include <asm/ebcdic.h>
28 #define DED_WEIGHT 0xffff
30 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
31 * as they are justified with spaces.
33 #define CP 0xc3d7404040404040UL
34 #define IFL 0xc9c6d34040404040UL
38 HDR_STACK_INCM
= 0x20,
63 u8 infhflg2
; /* reserved */
64 u8 infhval1
; /* reserved */
65 u8 infhval2
; /* reserved */
90 u8 infmflg1
; /* reserved */
91 u8 infmflg2
; /* reserved */
93 u8 infmval2
; /* reserved */
108 u8 infpflg2
; /* reserved */
110 u8 infpval2
; /* reserved */
142 struct lpar_cpu_inf
{
147 static inline u64
cpu_id(u8 ctidx
, void *diag224_buf
)
149 return *((u64
*)(diag224_buf
+ (ctidx
+ 1) * DIAG204_CPU_NAME_LEN
));
153 * Scales the cpu capping from the lpar range to the one expected in
156 * diag204 reports a cap in hundredths of processor units.
157 * z/VM's range for one core is 0 - 0x10000.
159 static u32
scale_cap(u32 in
)
161 return (0x10000 * in
) / 100;
164 static void fill_hdr(struct sthyi_sctns
*sctns
)
166 sctns
->hdr
.infhdln
= sizeof(sctns
->hdr
);
167 sctns
->hdr
.infmoff
= sizeof(sctns
->hdr
);
168 sctns
->hdr
.infmlen
= sizeof(sctns
->mac
);
169 sctns
->hdr
.infplen
= sizeof(sctns
->par
);
170 sctns
->hdr
.infpoff
= sctns
->hdr
.infhdln
+ sctns
->hdr
.infmlen
;
171 sctns
->hdr
.infhtotl
= sctns
->hdr
.infpoff
+ sctns
->hdr
.infplen
;
174 static void fill_stsi_mac(struct sthyi_sctns
*sctns
,
175 struct sysinfo_1_1_1
*sysinfo
)
177 if (stsi(sysinfo
, 1, 1, 1))
180 sclp_ocf_cpc_name_copy(sctns
->mac
.infmname
);
182 memcpy(sctns
->mac
.infmtype
, sysinfo
->type
, sizeof(sctns
->mac
.infmtype
));
183 memcpy(sctns
->mac
.infmmanu
, sysinfo
->manufacturer
, sizeof(sctns
->mac
.infmmanu
));
184 memcpy(sctns
->mac
.infmpman
, sysinfo
->plant
, sizeof(sctns
->mac
.infmpman
));
185 memcpy(sctns
->mac
.infmseq
, sysinfo
->sequence
, sizeof(sctns
->mac
.infmseq
));
187 sctns
->mac
.infmval1
|= MAC_ID_VLD
| MAC_NAME_VLD
;
190 static void fill_stsi_par(struct sthyi_sctns
*sctns
,
191 struct sysinfo_2_2_2
*sysinfo
)
193 if (stsi(sysinfo
, 2, 2, 2))
196 sctns
->par
.infppnum
= sysinfo
->lpar_number
;
197 memcpy(sctns
->par
.infppnam
, sysinfo
->name
, sizeof(sctns
->par
.infppnam
));
199 sctns
->par
.infpval1
|= PAR_ID_VLD
;
202 static void fill_stsi(struct sthyi_sctns
*sctns
)
206 /* Errors are handled through the validity bits in the response. */
207 sysinfo
= (void *)__get_free_page(GFP_KERNEL
);
211 fill_stsi_mac(sctns
, sysinfo
);
212 fill_stsi_par(sctns
, sysinfo
);
214 free_pages((unsigned long)sysinfo
, 0);
217 static void fill_diag_mac(struct sthyi_sctns
*sctns
,
218 struct diag204_x_phys_block
*block
,
223 for (i
= 0; i
< block
->hdr
.cpus
; i
++) {
224 switch (cpu_id(block
->cpus
[i
].ctidx
, diag224_buf
)) {
226 if (block
->cpus
[i
].weight
== DED_WEIGHT
)
227 sctns
->mac
.infmdcps
++;
229 sctns
->mac
.infmscps
++;
232 if (block
->cpus
[i
].weight
== DED_WEIGHT
)
233 sctns
->mac
.infmdifl
++;
235 sctns
->mac
.infmsifl
++;
239 sctns
->mac
.infmval1
|= MAC_CNT_VLD
;
242 /* Returns a pointer to the the next partition block. */
243 static struct diag204_x_part_block
*lpar_cpu_inf(struct lpar_cpu_inf
*part_inf
,
246 struct diag204_x_part_block
*block
)
248 int i
, capped
= 0, weight_cp
= 0, weight_ifl
= 0;
249 struct cpu_inf
*cpu_inf
;
251 for (i
= 0; i
< block
->hdr
.rcpus
; i
++) {
252 if (!(block
->cpus
[i
].cflag
& DIAG204_CPU_ONLINE
))
255 switch (cpu_id(block
->cpus
[i
].ctidx
, diag224_buf
)) {
257 cpu_inf
= &part_inf
->cp
;
258 if (block
->cpus
[i
].cur_weight
< DED_WEIGHT
)
259 weight_cp
|= block
->cpus
[i
].cur_weight
;
262 cpu_inf
= &part_inf
->ifl
;
263 if (block
->cpus
[i
].cur_weight
< DED_WEIGHT
)
264 weight_ifl
|= block
->cpus
[i
].cur_weight
;
273 capped
|= block
->cpus
[i
].cflag
& DIAG204_CPU_CAPPED
;
274 cpu_inf
->lpar_cap
|= block
->cpus
[i
].cpu_type_cap
;
275 cpu_inf
->lpar_grp_cap
|= block
->cpus
[i
].group_cpu_type_cap
;
277 if (block
->cpus
[i
].weight
== DED_WEIGHT
)
278 cpu_inf
->cpu_num_ded
+= 1;
280 cpu_inf
->cpu_num_shd
+= 1;
283 if (this_lpar
&& capped
) {
284 part_inf
->cp
.lpar_weight
= weight_cp
;
285 part_inf
->ifl
.lpar_weight
= weight_ifl
;
287 part_inf
->cp
.all_weight
+= weight_cp
;
288 part_inf
->ifl
.all_weight
+= weight_ifl
;
289 return (struct diag204_x_part_block
*)&block
->cpus
[i
];
292 static void fill_diag(struct sthyi_sctns
*sctns
)
297 void *diag224_buf
= NULL
;
298 struct diag204_x_info_blk_hdr
*ti_hdr
;
299 struct diag204_x_part_block
*part_block
;
300 struct diag204_x_phys_block
*phys_block
;
301 struct lpar_cpu_inf lpar_inf
= {};
303 /* Errors are handled through the validity bits in the response. */
304 pages
= diag204((unsigned long)DIAG204_SUBC_RSI
|
305 (unsigned long)DIAG204_INFO_EXT
, 0, NULL
);
309 diag204_buf
= vmalloc(PAGE_SIZE
* pages
);
313 r
= diag204((unsigned long)DIAG204_SUBC_STIB7
|
314 (unsigned long)DIAG204_INFO_EXT
, pages
, diag204_buf
);
318 diag224_buf
= (void *)__get_free_page(GFP_KERNEL
| GFP_DMA
);
319 if (!diag224_buf
|| diag224(diag224_buf
))
322 ti_hdr
= diag204_buf
;
323 part_block
= diag204_buf
+ sizeof(*ti_hdr
);
325 for (i
= 0; i
< ti_hdr
->npar
; i
++) {
327 * For the calling lpar we also need to get the cpu
328 * caps and weights. The time information block header
329 * specifies the offset to the partition block of the
330 * caller lpar, so we know when we process its data.
332 this_lpar
= (void *)part_block
- diag204_buf
== ti_hdr
->this_part
;
333 part_block
= lpar_cpu_inf(&lpar_inf
, this_lpar
, diag224_buf
,
337 phys_block
= (struct diag204_x_phys_block
*)part_block
;
338 part_block
= diag204_buf
+ ti_hdr
->this_part
;
339 if (part_block
->hdr
.mtid
)
340 sctns
->par
.infpflg1
= PAR_MT_EN
;
342 sctns
->par
.infpval1
|= PAR_GRP_VLD
;
343 sctns
->par
.infplgcp
= scale_cap(lpar_inf
.cp
.lpar_grp_cap
);
344 sctns
->par
.infplgif
= scale_cap(lpar_inf
.ifl
.lpar_grp_cap
);
345 memcpy(sctns
->par
.infplgnm
, part_block
->hdr
.hardware_group_name
,
346 sizeof(sctns
->par
.infplgnm
));
348 sctns
->par
.infpscps
= lpar_inf
.cp
.cpu_num_shd
;
349 sctns
->par
.infpdcps
= lpar_inf
.cp
.cpu_num_ded
;
350 sctns
->par
.infpsifl
= lpar_inf
.ifl
.cpu_num_shd
;
351 sctns
->par
.infpdifl
= lpar_inf
.ifl
.cpu_num_ded
;
352 sctns
->par
.infpval1
|= PAR_PCNT_VLD
;
354 sctns
->par
.infpabcp
= scale_cap(lpar_inf
.cp
.lpar_cap
);
355 sctns
->par
.infpabif
= scale_cap(lpar_inf
.ifl
.lpar_cap
);
356 sctns
->par
.infpval1
|= PAR_ABS_VLD
;
359 * Everything below needs global performance data to be
362 if (!(ti_hdr
->flags
& DIAG204_LPAR_PHYS_FLG
)) {
363 sctns
->hdr
.infhflg1
|= HDR_PERF_UNAV
;
367 fill_diag_mac(sctns
, phys_block
, diag224_buf
);
369 if (lpar_inf
.cp
.lpar_weight
) {
370 sctns
->par
.infpwbcp
= sctns
->mac
.infmscps
* 0x10000 *
371 lpar_inf
.cp
.lpar_weight
/ lpar_inf
.cp
.all_weight
;
374 if (lpar_inf
.ifl
.lpar_weight
) {
375 sctns
->par
.infpwbif
= sctns
->mac
.infmsifl
* 0x10000 *
376 lpar_inf
.ifl
.lpar_weight
/ lpar_inf
.ifl
.all_weight
;
378 sctns
->par
.infpval1
|= PAR_WGHT_VLD
;
381 free_page((unsigned long)diag224_buf
);
385 static int sthyi(u64 vaddr
)
387 register u64 code
asm("0") = 0;
388 register u64 addr
asm("2") = vaddr
;
392 ".insn rre,0xB2560000,%[code],%[addr]\n"
396 : [code
] "d" (code
), [addr
] "a" (addr
)
401 int handle_sthyi(struct kvm_vcpu
*vcpu
)
403 int reg1
, reg2
, r
= 0;
404 u64 code
, addr
, cc
= 0;
405 struct sthyi_sctns
*sctns
= NULL
;
408 * STHYI requires extensive locking in the higher hypervisors
409 * and is very computational/memory expensive. Therefore we
410 * ratelimit the executions per VM.
412 if (!__ratelimit(&vcpu
->kvm
->arch
.sthyi_limit
)) {
413 kvm_s390_retry_instr(vcpu
);
417 kvm_s390_get_regs_rre(vcpu
, ®1
, ®2
);
418 code
= vcpu
->run
->s
.regs
.gprs
[reg1
];
419 addr
= vcpu
->run
->s
.regs
.gprs
[reg2
];
421 vcpu
->stat
.instruction_sthyi
++;
422 VCPU_EVENT(vcpu
, 3, "STHYI: fc: %llu addr: 0x%016llx", code
, addr
);
423 trace_kvm_s390_handle_sthyi(vcpu
, code
, addr
);
425 if (reg1
== reg2
|| reg1
& 1 || reg2
& 1 || addr
& ~PAGE_MASK
)
426 return kvm_s390_inject_program_int(vcpu
, PGM_SPECIFICATION
);
434 * If the page has not yet been faulted in, we want to do that
435 * now and not after all the expensive calculations.
437 r
= write_guest(vcpu
, addr
, reg2
, &cc
, 1);
439 return kvm_s390_inject_prog_cond(vcpu
, r
);
441 sctns
= (void *)get_zeroed_page(GFP_KERNEL
);
446 * If we are a guest, we don't want to emulate an emulated
447 * instruction. We ask the hypervisor to provide the data.
449 if (test_facility(74)) {
450 cc
= sthyi((u64
)sctns
);
460 r
= write_guest(vcpu
, addr
, reg2
, sctns
, PAGE_SIZE
);
462 free_page((unsigned long)sctns
);
463 return kvm_s390_inject_prog_cond(vcpu
, r
);
467 free_page((unsigned long)sctns
);
468 vcpu
->run
->s
.regs
.gprs
[reg2
+ 1] = cc
? 4 : 0;
469 kvm_s390_set_psw_cc(vcpu
, cc
);