1 // SPDX-License-Identifier: GPL-2.0
3 * channel program interfaces
5 * Copyright IBM Corp. 2017
7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
11 #include <linux/ratelimit.h>
13 #include <linux/slab.h>
14 #include <linux/iommu.h>
15 #include <linux/vfio.h>
16 #include <asm/idals.h>
18 #include "vfio_ccw_cp.h"
21 /* Starting guest physical I/O address. */
22 unsigned long pa_iova
;
23 /* Array that stores PFNs of the pages need to pin. */
24 unsigned long *pa_iova_pfn
;
25 /* Array that receives PFNs of the pages pinned. */
26 unsigned long *pa_pfn
;
27 /* Number of pages pinned from @pa_iova. */
32 struct list_head next
;
34 /* Guest physical address of the current chain. */
36 /* Count of the valid ccws in chain. */
38 /* Pinned PAGEs for the original data. */
39 struct pfn_array
*ch_pa
;
43 * pfn_array_alloc() - alloc memory for PFNs
44 * @pa: pfn_array on which to perform the operation
45 * @iova: target guest physical address
46 * @len: number of bytes that should be pinned from @iova
48 * Attempt to allocate memory for PFNs.
51 * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in
52 * this structure will be filled in by this function.
55 * 0 if PFNs are allocated
56 * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova_pfn is not NULL
57 * -ENOMEM if alloc failed
59 static int pfn_array_alloc(struct pfn_array
*pa
, u64 iova
, unsigned int len
)
63 if (pa
->pa_nr
|| pa
->pa_iova_pfn
)
68 pa
->pa_nr
= ((iova
& ~PAGE_MASK
) + len
+ (PAGE_SIZE
- 1)) >> PAGE_SHIFT
;
72 pa
->pa_iova_pfn
= kcalloc(pa
->pa_nr
,
73 sizeof(*pa
->pa_iova_pfn
) +
76 if (unlikely(!pa
->pa_iova_pfn
)) {
80 pa
->pa_pfn
= pa
->pa_iova_pfn
+ pa
->pa_nr
;
82 pa
->pa_iova_pfn
[0] = pa
->pa_iova
>> PAGE_SHIFT
;
83 pa
->pa_pfn
[0] = -1ULL;
84 for (i
= 1; i
< pa
->pa_nr
; i
++) {
85 pa
->pa_iova_pfn
[i
] = pa
->pa_iova_pfn
[i
- 1] + 1;
86 pa
->pa_pfn
[i
] = -1ULL;
93 * pfn_array_pin() - Pin user pages in memory
94 * @pa: pfn_array on which to perform the operation
95 * @mdev: the mediated device to perform pin operations
97 * Returns number of pages pinned upon success.
98 * If the pin request partially succeeds, or fails completely,
99 * all pages are left unpinned and a negative error value is returned.
101 static int pfn_array_pin(struct pfn_array
*pa
, struct device
*mdev
)
105 ret
= vfio_pin_pages(mdev
, pa
->pa_iova_pfn
, pa
->pa_nr
,
106 IOMMU_READ
| IOMMU_WRITE
, pa
->pa_pfn
);
110 } else if (ret
> 0 && ret
!= pa
->pa_nr
) {
111 vfio_unpin_pages(mdev
, pa
->pa_iova_pfn
, ret
);
124 /* Unpin the pages before releasing the memory. */
125 static void pfn_array_unpin_free(struct pfn_array
*pa
, struct device
*mdev
)
127 /* Only unpin if any pages were pinned to begin with */
129 vfio_unpin_pages(mdev
, pa
->pa_iova_pfn
, pa
->pa_nr
);
131 kfree(pa
->pa_iova_pfn
);
134 static bool pfn_array_iova_pinned(struct pfn_array
*pa
, unsigned long iova
)
136 unsigned long iova_pfn
= iova
>> PAGE_SHIFT
;
139 for (i
= 0; i
< pa
->pa_nr
; i
++)
140 if (pa
->pa_iova_pfn
[i
] == iova_pfn
)
145 /* Create the list of IDAL words for a pfn_array. */
146 static inline void pfn_array_idal_create_words(
147 struct pfn_array
*pa
,
148 unsigned long *idaws
)
153 * Idal words (execept the first one) rely on the memory being 4k
154 * aligned. If a user virtual address is 4K aligned, then it's
155 * corresponding kernel physical address will also be 4K aligned. Thus
156 * there will be no problem here to simply use the phys to create an
160 for (i
= 0; i
< pa
->pa_nr
; i
++)
161 idaws
[i
] = pa
->pa_pfn
[i
] << PAGE_SHIFT
;
163 /* Adjust the first IDAW, since it may not start on a page boundary */
164 idaws
[0] += pa
->pa_iova
& (PAGE_SIZE
- 1);
167 static void convert_ccw0_to_ccw1(struct ccw1
*source
, unsigned long len
)
170 struct ccw1
*pccw1
= source
;
173 for (i
= 0; i
< len
; i
++) {
174 ccw0
= *(struct ccw0
*)pccw1
;
175 if ((pccw1
->cmd_code
& 0x0f) == CCW_CMD_TIC
) {
176 pccw1
->cmd_code
= CCW_CMD_TIC
;
180 pccw1
->cmd_code
= ccw0
.cmd_code
;
181 pccw1
->flags
= ccw0
.flags
;
182 pccw1
->count
= ccw0
.count
;
184 pccw1
->cda
= ccw0
.cda
;
190 * Within the domain (@mdev), copy @n bytes from a guest physical
191 * address (@iova) to a host physical address (@to).
193 static long copy_from_iova(struct device
*mdev
,
197 struct pfn_array pa
= {0};
202 ret
= pfn_array_alloc(&pa
, iova
, n
);
206 ret
= pfn_array_pin(&pa
, mdev
);
208 pfn_array_unpin_free(&pa
, mdev
);
213 for (i
= 0; i
< pa
.pa_nr
; i
++) {
214 from
= pa
.pa_pfn
[i
] << PAGE_SHIFT
;
217 from
+= iova
& (PAGE_SIZE
- 1);
218 m
-= iova
& (PAGE_SIZE
- 1);
222 memcpy(to
+ (n
- l
), (void *)from
, m
);
229 pfn_array_unpin_free(&pa
, mdev
);
235 * Helpers to operate ccwchain.
237 #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
238 #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
239 #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
241 #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
243 #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
245 #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
246 #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
248 #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
251 * ccw_does_data_transfer()
253 * Determine whether a CCW will move any data, such that the guest pages
254 * would need to be pinned before performing the I/O.
256 * Returns 1 if yes, 0 if no.
258 static inline int ccw_does_data_transfer(struct ccw1
*ccw
)
260 /* If the count field is zero, then no data will be transferred */
264 /* If the command is a NOP, then no data will be transferred */
265 if (ccw_is_noop(ccw
))
268 /* If the skip flag is off, then data will be transferred */
269 if (!ccw_is_skip(ccw
))
273 * If the skip flag is on, it is only meaningful if the command
274 * code is a read, read backward, sense, or sense ID. In those
275 * cases, no data will be transferred.
277 if (ccw_is_read(ccw
) || ccw_is_read_backward(ccw
))
280 if (ccw_is_sense(ccw
))
283 /* The skip flag is on, but it is ignored for this command code. */
288 * is_cpa_within_range()
290 * @cpa: channel program address being questioned
291 * @head: address of the beginning of a CCW chain
292 * @len: number of CCWs within the chain
294 * Determine whether the address of a CCW (whether a new chain,
295 * or the target of a TIC) falls within a range (including the end points).
297 * Returns 1 if yes, 0 if no.
299 static inline int is_cpa_within_range(u32 cpa
, u32 head
, int len
)
301 u32 tail
= head
+ (len
- 1) * sizeof(struct ccw1
);
303 return (head
<= cpa
&& cpa
<= tail
);
306 static inline int is_tic_within_range(struct ccw1
*ccw
, u32 head
, int len
)
308 if (!ccw_is_tic(ccw
))
311 return is_cpa_within_range(ccw
->cda
, head
, len
);
314 static struct ccwchain
*ccwchain_alloc(struct channel_program
*cp
, int len
)
316 struct ccwchain
*chain
;
320 /* Make ccw address aligned to 8. */
321 size
= ((sizeof(*chain
) + 7L) & -8L) +
322 sizeof(*chain
->ch_ccw
) * len
+
323 sizeof(*chain
->ch_pa
) * len
;
324 chain
= kzalloc(size
, GFP_DMA
| GFP_KERNEL
);
328 data
= (u8
*)chain
+ ((sizeof(*chain
) + 7L) & -8L);
329 chain
->ch_ccw
= (struct ccw1
*)data
;
331 data
= (u8
*)(chain
->ch_ccw
) + sizeof(*chain
->ch_ccw
) * len
;
332 chain
->ch_pa
= (struct pfn_array
*)data
;
336 list_add_tail(&chain
->next
, &cp
->ccwchain_list
);
341 static void ccwchain_free(struct ccwchain
*chain
)
343 list_del(&chain
->next
);
347 /* Free resource for a ccw that allocated memory for its cda. */
348 static void ccwchain_cda_free(struct ccwchain
*chain
, int idx
)
350 struct ccw1
*ccw
= chain
->ch_ccw
+ idx
;
355 kfree((void *)(u64
)ccw
->cda
);
359 * ccwchain_calc_length - calculate the length of the ccw chain.
360 * @iova: guest physical address of the target ccw chain
361 * @cp: channel_program on which to perform the operation
363 * This is the chain length not considering any TICs.
364 * You need to do a new round for each TIC target.
366 * The program is also validated for absence of not yet supported
367 * indirect data addressing scenarios.
369 * Returns: the length of the ccw chain or -errno.
371 static int ccwchain_calc_length(u64 iova
, struct channel_program
*cp
)
373 struct ccw1
*ccw
= cp
->guest_cp
;
380 * As we don't want to fail direct addressing even if the
381 * orb specified one of the unsupported formats, we defer
382 * checking for IDAWs in unsupported formats to here.
384 if ((!cp
->orb
.cmd
.c64
|| cp
->orb
.cmd
.i2k
) && ccw_is_idal(ccw
))
388 * We want to keep counting if the current CCW has the
389 * command-chaining flag enabled, or if it is a TIC CCW
390 * that loops back into the current chain. The latter
391 * is used for device orientation, where the CCW PRIOR to
392 * the TIC can either jump to the TIC or a CCW immediately
393 * after the TIC, depending on the results of its operation.
395 if (!ccw_is_chain(ccw
) && !is_tic_within_range(ccw
, iova
, cnt
))
399 } while (cnt
< CCWCHAIN_LEN_MAX
+ 1);
401 if (cnt
== CCWCHAIN_LEN_MAX
+ 1)
407 static int tic_target_chain_exists(struct ccw1
*tic
, struct channel_program
*cp
)
409 struct ccwchain
*chain
;
412 list_for_each_entry(chain
, &cp
->ccwchain_list
, next
) {
413 ccw_head
= chain
->ch_iova
;
414 if (is_cpa_within_range(tic
->cda
, ccw_head
, chain
->ch_len
))
421 static int ccwchain_loop_tic(struct ccwchain
*chain
,
422 struct channel_program
*cp
);
424 static int ccwchain_handle_ccw(u32 cda
, struct channel_program
*cp
)
426 struct ccwchain
*chain
;
429 /* Copy 2K (the most we support today) of possible CCWs */
430 len
= copy_from_iova(cp
->mdev
, cp
->guest_cp
, cda
,
431 CCWCHAIN_LEN_MAX
* sizeof(struct ccw1
));
435 /* Convert any Format-0 CCWs to Format-1 */
436 if (!cp
->orb
.cmd
.fmt
)
437 convert_ccw0_to_ccw1(cp
->guest_cp
, CCWCHAIN_LEN_MAX
);
439 /* Count the CCWs in the current chain */
440 len
= ccwchain_calc_length(cda
, cp
);
444 /* Need alloc a new chain for this one. */
445 chain
= ccwchain_alloc(cp
, len
);
448 chain
->ch_iova
= cda
;
450 /* Copy the actual CCWs into the new chain */
451 memcpy(chain
->ch_ccw
, cp
->guest_cp
, len
* sizeof(struct ccw1
));
453 /* Loop for tics on this new chain. */
454 ret
= ccwchain_loop_tic(chain
, cp
);
457 ccwchain_free(chain
);
463 static int ccwchain_loop_tic(struct ccwchain
*chain
, struct channel_program
*cp
)
468 for (i
= 0; i
< chain
->ch_len
; i
++) {
469 tic
= chain
->ch_ccw
+ i
;
471 if (!ccw_is_tic(tic
))
474 /* May transfer to an existing chain. */
475 if (tic_target_chain_exists(tic
, cp
))
478 /* Build a ccwchain for the next segment */
479 ret
= ccwchain_handle_ccw(tic
->cda
, cp
);
487 static int ccwchain_fetch_tic(struct ccwchain
*chain
,
489 struct channel_program
*cp
)
491 struct ccw1
*ccw
= chain
->ch_ccw
+ idx
;
492 struct ccwchain
*iter
;
495 list_for_each_entry(iter
, &cp
->ccwchain_list
, next
) {
496 ccw_head
= iter
->ch_iova
;
497 if (is_cpa_within_range(ccw
->cda
, ccw_head
, iter
->ch_len
)) {
498 ccw
->cda
= (__u32
) (addr_t
) (((char *)iter
->ch_ccw
) +
499 (ccw
->cda
- ccw_head
));
507 static int ccwchain_fetch_direct(struct ccwchain
*chain
,
509 struct channel_program
*cp
)
512 struct pfn_array
*pa
;
514 unsigned long *idaws
;
517 int idaw_nr
, idal_len
;
520 ccw
= chain
->ch_ccw
+ idx
;
525 /* Calculate size of IDAL */
526 if (ccw_is_idal(ccw
)) {
527 /* Read first IDAW to see if it's 4K-aligned or not. */
528 /* All subsequent IDAws will be 4K-aligned. */
529 ret
= copy_from_iova(cp
->mdev
, &iova
, ccw
->cda
, sizeof(iova
));
535 idaw_nr
= idal_nr_words((void *)iova
, bytes
);
536 idal_len
= idaw_nr
* sizeof(*idaws
);
538 /* Allocate an IDAL from host storage */
539 idaws
= kcalloc(idaw_nr
, sizeof(*idaws
), GFP_DMA
| GFP_KERNEL
);
546 * Allocate an array of pfn's for pages to pin/translate.
547 * The number of pages is actually the count of the idaws
548 * required for the data transfer, since we only only support
551 pa
= chain
->ch_pa
+ idx
;
552 ret
= pfn_array_alloc(pa
, iova
, bytes
);
556 if (ccw_is_idal(ccw
)) {
557 /* Copy guest IDAL into host IDAL */
558 ret
= copy_from_iova(cp
->mdev
, idaws
, ccw
->cda
, idal_len
);
563 * Copy guest IDAWs into pfn_array, in case the memory they
564 * occupy is not contiguous.
566 for (i
= 0; i
< idaw_nr
; i
++)
567 pa
->pa_iova_pfn
[i
] = idaws
[i
] >> PAGE_SHIFT
;
570 * No action is required here; the iova addresses in pfn_array
571 * were initialized sequentially in pfn_array_alloc() beginning
572 * with the contents of ccw->cda.
576 if (ccw_does_data_transfer(ccw
)) {
577 ret
= pfn_array_pin(pa
, cp
->mdev
);
584 ccw
->cda
= (__u32
) virt_to_phys(idaws
);
585 ccw
->flags
|= CCW_FLAG_IDA
;
587 /* Populate the IDAL with pinned/translated addresses from pfn */
588 pfn_array_idal_create_words(pa
, idaws
);
593 pfn_array_unpin_free(pa
, cp
->mdev
);
603 * To reduce memory copy, we'll pin the cda page in memory,
604 * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
605 * direct ccws to idal ccws.
607 static int ccwchain_fetch_one(struct ccwchain
*chain
,
609 struct channel_program
*cp
)
611 struct ccw1
*ccw
= chain
->ch_ccw
+ idx
;
614 return ccwchain_fetch_tic(chain
, idx
, cp
);
616 return ccwchain_fetch_direct(chain
, idx
, cp
);
620 * cp_init() - allocate ccwchains for a channel program.
621 * @cp: channel_program on which to perform the operation
622 * @mdev: the mediated device to perform pin/unpin operations
623 * @orb: control block for the channel program from the guest
625 * This creates one or more ccwchain(s), and copies the raw data of
626 * the target channel program from @orb->cmd.iova to the new ccwchain(s).
629 * 1. Supports idal(c64) ccw chaining.
630 * 2. Supports 4k idaw.
633 * %0 on success and a negative error value on failure.
635 int cp_init(struct channel_program
*cp
, struct device
*mdev
, union orb
*orb
)
637 /* custom ratelimit used to avoid flood during guest IPL */
638 static DEFINE_RATELIMIT_STATE(ratelimit_state
, 5 * HZ
, 1);
642 * We only support prefetching the channel program. We assume all channel
643 * programs executed by supported guests likewise support prefetching.
644 * Executing a channel program that does not specify prefetching will
645 * typically not cause an error, but a warning is issued to help identify
646 * the problem if something does break.
648 if (!orb
->cmd
.pfch
&& __ratelimit(&ratelimit_state
))
649 dev_warn(mdev
, "Prefetching channel program even though prefetch not specified in ORB");
651 INIT_LIST_HEAD(&cp
->ccwchain_list
);
652 memcpy(&cp
->orb
, orb
, sizeof(*orb
));
655 /* Build a ccwchain for the first CCW segment */
656 ret
= ccwchain_handle_ccw(orb
->cmd
.cpa
, cp
);
659 cp
->initialized
= true;
661 /* It is safe to force: if it was not set but idals used
662 * ccwchain_calc_length would have returned an error.
672 * cp_free() - free resources for channel program.
673 * @cp: channel_program on which to perform the operation
675 * This unpins the memory pages and frees the memory space occupied by
676 * @cp, which must have been returned by a previous call to cp_init().
677 * Otherwise, undefined behavior occurs.
679 void cp_free(struct channel_program
*cp
)
681 struct ccwchain
*chain
, *temp
;
684 if (!cp
->initialized
)
687 cp
->initialized
= false;
688 list_for_each_entry_safe(chain
, temp
, &cp
->ccwchain_list
, next
) {
689 for (i
= 0; i
< chain
->ch_len
; i
++) {
690 pfn_array_unpin_free(chain
->ch_pa
+ i
, cp
->mdev
);
691 ccwchain_cda_free(chain
, i
);
693 ccwchain_free(chain
);
698 * cp_prefetch() - translate a guest physical address channel program to
699 * a real-device runnable channel program.
700 * @cp: channel_program on which to perform the operation
702 * This function translates the guest-physical-address channel program
703 * and stores the result to ccwchain list. @cp must have been
704 * initialized by a previous call with cp_init(). Otherwise, undefined
706 * For each chain composing the channel program:
707 * - On entry ch_len holds the count of CCWs to be translated.
708 * - On exit ch_len is adjusted to the count of successfully translated CCWs.
709 * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
711 * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
712 * as helpers to do ccw chain translation inside the kernel. Basically
713 * they accept a channel program issued by a virtual machine, and
714 * translate the channel program to a real-device runnable channel
717 * These APIs will copy the ccws into kernel-space buffers, and update
718 * the guest phsical addresses with their corresponding host physical
719 * addresses. Then channel I/O device drivers could issue the
720 * translated channel program to real devices to perform an I/O
723 * These interfaces are designed to support translation only for
724 * channel programs, which are generated and formatted by a
725 * guest. Thus this will make it possible for things like VFIO to
726 * leverage the interfaces to passthrough a channel I/O mediated
729 * We support direct ccw chaining by translating them to idal ccws.
732 * %0 on success and a negative error value on failure.
734 int cp_prefetch(struct channel_program
*cp
)
736 struct ccwchain
*chain
;
739 /* this is an error in the caller */
740 if (!cp
->initialized
)
743 list_for_each_entry(chain
, &cp
->ccwchain_list
, next
) {
745 for (idx
= 0; idx
< len
; idx
++) {
746 ret
= ccwchain_fetch_one(chain
, idx
, cp
);
754 /* Only cleanup the chain elements that were actually translated. */
756 list_for_each_entry_continue(chain
, &cp
->ccwchain_list
, next
) {
763 * cp_get_orb() - get the orb of the channel program
764 * @cp: channel_program on which to perform the operation
765 * @intparm: new intparm for the returned orb
766 * @lpm: candidate value of the logical-path mask for the returned orb
768 * This function returns the address of the updated orb of the channel
769 * program. Channel I/O device drivers could use this orb to issue a
772 union orb
*cp_get_orb(struct channel_program
*cp
, u32 intparm
, u8 lpm
)
775 struct ccwchain
*chain
;
778 /* this is an error in the caller */
779 if (!cp
->initialized
)
784 orb
->cmd
.intparm
= intparm
;
786 orb
->cmd
.key
= PAGE_DEFAULT_KEY
>> 4;
788 if (orb
->cmd
.lpm
== 0)
791 chain
= list_first_entry(&cp
->ccwchain_list
, struct ccwchain
, next
);
793 orb
->cmd
.cpa
= (__u32
) __pa(cpa
);
799 * cp_update_scsw() - update scsw for a channel program.
800 * @cp: channel_program on which to perform the operation
801 * @scsw: I/O results of the channel program and also the target to be
804 * @scsw contains the I/O results of the channel program that pointed
805 * to by @cp. However what @scsw->cpa stores is a host physical
806 * address, which is meaningless for the guest, which is waiting for
809 * This function updates @scsw->cpa to its coressponding guest physical
812 void cp_update_scsw(struct channel_program
*cp
, union scsw
*scsw
)
814 struct ccwchain
*chain
;
815 u32 cpa
= scsw
->cmd
.cpa
;
818 if (!cp
->initialized
)
823 * For now, only update the cmd.cpa part. We may need to deal with
824 * other portions of the schib as well, even if we don't return them
825 * in the ioctl directly. Path status changes etc.
827 list_for_each_entry(chain
, &cp
->ccwchain_list
, next
) {
828 ccw_head
= (u32
)(u64
)chain
->ch_ccw
;
830 * On successful execution, cpa points just beyond the end
833 if (is_cpa_within_range(cpa
, ccw_head
, chain
->ch_len
+ 1)) {
835 * (cpa - ccw_head) is the offset value of the host
836 * physical ccw to its chain head.
837 * Adding this value to the guest physical ccw chain
838 * head gets us the guest cpa.
840 cpa
= chain
->ch_iova
+ (cpa
- ccw_head
);
849 * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
850 * @cp: channel_program on which to perform the operation
851 * @iova: the iova to check
853 * If the @iova is currently pinned for the ccw chain, return true;
856 bool cp_iova_pinned(struct channel_program
*cp
, u64 iova
)
858 struct ccwchain
*chain
;
861 if (!cp
->initialized
)
864 list_for_each_entry(chain
, &cp
->ccwchain_list
, next
) {
865 for (i
= 0; i
< chain
->ch_len
; i
++)
866 if (pfn_array_iova_pinned(chain
->ch_pa
+ i
, iova
))