1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /* I/O iterator iteration building functions.
4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #ifndef _LINUX_IOV_ITER_H
9 #define _LINUX_IOV_ITER_H
11 #include <linux/uio.h>
12 #include <linux/bvec.h>
13 #include <linux/folio_queue.h>
15 typedef size_t (*iov_step_f
)(void *iter_base
, size_t progress
, size_t len
,
16 void *priv
, void *priv2
);
17 typedef size_t (*iov_ustep_f
)(void __user
*iter_base
, size_t progress
, size_t len
,
18 void *priv
, void *priv2
);
23 static __always_inline
24 size_t iterate_ubuf(struct iov_iter
*iter
, size_t len
, void *priv
, void *priv2
,
27 void __user
*base
= iter
->ubuf
;
28 size_t progress
= 0, remain
;
30 remain
= step(base
+ iter
->iov_offset
, 0, len
, priv
, priv2
);
31 progress
= len
- remain
;
32 iter
->iov_offset
+= progress
;
33 iter
->count
-= progress
;
40 static __always_inline
41 size_t iterate_iovec(struct iov_iter
*iter
, size_t len
, void *priv
, void *priv2
,
44 const struct iovec
*p
= iter
->__iov
;
45 size_t progress
= 0, skip
= iter
->iov_offset
;
48 size_t remain
, consumed
;
49 size_t part
= min(len
, p
->iov_len
- skip
);
52 remain
= step(p
->iov_base
+ skip
, progress
, part
, priv
, priv2
);
53 consumed
= part
- remain
;
57 if (skip
< p
->iov_len
)
64 iter
->nr_segs
-= p
- iter
->__iov
;
66 iter
->iov_offset
= skip
;
67 iter
->count
-= progress
;
74 static __always_inline
75 size_t iterate_kvec(struct iov_iter
*iter
, size_t len
, void *priv
, void *priv2
,
78 const struct kvec
*p
= iter
->kvec
;
79 size_t progress
= 0, skip
= iter
->iov_offset
;
82 size_t remain
, consumed
;
83 size_t part
= min(len
, p
->iov_len
- skip
);
86 remain
= step(p
->iov_base
+ skip
, progress
, part
, priv
, priv2
);
87 consumed
= part
- remain
;
91 if (skip
< p
->iov_len
)
98 iter
->nr_segs
-= p
- iter
->kvec
;
100 iter
->iov_offset
= skip
;
101 iter
->count
-= progress
;
108 static __always_inline
109 size_t iterate_bvec(struct iov_iter
*iter
, size_t len
, void *priv
, void *priv2
,
112 const struct bio_vec
*p
= iter
->bvec
;
113 size_t progress
= 0, skip
= iter
->iov_offset
;
116 size_t remain
, consumed
;
117 size_t offset
= p
->bv_offset
+ skip
, part
;
118 void *kaddr
= kmap_local_page(p
->bv_page
+ offset
/ PAGE_SIZE
);
121 (size_t)(p
->bv_len
- skip
),
122 (size_t)(PAGE_SIZE
- offset
% PAGE_SIZE
));
123 remain
= step(kaddr
+ offset
% PAGE_SIZE
, progress
, part
, priv
, priv2
);
125 consumed
= part
- remain
;
127 progress
+= consumed
;
129 if (skip
>= p
->bv_len
) {
137 iter
->nr_segs
-= p
- iter
->bvec
;
139 iter
->iov_offset
= skip
;
140 iter
->count
-= progress
;
145 * Handle ITER_FOLIOQ.
147 static __always_inline
148 size_t iterate_folioq(struct iov_iter
*iter
, size_t len
, void *priv
, void *priv2
,
151 const struct folio_queue
*folioq
= iter
->folioq
;
152 unsigned int slot
= iter
->folioq_slot
;
153 size_t progress
= 0, skip
= iter
->iov_offset
;
155 if (slot
== folioq_nr_slots(folioq
)) {
156 /* The iterator may have been extended. */
157 folioq
= folioq
->next
;
162 struct folio
*folio
= folioq_folio(folioq
, slot
);
163 size_t part
, remain
, consumed
;
170 fsize
= folioq_folio_size(folioq
, slot
);
171 base
= kmap_local_folio(folio
, skip
);
172 part
= umin(len
, PAGE_SIZE
- skip
% PAGE_SIZE
);
173 remain
= step(base
, progress
, part
, priv
, priv2
);
175 consumed
= part
- remain
;
177 progress
+= consumed
;
182 if (slot
== folioq_nr_slots(folioq
) && folioq
->next
) {
183 folioq
= folioq
->next
;
191 iter
->folioq_slot
= slot
;
192 iter
->folioq
= folioq
;
193 iter
->iov_offset
= skip
;
194 iter
->count
-= progress
;
199 * Handle ITER_XARRAY.
201 static __always_inline
202 size_t iterate_xarray(struct iov_iter
*iter
, size_t len
, void *priv
, void *priv2
,
207 loff_t start
= iter
->xarray_start
+ iter
->iov_offset
;
208 pgoff_t index
= start
/ PAGE_SIZE
;
209 XA_STATE(xas
, iter
->xarray
, index
);
212 xas_for_each(&xas
, folio
, ULONG_MAX
) {
213 size_t remain
, consumed
, offset
, part
, flen
;
215 if (xas_retry(&xas
, folio
))
217 if (WARN_ON(xa_is_value(folio
)))
219 if (WARN_ON(folio_test_hugetlb(folio
)))
222 offset
= offset_in_folio(folio
, start
+ progress
);
223 flen
= min(folio_size(folio
) - offset
, len
);
226 void *base
= kmap_local_folio(folio
, offset
);
228 part
= min_t(size_t, flen
,
229 PAGE_SIZE
- offset_in_page(offset
));
230 remain
= step(base
, progress
, part
, priv
, priv2
);
233 consumed
= part
- remain
;
234 progress
+= consumed
;
237 if (remain
|| len
== 0)
246 iter
->iov_offset
+= progress
;
247 iter
->count
-= progress
;
252 * Handle ITER_DISCARD.
254 static __always_inline
255 size_t iterate_discard(struct iov_iter
*iter
, size_t len
, void *priv
, void *priv2
,
258 size_t progress
= len
;
260 iter
->count
-= progress
;
265 * iterate_and_advance2 - Iterate over an iterator
266 * @iter: The iterator to iterate over.
267 * @len: The amount to iterate over.
268 * @priv: Data for the step functions.
269 * @priv2: More data for the step functions.
270 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
271 * @step: Function for other iterators; given kernel addresses.
273 * Iterate over the next part of an iterator, up to the specified length. The
274 * buffer is presented in segments, which for kernel iteration are broken up by
275 * physical pages and mapped, with the mapped address being presented.
277 * Two step functions, @step and @ustep, must be provided, one for handling
278 * mapped kernel addresses and the other is given user addresses which have the
279 * potential to fault since no pinning is performed.
281 * The step functions are passed the address and length of the segment, @priv,
282 * @priv2 and the amount of data so far iterated over (which can, for example,
283 * be added to @priv to point to the right part of a second buffer). The step
284 * functions should return the amount of the segment they didn't process (ie. 0
285 * indicates complete processsing).
287 * This function returns the amount of data processed (ie. 0 means nothing was
288 * processed and the value of @len means processes to completion).
290 static __always_inline
291 size_t iterate_and_advance2(struct iov_iter
*iter
, size_t len
, void *priv
,
292 void *priv2
, iov_ustep_f ustep
, iov_step_f step
)
294 if (unlikely(iter
->count
< len
))
299 if (likely(iter_is_ubuf(iter
)))
300 return iterate_ubuf(iter
, len
, priv
, priv2
, ustep
);
301 if (likely(iter_is_iovec(iter
)))
302 return iterate_iovec(iter
, len
, priv
, priv2
, ustep
);
303 if (iov_iter_is_bvec(iter
))
304 return iterate_bvec(iter
, len
, priv
, priv2
, step
);
305 if (iov_iter_is_kvec(iter
))
306 return iterate_kvec(iter
, len
, priv
, priv2
, step
);
307 if (iov_iter_is_folioq(iter
))
308 return iterate_folioq(iter
, len
, priv
, priv2
, step
);
309 if (iov_iter_is_xarray(iter
))
310 return iterate_xarray(iter
, len
, priv
, priv2
, step
);
311 return iterate_discard(iter
, len
, priv
, priv2
, step
);
315 * iterate_and_advance - Iterate over an iterator
316 * @iter: The iterator to iterate over.
317 * @len: The amount to iterate over.
318 * @priv: Data for the step functions.
319 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
320 * @step: Function for other iterators; given kernel addresses.
322 * As iterate_and_advance2(), but priv2 is always NULL.
324 static __always_inline
325 size_t iterate_and_advance(struct iov_iter
*iter
, size_t len
, void *priv
,
326 iov_ustep_f ustep
, iov_step_f step
)
328 return iterate_and_advance2(iter
, len
, priv
, NULL
, ustep
, step
);
332 * iterate_and_advance_kernel - Iterate over a kernel-internal iterator
333 * @iter: The iterator to iterate over.
334 * @len: The amount to iterate over.
335 * @priv: Data for the step functions.
336 * @priv2: More data for the step functions.
337 * @step: Function for other iterators; given kernel addresses.
339 * Iterate over the next part of an iterator, up to the specified length. The
340 * buffer is presented in segments, which for kernel iteration are broken up by
341 * physical pages and mapped, with the mapped address being presented.
343 * [!] Note This will only handle BVEC, KVEC, FOLIOQ, XARRAY and DISCARD-type
344 * iterators; it will not handle UBUF or IOVEC-type iterators.
346 * A step functions, @step, must be provided, one for handling mapped kernel
347 * addresses and the other is given user addresses which have the potential to
348 * fault since no pinning is performed.
350 * The step functions are passed the address and length of the segment, @priv,
351 * @priv2 and the amount of data so far iterated over (which can, for example,
352 * be added to @priv to point to the right part of a second buffer). The step
353 * functions should return the amount of the segment they didn't process (ie. 0
354 * indicates complete processsing).
356 * This function returns the amount of data processed (ie. 0 means nothing was
357 * processed and the value of @len means processes to completion).
359 static __always_inline
360 size_t iterate_and_advance_kernel(struct iov_iter
*iter
, size_t len
, void *priv
,
361 void *priv2
, iov_step_f step
)
363 if (unlikely(iter
->count
< len
))
367 if (iov_iter_is_bvec(iter
))
368 return iterate_bvec(iter
, len
, priv
, priv2
, step
);
369 if (iov_iter_is_kvec(iter
))
370 return iterate_kvec(iter
, len
, priv
, priv2
, step
);
371 if (iov_iter_is_folioq(iter
))
372 return iterate_folioq(iter
, len
, priv
, priv2
, step
);
373 if (iov_iter_is_xarray(iter
))
374 return iterate_xarray(iter
, len
, priv
, priv2
, step
);
375 return iterate_discard(iter
, len
, priv
, priv2
, step
);
378 #endif /* _LINUX_IOV_ITER_H */