1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
5 * Scatterlist handling helpers.
7 #include <linux/export.h>
8 #include <linux/slab.h>
9 #include <linux/scatterlist.h>
10 #include <linux/highmem.h>
11 #include <linux/kmemleak.h>
12 #include <linux/bvec.h>
13 #include <linux/uio.h>
14 #include <linux/folio_queue.h>
17 * sg_next - return the next scatterlist entry in a list
18 * @sg: The current sg entry
21 * Usually the next entry will be @sg@ + 1, but if this sg element is part
22 * of a chained scatterlist, it could jump to the start of a new
26 struct scatterlist
*sg_next(struct scatterlist
*sg
)
32 if (unlikely(sg_is_chain(sg
)))
33 sg
= sg_chain_ptr(sg
);
37 EXPORT_SYMBOL(sg_next
);
40 * sg_nents - return total count of entries in scatterlist
41 * @sg: The scatterlist
44 * Allows to know how many entries are in sg, taking into account
48 int sg_nents(struct scatterlist
*sg
)
51 for (nents
= 0; sg
; sg
= sg_next(sg
))
55 EXPORT_SYMBOL(sg_nents
);
58 * sg_nents_for_len - return total count of entries in scatterlist
59 * needed to satisfy the supplied length
60 * @sg: The scatterlist
61 * @len: The total required length
64 * Determines the number of entries in sg that are required to meet
65 * the supplied length, taking into account chaining as well
68 * the number of sg entries needed, negative error on failure
71 int sg_nents_for_len(struct scatterlist
*sg
, u64 len
)
79 for (nents
= 0, total
= 0; sg
; sg
= sg_next(sg
)) {
88 EXPORT_SYMBOL(sg_nents_for_len
);
91 * sg_last - return the last scatterlist entry in a list
92 * @sgl: First entry in the scatterlist
93 * @nents: Number of entries in the scatterlist
96 * Should only be used casually, it (currently) scans the entire list
97 * to get the last entry.
99 * Note that the @sgl@ pointer passed in need not be the first one,
100 * the important bit is that @nents@ denotes the number of entries that
104 struct scatterlist
*sg_last(struct scatterlist
*sgl
, unsigned int nents
)
106 struct scatterlist
*sg
, *ret
= NULL
;
109 for_each_sg(sgl
, sg
, nents
, i
)
112 BUG_ON(!sg_is_last(ret
));
115 EXPORT_SYMBOL(sg_last
);
118 * sg_init_table - Initialize SG table
120 * @nents: Number of entries in table
123 * If this is part of a chained sg table, sg_mark_end() should be
124 * used only on the last table part.
127 void sg_init_table(struct scatterlist
*sgl
, unsigned int nents
)
129 memset(sgl
, 0, sizeof(*sgl
) * nents
);
130 sg_init_marker(sgl
, nents
);
132 EXPORT_SYMBOL(sg_init_table
);
135 * sg_init_one - Initialize a single entry sg list
137 * @buf: Virtual address for IO
141 void sg_init_one(struct scatterlist
*sg
, const void *buf
, unsigned int buflen
)
143 sg_init_table(sg
, 1);
144 sg_set_buf(sg
, buf
, buflen
);
146 EXPORT_SYMBOL(sg_init_one
);
149 * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree
152 static struct scatterlist
*sg_kmalloc(unsigned int nents
, gfp_t gfp_mask
)
154 if (nents
== SG_MAX_SINGLE_ALLOC
) {
156 * Kmemleak doesn't track page allocations as they are not
157 * commonly used (in a raw form) for kernel data structures.
158 * As we chain together a list of pages and then a normal
159 * kmalloc (tracked by kmemleak), in order to for that last
160 * allocation not to become decoupled (and thus a
161 * false-positive) we need to inform kmemleak of all the
162 * intermediate allocations.
164 void *ptr
= (void *) __get_free_page(gfp_mask
);
165 kmemleak_alloc(ptr
, PAGE_SIZE
, 1, gfp_mask
);
168 return kmalloc_array(nents
, sizeof(struct scatterlist
),
172 static void sg_kfree(struct scatterlist
*sg
, unsigned int nents
)
174 if (nents
== SG_MAX_SINGLE_ALLOC
) {
176 free_page((unsigned long) sg
);
182 * __sg_free_table - Free a previously mapped sg table
183 * @table: The sg table header to use
184 * @max_ents: The maximum number of entries per single scatterlist
185 * @nents_first_chunk: Number of entries int the (preallocated) first
186 * scatterlist chunk, 0 means no such preallocated first chunk
187 * @free_fn: Free function
188 * @num_ents: Number of entries in the table
191 * Free an sg table previously allocated and setup with
192 * __sg_alloc_table(). The @max_ents value must be identical to
193 * that previously used with __sg_alloc_table().
196 void __sg_free_table(struct sg_table
*table
, unsigned int max_ents
,
197 unsigned int nents_first_chunk
, sg_free_fn
*free_fn
,
198 unsigned int num_ents
)
200 struct scatterlist
*sgl
, *next
;
201 unsigned curr_max_ents
= nents_first_chunk
?: max_ents
;
203 if (unlikely(!table
->sgl
))
208 unsigned int alloc_size
= num_ents
;
209 unsigned int sg_size
;
212 * If we have more than max_ents segments left,
213 * then assign 'next' to the sg table after the current one.
214 * sg_size is then one less than alloc size, since the last
215 * element is the chain pointer.
217 if (alloc_size
> curr_max_ents
) {
218 next
= sg_chain_ptr(&sgl
[curr_max_ents
- 1]);
219 alloc_size
= curr_max_ents
;
220 sg_size
= alloc_size
- 1;
222 sg_size
= alloc_size
;
227 if (nents_first_chunk
)
228 nents_first_chunk
= 0;
230 free_fn(sgl
, alloc_size
);
232 curr_max_ents
= max_ents
;
237 EXPORT_SYMBOL(__sg_free_table
);
240 * sg_free_append_table - Free a previously allocated append sg table.
241 * @table: The mapped sg append table header
244 void sg_free_append_table(struct sg_append_table
*table
)
246 __sg_free_table(&table
->sgt
, SG_MAX_SINGLE_ALLOC
, 0, sg_kfree
,
249 EXPORT_SYMBOL(sg_free_append_table
);
253 * sg_free_table - Free a previously allocated sg table
254 * @table: The mapped sg table header
257 void sg_free_table(struct sg_table
*table
)
259 __sg_free_table(table
, SG_MAX_SINGLE_ALLOC
, 0, sg_kfree
,
262 EXPORT_SYMBOL(sg_free_table
);
265 * __sg_alloc_table - Allocate and initialize an sg table with given allocator
266 * @table: The sg table header to use
267 * @nents: Number of entries in sg list
268 * @max_ents: The maximum number of entries the allocator returns per call
269 * @first_chunk: first SGL if preallocated (may be %NULL)
270 * @nents_first_chunk: Number of entries in the (preallocated) first
271 * scatterlist chunk, 0 means no such preallocated chunk provided by user
272 * @gfp_mask: GFP allocation mask
273 * @alloc_fn: Allocator to use
276 * This function returns a @table @nents long. The allocator is
277 * defined to return scatterlist chunks of maximum size @max_ents.
278 * Thus if @nents is bigger than @max_ents, the scatterlists will be
279 * chained in units of @max_ents.
282 * If this function returns non-0 (eg failure), the caller must call
283 * __sg_free_table() to cleanup any leftover allocations.
286 int __sg_alloc_table(struct sg_table
*table
, unsigned int nents
,
287 unsigned int max_ents
, struct scatterlist
*first_chunk
,
288 unsigned int nents_first_chunk
, gfp_t gfp_mask
,
289 sg_alloc_fn
*alloc_fn
)
291 struct scatterlist
*sg
, *prv
;
293 unsigned curr_max_ents
= nents_first_chunk
?: max_ents
;
294 unsigned prv_max_ents
;
296 memset(table
, 0, sizeof(*table
));
300 #ifdef CONFIG_ARCH_NO_SG_CHAIN
301 if (WARN_ON_ONCE(nents
> max_ents
))
308 unsigned int sg_size
, alloc_size
= left
;
310 if (alloc_size
> curr_max_ents
) {
311 alloc_size
= curr_max_ents
;
312 sg_size
= alloc_size
- 1;
314 sg_size
= alloc_size
;
322 sg
= alloc_fn(alloc_size
, gfp_mask
);
326 * Adjust entry count to reflect that the last
327 * entry of the previous table won't be used for
328 * linkage. Without this, sg_kfree() may get
332 table
->nents
= ++table
->orig_nents
;
337 sg_init_table(sg
, alloc_size
);
338 table
->nents
= table
->orig_nents
+= sg_size
;
341 * If this is the first mapping, assign the sg table header.
342 * If this is not the first mapping, chain previous part.
345 sg_chain(prv
, prv_max_ents
, sg
);
350 * If no more entries after this one, mark the end
353 sg_mark_end(&sg
[sg_size
- 1]);
356 prv_max_ents
= curr_max_ents
;
357 curr_max_ents
= max_ents
;
362 EXPORT_SYMBOL(__sg_alloc_table
);
365 * sg_alloc_table - Allocate and initialize an sg table
366 * @table: The sg table header to use
367 * @nents: Number of entries in sg list
368 * @gfp_mask: GFP allocation mask
371 * Allocate and initialize an sg table. If @nents@ is larger than
372 * SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
375 int sg_alloc_table(struct sg_table
*table
, unsigned int nents
, gfp_t gfp_mask
)
379 ret
= __sg_alloc_table(table
, nents
, SG_MAX_SINGLE_ALLOC
,
380 NULL
, 0, gfp_mask
, sg_kmalloc
);
382 sg_free_table(table
);
385 EXPORT_SYMBOL(sg_alloc_table
);
387 static struct scatterlist
*get_next_sg(struct sg_append_table
*table
,
388 struct scatterlist
*cur
,
389 unsigned long needed_sges
,
392 struct scatterlist
*new_sg
, *next_sg
;
393 unsigned int alloc_size
;
396 next_sg
= sg_next(cur
);
397 /* Check if last entry should be keeped for chainning */
398 if (!sg_is_last(next_sg
) || needed_sges
== 1)
402 alloc_size
= min_t(unsigned long, needed_sges
, SG_MAX_SINGLE_ALLOC
);
403 new_sg
= sg_kmalloc(alloc_size
, gfp_mask
);
405 return ERR_PTR(-ENOMEM
);
406 sg_init_table(new_sg
, alloc_size
);
408 table
->total_nents
+= alloc_size
- 1;
409 __sg_chain(next_sg
, new_sg
);
411 table
->sgt
.sgl
= new_sg
;
412 table
->total_nents
= alloc_size
;
417 static bool pages_are_mergeable(struct page
*a
, struct page
*b
)
419 if (page_to_pfn(a
) != page_to_pfn(b
) + 1)
421 if (!zone_device_pages_have_same_pgmap(a
, b
))
427 * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
428 * table from an array of pages
429 * @sgt_append: The sg append table to use
430 * @pages: Pointer to an array of page pointers
431 * @n_pages: Number of pages in the pages array
432 * @offset: Offset from start of the first page to the start of a buffer
433 * @size: Number of valid bytes in the buffer (after offset)
434 * @max_segment: Maximum size of a scatterlist element in bytes
435 * @left_pages: Left pages caller have to set after this call
436 * @gfp_mask: GFP allocation mask
439 * In the first call it allocate and initialize an sg table from a list of
440 * pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
441 * the pages are squashed into a single scatterlist entry up to the maximum
442 * size specified in @max_segment. A user may provide an offset at a start
443 * and a size of valid data in a buffer specified by the page array. The
444 * returned sg table is released by sg_free_append_table
447 * 0 on success, negative error on failure
450 * If this function returns non-0 (eg failure), the caller must call
451 * sg_free_append_table() to cleanup any leftover allocations.
453 * In the fist call, sgt_append must by initialized.
455 int sg_alloc_append_table_from_pages(struct sg_append_table
*sgt_append
,
456 struct page
**pages
, unsigned int n_pages
, unsigned int offset
,
457 unsigned long size
, unsigned int max_segment
,
458 unsigned int left_pages
, gfp_t gfp_mask
)
460 unsigned int chunks
, cur_page
, seg_len
, i
, prv_len
= 0;
461 unsigned int added_nents
= 0;
462 struct scatterlist
*s
= sgt_append
->prv
;
463 struct page
*last_pg
;
466 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
467 * otherwise it can overshoot.
469 max_segment
= ALIGN_DOWN(max_segment
, PAGE_SIZE
);
470 if (WARN_ON(max_segment
< PAGE_SIZE
))
473 if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN
) && sgt_append
->prv
)
476 if (sgt_append
->prv
) {
477 unsigned long next_pfn
= (page_to_phys(sg_page(sgt_append
->prv
)) +
478 sgt_append
->prv
->offset
+ sgt_append
->prv
->length
) / PAGE_SIZE
;
483 /* Merge contiguous pages into the last SG */
484 prv_len
= sgt_append
->prv
->length
;
485 if (page_to_pfn(pages
[0]) == next_pfn
) {
486 last_pg
= pfn_to_page(next_pfn
- 1);
487 while (n_pages
&& pages_are_mergeable(pages
[0], last_pg
)) {
488 if (sgt_append
->prv
->length
+ PAGE_SIZE
> max_segment
)
490 sgt_append
->prv
->length
+= PAGE_SIZE
;
500 /* compute number of contiguous chunks */
503 for (i
= 1; i
< n_pages
; i
++) {
504 seg_len
+= PAGE_SIZE
;
505 if (seg_len
>= max_segment
||
506 !pages_are_mergeable(pages
[i
], pages
[i
- 1])) {
512 /* merging chunks and putting them into the scatterlist */
514 for (i
= 0; i
< chunks
; i
++) {
515 unsigned int j
, chunk_size
;
517 /* look for the end of the current chunk */
519 for (j
= cur_page
+ 1; j
< n_pages
; j
++) {
520 seg_len
+= PAGE_SIZE
;
521 if (seg_len
>= max_segment
||
522 !pages_are_mergeable(pages
[j
], pages
[j
- 1]))
526 /* Pass how many chunks might be left */
527 s
= get_next_sg(sgt_append
, s
, chunks
- i
+ left_pages
,
531 * Adjust entry length to be as before function was
535 sgt_append
->prv
->length
= prv_len
;
538 chunk_size
= ((j
- cur_page
) << PAGE_SHIFT
) - offset
;
539 sg_set_page(s
, pages
[cur_page
],
540 min_t(unsigned long, size
, chunk_size
), offset
);
546 sgt_append
->sgt
.nents
+= added_nents
;
547 sgt_append
->sgt
.orig_nents
= sgt_append
->sgt
.nents
;
554 EXPORT_SYMBOL(sg_alloc_append_table_from_pages
);
557 * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
558 * an array of pages and given maximum
560 * @sgt: The sg table header to use
561 * @pages: Pointer to an array of page pointers
562 * @n_pages: Number of pages in the pages array
563 * @offset: Offset from start of the first page to the start of a buffer
564 * @size: Number of valid bytes in the buffer (after offset)
565 * @max_segment: Maximum size of a scatterlist element in bytes
566 * @gfp_mask: GFP allocation mask
569 * Allocate and initialize an sg table from a list of pages. Contiguous
570 * ranges of the pages are squashed into a single scatterlist node up to the
571 * maximum size specified in @max_segment. A user may provide an offset at a
572 * start and a size of valid data in a buffer specified by the page array.
574 * The returned sg table is released by sg_free_table.
577 * 0 on success, negative error on failure
579 int sg_alloc_table_from_pages_segment(struct sg_table
*sgt
, struct page
**pages
,
580 unsigned int n_pages
, unsigned int offset
,
581 unsigned long size
, unsigned int max_segment
,
584 struct sg_append_table append
= {};
587 err
= sg_alloc_append_table_from_pages(&append
, pages
, n_pages
, offset
,
588 size
, max_segment
, 0, gfp_mask
);
590 sg_free_append_table(&append
);
593 memcpy(sgt
, &append
.sgt
, sizeof(*sgt
));
594 WARN_ON(append
.total_nents
!= sgt
->orig_nents
);
597 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment
);
599 #ifdef CONFIG_SGL_ALLOC
602 * sgl_alloc_order - allocate a scatterlist and its pages
603 * @length: Length in bytes of the scatterlist. Must be at least one
604 * @order: Second argument for alloc_pages()
605 * @chainable: Whether or not to allocate an extra element in the scatterlist
606 * for scatterlist chaining purposes
607 * @gfp: Memory allocation flags
608 * @nent_p: [out] Number of entries in the scatterlist that have pages
610 * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
612 struct scatterlist
*sgl_alloc_order(unsigned long long length
,
613 unsigned int order
, bool chainable
,
614 gfp_t gfp
, unsigned int *nent_p
)
616 struct scatterlist
*sgl
, *sg
;
618 unsigned int nent
, nalloc
;
621 nent
= round_up(length
, PAGE_SIZE
<< order
) >> (PAGE_SHIFT
+ order
);
622 /* Check for integer overflow */
623 if (length
> (nent
<< (PAGE_SHIFT
+ order
)))
627 /* Check for integer overflow */
628 if (nalloc
+ 1 < nalloc
)
632 sgl
= kmalloc_array(nalloc
, sizeof(struct scatterlist
),
637 sg_init_table(sgl
, nalloc
);
640 elem_len
= min_t(u64
, length
, PAGE_SIZE
<< order
);
641 page
= alloc_pages(gfp
, order
);
643 sgl_free_order(sgl
, order
);
647 sg_set_page(sg
, page
, elem_len
, 0);
651 WARN_ONCE(length
, "length = %lld\n", length
);
656 EXPORT_SYMBOL(sgl_alloc_order
);
659 * sgl_alloc - allocate a scatterlist and its pages
660 * @length: Length in bytes of the scatterlist
661 * @gfp: Memory allocation flags
662 * @nent_p: [out] Number of entries in the scatterlist
664 * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
666 struct scatterlist
*sgl_alloc(unsigned long long length
, gfp_t gfp
,
667 unsigned int *nent_p
)
669 return sgl_alloc_order(length
, 0, false, gfp
, nent_p
);
671 EXPORT_SYMBOL(sgl_alloc
);
674 * sgl_free_n_order - free a scatterlist and its pages
675 * @sgl: Scatterlist with one or more elements
676 * @nents: Maximum number of elements to free
677 * @order: Second argument for __free_pages()
680 * - If several scatterlists have been chained and each chain element is
681 * freed separately then it's essential to set nents correctly to avoid that a
682 * page would get freed twice.
683 * - All pages in a chained scatterlist can be freed at once by setting @nents
686 void sgl_free_n_order(struct scatterlist
*sgl
, int nents
, int order
)
688 struct scatterlist
*sg
;
692 for_each_sg(sgl
, sg
, nents
, i
) {
697 __free_pages(page
, order
);
701 EXPORT_SYMBOL(sgl_free_n_order
);
704 * sgl_free_order - free a scatterlist and its pages
705 * @sgl: Scatterlist with one or more elements
706 * @order: Second argument for __free_pages()
708 void sgl_free_order(struct scatterlist
*sgl
, int order
)
710 sgl_free_n_order(sgl
, INT_MAX
, order
);
712 EXPORT_SYMBOL(sgl_free_order
);
715 * sgl_free - free a scatterlist and its pages
716 * @sgl: Scatterlist with one or more elements
718 void sgl_free(struct scatterlist
*sgl
)
720 sgl_free_order(sgl
, 0);
722 EXPORT_SYMBOL(sgl_free
);
724 #endif /* CONFIG_SGL_ALLOC */
726 void __sg_page_iter_start(struct sg_page_iter
*piter
,
727 struct scatterlist
*sglist
, unsigned int nents
,
728 unsigned long pgoffset
)
730 piter
->__pg_advance
= 0;
731 piter
->__nents
= nents
;
734 piter
->sg_pgoffset
= pgoffset
;
736 EXPORT_SYMBOL(__sg_page_iter_start
);
738 static int sg_page_count(struct scatterlist
*sg
)
740 return PAGE_ALIGN(sg
->offset
+ sg
->length
) >> PAGE_SHIFT
;
743 bool __sg_page_iter_next(struct sg_page_iter
*piter
)
745 if (!piter
->__nents
|| !piter
->sg
)
748 piter
->sg_pgoffset
+= piter
->__pg_advance
;
749 piter
->__pg_advance
= 1;
751 while (piter
->sg_pgoffset
>= sg_page_count(piter
->sg
)) {
752 piter
->sg_pgoffset
-= sg_page_count(piter
->sg
);
753 piter
->sg
= sg_next(piter
->sg
);
754 if (!--piter
->__nents
|| !piter
->sg
)
760 EXPORT_SYMBOL(__sg_page_iter_next
);
762 static int sg_dma_page_count(struct scatterlist
*sg
)
764 return PAGE_ALIGN(sg
->offset
+ sg_dma_len(sg
)) >> PAGE_SHIFT
;
767 bool __sg_page_iter_dma_next(struct sg_dma_page_iter
*dma_iter
)
769 struct sg_page_iter
*piter
= &dma_iter
->base
;
771 if (!piter
->__nents
|| !piter
->sg
)
774 piter
->sg_pgoffset
+= piter
->__pg_advance
;
775 piter
->__pg_advance
= 1;
777 while (piter
->sg_pgoffset
>= sg_dma_page_count(piter
->sg
)) {
778 piter
->sg_pgoffset
-= sg_dma_page_count(piter
->sg
);
779 piter
->sg
= sg_next(piter
->sg
);
780 if (!--piter
->__nents
|| !piter
->sg
)
786 EXPORT_SYMBOL(__sg_page_iter_dma_next
);
789 * sg_miter_start - start mapping iteration over a sg list
790 * @miter: sg mapping iter to be started
791 * @sgl: sg list to iterate over
792 * @nents: number of sg entries
793 * @flags: sg iterator flags
796 * Starts mapping iterator @miter.
801 void sg_miter_start(struct sg_mapping_iter
*miter
, struct scatterlist
*sgl
,
802 unsigned int nents
, unsigned int flags
)
804 memset(miter
, 0, sizeof(struct sg_mapping_iter
));
806 __sg_page_iter_start(&miter
->piter
, sgl
, nents
, 0);
807 WARN_ON(!(flags
& (SG_MITER_TO_SG
| SG_MITER_FROM_SG
)));
808 miter
->__flags
= flags
;
810 EXPORT_SYMBOL(sg_miter_start
);
812 static bool sg_miter_get_next_page(struct sg_mapping_iter
*miter
)
814 if (!miter
->__remaining
) {
815 struct scatterlist
*sg
;
817 if (!__sg_page_iter_next(&miter
->piter
))
820 sg
= miter
->piter
.sg
;
822 miter
->__offset
= miter
->piter
.sg_pgoffset
? 0 : sg
->offset
;
823 miter
->piter
.sg_pgoffset
+= miter
->__offset
>> PAGE_SHIFT
;
824 miter
->__offset
&= PAGE_SIZE
- 1;
825 miter
->__remaining
= sg
->offset
+ sg
->length
-
826 (miter
->piter
.sg_pgoffset
<< PAGE_SHIFT
) -
828 miter
->__remaining
= min_t(unsigned long, miter
->__remaining
,
829 PAGE_SIZE
- miter
->__offset
);
836 * sg_miter_skip - reposition mapping iterator
837 * @miter: sg mapping iter to be skipped
838 * @offset: number of bytes to plus the current location
841 * Sets the offset of @miter to its current location plus @offset bytes.
842 * If mapping iterator @miter has been proceeded by sg_miter_next(), this
849 * true if @miter contains the valid mapping. false if end of sg
852 bool sg_miter_skip(struct sg_mapping_iter
*miter
, off_t offset
)
854 sg_miter_stop(miter
);
859 if (!sg_miter_get_next_page(miter
))
862 consumed
= min_t(off_t
, offset
, miter
->__remaining
);
863 miter
->__offset
+= consumed
;
864 miter
->__remaining
-= consumed
;
870 EXPORT_SYMBOL(sg_miter_skip
);
873 * sg_miter_next - proceed mapping iterator to the next mapping
874 * @miter: sg mapping iter to proceed
877 * Proceeds @miter to the next mapping. @miter should have been started
878 * using sg_miter_start(). On successful return, @miter->page,
879 * @miter->addr and @miter->length point to the current mapping.
882 * May sleep if !SG_MITER_ATOMIC.
885 * true if @miter contains the next mapping. false if end of sg
888 bool sg_miter_next(struct sg_mapping_iter
*miter
)
890 sg_miter_stop(miter
);
893 * Get to the next page if necessary.
894 * __remaining, __offset is adjusted by sg_miter_stop
896 if (!sg_miter_get_next_page(miter
))
899 miter
->page
= sg_page_iter_page(&miter
->piter
);
900 miter
->consumed
= miter
->length
= miter
->__remaining
;
902 if (miter
->__flags
& SG_MITER_ATOMIC
)
903 miter
->addr
= kmap_atomic(miter
->page
) + miter
->__offset
;
905 miter
->addr
= kmap(miter
->page
) + miter
->__offset
;
909 EXPORT_SYMBOL(sg_miter_next
);
912 * sg_miter_stop - stop mapping iteration
913 * @miter: sg mapping iter to be stopped
916 * Stops mapping iterator @miter. @miter should have been started
917 * using sg_miter_start(). A stopped iteration can be resumed by
918 * calling sg_miter_next() on it. This is useful when resources (kmap)
919 * need to be released during iteration.
922 * Don't care otherwise.
924 void sg_miter_stop(struct sg_mapping_iter
*miter
)
926 WARN_ON(miter
->consumed
> miter
->length
);
928 /* drop resources from the last iteration */
930 miter
->__offset
+= miter
->consumed
;
931 miter
->__remaining
-= miter
->consumed
;
933 if (miter
->__flags
& SG_MITER_TO_SG
)
934 flush_dcache_page(miter
->page
);
936 if (miter
->__flags
& SG_MITER_ATOMIC
) {
937 WARN_ON_ONCE(!pagefault_disabled());
938 kunmap_atomic(miter
->addr
);
948 EXPORT_SYMBOL(sg_miter_stop
);
951 * sg_copy_buffer - Copy data between a linear buffer and an SG list
953 * @nents: Number of SG entries
954 * @buf: Where to copy from
955 * @buflen: The number of bytes to copy
956 * @skip: Number of bytes to skip before copying
957 * @to_buffer: transfer direction (true == from an sg list to a
958 * buffer, false == from a buffer to an sg list)
960 * Returns the number of copied bytes.
963 size_t sg_copy_buffer(struct scatterlist
*sgl
, unsigned int nents
, void *buf
,
964 size_t buflen
, off_t skip
, bool to_buffer
)
966 unsigned int offset
= 0;
967 struct sg_mapping_iter miter
;
968 unsigned int sg_flags
= SG_MITER_ATOMIC
;
971 sg_flags
|= SG_MITER_FROM_SG
;
973 sg_flags
|= SG_MITER_TO_SG
;
975 sg_miter_start(&miter
, sgl
, nents
, sg_flags
);
977 if (!sg_miter_skip(&miter
, skip
))
980 while ((offset
< buflen
) && sg_miter_next(&miter
)) {
983 len
= min(miter
.length
, buflen
- offset
);
986 memcpy(buf
+ offset
, miter
.addr
, len
);
988 memcpy(miter
.addr
, buf
+ offset
, len
);
993 sg_miter_stop(&miter
);
997 EXPORT_SYMBOL(sg_copy_buffer
);
1000 * sg_copy_from_buffer - Copy from a linear buffer to an SG list
1002 * @nents: Number of SG entries
1003 * @buf: Where to copy from
1004 * @buflen: The number of bytes to copy
1006 * Returns the number of copied bytes.
1009 size_t sg_copy_from_buffer(struct scatterlist
*sgl
, unsigned int nents
,
1010 const void *buf
, size_t buflen
)
1012 return sg_copy_buffer(sgl
, nents
, (void *)buf
, buflen
, 0, false);
1014 EXPORT_SYMBOL(sg_copy_from_buffer
);
1017 * sg_copy_to_buffer - Copy from an SG list to a linear buffer
1019 * @nents: Number of SG entries
1020 * @buf: Where to copy to
1021 * @buflen: The number of bytes to copy
1023 * Returns the number of copied bytes.
1026 size_t sg_copy_to_buffer(struct scatterlist
*sgl
, unsigned int nents
,
1027 void *buf
, size_t buflen
)
1029 return sg_copy_buffer(sgl
, nents
, buf
, buflen
, 0, true);
1031 EXPORT_SYMBOL(sg_copy_to_buffer
);
1034 * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list
1036 * @nents: Number of SG entries
1037 * @buf: Where to copy from
1038 * @buflen: The number of bytes to copy
1039 * @skip: Number of bytes to skip before copying
1041 * Returns the number of copied bytes.
1044 size_t sg_pcopy_from_buffer(struct scatterlist
*sgl
, unsigned int nents
,
1045 const void *buf
, size_t buflen
, off_t skip
)
1047 return sg_copy_buffer(sgl
, nents
, (void *)buf
, buflen
, skip
, false);
1049 EXPORT_SYMBOL(sg_pcopy_from_buffer
);
1052 * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer
1054 * @nents: Number of SG entries
1055 * @buf: Where to copy to
1056 * @buflen: The number of bytes to copy
1057 * @skip: Number of bytes to skip before copying
1059 * Returns the number of copied bytes.
1062 size_t sg_pcopy_to_buffer(struct scatterlist
*sgl
, unsigned int nents
,
1063 void *buf
, size_t buflen
, off_t skip
)
1065 return sg_copy_buffer(sgl
, nents
, buf
, buflen
, skip
, true);
1067 EXPORT_SYMBOL(sg_pcopy_to_buffer
);
1070 * sg_zero_buffer - Zero-out a part of a SG list
1072 * @nents: Number of SG entries
1073 * @buflen: The number of bytes to zero out
1074 * @skip: Number of bytes to skip before zeroing
1076 * Returns the number of bytes zeroed.
1078 size_t sg_zero_buffer(struct scatterlist
*sgl
, unsigned int nents
,
1079 size_t buflen
, off_t skip
)
1081 unsigned int offset
= 0;
1082 struct sg_mapping_iter miter
;
1083 unsigned int sg_flags
= SG_MITER_ATOMIC
| SG_MITER_TO_SG
;
1085 sg_miter_start(&miter
, sgl
, nents
, sg_flags
);
1087 if (!sg_miter_skip(&miter
, skip
))
1090 while (offset
< buflen
&& sg_miter_next(&miter
)) {
1093 len
= min(miter
.length
, buflen
- offset
);
1094 memset(miter
.addr
, 0, len
);
1099 sg_miter_stop(&miter
);
1102 EXPORT_SYMBOL(sg_zero_buffer
);
1105 * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
1106 * iterators, and add them to the scatterlist.
1108 static ssize_t
extract_user_to_sg(struct iov_iter
*iter
,
1110 struct sg_table
*sgtable
,
1111 unsigned int sg_max
,
1112 iov_iter_extraction_t extraction_flags
)
1114 struct scatterlist
*sg
= sgtable
->sgl
+ sgtable
->nents
;
1115 struct page
**pages
;
1116 unsigned int npages
;
1117 ssize_t ret
= 0, res
;
1120 /* We decant the page list into the tail of the scatterlist */
1121 pages
= (void *)sgtable
->sgl
+
1122 array_size(sg_max
, sizeof(struct scatterlist
));
1126 res
= iov_iter_extract_pages(iter
, &pages
, maxsize
, sg_max
,
1127 extraction_flags
, &off
);
1134 npages
= DIV_ROUND_UP(off
+ len
, PAGE_SIZE
);
1137 for (; npages
> 0; npages
--) {
1138 struct page
*page
= *pages
;
1139 size_t seg
= min_t(size_t, PAGE_SIZE
- off
, len
);
1142 sg_set_page(sg
, page
, seg
, off
);
1148 } while (maxsize
> 0 && sg_max
> 0);
1153 while (sgtable
->nents
> sgtable
->orig_nents
)
1154 unpin_user_page(sg_page(&sgtable
->sgl
[--sgtable
->nents
]));
1159 * Extract up to sg_max pages from a BVEC-type iterator and add them to the
1160 * scatterlist. The pages are not pinned.
1162 static ssize_t
extract_bvec_to_sg(struct iov_iter
*iter
,
1164 struct sg_table
*sgtable
,
1165 unsigned int sg_max
,
1166 iov_iter_extraction_t extraction_flags
)
1168 const struct bio_vec
*bv
= iter
->bvec
;
1169 struct scatterlist
*sg
= sgtable
->sgl
+ sgtable
->nents
;
1170 unsigned long start
= iter
->iov_offset
;
1174 for (i
= 0; i
< iter
->nr_segs
; i
++) {
1183 len
= min_t(size_t, maxsize
, len
- start
);
1184 off
= bv
[i
].bv_offset
+ start
;
1186 sg_set_page(sg
, bv
[i
].bv_page
, len
, off
);
1193 if (maxsize
<= 0 || sg_max
== 0)
1199 iov_iter_advance(iter
, ret
);
1204 * Extract up to sg_max pages from a KVEC-type iterator and add them to the
1205 * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or
1206 * static buffers. The pages are not pinned.
1208 static ssize_t
extract_kvec_to_sg(struct iov_iter
*iter
,
1210 struct sg_table
*sgtable
,
1211 unsigned int sg_max
,
1212 iov_iter_extraction_t extraction_flags
)
1214 const struct kvec
*kv
= iter
->kvec
;
1215 struct scatterlist
*sg
= sgtable
->sgl
+ sgtable
->nents
;
1216 unsigned long start
= iter
->iov_offset
;
1220 for (i
= 0; i
< iter
->nr_segs
; i
++) {
1222 unsigned long kaddr
;
1223 size_t off
, len
, seg
;
1225 len
= kv
[i
].iov_len
;
1231 kaddr
= (unsigned long)kv
[i
].iov_base
+ start
;
1232 off
= kaddr
& ~PAGE_MASK
;
1233 len
= min_t(size_t, maxsize
, len
- start
);
1239 seg
= min_t(size_t, len
, PAGE_SIZE
- off
);
1240 if (is_vmalloc_or_module_addr((void *)kaddr
))
1241 page
= vmalloc_to_page((void *)kaddr
);
1243 page
= virt_to_page((void *)kaddr
);
1245 sg_set_page(sg
, page
, len
, off
);
1253 } while (len
> 0 && sg_max
> 0);
1255 if (maxsize
<= 0 || sg_max
== 0)
1261 iov_iter_advance(iter
, ret
);
1266 * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to
1267 * the scatterlist. The pages are not pinned.
1269 static ssize_t
extract_folioq_to_sg(struct iov_iter
*iter
,
1271 struct sg_table
*sgtable
,
1272 unsigned int sg_max
,
1273 iov_iter_extraction_t extraction_flags
)
1275 const struct folio_queue
*folioq
= iter
->folioq
;
1276 struct scatterlist
*sg
= sgtable
->sgl
+ sgtable
->nents
;
1277 unsigned int slot
= iter
->folioq_slot
;
1279 size_t offset
= iter
->iov_offset
;
1283 if (slot
>= folioq_nr_slots(folioq
)) {
1284 folioq
= folioq
->next
;
1285 if (WARN_ON_ONCE(!folioq
))
1291 struct folio
*folio
= folioq_folio(folioq
, slot
);
1292 size_t fsize
= folioq_folio_size(folioq
, slot
);
1294 if (offset
< fsize
) {
1295 size_t part
= umin(maxsize
- ret
, fsize
- offset
);
1297 sg_set_page(sg
, folio_page(folio
, 0), part
, offset
);
1305 if (offset
>= fsize
) {
1308 if (slot
>= folioq_nr_slots(folioq
)) {
1309 if (!folioq
->next
) {
1310 WARN_ON_ONCE(ret
< iter
->count
);
1313 folioq
= folioq
->next
;
1317 } while (sg_max
> 0 && ret
< maxsize
);
1319 iter
->folioq
= folioq
;
1320 iter
->folioq_slot
= slot
;
1321 iter
->iov_offset
= offset
;
1327 * Extract up to sg_max folios from an XARRAY-type iterator and add them to
1328 * the scatterlist. The pages are not pinned.
1330 static ssize_t
extract_xarray_to_sg(struct iov_iter
*iter
,
1332 struct sg_table
*sgtable
,
1333 unsigned int sg_max
,
1334 iov_iter_extraction_t extraction_flags
)
1336 struct scatterlist
*sg
= sgtable
->sgl
+ sgtable
->nents
;
1337 struct xarray
*xa
= iter
->xarray
;
1338 struct folio
*folio
;
1339 loff_t start
= iter
->xarray_start
+ iter
->iov_offset
;
1340 pgoff_t index
= start
/ PAGE_SIZE
;
1343 XA_STATE(xas
, xa
, index
);
1347 xas_for_each(&xas
, folio
, ULONG_MAX
) {
1348 if (xas_retry(&xas
, folio
))
1350 if (WARN_ON(xa_is_value(folio
)))
1352 if (WARN_ON(folio_test_hugetlb(folio
)))
1355 offset
= offset_in_folio(folio
, start
);
1356 len
= min_t(size_t, maxsize
, folio_size(folio
) - offset
);
1358 sg_set_page(sg
, folio_page(folio
, 0), len
, offset
);
1365 if (maxsize
<= 0 || sg_max
== 0)
1371 iov_iter_advance(iter
, ret
);
1376 * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
1377 * @iter: The iterator to extract from
1378 * @maxsize: The amount of iterator to copy
1379 * @sgtable: The scatterlist table to fill in
1380 * @sg_max: Maximum number of elements in @sgtable that may be filled
1381 * @extraction_flags: Flags to qualify the request
1383 * Extract the page fragments from the given amount of the source iterator and
1384 * add them to a scatterlist that refers to all of those bits, to a maximum
1385 * addition of @sg_max elements.
1387 * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
1388 * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't
1389 * pinned; DISCARD-type is not supported.
1391 * No end mark is placed on the scatterlist; that's left to the caller.
1393 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
1394 * be allowed on the pages extracted.
1396 * If successful, @sgtable->nents is updated to include the number of elements
1397 * added and the number of bytes added is returned. @sgtable->orig_nents is
1400 * The iov_iter_extract_mode() function should be used to query how cleanup
1401 * should be performed.
1403 ssize_t
extract_iter_to_sg(struct iov_iter
*iter
, size_t maxsize
,
1404 struct sg_table
*sgtable
, unsigned int sg_max
,
1405 iov_iter_extraction_t extraction_flags
)
1410 switch (iov_iter_type(iter
)) {
1413 return extract_user_to_sg(iter
, maxsize
, sgtable
, sg_max
,
1416 return extract_bvec_to_sg(iter
, maxsize
, sgtable
, sg_max
,
1419 return extract_kvec_to_sg(iter
, maxsize
, sgtable
, sg_max
,
1422 return extract_folioq_to_sg(iter
, maxsize
, sgtable
, sg_max
,
1425 return extract_xarray_to_sg(iter
, maxsize
, sgtable
, sg_max
,
1428 pr_err("%s(%u) unsupported\n", __func__
, iov_iter_type(iter
));
1433 EXPORT_SYMBOL_GPL(extract_iter_to_sg
);