2 * Copyright (c) 2020 iXsystems, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/types.h>
29 #include <sys/param.h>
31 #include <sys/dmu_impl.h>
32 #include <sys/dmu_tx.h>
34 #include <sys/dnode.h>
35 #include <sys/zfs_context.h>
36 #include <sys/dmu_objset.h>
37 #include <sys/dmu_traverse.h>
38 #include <sys/dsl_dataset.h>
39 #include <sys/dsl_dir.h>
40 #include <sys/dsl_pool.h>
41 #include <sys/dsl_synctask.h>
42 #include <sys/dsl_prop.h>
43 #include <sys/dmu_zfetch.h>
44 #include <sys/zfs_ioctl.h>
46 #include <sys/zio_checksum.h>
47 #include <sys/zio_compress.h>
49 #include <sys/zfeature.h>
51 #include <sys/zfs_rlock.h>
52 #include <sys/racct.h>
54 #include <sys/zfs_znode.h>
55 #include <sys/zfs_vnops.h>
57 #include <sys/ccompat.h>
60 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
63 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
66 dmu_write_pages(objset_t
*os
, uint64_t object
, uint64_t offset
, uint64_t size
,
67 vm_page_t
*ma
, dmu_tx_t
*tx
)
77 err
= dmu_buf_hold_array(os
, object
, offset
, size
,
78 FALSE
, FTAG
, &numbufs
, &dbp
);
82 for (i
= 0; i
< numbufs
; i
++) {
83 int tocpy
, copied
, thiscpy
;
85 dmu_buf_t
*db
= dbp
[i
];
89 ASSERT3U(db
->db_size
, >=, PAGESIZE
);
91 bufoff
= offset
- db
->db_offset
;
92 tocpy
= (int)MIN(db
->db_size
- bufoff
, size
);
94 ASSERT(i
== 0 || i
== numbufs
-1 || tocpy
== db
->db_size
);
96 if (tocpy
== db
->db_size
)
97 dmu_buf_will_fill(db
, tx
, B_FALSE
);
99 dmu_buf_will_dirty(db
, tx
);
101 for (copied
= 0; copied
< tocpy
; copied
+= PAGESIZE
) {
102 ASSERT3U(ptoa((*ma
)->pindex
), ==,
103 db
->db_offset
+ bufoff
);
104 thiscpy
= MIN(PAGESIZE
, tocpy
- copied
);
105 va
= zfs_map_page(*ma
, &sf
);
106 memcpy((char *)db
->db_data
+ bufoff
, va
, thiscpy
);
112 if (tocpy
== db
->db_size
)
113 dmu_buf_fill_done(db
, tx
, B_FALSE
);
118 dmu_buf_rele_array(dbp
, numbufs
, FTAG
);
123 dmu_read_pages(objset_t
*os
, uint64_t object
, vm_page_t
*ma
, int count
,
124 int *rbehind
, int *rahead
, int last_size
)
133 int bufoff
, pgoff
, tocpy
;
137 ASSERT3U(ma
[0]->pindex
+ count
- 1, ==, ma
[count
- 1]->pindex
);
138 ASSERT3S(last_size
, <=, PAGE_SIZE
);
140 err
= dmu_buf_hold_array(os
, object
, IDX_TO_OFF(ma
[0]->pindex
),
141 IDX_TO_OFF(count
- 1) + last_size
, TRUE
, FTAG
, &numbufs
, &dbp
);
146 IMPLY(last_size
< PAGE_SIZE
, *rahead
== 0);
147 if (dbp
[0]->db_offset
!= 0 || numbufs
> 1) {
148 for (i
= 0; i
< numbufs
; i
++) {
149 ASSERT(ISP2(dbp
[i
]->db_size
));
150 ASSERT3U((dbp
[i
]->db_offset
% dbp
[i
]->db_size
), ==, 0);
151 ASSERT3U(dbp
[i
]->db_size
, ==, dbp
[0]->db_size
);
156 vmobj
= ma
[0]->object
;
159 for (i
= 0; i
< *rbehind
; i
++) {
160 m
= vm_page_grab_unlocked(vmobj
, ma
[0]->pindex
- 1 - i
,
161 VM_ALLOC_NORMAL
| VM_ALLOC_NOWAIT
| VM_ALLOC_BUSY_FLAGS
);
164 if (!vm_page_none_valid(m
)) {
165 ASSERT3U(m
->valid
, ==, VM_PAGE_BITS_ALL
);
169 ASSERT3U(m
->dirty
, ==, 0);
170 ASSERT(!pmap_page_is_write_mapped(m
));
172 ASSERT3U(db
->db_size
, >, PAGE_SIZE
);
173 bufoff
= IDX_TO_OFF(m
->pindex
) % db
->db_size
;
174 va
= zfs_map_page(m
, &sf
);
175 memcpy(va
, (char *)db
->db_data
+ bufoff
, PAGESIZE
);
178 if ((m
->busy_lock
& VPB_BIT_WAITERS
) != 0)
181 vm_page_deactivate(m
);
186 bufoff
= IDX_TO_OFF(ma
[0]->pindex
) % db
->db_size
;
188 for (mi
= 0, di
= 0; mi
< count
&& di
< numbufs
; ) {
191 if (m
!= bogus_page
) {
192 vm_page_assert_xbusied(m
);
193 ASSERT(vm_page_none_valid(m
));
194 ASSERT3U(m
->dirty
, ==, 0);
195 ASSERT(!pmap_page_is_write_mapped(m
));
196 va
= zfs_map_page(m
, &sf
);
202 if (m
!= bogus_page
) {
203 ASSERT3U(IDX_TO_OFF(m
->pindex
) + pgoff
, ==,
204 db
->db_offset
+ bufoff
);
208 * We do not need to clamp the copy size by the file
209 * size as the last block is zero-filled beyond the
210 * end of file anyway.
212 tocpy
= MIN(db
->db_size
- bufoff
, PAGESIZE
- pgoff
);
213 ASSERT3S(tocpy
, >=, 0);
215 memcpy(va
+ pgoff
, (char *)db
->db_data
+ bufoff
, tocpy
);
218 ASSERT3S(pgoff
, >=, 0);
219 ASSERT3S(pgoff
, <=, PAGESIZE
);
220 if (pgoff
== PAGESIZE
) {
221 if (m
!= bogus_page
) {
225 ASSERT3S(mi
, <, count
);
231 ASSERT3S(bufoff
, >=, 0);
232 ASSERT3S(bufoff
, <=, db
->db_size
);
233 if (bufoff
== db
->db_size
) {
234 ASSERT3S(di
, <, numbufs
);
242 * Three possibilities:
243 * - last requested page ends at a buffer boundary and , thus,
244 * all pages and buffers have been iterated;
245 * - all requested pages are filled, but the last buffer
246 * has not been exhausted;
247 * the read-ahead is possible only in this case;
248 * - all buffers have been read, but the last page has not been
250 * this is only possible if the file has only a single buffer
251 * with a size that is not a multiple of the page size.
254 ASSERT3S(di
, >=, numbufs
- 1);
255 IMPLY(*rahead
!= 0, di
== numbufs
- 1);
256 IMPLY(*rahead
!= 0, bufoff
!= 0);
260 ASSERT3S(mi
, >=, count
- 1);
262 IMPLY(pgoff
== 0, mi
== count
);
264 ASSERT3S(mi
, ==, count
- 1);
265 ASSERT3U((dbp
[0]->db_size
& PAGE_MASK
), !=, 0);
270 ASSERT3P(m
, !=, bogus_page
);
271 memset(va
+ pgoff
, 0, PAGESIZE
- pgoff
);
276 for (i
= 0; i
< *rahead
; i
++) {
277 m
= vm_page_grab_unlocked(vmobj
, ma
[count
- 1]->pindex
+ 1 + i
,
278 VM_ALLOC_NORMAL
| VM_ALLOC_NOWAIT
| VM_ALLOC_BUSY_FLAGS
);
281 if (!vm_page_none_valid(m
)) {
282 ASSERT3U(m
->valid
, ==, VM_PAGE_BITS_ALL
);
286 ASSERT3U(m
->dirty
, ==, 0);
287 ASSERT(!pmap_page_is_write_mapped(m
));
289 ASSERT3U(db
->db_size
, >, PAGE_SIZE
);
290 bufoff
= IDX_TO_OFF(m
->pindex
) % db
->db_size
;
291 tocpy
= MIN(db
->db_size
- bufoff
, PAGESIZE
);
292 va
= zfs_map_page(m
, &sf
);
293 memcpy(va
, (char *)db
->db_data
+ bufoff
, tocpy
);
294 if (tocpy
< PAGESIZE
) {
295 ASSERT3S(i
, ==, *rahead
- 1);
296 ASSERT3U((db
->db_size
& PAGE_MASK
), !=, 0);
297 memset(va
+ tocpy
, 0, PAGESIZE
- tocpy
);
301 if ((m
->busy_lock
& VPB_BIT_WAITERS
) != 0)
304 vm_page_deactivate(m
);
309 dmu_buf_rele_array(dbp
, numbufs
, FTAG
);