2 * Copyright (c) 2020 iXsystems, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/types.h>
29 #include <sys/param.h>
31 #include <sys/dmu_impl.h>
32 #include <sys/dmu_tx.h>
34 #include <sys/dnode.h>
35 #include <sys/zfs_context.h>
36 #include <sys/dmu_objset.h>
37 #include <sys/dmu_traverse.h>
38 #include <sys/dsl_dataset.h>
39 #include <sys/dsl_dir.h>
40 #include <sys/dsl_pool.h>
41 #include <sys/dsl_synctask.h>
42 #include <sys/dsl_prop.h>
43 #include <sys/dmu_zfetch.h>
44 #include <sys/zfs_ioctl.h>
46 #include <sys/zio_checksum.h>
47 #include <sys/zio_compress.h>
49 #include <sys/zfeature.h>
51 #include <sys/zfs_rlock.h>
52 #include <sys/racct.h>
54 #include <sys/zfs_znode.h>
55 #include <sys/zfs_vnops.h>
57 #include <sys/ccompat.h>
60 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
63 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
66 dmu_write_pages(objset_t
*os
, uint64_t object
, uint64_t offset
, uint64_t size
,
67 vm_page_t
*ma
, dmu_tx_t
*tx
)
77 err
= dmu_buf_hold_array(os
, object
, offset
, size
,
78 FALSE
, FTAG
, &numbufs
, &dbp
);
82 for (i
= 0; i
< numbufs
; i
++) {
83 int tocpy
, copied
, thiscpy
;
85 dmu_buf_t
*db
= dbp
[i
];
89 ASSERT3U(db
->db_size
, >=, PAGESIZE
);
91 bufoff
= offset
- db
->db_offset
;
92 tocpy
= (int)MIN(db
->db_size
- bufoff
, size
);
94 ASSERT(i
== 0 || i
== numbufs
-1 || tocpy
== db
->db_size
);
96 if (tocpy
== db
->db_size
)
97 dmu_buf_will_fill(db
, tx
, B_FALSE
);
99 dmu_buf_will_dirty(db
, tx
);
101 for (copied
= 0; copied
< tocpy
; copied
+= PAGESIZE
) {
102 ASSERT3U(ptoa((*ma
)->pindex
), ==,
103 db
->db_offset
+ bufoff
);
104 thiscpy
= MIN(PAGESIZE
, tocpy
- copied
);
105 va
= zfs_map_page(*ma
, &sf
);
106 ASSERT(db
->db_data
!= NULL
);
107 memcpy((char *)db
->db_data
+ bufoff
, va
, thiscpy
);
113 if (tocpy
== db
->db_size
)
114 dmu_buf_fill_done(db
, tx
, B_FALSE
);
119 dmu_buf_rele_array(dbp
, numbufs
, FTAG
);
124 dmu_read_pages(objset_t
*os
, uint64_t object
, vm_page_t
*ma
, int count
,
125 int *rbehind
, int *rahead
, int last_size
)
134 int bufoff
, pgoff
, tocpy
;
138 ASSERT3U(ma
[0]->pindex
+ count
- 1, ==, ma
[count
- 1]->pindex
);
139 ASSERT3S(last_size
, <=, PAGE_SIZE
);
141 err
= dmu_buf_hold_array(os
, object
, IDX_TO_OFF(ma
[0]->pindex
),
142 IDX_TO_OFF(count
- 1) + last_size
, TRUE
, FTAG
, &numbufs
, &dbp
);
147 IMPLY(last_size
< PAGE_SIZE
, *rahead
== 0);
148 if (dbp
[0]->db_offset
!= 0 || numbufs
> 1) {
149 for (i
= 0; i
< numbufs
; i
++) {
150 ASSERT(ISP2(dbp
[i
]->db_size
));
151 ASSERT3U((dbp
[i
]->db_offset
% dbp
[i
]->db_size
), ==, 0);
152 ASSERT3U(dbp
[i
]->db_size
, ==, dbp
[0]->db_size
);
157 vmobj
= ma
[0]->object
;
160 for (i
= 0; i
< *rbehind
; i
++) {
161 m
= vm_page_grab_unlocked(vmobj
, ma
[0]->pindex
- 1 - i
,
162 VM_ALLOC_NORMAL
| VM_ALLOC_NOWAIT
| VM_ALLOC_BUSY_FLAGS
);
165 if (!vm_page_none_valid(m
)) {
166 ASSERT3U(m
->valid
, ==, VM_PAGE_BITS_ALL
);
170 ASSERT3U(m
->dirty
, ==, 0);
171 ASSERT(!pmap_page_is_write_mapped(m
));
173 ASSERT3U(db
->db_size
, >, PAGE_SIZE
);
174 bufoff
= IDX_TO_OFF(m
->pindex
) % db
->db_size
;
175 va
= zfs_map_page(m
, &sf
);
176 ASSERT(db
->db_data
!= NULL
);
177 memcpy(va
, (char *)db
->db_data
+ bufoff
, PAGESIZE
);
180 if ((m
->busy_lock
& VPB_BIT_WAITERS
) != 0)
183 vm_page_deactivate(m
);
188 bufoff
= IDX_TO_OFF(ma
[0]->pindex
) % db
->db_size
;
190 for (mi
= 0, di
= 0; mi
< count
&& di
< numbufs
; ) {
193 if (m
!= bogus_page
) {
194 vm_page_assert_xbusied(m
);
195 ASSERT(vm_page_none_valid(m
));
196 ASSERT3U(m
->dirty
, ==, 0);
197 ASSERT(!pmap_page_is_write_mapped(m
));
198 va
= zfs_map_page(m
, &sf
);
204 if (m
!= bogus_page
) {
205 ASSERT3U(IDX_TO_OFF(m
->pindex
) + pgoff
, ==,
206 db
->db_offset
+ bufoff
);
210 * We do not need to clamp the copy size by the file
211 * size as the last block is zero-filled beyond the
212 * end of file anyway.
214 tocpy
= MIN(db
->db_size
- bufoff
, PAGESIZE
- pgoff
);
215 ASSERT3S(tocpy
, >=, 0);
216 if (m
!= bogus_page
) {
217 ASSERT(db
->db_data
!= NULL
);
218 memcpy(va
+ pgoff
, (char *)db
->db_data
+ bufoff
, tocpy
);
222 ASSERT3S(pgoff
, >=, 0);
223 ASSERT3S(pgoff
, <=, PAGESIZE
);
224 if (pgoff
== PAGESIZE
) {
225 if (m
!= bogus_page
) {
229 ASSERT3S(mi
, <, count
);
235 ASSERT3S(bufoff
, >=, 0);
236 ASSERT3S(bufoff
, <=, db
->db_size
);
237 if (bufoff
== db
->db_size
) {
238 ASSERT3S(di
, <, numbufs
);
246 * Three possibilities:
247 * - last requested page ends at a buffer boundary and , thus,
248 * all pages and buffers have been iterated;
249 * - all requested pages are filled, but the last buffer
250 * has not been exhausted;
251 * the read-ahead is possible only in this case;
252 * - all buffers have been read, but the last page has not been
254 * this is only possible if the file has only a single buffer
255 * with a size that is not a multiple of the page size.
258 ASSERT3S(di
, >=, numbufs
- 1);
259 IMPLY(*rahead
!= 0, di
== numbufs
- 1);
260 IMPLY(*rahead
!= 0, bufoff
!= 0);
264 ASSERT3S(mi
, >=, count
- 1);
266 IMPLY(pgoff
== 0, mi
== count
);
268 ASSERT3S(mi
, ==, count
- 1);
269 ASSERT3U((dbp
[0]->db_size
& PAGE_MASK
), !=, 0);
274 ASSERT3P(m
, !=, bogus_page
);
275 memset(va
+ pgoff
, 0, PAGESIZE
- pgoff
);
280 for (i
= 0; i
< *rahead
; i
++) {
281 m
= vm_page_grab_unlocked(vmobj
, ma
[count
- 1]->pindex
+ 1 + i
,
282 VM_ALLOC_NORMAL
| VM_ALLOC_NOWAIT
| VM_ALLOC_BUSY_FLAGS
);
285 if (!vm_page_none_valid(m
)) {
286 ASSERT3U(m
->valid
, ==, VM_PAGE_BITS_ALL
);
290 ASSERT3U(m
->dirty
, ==, 0);
291 ASSERT(!pmap_page_is_write_mapped(m
));
293 ASSERT3U(db
->db_size
, >, PAGE_SIZE
);
294 bufoff
= IDX_TO_OFF(m
->pindex
) % db
->db_size
;
295 tocpy
= MIN(db
->db_size
- bufoff
, PAGESIZE
);
296 va
= zfs_map_page(m
, &sf
);
297 ASSERT(db
->db_data
!= NULL
);
298 memcpy(va
, (char *)db
->db_data
+ bufoff
, tocpy
);
299 if (tocpy
< PAGESIZE
) {
300 ASSERT3S(i
, ==, *rahead
- 1);
301 ASSERT3U((db
->db_size
& PAGE_MASK
), !=, 0);
302 memset(va
+ tocpy
, 0, PAGESIZE
- tocpy
);
306 if ((m
->busy_lock
& VPB_BIT_WAITERS
) != 0)
309 vm_page_deactivate(m
);
314 dmu_buf_rele_array(dbp
, numbufs
, FTAG
);