1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2017-2018 HUAWEI, Inc.
4 * https://www.huawei.com/
5 * Copyright (C) 2021, Alibaba Cloud
8 #include <linux/sched/mm.h>
9 #include <trace/events/erofs.h>
11 void erofs_unmap_metabuf(struct erofs_buf
*buf
)
15 kunmap_local(buf
->base
);
19 void erofs_put_metabuf(struct erofs_buf
*buf
)
23 erofs_unmap_metabuf(buf
);
24 folio_put(page_folio(buf
->page
));
28 void *erofs_bread(struct erofs_buf
*buf
, erofs_off_t offset
,
29 enum erofs_kmap_type type
)
31 pgoff_t index
= offset
>> PAGE_SHIFT
;
32 struct folio
*folio
= NULL
;
35 folio
= page_folio(buf
->page
);
36 if (folio_file_page(folio
, index
) != buf
->page
)
37 erofs_unmap_metabuf(buf
);
39 if (!folio
|| !folio_contains(folio
, index
)) {
40 erofs_put_metabuf(buf
);
41 folio
= read_mapping_folio(buf
->mapping
, index
, buf
->file
);
45 buf
->page
= folio_file_page(folio
, index
);
46 if (!buf
->base
&& type
== EROFS_KMAP
)
47 buf
->base
= kmap_local_page(buf
->page
);
48 if (type
== EROFS_NO_KMAP
)
50 return buf
->base
+ (offset
& ~PAGE_MASK
);
53 void erofs_init_metabuf(struct erofs_buf
*buf
, struct super_block
*sb
)
55 struct erofs_sb_info
*sbi
= EROFS_SB(sb
);
58 if (erofs_is_fileio_mode(sbi
)) {
59 buf
->file
= sbi
->fdev
; /* some fs like FUSE needs it */
60 buf
->mapping
= buf
->file
->f_mapping
;
61 } else if (erofs_is_fscache_mode(sb
))
62 buf
->mapping
= sbi
->s_fscache
->inode
->i_mapping
;
64 buf
->mapping
= sb
->s_bdev
->bd_mapping
;
67 void *erofs_read_metabuf(struct erofs_buf
*buf
, struct super_block
*sb
,
68 erofs_off_t offset
, enum erofs_kmap_type type
)
70 erofs_init_metabuf(buf
, sb
);
71 return erofs_bread(buf
, offset
, type
);
74 static int erofs_map_blocks_flatmode(struct inode
*inode
,
75 struct erofs_map_blocks
*map
)
77 struct erofs_inode
*vi
= EROFS_I(inode
);
78 struct super_block
*sb
= inode
->i_sb
;
79 bool tailendpacking
= (vi
->datalayout
== EROFS_INODE_FLAT_INLINE
);
80 erofs_blk_t lastblk
= erofs_iblks(inode
) - tailendpacking
;
82 map
->m_flags
= EROFS_MAP_MAPPED
; /* no hole in flat inodes */
83 if (map
->m_la
< erofs_pos(sb
, lastblk
)) {
84 map
->m_pa
= erofs_pos(sb
, vi
->raw_blkaddr
) + map
->m_la
;
85 map
->m_plen
= erofs_pos(sb
, lastblk
) - map
->m_la
;
87 DBG_BUGON(!tailendpacking
);
88 map
->m_pa
= erofs_iloc(inode
) + vi
->inode_isize
+
89 vi
->xattr_isize
+ erofs_blkoff(sb
, map
->m_la
);
90 map
->m_plen
= inode
->i_size
- map
->m_la
;
92 /* inline data should be located in the same meta block */
93 if (erofs_blkoff(sb
, map
->m_pa
) + map
->m_plen
> sb
->s_blocksize
) {
94 erofs_err(sb
, "inline data across blocks @ nid %llu", vi
->nid
);
98 map
->m_flags
|= EROFS_MAP_META
;
103 int erofs_map_blocks(struct inode
*inode
, struct erofs_map_blocks
*map
)
105 struct super_block
*sb
= inode
->i_sb
;
106 struct erofs_inode
*vi
= EROFS_I(inode
);
107 struct erofs_inode_chunk_index
*idx
;
108 struct erofs_buf buf
= __EROFS_BUF_INITIALIZER
;
115 trace_erofs_map_blocks_enter(inode
, map
, 0);
117 if (map
->m_la
>= inode
->i_size
) {
118 /* leave out-of-bound access unmapped */
120 map
->m_plen
= map
->m_llen
;
124 if (vi
->datalayout
!= EROFS_INODE_CHUNK_BASED
) {
125 err
= erofs_map_blocks_flatmode(inode
, map
);
129 if (vi
->chunkformat
& EROFS_CHUNK_FORMAT_INDEXES
)
130 unit
= sizeof(*idx
); /* chunk index */
132 unit
= EROFS_BLOCK_MAP_ENTRY_SIZE
; /* block map */
134 chunknr
= map
->m_la
>> vi
->chunkbits
;
135 pos
= ALIGN(erofs_iloc(inode
) + vi
->inode_isize
+
136 vi
->xattr_isize
, unit
) + unit
* chunknr
;
138 kaddr
= erofs_read_metabuf(&buf
, sb
, pos
, EROFS_KMAP
);
140 err
= PTR_ERR(kaddr
);
143 map
->m_la
= chunknr
<< vi
->chunkbits
;
144 map
->m_plen
= min_t(erofs_off_t
, 1UL << vi
->chunkbits
,
145 round_up(inode
->i_size
- map
->m_la
, sb
->s_blocksize
));
147 /* handle block map */
148 if (!(vi
->chunkformat
& EROFS_CHUNK_FORMAT_INDEXES
)) {
149 __le32
*blkaddr
= kaddr
;
151 if (le32_to_cpu(*blkaddr
) == EROFS_NULL_ADDR
) {
154 map
->m_pa
= erofs_pos(sb
, le32_to_cpu(*blkaddr
));
155 map
->m_flags
= EROFS_MAP_MAPPED
;
159 /* parse chunk indexes */
161 switch (le32_to_cpu(idx
->blkaddr
)) {
162 case EROFS_NULL_ADDR
:
166 map
->m_deviceid
= le16_to_cpu(idx
->device_id
) &
167 EROFS_SB(sb
)->device_id_mask
;
168 map
->m_pa
= erofs_pos(sb
, le32_to_cpu(idx
->blkaddr
));
169 map
->m_flags
= EROFS_MAP_MAPPED
;
173 erofs_put_metabuf(&buf
);
176 map
->m_llen
= map
->m_plen
;
177 trace_erofs_map_blocks_exit(inode
, map
, 0, err
);
181 static void erofs_fill_from_devinfo(struct erofs_map_dev
*map
,
182 struct erofs_device_info
*dif
)
187 if (S_ISBLK(file_inode(dif
->file
)->i_mode
))
188 map
->m_bdev
= file_bdev(dif
->file
);
190 map
->m_fp
= dif
->file
;
192 map
->m_daxdev
= dif
->dax_dev
;
193 map
->m_dax_part_off
= dif
->dax_part_off
;
194 map
->m_fscache
= dif
->fscache
;
197 int erofs_map_dev(struct super_block
*sb
, struct erofs_map_dev
*map
)
199 struct erofs_dev_context
*devs
= EROFS_SB(sb
)->devs
;
200 struct erofs_device_info
*dif
;
201 erofs_off_t startoff
, length
;
204 map
->m_bdev
= sb
->s_bdev
;
205 map
->m_daxdev
= EROFS_SB(sb
)->dax_dev
;
206 map
->m_dax_part_off
= EROFS_SB(sb
)->dax_part_off
;
207 map
->m_fscache
= EROFS_SB(sb
)->s_fscache
;
208 map
->m_fp
= EROFS_SB(sb
)->fdev
;
210 if (map
->m_deviceid
) {
211 down_read(&devs
->rwsem
);
212 dif
= idr_find(&devs
->tree
, map
->m_deviceid
- 1);
214 up_read(&devs
->rwsem
);
218 map
->m_pa
+= erofs_pos(sb
, dif
->mapped_blkaddr
);
219 up_read(&devs
->rwsem
);
222 erofs_fill_from_devinfo(map
, dif
);
223 up_read(&devs
->rwsem
);
224 } else if (devs
->extra_devices
&& !devs
->flatdev
) {
225 down_read(&devs
->rwsem
);
226 idr_for_each_entry(&devs
->tree
, dif
, id
) {
227 if (!dif
->mapped_blkaddr
)
230 startoff
= erofs_pos(sb
, dif
->mapped_blkaddr
);
231 length
= erofs_pos(sb
, dif
->blocks
);
232 if (map
->m_pa
>= startoff
&&
233 map
->m_pa
< startoff
+ length
) {
234 map
->m_pa
-= startoff
;
235 erofs_fill_from_devinfo(map
, dif
);
239 up_read(&devs
->rwsem
);
245 * bit 30: I/O error occurred on this folio
246 * bit 0 - 29: remaining parts to complete this folio
248 #define EROFS_ONLINEFOLIO_EIO (1 << 30)
250 void erofs_onlinefolio_init(struct folio
*folio
)
255 } u
= { .o
= ATOMIC_INIT(1) };
257 folio
->private = u
.v
; /* valid only if file-backed folio is locked */
260 void erofs_onlinefolio_split(struct folio
*folio
)
262 atomic_inc((atomic_t
*)&folio
->private);
265 void erofs_onlinefolio_end(struct folio
*folio
, int err
)
270 orig
= atomic_read((atomic_t
*)&folio
->private);
271 v
= (orig
- 1) | (err
? EROFS_ONLINEFOLIO_EIO
: 0);
272 } while (atomic_cmpxchg((atomic_t
*)&folio
->private, orig
, v
) != orig
);
274 if (v
& ~EROFS_ONLINEFOLIO_EIO
)
277 folio_end_read(folio
, !(v
& EROFS_ONLINEFOLIO_EIO
));
280 static int erofs_iomap_begin(struct inode
*inode
, loff_t offset
, loff_t length
,
281 unsigned int flags
, struct iomap
*iomap
, struct iomap
*srcmap
)
284 struct super_block
*sb
= inode
->i_sb
;
285 struct erofs_map_blocks map
;
286 struct erofs_map_dev mdev
;
291 ret
= erofs_map_blocks(inode
, &map
);
295 mdev
= (struct erofs_map_dev
) {
296 .m_deviceid
= map
.m_deviceid
,
299 ret
= erofs_map_dev(sb
, &mdev
);
303 iomap
->offset
= map
.m_la
;
304 if (flags
& IOMAP_DAX
)
305 iomap
->dax_dev
= mdev
.m_daxdev
;
307 iomap
->bdev
= mdev
.m_bdev
;
308 iomap
->length
= map
.m_llen
;
310 iomap
->private = NULL
;
312 if (!(map
.m_flags
& EROFS_MAP_MAPPED
)) {
313 iomap
->type
= IOMAP_HOLE
;
314 iomap
->addr
= IOMAP_NULL_ADDR
;
316 iomap
->length
= length
;
320 if (map
.m_flags
& EROFS_MAP_META
) {
322 struct erofs_buf buf
= __EROFS_BUF_INITIALIZER
;
324 iomap
->type
= IOMAP_INLINE
;
325 ptr
= erofs_read_metabuf(&buf
, sb
, mdev
.m_pa
, EROFS_KMAP
);
328 iomap
->inline_data
= ptr
;
329 iomap
->private = buf
.base
;
331 iomap
->type
= IOMAP_MAPPED
;
332 iomap
->addr
= mdev
.m_pa
;
333 if (flags
& IOMAP_DAX
)
334 iomap
->addr
+= mdev
.m_dax_part_off
;
339 static int erofs_iomap_end(struct inode
*inode
, loff_t pos
, loff_t length
,
340 ssize_t written
, unsigned int flags
, struct iomap
*iomap
)
342 void *ptr
= iomap
->private;
345 struct erofs_buf buf
= {
346 .page
= kmap_to_page(ptr
),
350 DBG_BUGON(iomap
->type
!= IOMAP_INLINE
);
351 erofs_put_metabuf(&buf
);
353 DBG_BUGON(iomap
->type
== IOMAP_INLINE
);
358 static const struct iomap_ops erofs_iomap_ops
= {
359 .iomap_begin
= erofs_iomap_begin
,
360 .iomap_end
= erofs_iomap_end
,
363 int erofs_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
366 if (erofs_inode_is_data_compressed(EROFS_I(inode
)->datalayout
)) {
367 #ifdef CONFIG_EROFS_FS_ZIP
368 return iomap_fiemap(inode
, fieinfo
, start
, len
,
369 &z_erofs_iomap_report_ops
);
374 return iomap_fiemap(inode
, fieinfo
, start
, len
, &erofs_iomap_ops
);
378 * since we dont have write or truncate flows, so no inode
379 * locking needs to be held at the moment.
381 static int erofs_read_folio(struct file
*file
, struct folio
*folio
)
383 return iomap_read_folio(folio
, &erofs_iomap_ops
);
386 static void erofs_readahead(struct readahead_control
*rac
)
388 return iomap_readahead(rac
, &erofs_iomap_ops
);
391 static sector_t
erofs_bmap(struct address_space
*mapping
, sector_t block
)
393 return iomap_bmap(mapping
, block
, &erofs_iomap_ops
);
396 static ssize_t
erofs_file_read_iter(struct kiocb
*iocb
, struct iov_iter
*to
)
398 struct inode
*inode
= file_inode(iocb
->ki_filp
);
400 /* no need taking (shared) inode lock since it's a ro filesystem */
401 if (!iov_iter_count(to
))
406 return dax_iomap_rw(iocb
, to
, &erofs_iomap_ops
);
408 if ((iocb
->ki_flags
& IOCB_DIRECT
) && inode
->i_sb
->s_bdev
)
409 return iomap_dio_rw(iocb
, to
, &erofs_iomap_ops
,
411 return filemap_read(iocb
, to
, 0);
414 /* for uncompressed (aligned) files and raw access for other files */
415 const struct address_space_operations erofs_aops
= {
416 .read_folio
= erofs_read_folio
,
417 .readahead
= erofs_readahead
,
419 .direct_IO
= noop_direct_IO
,
420 .release_folio
= iomap_release_folio
,
421 .invalidate_folio
= iomap_invalidate_folio
,
425 static vm_fault_t
erofs_dax_huge_fault(struct vm_fault
*vmf
,
428 return dax_iomap_fault(vmf
, order
, NULL
, NULL
, &erofs_iomap_ops
);
431 static vm_fault_t
erofs_dax_fault(struct vm_fault
*vmf
)
433 return erofs_dax_huge_fault(vmf
, 0);
436 static const struct vm_operations_struct erofs_dax_vm_ops
= {
437 .fault
= erofs_dax_fault
,
438 .huge_fault
= erofs_dax_huge_fault
,
441 static int erofs_file_mmap(struct file
*file
, struct vm_area_struct
*vma
)
443 if (!IS_DAX(file_inode(file
)))
444 return generic_file_readonly_mmap(file
, vma
);
446 if ((vma
->vm_flags
& VM_SHARED
) && (vma
->vm_flags
& VM_MAYWRITE
))
449 vma
->vm_ops
= &erofs_dax_vm_ops
;
450 vm_flags_set(vma
, VM_HUGEPAGE
);
454 #define erofs_file_mmap generic_file_readonly_mmap
457 static loff_t
erofs_file_llseek(struct file
*file
, loff_t offset
, int whence
)
459 struct inode
*inode
= file
->f_mapping
->host
;
460 const struct iomap_ops
*ops
= &erofs_iomap_ops
;
462 if (erofs_inode_is_data_compressed(EROFS_I(inode
)->datalayout
))
463 #ifdef CONFIG_EROFS_FS_ZIP
464 ops
= &z_erofs_iomap_report_ops
;
466 return generic_file_llseek(file
, offset
, whence
);
469 if (whence
== SEEK_HOLE
)
470 offset
= iomap_seek_hole(inode
, offset
, ops
);
471 else if (whence
== SEEK_DATA
)
472 offset
= iomap_seek_data(inode
, offset
, ops
);
474 return generic_file_llseek(file
, offset
, whence
);
478 return vfs_setpos(file
, offset
, inode
->i_sb
->s_maxbytes
);
481 const struct file_operations erofs_file_fops
= {
482 .llseek
= erofs_file_llseek
,
483 .read_iter
= erofs_file_read_iter
,
484 .mmap
= erofs_file_mmap
,
485 .get_unmapped_area
= thp_get_unmapped_area
,
486 .splice_read
= filemap_splice_read
,