1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Squashfs - a compressed read only filesystem for Linux
5 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
6 * Phillip Lougher <phillip@squashfs.org.uk>
12 * This file contains code for handling regular files. A regular file
13 * consists of a sequence of contiguous compressed blocks, and/or a
14 * compressed fragment block (tail-end packed block). The compressed size
15 * of each datablock is stored in a block list contained within the
16 * file inode (itself stored in one or more compressed metadata blocks).
18 * To speed up access to datablocks when reading 'large' files (256 Mbytes or
19 * larger), the code implements an index cache that caches the mapping from
20 * block index to datablock location on disk.
22 * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while
23 * retaining a simple and space-efficient block list on disk. The cache
24 * is split into slots, caching up to eight 224 GiB files (128 KiB blocks).
25 * Larger files use multiple slots, with 1.75 TiB files using all 8 slots.
26 * The index cache is designed to be memory efficient, and by default uses
31 #include <linux/vfs.h>
32 #include <linux/kernel.h>
33 #include <linux/slab.h>
34 #include <linux/string.h>
35 #include <linux/pagemap.h>
36 #include <linux/mutex.h>
38 #include "squashfs_fs.h"
39 #include "squashfs_fs_sb.h"
40 #include "squashfs_fs_i.h"
42 #include "page_actor.h"
45 * Locate cache slot in range [offset, index] for specified inode. If
46 * there's more than one return the slot closest to index.
48 static struct meta_index
*locate_meta_index(struct inode
*inode
, int offset
,
51 struct meta_index
*meta
= NULL
;
52 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
55 mutex_lock(&msblk
->meta_index_mutex
);
57 TRACE("locate_meta_index: index %d, offset %d\n", index
, offset
);
59 if (msblk
->meta_index
== NULL
)
62 for (i
= 0; i
< SQUASHFS_META_SLOTS
; i
++) {
63 if (msblk
->meta_index
[i
].inode_number
== inode
->i_ino
&&
64 msblk
->meta_index
[i
].offset
>= offset
&&
65 msblk
->meta_index
[i
].offset
<= index
&&
66 msblk
->meta_index
[i
].locked
== 0) {
67 TRACE("locate_meta_index: entry %d, offset %d\n", i
,
68 msblk
->meta_index
[i
].offset
);
69 meta
= &msblk
->meta_index
[i
];
70 offset
= meta
->offset
;
78 mutex_unlock(&msblk
->meta_index_mutex
);
85 * Find and initialise an empty cache slot for index offset.
87 static struct meta_index
*empty_meta_index(struct inode
*inode
, int offset
,
90 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
91 struct meta_index
*meta
= NULL
;
94 mutex_lock(&msblk
->meta_index_mutex
);
96 TRACE("empty_meta_index: offset %d, skip %d\n", offset
, skip
);
98 if (msblk
->meta_index
== NULL
) {
100 * First time cache index has been used, allocate and
101 * initialise. The cache index could be allocated at
102 * mount time but doing it here means it is allocated only
103 * if a 'large' file is read.
105 msblk
->meta_index
= kcalloc(SQUASHFS_META_SLOTS
,
106 sizeof(*(msblk
->meta_index
)), GFP_KERNEL
);
107 if (msblk
->meta_index
== NULL
) {
108 ERROR("Failed to allocate meta_index\n");
111 for (i
= 0; i
< SQUASHFS_META_SLOTS
; i
++) {
112 msblk
->meta_index
[i
].inode_number
= 0;
113 msblk
->meta_index
[i
].locked
= 0;
115 msblk
->next_meta_index
= 0;
118 for (i
= SQUASHFS_META_SLOTS
; i
&&
119 msblk
->meta_index
[msblk
->next_meta_index
].locked
; i
--)
120 msblk
->next_meta_index
= (msblk
->next_meta_index
+ 1) %
124 TRACE("empty_meta_index: failed!\n");
128 TRACE("empty_meta_index: returned meta entry %d, %p\n",
129 msblk
->next_meta_index
,
130 &msblk
->meta_index
[msblk
->next_meta_index
]);
132 meta
= &msblk
->meta_index
[msblk
->next_meta_index
];
133 msblk
->next_meta_index
= (msblk
->next_meta_index
+ 1) %
136 meta
->inode_number
= inode
->i_ino
;
137 meta
->offset
= offset
;
143 mutex_unlock(&msblk
->meta_index_mutex
);
148 static void release_meta_index(struct inode
*inode
, struct meta_index
*meta
)
150 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
151 mutex_lock(&msblk
->meta_index_mutex
);
153 mutex_unlock(&msblk
->meta_index_mutex
);
158 * Read the next n blocks from the block list, starting from
159 * metadata block <start_block, offset>.
161 static long long read_indexes(struct super_block
*sb
, int n
,
162 u64
*start_block
, int *offset
)
166 __le32
*blist
= kmalloc(PAGE_SIZE
, GFP_KERNEL
);
169 ERROR("read_indexes: Failed to allocate block_list\n");
174 int blocks
= min_t(int, n
, PAGE_SIZE
>> 2);
176 err
= squashfs_read_metadata(sb
, blist
, start_block
,
177 offset
, blocks
<< 2);
179 ERROR("read_indexes: reading block [%llx:%x]\n",
180 *start_block
, *offset
);
184 for (i
= 0; i
< blocks
; i
++) {
185 int size
= squashfs_block_size(blist
[i
]);
190 block
+= SQUASHFS_COMPRESSED_SIZE_BLOCK(size
);
205 * Each cache index slot has SQUASHFS_META_ENTRIES, each of which
206 * can cache one index -> datablock/blocklist-block mapping. We wish
207 * to distribute these over the length of the file, entry[0] maps index x,
208 * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on.
209 * The larger the file, the greater the skip factor. The skip factor is
210 * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure
211 * the number of metadata blocks that need to be read fits into the cache.
212 * If the skip factor is limited in this way then the file will use multiple
215 static inline int calculate_skip(u64 blocks
)
217 u64 skip
= blocks
/ ((SQUASHFS_META_ENTRIES
+ 1)
218 * SQUASHFS_META_INDEXES
);
219 return min((u64
) SQUASHFS_CACHED_BLKS
- 1, skip
+ 1);
224 * Search and grow the index cache for the specified inode, returning the
225 * on-disk locations of the datablock and block list metadata block
226 * <index_block, index_offset> for index (scaled to nearest cache index).
228 static int fill_meta_index(struct inode
*inode
, int index
,
229 u64
*index_block
, int *index_offset
, u64
*data_block
)
231 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
232 int skip
= calculate_skip(i_size_read(inode
) >> msblk
->block_log
);
234 struct meta_index
*meta
;
235 struct meta_entry
*meta_entry
;
236 u64 cur_index_block
= squashfs_i(inode
)->block_list_start
;
237 int cur_offset
= squashfs_i(inode
)->offset
;
238 u64 cur_data_block
= squashfs_i(inode
)->start
;
242 * Scale index to cache index (cache slot entry)
244 index
/= SQUASHFS_META_INDEXES
* skip
;
246 while (offset
< index
) {
247 meta
= locate_meta_index(inode
, offset
+ 1, index
);
250 meta
= empty_meta_index(inode
, offset
+ 1, skip
);
254 offset
= index
< meta
->offset
+ meta
->entries
? index
:
255 meta
->offset
+ meta
->entries
- 1;
256 meta_entry
= &meta
->meta_entry
[offset
- meta
->offset
];
257 cur_index_block
= meta_entry
->index_block
+
259 cur_offset
= meta_entry
->offset
;
260 cur_data_block
= meta_entry
->data_block
;
261 TRACE("get_meta_index: offset %d, meta->offset %d, "
262 "meta->entries %d\n", offset
, meta
->offset
,
264 TRACE("get_meta_index: index_block 0x%llx, offset 0x%x"
265 " data_block 0x%llx\n", cur_index_block
,
266 cur_offset
, cur_data_block
);
270 * If necessary grow cache slot by reading block list. Cache
271 * slot is extended up to index or to the end of the slot, in
272 * which case further slots will be used.
274 for (i
= meta
->offset
+ meta
->entries
; i
<= index
&&
275 i
< meta
->offset
+ SQUASHFS_META_ENTRIES
; i
++) {
276 int blocks
= skip
* SQUASHFS_META_INDEXES
;
277 long long res
= read_indexes(inode
->i_sb
, blocks
,
278 &cur_index_block
, &cur_offset
);
281 if (meta
->entries
== 0)
283 * Don't leave an empty slot on read
284 * error allocated to this inode...
286 meta
->inode_number
= 0;
291 cur_data_block
+= res
;
292 meta_entry
= &meta
->meta_entry
[i
- meta
->offset
];
293 meta_entry
->index_block
= cur_index_block
-
295 meta_entry
->offset
= cur_offset
;
296 meta_entry
->data_block
= cur_data_block
;
301 TRACE("get_meta_index: meta->offset %d, meta->entries %d\n",
302 meta
->offset
, meta
->entries
);
304 release_meta_index(inode
, meta
);
308 *index_block
= cur_index_block
;
309 *index_offset
= cur_offset
;
310 *data_block
= cur_data_block
;
313 * Scale cache index (cache slot entry) to index
315 return offset
* SQUASHFS_META_INDEXES
* skip
;
318 release_meta_index(inode
, meta
);
324 * Get the on-disk location and compressed size of the datablock
325 * specified by index. Fill_meta_index() does most of the work.
327 static int read_blocklist(struct inode
*inode
, int index
, u64
*block
)
333 int res
= fill_meta_index(inode
, index
, &start
, &offset
, block
);
335 TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset"
336 " 0x%x, block 0x%llx\n", res
, index
, start
, offset
,
343 * res contains the index of the mapping returned by fill_meta_index(),
344 * this will likely be less than the desired index (because the
345 * meta_index cache works at a higher granularity). Read any
346 * extra block indexes needed.
349 blks
= read_indexes(inode
->i_sb
, index
- res
, &start
, &offset
);
356 * Read length of block specified by index.
358 res
= squashfs_read_metadata(inode
->i_sb
, &size
, &start
, &offset
,
362 return squashfs_block_size(size
);
365 void squashfs_fill_page(struct page
*page
, struct squashfs_cache_entry
*buffer
, int offset
, int avail
)
370 pageaddr
= kmap_atomic(page
);
371 copied
= squashfs_copy_data(pageaddr
, buffer
, offset
, avail
);
372 memset(pageaddr
+ copied
, 0, PAGE_SIZE
- copied
);
373 kunmap_atomic(pageaddr
);
375 flush_dcache_page(page
);
377 SetPageUptodate(page
);
380 /* Copy data into page cache */
381 void squashfs_copy_cache(struct page
*page
, struct squashfs_cache_entry
*buffer
,
382 int bytes
, int offset
)
384 struct inode
*inode
= page
->mapping
->host
;
385 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
386 int i
, mask
= (1 << (msblk
->block_log
- PAGE_SHIFT
)) - 1;
387 int start_index
= page
->index
& ~mask
, end_index
= start_index
| mask
;
390 * Loop copying datablock into pages. As the datablock likely covers
391 * many PAGE_SIZE pages (default block size is 128 KiB) explicitly
392 * grab the pages from the page cache, except for the page that we've
393 * been called to fill.
395 for (i
= start_index
; i
<= end_index
&& bytes
> 0; i
++,
396 bytes
-= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
397 struct page
*push_page
;
398 int avail
= buffer
? min_t(int, bytes
, PAGE_SIZE
) : 0;
400 TRACE("bytes %d, i %d, available_bytes %d\n", bytes
, i
, avail
);
402 push_page
= (i
== page
->index
) ? page
:
403 grab_cache_page_nowait(page
->mapping
, i
);
408 if (PageUptodate(push_page
))
411 squashfs_fill_page(push_page
, buffer
, offset
, avail
);
413 unlock_page(push_page
);
414 if (i
!= page
->index
)
419 /* Read datablock stored packed inside a fragment (tail-end packed block) */
420 static int squashfs_readpage_fragment(struct page
*page
, int expected
)
422 struct inode
*inode
= page
->mapping
->host
;
423 struct squashfs_cache_entry
*buffer
= squashfs_get_fragment(inode
->i_sb
,
424 squashfs_i(inode
)->fragment_block
,
425 squashfs_i(inode
)->fragment_size
);
426 int res
= buffer
->error
;
429 ERROR("Unable to read page, block %llx, size %x\n",
430 squashfs_i(inode
)->fragment_block
,
431 squashfs_i(inode
)->fragment_size
);
433 squashfs_copy_cache(page
, buffer
, expected
,
434 squashfs_i(inode
)->fragment_offset
);
436 squashfs_cache_put(buffer
);
440 static int squashfs_readpage_sparse(struct page
*page
, int expected
)
442 squashfs_copy_cache(page
, NULL
, expected
, 0);
446 static int squashfs_read_folio(struct file
*file
, struct folio
*folio
)
448 struct page
*page
= &folio
->page
;
449 struct inode
*inode
= page
->mapping
->host
;
450 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
451 int index
= page
->index
>> (msblk
->block_log
- PAGE_SHIFT
);
452 int file_end
= i_size_read(inode
) >> msblk
->block_log
;
453 int expected
= index
== file_end
?
454 (i_size_read(inode
) & (msblk
->block_size
- 1)) :
459 TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
460 page
->index
, squashfs_i(inode
)->start
);
462 if (page
->index
>= ((i_size_read(inode
) + PAGE_SIZE
- 1) >>
466 if (index
< file_end
|| squashfs_i(inode
)->fragment_block
==
467 SQUASHFS_INVALID_BLK
) {
470 res
= read_blocklist(inode
, index
, &block
);
475 res
= squashfs_readpage_sparse(page
, expected
);
477 res
= squashfs_readpage_block(page
, block
, res
, expected
);
479 res
= squashfs_readpage_fragment(page
, expected
);
485 pageaddr
= kmap_atomic(page
);
486 memset(pageaddr
, 0, PAGE_SIZE
);
487 kunmap_atomic(pageaddr
);
488 flush_dcache_page(page
);
490 SetPageUptodate(page
);
496 static int squashfs_readahead_fragment(struct page
**page
,
497 unsigned int pages
, unsigned int expected
, loff_t start
)
499 struct inode
*inode
= page
[0]->mapping
->host
;
500 struct squashfs_cache_entry
*buffer
= squashfs_get_fragment(inode
->i_sb
,
501 squashfs_i(inode
)->fragment_block
,
502 squashfs_i(inode
)->fragment_size
);
503 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
504 int i
, bytes
, copied
;
505 struct squashfs_page_actor
*actor
;
508 struct page
*last_page
;
513 actor
= squashfs_page_actor_init_special(msblk
, page
, pages
,
518 squashfs_actor_nobuff(actor
);
519 addr
= squashfs_first_page(actor
);
521 for (copied
= offset
= 0; offset
< expected
; offset
+= PAGE_SIZE
) {
522 int avail
= min_t(int, expected
- offset
, PAGE_SIZE
);
525 bytes
= squashfs_copy_data(addr
, buffer
, offset
+
526 squashfs_i(inode
)->fragment_offset
, avail
);
533 addr
= squashfs_next_page(actor
);
536 last_page
= squashfs_page_actor_free(actor
);
538 if (copied
== expected
&& !IS_ERR(last_page
)) {
539 /* Last page (if present) may have trailing bytes not filled */
540 bytes
= copied
% PAGE_SIZE
;
541 if (bytes
&& last_page
)
542 memzero_page(last_page
, bytes
, PAGE_SIZE
- bytes
);
544 for (i
= 0; i
< pages
; i
++) {
545 flush_dcache_page(page
[i
]);
546 SetPageUptodate(page
[i
]);
550 for (i
= 0; i
< pages
; i
++) {
551 unlock_page(page
[i
]);
555 squashfs_cache_put(buffer
);
559 squashfs_page_actor_free(actor
);
562 squashfs_cache_put(buffer
);
566 static void squashfs_readahead(struct readahead_control
*ractl
)
568 struct inode
*inode
= ractl
->mapping
->host
;
569 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
570 size_t mask
= (1UL << msblk
->block_log
) - 1;
571 unsigned short shift
= msblk
->block_log
- PAGE_SHIFT
;
572 loff_t start
= readahead_pos(ractl
) & ~mask
;
573 size_t len
= readahead_length(ractl
) + readahead_pos(ractl
) - start
;
574 struct squashfs_page_actor
*actor
;
575 unsigned int nr_pages
= 0;
578 loff_t file_end
= i_size_read(inode
) >> msblk
->block_log
;
579 unsigned int max_pages
= 1UL << shift
;
581 readahead_expand(ractl
, start
, (len
| mask
) + 1);
583 pages
= kmalloc_array(max_pages
, sizeof(void *), GFP_KERNEL
);
590 unsigned int expected
;
591 struct page
*last_page
;
593 expected
= start
>> msblk
->block_log
== file_end
?
594 (i_size_read(inode
) & (msblk
->block_size
- 1)) :
597 max_pages
= (expected
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
599 nr_pages
= __readahead_batch(ractl
, pages
, max_pages
);
603 if (readahead_pos(ractl
) >= i_size_read(inode
))
606 if (start
>> msblk
->block_log
== file_end
&&
607 squashfs_i(inode
)->fragment_block
!= SQUASHFS_INVALID_BLK
) {
608 res
= squashfs_readahead_fragment(pages
, nr_pages
,
615 bsize
= read_blocklist(inode
, start
>> msblk
->block_log
, &block
);
619 actor
= squashfs_page_actor_init_special(msblk
, pages
, nr_pages
,
624 res
= squashfs_read_data(inode
->i_sb
, block
, bsize
, NULL
, actor
);
626 last_page
= squashfs_page_actor_free(actor
);
628 if (res
== expected
&& !IS_ERR(last_page
)) {
631 /* Last page (if present) may have trailing bytes not filled */
632 bytes
= res
% PAGE_SIZE
;
633 if (start
>> msblk
->block_log
== file_end
&& bytes
&& last_page
)
634 memzero_page(last_page
, bytes
,
637 for (i
= 0; i
< nr_pages
; i
++) {
638 flush_dcache_page(pages
[i
]);
639 SetPageUptodate(pages
[i
]);
643 for (i
= 0; i
< nr_pages
; i
++) {
644 unlock_page(pages
[i
]);
648 start
+= readahead_batch_length(ractl
);
655 for (i
= 0; i
< nr_pages
; i
++) {
656 unlock_page(pages
[i
]);
662 const struct address_space_operations squashfs_aops
= {
663 .read_folio
= squashfs_read_folio
,
664 .readahead
= squashfs_readahead