1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Squashfs - a compressed read only filesystem for Linux
5 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
6 * Phillip Lougher <phillip@squashfs.org.uk>
12 * This file contains code for handling regular files. A regular file
13 * consists of a sequence of contiguous compressed blocks, and/or a
14 * compressed fragment block (tail-end packed block). The compressed size
15 * of each datablock is stored in a block list contained within the
16 * file inode (itself stored in one or more compressed metadata blocks).
18 * To speed up access to datablocks when reading 'large' files (256 Mbytes or
19 * larger), the code implements an index cache that caches the mapping from
20 * block index to datablock location on disk.
22 * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while
23 * retaining a simple and space-efficient block list on disk. The cache
24 * is split into slots, caching up to eight 224 GiB files (128 KiB blocks).
25 * Larger files use multiple slots, with 1.75 TiB files using all 8 slots.
26 * The index cache is designed to be memory efficient, and by default uses
31 #include <linux/vfs.h>
32 #include <linux/kernel.h>
33 #include <linux/slab.h>
34 #include <linux/string.h>
35 #include <linux/pagemap.h>
36 #include <linux/mutex.h>
38 #include "squashfs_fs.h"
39 #include "squashfs_fs_sb.h"
40 #include "squashfs_fs_i.h"
44 * Locate cache slot in range [offset, index] for specified inode. If
45 * there's more than one return the slot closest to index.
47 static struct meta_index
*locate_meta_index(struct inode
*inode
, int offset
,
50 struct meta_index
*meta
= NULL
;
51 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
54 mutex_lock(&msblk
->meta_index_mutex
);
56 TRACE("locate_meta_index: index %d, offset %d\n", index
, offset
);
58 if (msblk
->meta_index
== NULL
)
61 for (i
= 0; i
< SQUASHFS_META_SLOTS
; i
++) {
62 if (msblk
->meta_index
[i
].inode_number
== inode
->i_ino
&&
63 msblk
->meta_index
[i
].offset
>= offset
&&
64 msblk
->meta_index
[i
].offset
<= index
&&
65 msblk
->meta_index
[i
].locked
== 0) {
66 TRACE("locate_meta_index: entry %d, offset %d\n", i
,
67 msblk
->meta_index
[i
].offset
);
68 meta
= &msblk
->meta_index
[i
];
69 offset
= meta
->offset
;
77 mutex_unlock(&msblk
->meta_index_mutex
);
84 * Find and initialise an empty cache slot for index offset.
86 static struct meta_index
*empty_meta_index(struct inode
*inode
, int offset
,
89 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
90 struct meta_index
*meta
= NULL
;
93 mutex_lock(&msblk
->meta_index_mutex
);
95 TRACE("empty_meta_index: offset %d, skip %d\n", offset
, skip
);
97 if (msblk
->meta_index
== NULL
) {
99 * First time cache index has been used, allocate and
100 * initialise. The cache index could be allocated at
101 * mount time but doing it here means it is allocated only
102 * if a 'large' file is read.
104 msblk
->meta_index
= kcalloc(SQUASHFS_META_SLOTS
,
105 sizeof(*(msblk
->meta_index
)), GFP_KERNEL
);
106 if (msblk
->meta_index
== NULL
) {
107 ERROR("Failed to allocate meta_index\n");
110 for (i
= 0; i
< SQUASHFS_META_SLOTS
; i
++) {
111 msblk
->meta_index
[i
].inode_number
= 0;
112 msblk
->meta_index
[i
].locked
= 0;
114 msblk
->next_meta_index
= 0;
117 for (i
= SQUASHFS_META_SLOTS
; i
&&
118 msblk
->meta_index
[msblk
->next_meta_index
].locked
; i
--)
119 msblk
->next_meta_index
= (msblk
->next_meta_index
+ 1) %
123 TRACE("empty_meta_index: failed!\n");
127 TRACE("empty_meta_index: returned meta entry %d, %p\n",
128 msblk
->next_meta_index
,
129 &msblk
->meta_index
[msblk
->next_meta_index
]);
131 meta
= &msblk
->meta_index
[msblk
->next_meta_index
];
132 msblk
->next_meta_index
= (msblk
->next_meta_index
+ 1) %
135 meta
->inode_number
= inode
->i_ino
;
136 meta
->offset
= offset
;
142 mutex_unlock(&msblk
->meta_index_mutex
);
147 static void release_meta_index(struct inode
*inode
, struct meta_index
*meta
)
149 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
150 mutex_lock(&msblk
->meta_index_mutex
);
152 mutex_unlock(&msblk
->meta_index_mutex
);
157 * Read the next n blocks from the block list, starting from
158 * metadata block <start_block, offset>.
160 static long long read_indexes(struct super_block
*sb
, int n
,
161 u64
*start_block
, int *offset
)
165 __le32
*blist
= kmalloc(PAGE_SIZE
, GFP_KERNEL
);
168 ERROR("read_indexes: Failed to allocate block_list\n");
173 int blocks
= min_t(int, n
, PAGE_SIZE
>> 2);
175 err
= squashfs_read_metadata(sb
, blist
, start_block
,
176 offset
, blocks
<< 2);
178 ERROR("read_indexes: reading block [%llx:%x]\n",
179 *start_block
, *offset
);
183 for (i
= 0; i
< blocks
; i
++) {
184 int size
= squashfs_block_size(blist
[i
]);
189 block
+= SQUASHFS_COMPRESSED_SIZE_BLOCK(size
);
204 * Each cache index slot has SQUASHFS_META_ENTRIES, each of which
205 * can cache one index -> datablock/blocklist-block mapping. We wish
206 * to distribute these over the length of the file, entry[0] maps index x,
207 * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on.
208 * The larger the file, the greater the skip factor. The skip factor is
209 * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure
210 * the number of metadata blocks that need to be read fits into the cache.
211 * If the skip factor is limited in this way then the file will use multiple
214 static inline int calculate_skip(int blocks
)
216 int skip
= blocks
/ ((SQUASHFS_META_ENTRIES
+ 1)
217 * SQUASHFS_META_INDEXES
);
218 return min(SQUASHFS_CACHED_BLKS
- 1, skip
+ 1);
223 * Search and grow the index cache for the specified inode, returning the
224 * on-disk locations of the datablock and block list metadata block
225 * <index_block, index_offset> for index (scaled to nearest cache index).
227 static int fill_meta_index(struct inode
*inode
, int index
,
228 u64
*index_block
, int *index_offset
, u64
*data_block
)
230 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
231 int skip
= calculate_skip(i_size_read(inode
) >> msblk
->block_log
);
233 struct meta_index
*meta
;
234 struct meta_entry
*meta_entry
;
235 u64 cur_index_block
= squashfs_i(inode
)->block_list_start
;
236 int cur_offset
= squashfs_i(inode
)->offset
;
237 u64 cur_data_block
= squashfs_i(inode
)->start
;
241 * Scale index to cache index (cache slot entry)
243 index
/= SQUASHFS_META_INDEXES
* skip
;
245 while (offset
< index
) {
246 meta
= locate_meta_index(inode
, offset
+ 1, index
);
249 meta
= empty_meta_index(inode
, offset
+ 1, skip
);
253 offset
= index
< meta
->offset
+ meta
->entries
? index
:
254 meta
->offset
+ meta
->entries
- 1;
255 meta_entry
= &meta
->meta_entry
[offset
- meta
->offset
];
256 cur_index_block
= meta_entry
->index_block
+
258 cur_offset
= meta_entry
->offset
;
259 cur_data_block
= meta_entry
->data_block
;
260 TRACE("get_meta_index: offset %d, meta->offset %d, "
261 "meta->entries %d\n", offset
, meta
->offset
,
263 TRACE("get_meta_index: index_block 0x%llx, offset 0x%x"
264 " data_block 0x%llx\n", cur_index_block
,
265 cur_offset
, cur_data_block
);
269 * If necessary grow cache slot by reading block list. Cache
270 * slot is extended up to index or to the end of the slot, in
271 * which case further slots will be used.
273 for (i
= meta
->offset
+ meta
->entries
; i
<= index
&&
274 i
< meta
->offset
+ SQUASHFS_META_ENTRIES
; i
++) {
275 int blocks
= skip
* SQUASHFS_META_INDEXES
;
276 long long res
= read_indexes(inode
->i_sb
, blocks
,
277 &cur_index_block
, &cur_offset
);
280 if (meta
->entries
== 0)
282 * Don't leave an empty slot on read
283 * error allocated to this inode...
285 meta
->inode_number
= 0;
290 cur_data_block
+= res
;
291 meta_entry
= &meta
->meta_entry
[i
- meta
->offset
];
292 meta_entry
->index_block
= cur_index_block
-
294 meta_entry
->offset
= cur_offset
;
295 meta_entry
->data_block
= cur_data_block
;
300 TRACE("get_meta_index: meta->offset %d, meta->entries %d\n",
301 meta
->offset
, meta
->entries
);
303 release_meta_index(inode
, meta
);
307 *index_block
= cur_index_block
;
308 *index_offset
= cur_offset
;
309 *data_block
= cur_data_block
;
312 * Scale cache index (cache slot entry) to index
314 return offset
* SQUASHFS_META_INDEXES
* skip
;
317 release_meta_index(inode
, meta
);
323 * Get the on-disk location and compressed size of the datablock
324 * specified by index. Fill_meta_index() does most of the work.
326 static int read_blocklist(struct inode
*inode
, int index
, u64
*block
)
332 int res
= fill_meta_index(inode
, index
, &start
, &offset
, block
);
334 TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset"
335 " 0x%x, block 0x%llx\n", res
, index
, start
, offset
,
342 * res contains the index of the mapping returned by fill_meta_index(),
343 * this will likely be less than the desired index (because the
344 * meta_index cache works at a higher granularity). Read any
345 * extra block indexes needed.
348 blks
= read_indexes(inode
->i_sb
, index
- res
, &start
, &offset
);
355 * Read length of block specified by index.
357 res
= squashfs_read_metadata(inode
->i_sb
, &size
, &start
, &offset
,
361 return squashfs_block_size(size
);
364 void squashfs_fill_page(struct page
*page
, struct squashfs_cache_entry
*buffer
, int offset
, int avail
)
369 pageaddr
= kmap_atomic(page
);
370 copied
= squashfs_copy_data(pageaddr
, buffer
, offset
, avail
);
371 memset(pageaddr
+ copied
, 0, PAGE_SIZE
- copied
);
372 kunmap_atomic(pageaddr
);
374 flush_dcache_page(page
);
376 SetPageUptodate(page
);
381 /* Copy data into page cache */
382 void squashfs_copy_cache(struct page
*page
, struct squashfs_cache_entry
*buffer
,
383 int bytes
, int offset
)
385 struct inode
*inode
= page
->mapping
->host
;
386 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
387 int i
, mask
= (1 << (msblk
->block_log
- PAGE_SHIFT
)) - 1;
388 int start_index
= page
->index
& ~mask
, end_index
= start_index
| mask
;
391 * Loop copying datablock into pages. As the datablock likely covers
392 * many PAGE_SIZE pages (default block size is 128 KiB) explicitly
393 * grab the pages from the page cache, except for the page that we've
394 * been called to fill.
396 for (i
= start_index
; i
<= end_index
&& bytes
> 0; i
++,
397 bytes
-= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
398 struct page
*push_page
;
399 int avail
= buffer
? min_t(int, bytes
, PAGE_SIZE
) : 0;
401 TRACE("bytes %d, i %d, available_bytes %d\n", bytes
, i
, avail
);
403 push_page
= (i
== page
->index
) ? page
:
404 grab_cache_page_nowait(page
->mapping
, i
);
409 if (PageUptodate(push_page
))
412 squashfs_fill_page(push_page
, buffer
, offset
, avail
);
414 unlock_page(push_page
);
415 if (i
!= page
->index
)
420 /* Read datablock stored packed inside a fragment (tail-end packed block) */
421 static int squashfs_readpage_fragment(struct page
*page
, int expected
)
423 struct inode
*inode
= page
->mapping
->host
;
424 struct squashfs_cache_entry
*buffer
= squashfs_get_fragment(inode
->i_sb
,
425 squashfs_i(inode
)->fragment_block
,
426 squashfs_i(inode
)->fragment_size
);
427 int res
= buffer
->error
;
430 ERROR("Unable to read page, block %llx, size %x\n",
431 squashfs_i(inode
)->fragment_block
,
432 squashfs_i(inode
)->fragment_size
);
434 squashfs_copy_cache(page
, buffer
, expected
,
435 squashfs_i(inode
)->fragment_offset
);
437 squashfs_cache_put(buffer
);
441 static int squashfs_readpage_sparse(struct page
*page
, int expected
)
443 squashfs_copy_cache(page
, NULL
, expected
, 0);
447 static int squashfs_readpage(struct file
*file
, struct page
*page
)
449 struct inode
*inode
= page
->mapping
->host
;
450 struct squashfs_sb_info
*msblk
= inode
->i_sb
->s_fs_info
;
451 int index
= page
->index
>> (msblk
->block_log
- PAGE_SHIFT
);
452 int file_end
= i_size_read(inode
) >> msblk
->block_log
;
453 int expected
= index
== file_end
?
454 (i_size_read(inode
) & (msblk
->block_size
- 1)) :
459 TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
460 page
->index
, squashfs_i(inode
)->start
);
462 if (page
->index
>= ((i_size_read(inode
) + PAGE_SIZE
- 1) >>
466 if (index
< file_end
|| squashfs_i(inode
)->fragment_block
==
467 SQUASHFS_INVALID_BLK
) {
469 int bsize
= read_blocklist(inode
, index
, &block
);
474 res
= squashfs_readpage_sparse(page
, expected
);
476 res
= squashfs_readpage_block(page
, block
, bsize
, expected
);
478 res
= squashfs_readpage_fragment(page
, expected
);
486 pageaddr
= kmap_atomic(page
);
487 memset(pageaddr
, 0, PAGE_SIZE
);
488 kunmap_atomic(pageaddr
);
489 flush_dcache_page(page
);
490 if (!PageError(page
))
491 SetPageUptodate(page
);
498 const struct address_space_operations squashfs_aops
= {
499 .readpage
= squashfs_readpage