1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
6 * TODO: try to use extents tree (instead of array)
9 #include <linux/blkdev.h>
11 #include <linux/log2.h>
17 /* runs_tree is a continues memory. Try to avoid big size. */
18 #define NTFS3_RUN_MAX_BYTES 0x10000
21 CLST vcn
; /* Virtual cluster number. */
22 CLST len
; /* Length in clusters. */
23 CLST lcn
; /* Logical cluster number. */
27 * run_lookup - Lookup the index of a MCB entry that is first <= vcn.
29 * Case of success it will return non-zero value and set
30 * @index parameter to index of entry been found.
31 * Case of entry missing from list 'index' will be set to
32 * point to insertion position for the entry question.
34 static bool run_lookup(const struct runs_tree
*run
, CLST vcn
, size_t *index
)
36 size_t min_idx
, max_idx
, mid_idx
;
45 max_idx
= run
->count
- 1;
47 /* Check boundary cases specially, 'cause they cover the often requests. */
54 if (vcn
< r
->vcn
+ r
->len
) {
60 if (vcn
>= r
->vcn
+ r
->len
) {
71 mid_idx
= min_idx
+ ((max_idx
- min_idx
) >> 1);
72 r
= run
->runs
+ mid_idx
;
75 max_idx
= mid_idx
- 1;
78 } else if (vcn
>= r
->vcn
+ r
->len
) {
79 min_idx
= mid_idx
+ 1;
84 } while (min_idx
<= max_idx
);
91 * run_consolidate - Consolidate runs starting from a given one.
93 static void run_consolidate(struct runs_tree
*run
, size_t index
)
96 struct ntfs_run
*r
= run
->runs
+ index
;
98 while (index
+ 1 < run
->count
) {
100 * I should merge current run with next
101 * if start of the next run lies inside one being tested.
103 struct ntfs_run
*n
= r
+ 1;
104 CLST end
= r
->vcn
+ r
->len
;
107 /* Stop if runs are not aligned one to another. */
114 * If range at index overlaps with next one
115 * then I will either adjust it's start position
116 * or (if completely matches) dust remove one from the list.
120 goto remove_next_range
;
124 if (n
->lcn
!= SPARSE_LCN
)
130 * Stop if sparse mode does not match
131 * both current and next runs.
133 if ((n
->lcn
== SPARSE_LCN
) != (r
->lcn
== SPARSE_LCN
)) {
140 * Check if volume block
141 * of a next run lcn does not match
142 * last volume block of the current run.
144 if (n
->lcn
!= SPARSE_LCN
&& n
->lcn
!= r
->lcn
+ r
->len
)
148 * Next and current are siblings.
151 r
->len
+= n
->len
- dl
;
154 i
= run
->count
- (index
+ 1);
156 memmove(n
, n
+ 1, sizeof(*n
) * (i
- 1));
165 * Return: True if range [svcn - evcn] is mapped.
167 bool run_is_mapped_full(const struct runs_tree
*run
, CLST svcn
, CLST evcn
)
170 const struct ntfs_run
*r
, *end
;
173 if (!run_lookup(run
, svcn
, &i
))
176 end
= run
->runs
+ run
->count
;
180 next_vcn
= r
->vcn
+ r
->len
;
187 if (r
->vcn
!= next_vcn
)
192 bool run_lookup_entry(const struct runs_tree
*run
, CLST vcn
, CLST
*lcn
,
193 CLST
*len
, size_t *index
)
199 /* Fail immediately if nrun was not touched yet. */
203 if (!run_lookup(run
, vcn
, &idx
))
208 if (vcn
>= r
->vcn
+ r
->len
)
215 *lcn
= r
->lcn
== SPARSE_LCN
? SPARSE_LCN
: (r
->lcn
+ gap
);
226 * run_truncate_head - Decommit the range before vcn.
228 void run_truncate_head(struct runs_tree
*run
, CLST vcn
)
233 if (run_lookup(run
, vcn
, &index
)) {
234 r
= run
->runs
+ index
;
237 CLST dlen
= vcn
- r
->vcn
;
241 if (r
->lcn
!= SPARSE_LCN
)
249 memmove(r
, r
+ index
, sizeof(*r
) * (run
->count
- index
));
261 * run_truncate - Decommit the range after vcn.
263 void run_truncate(struct runs_tree
*run
, CLST vcn
)
268 * If I hit the range then
269 * I have to truncate one.
270 * If range to be truncated is becoming empty
271 * then it will entirely be removed.
273 if (run_lookup(run
, vcn
, &index
)) {
274 struct ntfs_run
*r
= run
->runs
+ index
;
276 r
->len
= vcn
- r
->vcn
;
283 * At this point 'index' is set to position that
284 * should be thrown away (including index itself)
285 * Simple one - just set the limit.
289 /* Do not reallocate array 'runs'. Only free if possible. */
298 * run_truncate_around - Trim head and tail if necessary.
300 void run_truncate_around(struct runs_tree
*run
, CLST vcn
)
302 run_truncate_head(run
, vcn
);
304 if (run
->count
>= NTFS3_RUN_MAX_BYTES
/ sizeof(struct ntfs_run
) / 2)
305 run_truncate(run
, (run
->runs
+ (run
->count
>> 1))->vcn
);
311 * Sets location to known state.
312 * Run to be added may overlap with existing location.
314 * Return: false if of memory.
316 bool run_add_entry(struct runs_tree
*run
, CLST vcn
, CLST lcn
, CLST len
,
322 CLST tail_vcn
= 0, tail_len
= 0, tail_lcn
= 0;
323 bool should_add_tail
= false;
326 * Lookup the insertion point.
328 * Execute bsearch for the entry containing
329 * start position question.
331 inrange
= run_lookup(run
, vcn
, &index
);
334 * Shortcut here would be case of
335 * range not been found but one been added
336 * continues previous run.
337 * This case I can directly make use of
338 * existing range as my start point.
340 if (!inrange
&& index
> 0) {
341 struct ntfs_run
*t
= run
->runs
+ index
- 1;
343 if (t
->vcn
+ t
->len
== vcn
&&
344 (t
->lcn
== SPARSE_LCN
) == (lcn
== SPARSE_LCN
) &&
345 (lcn
== SPARSE_LCN
|| lcn
== t
->lcn
+ t
->len
)) {
352 * At this point 'index' either points to the range
353 * containing start position or to the insertion position
355 * So first let's check if range I'm probing is here already.
360 * Range was not found.
361 * Insert at position 'index'
363 used
= run
->count
* sizeof(struct ntfs_run
);
366 * Check allocated space.
367 * If one is not enough to get one more entry
368 * then it will be reallocated.
370 if (run
->allocated
< used
+ sizeof(struct ntfs_run
)) {
372 struct ntfs_run
*new_ptr
;
374 /* Use power of 2 for 'bytes'. */
377 } else if (used
<= 16 * PAGE_SIZE
) {
378 if (is_power_of_2(run
->allocated
))
379 bytes
= run
->allocated
<< 1;
382 << (2 + blksize_bits(used
));
384 bytes
= run
->allocated
+ (16 * PAGE_SIZE
);
387 WARN_ON(!is_mft
&& bytes
> NTFS3_RUN_MAX_BYTES
);
389 new_ptr
= kvmalloc(bytes
, GFP_KERNEL
);
395 memcpy(new_ptr
, run
->runs
,
396 index
* sizeof(struct ntfs_run
));
397 memcpy(r
+ 1, run
->runs
+ index
,
398 sizeof(struct ntfs_run
) * (run
->count
- index
));
402 run
->allocated
= bytes
;
405 size_t i
= run
->count
- index
;
407 r
= run
->runs
+ index
;
409 /* memmove appears to be a bottle neck here... */
411 memmove(r
+ 1, r
, sizeof(struct ntfs_run
) * i
);
419 r
= run
->runs
+ index
;
422 * If one of ranges was not allocated then we
423 * have to split location we just matched and
424 * insert current one.
425 * A common case this requires tail to be reinserted
428 if (((lcn
== SPARSE_LCN
) != (r
->lcn
== SPARSE_LCN
)) ||
429 (lcn
!= SPARSE_LCN
&& lcn
!= r
->lcn
+ (vcn
- r
->vcn
))) {
430 CLST to_eat
= vcn
- r
->vcn
;
431 CLST Tovcn
= to_eat
+ len
;
433 should_add_tail
= Tovcn
< r
->len
;
435 if (should_add_tail
) {
436 tail_lcn
= r
->lcn
== SPARSE_LCN
?
439 tail_vcn
= r
->vcn
+ Tovcn
;
440 tail_len
= r
->len
- Tovcn
;
447 goto requires_new_range
;
450 /* lcn should match one were going to add. */
455 * If existing range fits then were done.
456 * Otherwise extend found one and fall back to range jocode.
458 if (r
->vcn
+ r
->len
< vcn
+ len
)
459 r
->len
+= len
- ((r
->vcn
+ r
->len
) - vcn
);
463 * And normalize it starting from insertion point.
464 * It's possible that no insertion needed case if
465 * start point lies within the range of an entry
466 * that 'index' points to.
468 if (inrange
&& index
> 0)
470 run_consolidate(run
, index
);
471 run_consolidate(run
, index
+ 1);
475 * We have to add extra range a tail.
477 if (should_add_tail
&&
478 !run_add_entry(run
, tail_vcn
, tail_lcn
, tail_len
, is_mft
))
484 /* run_collapse_range
486 * Helper for attr_collapse_range(),
487 * which is helper for fallocate(collapse_range).
489 bool run_collapse_range(struct runs_tree
*run
, CLST vcn
, CLST len
)
492 struct ntfs_run
*r
, *e
, *eat_start
, *eat_end
;
495 if (WARN_ON(!run_lookup(run
, vcn
, &index
)))
496 return true; /* Should never be here. */
498 e
= run
->runs
+ run
->count
;
499 r
= run
->runs
+ index
;
503 if (r
->vcn
+ r
->len
<= end
) {
504 /* Collapse tail of run .*/
505 r
->len
= vcn
- r
->vcn
;
506 } else if (r
->lcn
== SPARSE_LCN
) {
507 /* Collapse a middle part of sparsed run. */
510 /* Collapse a middle part of normal run, split. */
511 if (!run_add_entry(run
, vcn
, SPARSE_LCN
, len
, false))
513 return run_collapse_range(run
, vcn
, len
);
530 if (r
->vcn
+ r
->len
<= end
) {
537 if (r
->lcn
!= SPARSE_LCN
)
543 eat
= eat_end
- eat_start
;
544 memmove(eat_start
, eat_end
, (e
- eat_end
) * sizeof(*r
));
552 * Helper for attr_insert_range(),
553 * which is helper for fallocate(insert_range).
555 bool run_insert_range(struct runs_tree
*run
, CLST vcn
, CLST len
)
558 struct ntfs_run
*r
, *e
;
560 if (WARN_ON(!run_lookup(run
, vcn
, &index
)))
561 return false; /* Should never be here. */
563 e
= run
->runs
+ run
->count
;
564 r
= run
->runs
+ index
;
572 r
= run
->runs
+ index
;
575 /* split fragment. */
576 CLST len1
= vcn
- r
->vcn
;
577 CLST len2
= r
->len
- len1
;
578 CLST lcn2
= r
->lcn
== SPARSE_LCN
? SPARSE_LCN
: (r
->lcn
+ len1
);
582 if (!run_add_entry(run
, vcn
+ len
, lcn2
, len2
, false))
586 if (!run_add_entry(run
, vcn
, SPARSE_LCN
, len
, false))
593 * run_get_entry - Return index-th mapped region.
595 bool run_get_entry(const struct runs_tree
*run
, size_t index
, CLST
*vcn
,
596 CLST
*lcn
, CLST
*len
)
598 const struct ntfs_run
*r
;
600 if (index
>= run
->count
)
603 r
= run
->runs
+ index
;
618 * run_packed_size - Calculate the size of packed int64.
621 static inline int run_packed_size(const s64 n
)
623 const u8
*p
= (const u8
*)&n
+ sizeof(n
) - 1;
626 if (p
[-7] || p
[-6] || p
[-5] || p
[-4])
635 if (p
[-7] != 0xff || p
[-6] != 0xff || p
[-5] != 0xff ||
638 if (p
[-3] != 0xff || p
[-2] != 0xff)
645 return (const u8
*)&n
+ sizeof(n
) - p
;
648 /* Full trusted function. It does not check 'size' for errors. */
649 static inline void run_pack_s64(u8
*run_buf
, u8 size
, s64 v
)
651 const u8
*p
= (u8
*)&v
;
680 /* Full trusted function. It does not check 'size' for errors. */
681 static inline s64
run_unpack_s64(const u8
*run_buf
, u8 size
, s64 v
)
715 static inline int run_packed_size(const s64 n
)
717 const u8
*p
= (const u8
*)&n
;
720 if (p
[7] || p
[6] || p
[5] || p
[4])
729 if (p
[7] != 0xff || p
[6] != 0xff || p
[5] != 0xff ||
732 if (p
[3] != 0xff || p
[2] != 0xff)
740 return 1 + p
- (const u8
*)&n
;
743 /* Full trusted function. It does not check 'size' for errors. */
744 static inline void run_pack_s64(u8
*run_buf
, u8 size
, s64 v
)
746 const u8
*p
= (u8
*)&v
;
748 /* memcpy( run_buf, &v, size); Is it faster? */
776 /* full trusted function. It does not check 'size' for errors */
777 static inline s64
run_unpack_s64(const u8
*run_buf
, u8 size
, s64 v
)
781 /* memcpy( &v, run_buf, size); Is it faster? */
812 * run_pack - Pack runs into buffer.
814 * packed_vcns - How much runs we have packed.
815 * packed_size - How much bytes we have used run_buf.
817 int run_pack(const struct runs_tree
*run
, CLST svcn
, CLST len
, u8
*run_buf
,
818 u32 run_buf_size
, CLST
*packed_vcns
)
820 CLST next_vcn
, vcn
, lcn
;
822 CLST evcn1
= svcn
+ len
;
823 const struct ntfs_run
*r
, *r_end
;
827 int offset_size
, size_size
, tmp
;
834 /* Check all required entries [svcn, encv1) available. */
835 if (!run_lookup(run
, svcn
, &i
))
838 r_end
= run
->runs
+ run
->count
;
841 for (next_vcn
= r
->vcn
+ r
->len
; next_vcn
< evcn1
;
842 next_vcn
= r
->vcn
+ r
->len
) {
843 if (++r
>= r_end
|| r
->vcn
!= next_vcn
)
847 /* Repeat cycle above and pack runs. Assume no errors. */
851 lcn
= r
->lcn
== SPARSE_LCN
? SPARSE_LCN
: (r
->lcn
+ len
);
855 next_vcn
= vcn
+ len
;
856 if (next_vcn
> evcn1
)
859 /* How much bytes required to pack len. */
860 size_size
= run_packed_size(len
);
862 /* offset_size - How much bytes is packed dlcn. */
863 if (lcn
== SPARSE_LCN
) {
867 /* NOTE: lcn can be less than prev_lcn! */
868 dlcn
= (s64
)lcn
- prev_lcn
;
869 offset_size
= run_packed_size(dlcn
);
873 tmp
= run_buf_size
- packed_size
- 2 - offset_size
;
877 /* Can we store this entire run. */
882 /* Pack run header. */
883 run_buf
[0] = ((u8
)(size_size
| (offset_size
<< 4)));
886 /* Pack the length of run. */
887 run_pack_s64(run_buf
, size_size
, len
);
889 run_buf
+= size_size
;
890 /* Pack the offset from previous LCN. */
891 run_pack_s64(run_buf
, offset_size
, dlcn
);
892 run_buf
+= offset_size
;
895 packed_size
+= 1 + offset_size
+ size_size
;
898 if (packed_size
+ 1 >= run_buf_size
|| next_vcn
>= evcn1
)
908 /* Store last zero. */
912 return packed_size
+ 1;
916 * run_unpack - Unpack packed runs from @run_buf.
918 * Return: Error if negative, or real used bytes.
920 int run_unpack(struct runs_tree
*run
, struct ntfs_sb_info
*sbi
, CLST ino
,
921 CLST svcn
, CLST evcn
, CLST vcn
, const u8
*run_buf
,
924 u64 prev_lcn
, vcn64
, lcn
, next_vcn
;
925 const u8
*run_last
, *run_0
;
926 bool is_mft
= ino
== MFT_REC_MFT
;
928 if (run_buf_size
< 0)
931 /* Check for empty. */
932 if (evcn
+ 1 == svcn
)
939 run_last
= run_buf
+ run_buf_size
;
943 /* Read all runs the chain. */
944 /* size_size - How much bytes is packed len. */
945 while (run_buf
< run_last
) {
946 /* size_size - How much bytes is packed len. */
947 u8 size_size
= *run_buf
& 0xF;
948 /* offset_size - How much bytes is packed dlcn. */
949 u8 offset_size
= *run_buf
++ >> 4;
957 * NOTE: Runs are stored little endian order
958 * "len" is unsigned value, "dlcn" is signed.
959 * Large positive number requires to store 5 bytes
960 * e.g.: 05 FF 7E FF FF 00 00 00
962 if (size_size
> sizeof(len
))
965 len
= run_unpack_s64(run_buf
, size_size
, 0);
966 /* Skip size_size. */
967 run_buf
+= size_size
;
974 else if (offset_size
<= sizeof(s64
)) {
977 /* Initial value of dlcn is -1 or 0. */
978 dlcn
= (run_buf
[offset_size
- 1] & 0x80) ? (s64
)-1 : 0;
979 dlcn
= run_unpack_s64(run_buf
, offset_size
, dlcn
);
980 /* Skip offset_size. */
981 run_buf
+= offset_size
;
985 lcn
= prev_lcn
+ dlcn
;
988 /* The size of 'dlcn' can't be > 8. */
992 next_vcn
= vcn64
+ len
;
993 /* Check boundary. */
994 if (next_vcn
> evcn
+ 1)
997 #ifndef CONFIG_NTFS3_64BIT_CLUSTER
998 if (next_vcn
> 0x100000000ull
|| (lcn
+ len
) > 0x100000000ull
) {
1001 "This driver is compiled without CONFIG_NTFS3_64BIT_CLUSTER (like windows driver).\n"
1002 "Volume contains 64 bits run: vcn %llx, lcn %llx, len %llx.\n"
1003 "Activate CONFIG_NTFS3_64BIT_CLUSTER to process this case",
1008 if (lcn
!= SPARSE_LCN64
&& lcn
+ len
> sbi
->used
.bitmap
.nbits
) {
1009 /* LCN range is out of volume. */
1014 ; /* Called from check_attr(fslog.c) to check run. */
1015 else if (run
== RUN_DEALLOCATE
) {
1017 * Called from ni_delete_all to free clusters
1018 * without storing in run.
1020 if (lcn
!= SPARSE_LCN64
)
1021 mark_as_free_ex(sbi
, lcn
, len
, true);
1022 } else if (vcn64
>= vcn
) {
1023 if (!run_add_entry(run
, vcn64
, lcn
, len
, is_mft
))
1025 } else if (next_vcn
> vcn
) {
1026 u64 dlen
= vcn
- vcn64
;
1028 if (!run_add_entry(run
, vcn
, lcn
+ dlen
, len
- dlen
,
1036 if (vcn64
!= evcn
+ 1) {
1037 /* Not expected length of unpacked runs. */
1041 return run_buf
- run_0
;
1044 #ifdef NTFS3_CHECK_FREE_CLST
1046 * run_unpack_ex - Unpack packed runs from "run_buf".
1048 * Checks unpacked runs to be used in bitmap.
1050 * Return: Error if negative, or real used bytes.
1052 int run_unpack_ex(struct runs_tree
*run
, struct ntfs_sb_info
*sbi
, CLST ino
,
1053 CLST svcn
, CLST evcn
, CLST vcn
, const u8
*run_buf
,
1057 CLST next_vcn
, lcn
, len
;
1060 struct wnd_bitmap
*wnd
;
1062 ret
= run_unpack(run
, sbi
, ino
, svcn
, evcn
, vcn
, run_buf
, run_buf_size
);
1066 if (!sbi
->used
.bitmap
.sb
|| !run
|| run
== RUN_DEALLOCATE
)
1069 if (ino
== MFT_REC_BADCLUST
)
1072 next_vcn
= vcn
= svcn
;
1073 wnd
= &sbi
->used
.bitmap
;
1075 for (ok
= run_lookup_entry(run
, vcn
, &lcn
, &len
, &index
);
1077 ok
= run_get_entry(run
, ++index
, &vcn
, &lcn
, &len
)) {
1078 if (!ok
|| next_vcn
!= vcn
)
1081 next_vcn
= vcn
+ len
;
1083 if (lcn
== SPARSE_LCN
)
1086 if (sbi
->flags
& NTFS_FLAGS_NEED_REPLAY
)
1089 down_read_nested(&wnd
->rw_lock
, BITMAP_MUTEX_CLUSTERS
);
1090 zone
= max(wnd
->zone_bit
, lcn
) < min(wnd
->zone_end
, lcn
+ len
);
1091 /* Check for free blocks. */
1092 ok
= !zone
&& wnd_is_used(wnd
, lcn
, len
);
1093 up_read(&wnd
->rw_lock
);
1097 /* Looks like volume is corrupted. */
1098 ntfs_set_state(sbi
, NTFS_DIRTY_ERROR
);
1100 if (!down_write_trylock(&wnd
->rw_lock
))
1105 * Range [lcn, lcn + len) intersects with zone.
1106 * To avoid complex with zone just turn it off.
1108 wnd_zone_set(wnd
, 0, 0);
1111 /* Mark all zero bits as used in range [lcn, lcn+len). */
1112 err
= wnd_set_used_safe(wnd
, lcn
, len
, &done
);
1114 /* Restore zone. Lock mft run. */
1115 struct rw_semaphore
*lock
=
1116 is_mounted(sbi
) ? &sbi
->mft
.ni
->file
.run_lock
:
1120 ntfs_refresh_zone(sbi
);
1124 up_write(&wnd
->rw_lock
);
1134 * run_get_highest_vcn
1136 * Return the highest vcn from a mapping pairs array
1137 * it used while replaying log file.
1139 int run_get_highest_vcn(CLST vcn
, const u8
*run_buf
, u64
*highest_vcn
)
1144 while ((size_size
= *run_buf
& 0xF)) {
1145 u8 offset_size
= *run_buf
++ >> 4;
1148 if (size_size
> 8 || offset_size
> 8)
1151 len
= run_unpack_s64(run_buf
, size_size
, 0);
1155 run_buf
+= size_size
+ offset_size
;
1158 #ifndef CONFIG_NTFS3_64BIT_CLUSTER
1159 if (vcn64
> 0x100000000ull
)
1164 *highest_vcn
= vcn64
- 1;
1171 * Make a copy of run
1173 int run_clone(const struct runs_tree
*run
, struct runs_tree
*new_run
)
1175 size_t bytes
= run
->count
* sizeof(struct ntfs_run
);
1177 if (bytes
> new_run
->allocated
) {
1178 struct ntfs_run
*new_ptr
= kvmalloc(bytes
, GFP_KERNEL
);
1183 kvfree(new_run
->runs
);
1184 new_run
->runs
= new_ptr
;
1185 new_run
->allocated
= bytes
;
1188 memcpy(new_run
->runs
, run
->runs
, bytes
);
1189 new_run
->count
= run
->count
;