1 // SPDX-License-Identifier: 0BSD
3 ///////////////////////////////////////////////////////////////////////////////
5 /// \file string_conversion.c
6 /// \brief Conversion of strings to filter chain and vice versa
8 // Author: Lasse Collin
10 ///////////////////////////////////////////////////////////////////////////////
12 #include "filter_common.h"
19 /// How much memory to allocate for strings. For now, no realloc is used
20 /// so this needs to be big enough even though there of course is
21 /// an overflow check still.
23 /// FIXME? Using a fixed size is wasteful if the application doesn't free
24 /// the string fairly quickly but this can be improved later if needed.
25 #define STR_ALLOC_SIZE 800
35 str_init(lzma_str
*str
, const lzma_allocator
*allocator
)
37 str
->buf
= lzma_alloc(STR_ALLOC_SIZE
, allocator
);
39 return LZMA_MEM_ERROR
;
47 str_free(lzma_str
*str
, const lzma_allocator
*allocator
)
49 lzma_free(str
->buf
, allocator
);
55 str_is_full(const lzma_str
*str
)
57 return str
->pos
== STR_ALLOC_SIZE
- 1;
62 str_finish(char **dest
, lzma_str
*str
, const lzma_allocator
*allocator
)
64 if (str_is_full(str
)) {
65 // The preallocated buffer was too small.
66 // This shouldn't happen as STR_ALLOC_SIZE should
67 // be adjusted if new filters are added.
68 lzma_free(str
->buf
, allocator
);
71 return LZMA_PROG_ERROR
;
74 str
->buf
[str
->pos
] = '\0';
81 str_append_str(lzma_str
*str
, const char *s
)
83 const size_t len
= strlen(s
);
84 const size_t limit
= STR_ALLOC_SIZE
- 1 - str
->pos
;
85 const size_t copy_size
= my_min(len
, limit
);
87 memcpy(str
->buf
+ str
->pos
, s
, copy_size
);
88 str
->pos
+= copy_size
;
94 str_append_u32(lzma_str
*str
, uint32_t v
, bool use_byte_suffix
)
97 str_append_str(str
, "0");
99 // NOTE: Don't use plain "B" because xz and the parser in this
100 // file don't support it and at glance it may look like 8
101 // (there cannot be a space before the suffix).
102 static const char suffixes
[4][4] = { "", "KiB", "MiB", "GiB" };
105 if (use_byte_suffix
) {
106 while ((v
& 1023) == 0
107 && suf
< ARRAY_SIZE(suffixes
) - 1) {
113 // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
114 // that initializing to "" initializes all elements to
115 // zero so '\0'-termination gets handled by this.
117 size_t pos
= sizeof(buf
) - 1;
120 buf
[--pos
] = '0' + (v
% 10);
124 str_append_str(str
, buf
+ pos
);
125 str_append_str(str
, suffixes
[suf
]);
132 //////////////////////////////////////////////
133 // Parsing and stringification declarations //
134 //////////////////////////////////////////////
136 /// Maximum length for filter and option names.
137 /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
138 #define NAME_LEN_MAX 11
141 /// For option_map.flags: Use .u.map to do convert the input value
142 /// to an integer. Without this flag, .u.range.{min,max} are used
143 /// as the allowed range for the integer.
144 #define OPTMAP_USE_NAME_VALUE_MAP 0x01
146 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
147 /// the stringified output if the value is an exact multiple of these.
148 /// This is used e.g. for LZMA1/2 dictionary size.
149 #define OPTMAP_USE_BYTE_SUFFIX 0x02
151 /// For option_map.flags: If the integer value is zero then this option
152 /// won't be included in the stringified output. It's used e.g. for
153 /// BCJ filter start offset which usually is zero.
154 #define OPTMAP_NO_STRFY_ZERO 0x04
156 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
157 /// it doesn't need to be specified in the initializers as it is
158 /// the implicit value.
161 OPTMAP_TYPE_LZMA_MODE
,
162 OPTMAP_TYPE_LZMA_MATCH_FINDER
,
163 OPTMAP_TYPE_LZMA_PRESET
,
167 /// This is for mapping string values in options to integers.
168 /// The last element of an array must have "" as the name.
169 /// It's used e.g. for match finder names in LZMA1/2.
171 const char name
[NAME_LEN_MAX
+ 1];
172 const uint32_t value
;
176 /// Each filter that has options needs an array of option_map structures.
177 /// The array doesn't need to be terminated as the functions take the
178 /// length of the array as an argument.
180 /// When converting a string to filter options structure, option values
181 /// will be handled in a few different ways:
183 /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
184 /// is handled specially.
186 /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
187 /// converted to an integer using the name_value_map pointed by .u.map.
188 /// The last element in .u.map must have .name = "" as the terminator.
190 /// (3) Otherwise the string is treated as a non-negative unsigned decimal
191 /// integer which must be in the range set in .u.range. If .flags has
192 /// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
194 /// The integer value from (2) or (3) is then stored to filter_options
195 /// at the offset specified in .offset using the type specified in .type
196 /// (default is uint32_t).
198 /// Stringifying a filter is done by processing a given number of options
199 /// in order from the beginning of an option_map array. The integer is
200 /// read from filter_options at .offset using the type from .type.
202 /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
203 /// option is skipped.
205 /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
206 /// to convert the option to a string. If the map doesn't contain a string
207 /// for the integer value then "UNKNOWN" is used.
209 /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
210 /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
211 /// MiB, or GiB suffix is used if the value is an exact multiple of these.
212 /// Plain "B" suffix is never used.
214 char name
[NAME_LEN_MAX
+ 1];
220 // NVHPC has problems with unions that contain pointers that
221 // are not the first members, so keep "map" at the top.
222 const name_value_map
*map
;
232 static const char *parse_options(const char **const str
, const char *str_end
,
233 void *filter_options
,
234 const option_map
*const optmap
, const size_t optmap_size
);
241 #if defined(HAVE_ENCODER_X86) \
242 || defined(HAVE_DECODER_X86) \
243 || defined(HAVE_ENCODER_ARM) \
244 || defined(HAVE_DECODER_ARM) \
245 || defined(HAVE_ENCODER_ARMTHUMB) \
246 || defined(HAVE_DECODER_ARMTHUMB) \
247 || defined(HAVE_ENCODER_ARM64) \
248 || defined(HAVE_DECODER_ARM64) \
249 || defined(HAVE_ENCODER_POWERPC) \
250 || defined(HAVE_DECODER_POWERPC) \
251 || defined(HAVE_ENCODER_IA64) \
252 || defined(HAVE_DECODER_IA64) \
253 || defined(HAVE_ENCODER_SPARC) \
254 || defined(HAVE_DECODER_SPARC) \
255 || defined(HAVE_ENCODER_RISCV) \
256 || defined(HAVE_DECODER_RISCV)
257 static const option_map bcj_optmap
[] = {
260 .flags
= OPTMAP_NO_STRFY_ZERO
| OPTMAP_USE_BYTE_SUFFIX
,
261 .offset
= offsetof(lzma_options_bcj
, start_offset
),
263 .u
.range
.max
= UINT32_MAX
,
269 parse_bcj(const char **const str
, const char *str_end
, void *filter_options
)
271 // filter_options was zeroed on allocation and that is enough
272 // for the default value.
273 return parse_options(str
, str_end
, filter_options
,
274 bcj_optmap
, ARRAY_SIZE(bcj_optmap
));
283 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
284 static const option_map delta_optmap
[] = {
287 .offset
= offsetof(lzma_options_delta
, dist
),
288 .u
.range
.min
= LZMA_DELTA_DIST_MIN
,
289 .u
.range
.max
= LZMA_DELTA_DIST_MAX
,
295 parse_delta(const char **const str
, const char *str_end
, void *filter_options
)
297 lzma_options_delta
*opts
= filter_options
;
298 opts
->type
= LZMA_DELTA_TYPE_BYTE
;
299 opts
->dist
= LZMA_DELTA_DIST_MIN
;
301 return parse_options(str
, str_end
, filter_options
,
302 delta_optmap
, ARRAY_SIZE(delta_optmap
));
311 /// Help string for presets
312 #define LZMA12_PRESET_STR "0-9[e]"
316 parse_lzma12_preset(const char **const str
, const char *str_end
,
319 assert(*str
< str_end
);
320 *preset
= (uint32_t)(**str
- '0');
322 // NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
323 while (++*str
< str_end
) {
326 *preset
|= LZMA_PRESET_EXTREME
;
330 return "Unsupported preset flag";
339 set_lzma12_preset(const char **const str
, const char *str_end
,
340 void *filter_options
)
343 const char *errmsg
= parse_lzma12_preset(str
, str_end
, &preset
);
347 lzma_options_lzma
*opts
= filter_options
;
348 if (lzma_lzma_preset(opts
, preset
))
349 return "Unsupported preset";
355 static const name_value_map lzma12_mode_map
[] = {
356 { "fast", LZMA_MODE_FAST
},
357 { "normal", LZMA_MODE_NORMAL
},
362 static const name_value_map lzma12_mf_map
[] = {
363 { "hc3", LZMA_MF_HC3
},
364 { "hc4", LZMA_MF_HC4
},
365 { "bt2", LZMA_MF_BT2
},
366 { "bt3", LZMA_MF_BT3
},
367 { "bt4", LZMA_MF_BT4
},
372 static const option_map lzma12_optmap
[] = {
375 .type
= OPTMAP_TYPE_LZMA_PRESET
,
378 .flags
= OPTMAP_USE_BYTE_SUFFIX
,
379 .offset
= offsetof(lzma_options_lzma
, dict_size
),
380 .u
.range
.min
= LZMA_DICT_SIZE_MIN
,
381 // FIXME? The max is really max for encoding but decoding
382 // would allow 4 GiB - 1 B.
383 .u
.range
.max
= (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
386 .offset
= offsetof(lzma_options_lzma
, lc
),
387 .u
.range
.min
= LZMA_LCLP_MIN
,
388 .u
.range
.max
= LZMA_LCLP_MAX
,
391 .offset
= offsetof(lzma_options_lzma
, lp
),
392 .u
.range
.min
= LZMA_LCLP_MIN
,
393 .u
.range
.max
= LZMA_LCLP_MAX
,
396 .offset
= offsetof(lzma_options_lzma
, pb
),
397 .u
.range
.min
= LZMA_PB_MIN
,
398 .u
.range
.max
= LZMA_PB_MAX
,
401 .type
= OPTMAP_TYPE_LZMA_MODE
,
402 .flags
= OPTMAP_USE_NAME_VALUE_MAP
,
403 .offset
= offsetof(lzma_options_lzma
, mode
),
404 .u
.map
= lzma12_mode_map
,
407 .offset
= offsetof(lzma_options_lzma
, nice_len
),
412 .type
= OPTMAP_TYPE_LZMA_MATCH_FINDER
,
413 .flags
= OPTMAP_USE_NAME_VALUE_MAP
,
414 .offset
= offsetof(lzma_options_lzma
, mf
),
415 .u
.map
= lzma12_mf_map
,
418 .offset
= offsetof(lzma_options_lzma
, depth
),
420 .u
.range
.max
= UINT32_MAX
,
426 parse_lzma12(const char **const str
, const char *str_end
, void *filter_options
)
428 lzma_options_lzma
*opts
= filter_options
;
431 const bool preset_ret
= lzma_lzma_preset(opts
, LZMA_PRESET_DEFAULT
);
435 const char *errmsg
= parse_options(str
, str_end
, filter_options
,
436 lzma12_optmap
, ARRAY_SIZE(lzma12_optmap
));
440 if (opts
->lc
+ opts
->lp
> LZMA_LCLP_MAX
)
441 return "The sum of lc and lp must not exceed 4";
447 /////////////////////////////////////////
448 // Generic parsing and stringification //
449 /////////////////////////////////////////
451 static const struct {
452 /// Name of the filter
453 char name
[NAME_LEN_MAX
+ 1];
455 /// For lzma_str_to_filters:
456 /// Size of the filter-specific options structure.
462 /// For lzma_str_to_filters:
463 /// Function to parse the filter-specific options. The filter_options
464 /// will already have been allocated using lzma_alloc_zero().
465 const char *(*parse
)(const char **str
, const char *str_end
,
466 void *filter_options
);
468 /// For lzma_str_from_filters:
469 /// If the flag LZMA_STR_ENCODER is used then the first
470 /// strfy_encoder elements of optmap are stringified.
471 /// With LZMA_STR_DECODER strfy_decoder is used.
472 /// Currently encoders use all options that decoders do but if
473 /// that changes then this needs to be changed too, for example,
474 /// add a new OPTMAP flag to skip printing some decoder-only options.
475 const option_map
*optmap
;
476 uint8_t strfy_encoder
;
477 uint8_t strfy_decoder
;
479 /// For lzma_str_from_filters:
480 /// If true, lzma_filter.options is allowed to be NULL. In that case,
481 /// only the filter name is printed without any options.
484 } filter_name_map
[] = {
485 #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
486 { "lzma1", sizeof(lzma_options_lzma
), LZMA_FILTER_LZMA1
,
487 &parse_lzma12
, lzma12_optmap
, 9, 5, false },
490 #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
491 { "lzma2", sizeof(lzma_options_lzma
), LZMA_FILTER_LZMA2
,
492 &parse_lzma12
, lzma12_optmap
, 9, 2, false },
495 #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
496 { "x86", sizeof(lzma_options_bcj
), LZMA_FILTER_X86
,
497 &parse_bcj
, bcj_optmap
, 1, 1, true },
500 #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
501 { "arm", sizeof(lzma_options_bcj
), LZMA_FILTER_ARM
,
502 &parse_bcj
, bcj_optmap
, 1, 1, true },
505 #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
506 { "armthumb", sizeof(lzma_options_bcj
), LZMA_FILTER_ARMTHUMB
,
507 &parse_bcj
, bcj_optmap
, 1, 1, true },
510 #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
511 { "arm64", sizeof(lzma_options_bcj
), LZMA_FILTER_ARM64
,
512 &parse_bcj
, bcj_optmap
, 1, 1, true },
515 #if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV)
516 { "riscv", sizeof(lzma_options_bcj
), LZMA_FILTER_RISCV
,
517 &parse_bcj
, bcj_optmap
, 1, 1, true },
520 #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
521 { "powerpc", sizeof(lzma_options_bcj
), LZMA_FILTER_POWERPC
,
522 &parse_bcj
, bcj_optmap
, 1, 1, true },
525 #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
526 { "ia64", sizeof(lzma_options_bcj
), LZMA_FILTER_IA64
,
527 &parse_bcj
, bcj_optmap
, 1, 1, true },
530 #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
531 { "sparc", sizeof(lzma_options_bcj
), LZMA_FILTER_SPARC
,
532 &parse_bcj
, bcj_optmap
, 1, 1, true },
535 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
536 { "delta", sizeof(lzma_options_delta
), LZMA_FILTER_DELTA
,
537 &parse_delta
, delta_optmap
, 1, 1, false },
542 /// Decodes options from a string for one filter (name1=value1,name2=value2).
543 /// Caller must have allocated memory for filter_options already and set
544 /// the initial default values. This is called from the filter-specific
545 /// parse_* functions.
547 /// The input string starts at *str and the address in str_end is the first
548 /// char that is not part of the string anymore. So no '\0' terminator is
549 /// used. *str is advanced every time something has been decoded successfully.
551 parse_options(const char **const str
, const char *str_end
,
552 void *filter_options
,
553 const option_map
*const optmap
, const size_t optmap_size
)
555 while (*str
< str_end
&& **str
!= '\0') {
556 // Each option is of the form name=value.
557 // Commas (',') separate options. Extra commas are ignored.
558 // Ignoring extra commas makes it simpler if an optional
559 // option stored in a shell variable which can be empty.
565 // Find where the next name=value ends.
566 const size_t str_len
= (size_t)(str_end
- *str
);
567 const char *name_eq_value_end
= memchr(*str
, ',', str_len
);
568 if (name_eq_value_end
== NULL
)
569 name_eq_value_end
= str_end
;
571 const char *equals_sign
= memchr(*str
, '=',
572 (size_t)(name_eq_value_end
- *str
));
574 // Fail if the '=' wasn't found or the option name is missing
575 // (the first char is '=').
576 if (equals_sign
== NULL
|| **str
== '=')
577 return "Options must be 'name=value' pairs separated "
580 // Reject a too long option name so that the memcmp()
581 // in the loop below won't read past the end of the
582 // string in optmap[i].name.
583 const size_t name_len
= (size_t)(equals_sign
- *str
);
584 if (name_len
> NAME_LEN_MAX
)
585 return "Unknown option name";
587 // Find the option name from optmap[].
590 if (i
== optmap_size
)
591 return "Unknown option name";
593 if (memcmp(*str
, optmap
[i
].name
, name_len
) == 0
594 && optmap
[i
].name
[name_len
] == '\0')
600 // The input string is good at least until the start of
602 *str
= equals_sign
+ 1;
604 // The code assumes that the option value isn't an empty
605 // string so check it here.
606 const size_t value_len
= (size_t)(name_eq_value_end
- *str
);
608 return "Option value cannot be empty";
610 // LZMA1/2 preset has its own parsing function.
611 if (optmap
[i
].type
== OPTMAP_TYPE_LZMA_PRESET
) {
612 const char *errmsg
= set_lzma12_preset(str
,
613 name_eq_value_end
, filter_options
);
620 // It's an integer value.
622 if (optmap
[i
].flags
& OPTMAP_USE_NAME_VALUE_MAP
) {
623 // The integer is picked from a string-to-integer map.
625 // Reject a too long value string so that the memcmp()
626 // in the loop below won't read past the end of the
627 // string in optmap[i].u.map[j].name.
628 if (value_len
> NAME_LEN_MAX
)
629 return "Invalid option value";
631 const name_value_map
*map
= optmap
[i
].u
.map
;
634 // The array is terminated with an empty name.
635 if (map
[j
].name
[0] == '\0')
636 return "Invalid option value";
638 if (memcmp(*str
, map
[j
].name
, value_len
) == 0
639 && map
[j
].name
[value_len
]
647 } else if (**str
< '0' || **str
> '9') {
648 // Note that "max" isn't supported while it is
649 // supported in xz. It's not useful here.
650 return "Value is not a non-negative decimal integer";
652 // strtoul() has locale-specific behavior so it cannot
653 // be relied on to get reproducible results since we
654 // cannot change the locate in a thread-safe library.
655 // It also needs '\0'-termination.
657 // Use a temporary pointer so that *str will point
658 // to the beginning of the value string in case
660 const char *p
= *str
;
663 if (v
> UINT32_MAX
/ 10)
664 return "Value out of range";
668 const uint32_t add
= (uint32_t)(*p
- '0');
669 if (UINT32_MAX
- add
< v
)
670 return "Value out of range";
674 } while (p
< name_eq_value_end
675 && *p
>= '0' && *p
<= '9');
677 if (p
< name_eq_value_end
) {
678 // Remember this position so that it can be
679 // used for error messages that are
680 // specifically about the suffix. (Out of
681 // range values are about the whole value
682 // and those error messages point to the
683 // beginning of the number part,
684 // not to the suffix.)
685 const char *multiplier_start
= p
;
687 // If multiplier suffix shouldn't be used
688 // then don't allow them even if the value
689 // would stay within limits. This is a somewhat
690 // unnecessary check but it rejects silly
691 // things like lzma2:pb=0MiB which xz allows.
692 if ((optmap
[i
].flags
& OPTMAP_USE_BYTE_SUFFIX
)
694 *str
= multiplier_start
;
695 return "This option does not support "
696 "any integer suffixes";
718 *str
= multiplier_start
;
719 return "Invalid multiplier suffix "
720 "(KiB, MiB, or GiB)";
725 // Allow "M", "Mi", "MB", "MiB" and the same
726 // for the other five characters from the
727 // switch-statement above. All are handled
728 // as base-2 (perhaps a mistake, perhaps not).
729 // Note that 'i' and 'B' are case sensitive.
730 if (p
< name_eq_value_end
&& *p
== 'i')
733 if (p
< name_eq_value_end
&& *p
== 'B')
736 // Now we must have no chars remaining.
737 if (p
< name_eq_value_end
) {
738 *str
= multiplier_start
;
739 return "Invalid multiplier suffix "
740 "(KiB, MiB, or GiB)";
743 if (v
> (UINT32_MAX
>> shift
))
744 return "Value out of range";
749 if (v
< optmap
[i
].u
.range
.min
750 || v
> optmap
[i
].u
.range
.max
)
751 return "Value out of range";
754 // Set the value in filter_options. Enums are handled
755 // specially since the underlying type isn't the same
756 // as uint32_t on all systems.
757 void *ptr
= (char *)filter_options
+ optmap
[i
].offset
;
758 switch (optmap
[i
].type
) {
759 case OPTMAP_TYPE_LZMA_MODE
:
760 *(lzma_mode
*)ptr
= (lzma_mode
)v
;
763 case OPTMAP_TYPE_LZMA_MATCH_FINDER
:
764 *(lzma_match_finder
*)ptr
= (lzma_match_finder
)v
;
768 *(uint32_t *)ptr
= v
;
772 // This option has been successfully handled.
773 *str
= name_eq_value_end
;
781 /// Finds the name of the filter at the beginning of the string and
782 /// calls filter_name_map[i].parse() to decode the filter-specific options.
783 /// The caller must have set str_end so that exactly one filter and its
784 /// options are present without any trailing characters.
786 parse_filter(const char **const str
, const char *str_end
, lzma_filter
*filter
,
787 const lzma_allocator
*allocator
, bool only_xz
)
789 // Search for a colon or equals sign that would separate the filter
790 // name from filter options. If neither is found, then the input
791 // string only contains a filter name and there are no options.
793 // First assume that a colon or equals sign won't be found:
794 const char *name_end
= str_end
;
795 const char *opts_start
= str_end
;
797 for (const char *p
= *str
; p
< str_end
; ++p
) {
798 if (*p
== ':' || *p
== '=') {
801 // Filter options (name1=value1,name2=value2,...)
802 // begin after the colon or equals sign.
808 // Reject a too long filter name so that the memcmp()
809 // in the loop below won't read past the end of the
810 // string in filter_name_map[i].name.
811 const size_t name_len
= (size_t)(name_end
- *str
);
812 if (name_len
> NAME_LEN_MAX
)
813 return "Unknown filter name";
815 for (size_t i
= 0; i
< ARRAY_SIZE(filter_name_map
); ++i
) {
816 if (memcmp(*str
, filter_name_map
[i
].name
, name_len
) == 0
817 && filter_name_map
[i
].name
[name_len
] == '\0') {
818 if (only_xz
&& filter_name_map
[i
].id
819 >= LZMA_FILTER_RESERVED_START
)
820 return "This filter cannot be used in "
823 // Allocate the filter-specific options and
824 // initialize the memory with zeros.
825 void *options
= lzma_alloc_zero(
826 filter_name_map
[i
].opts_size
,
829 return "Memory allocation failed";
831 // Filter name was found so the input string is good
832 // at least this far.
835 const char *errmsg
= filter_name_map
[i
].parse(
836 str
, str_end
, options
);
837 if (errmsg
!= NULL
) {
838 lzma_free(options
, allocator
);
842 // *filter is modified only when parsing is successful.
843 filter
->id
= filter_name_map
[i
].id
;
844 filter
->options
= options
;
849 return "Unknown filter name";
853 /// Converts the string to a filter chain (array of lzma_filter structures).
855 /// *str is advanced every time something has been decoded successfully.
856 /// This way the caller knows where in the string a possible error occurred.
858 str_to_filters(const char **const str
, lzma_filter
*filters
, uint32_t flags
,
859 const lzma_allocator
*allocator
)
863 // Skip leading spaces.
868 return "Empty string is not allowed, "
869 "try \"6\" if a default value is needed";
871 // Detect the type of the string.
873 // A string beginning with a digit or a string beginning with
874 // one dash and a digit are treated as presets. Trailing spaces
875 // will be ignored too (leading spaces were already ignored above).
877 // For example, "6", "7 ", "-9e", or " -3 " are treated as presets.
878 // Strings like "-" or "- " aren't preset.
879 #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
880 if (MY_IS_DIGIT(**str
) || (**str
== '-' && MY_IS_DIGIT((*str
)[1]))) {
884 // Ignore trailing spaces.
885 const size_t str_len
= strlen(*str
);
886 const char *str_end
= memchr(*str
, ' ', str_len
);
887 if (str_end
!= NULL
) {
888 // There is at least one trailing space. Check that
889 // there are no chars other than spaces.
890 for (size_t i
= 1; str_end
[i
] != '\0'; ++i
)
891 if (str_end
[i
] != ' ')
892 return "Unsupported preset";
894 // There are no trailing spaces. Use the whole string.
895 str_end
= *str
+ str_len
;
899 errmsg
= parse_lzma12_preset(str
, str_end
, &preset
);
903 lzma_options_lzma
*opts
= lzma_alloc(sizeof(*opts
), allocator
);
905 return "Memory allocation failed";
907 if (lzma_lzma_preset(opts
, preset
)) {
908 lzma_free(opts
, allocator
);
909 return "Unsupported preset";
912 filters
[0].id
= LZMA_FILTER_LZMA2
;
913 filters
[0].options
= opts
;
914 filters
[1].id
= LZMA_VLI_UNKNOWN
;
915 filters
[1].options
= NULL
;
920 // Not a preset so it must be a filter chain.
922 // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
923 // can be used in .xz.
924 const bool only_xz
= (flags
& LZMA_STR_ALL_FILTERS
) == 0;
926 // Use a temporary array so that we don't modify the caller-supplied
927 // one until we know that no errors occurred.
928 lzma_filter temp_filters
[LZMA_FILTERS_MAX
+ 1];
932 if (i
== LZMA_FILTERS_MAX
) {
933 errmsg
= "The maximum number of filters is four";
937 // Skip "--" if present.
938 if ((*str
)[0] == '-' && (*str
)[1] == '-')
941 // Locate the end of "filter:name1=value1,name2=value2",
942 // stopping at the first "--" or a single space.
943 const char *filter_end
= *str
;
944 while (filter_end
[0] != '\0') {
945 if ((filter_end
[0] == '-' && filter_end
[1] == '-')
946 || filter_end
[0] == ' ')
952 // Inputs that have "--" at the end or "-- " in the middle
953 // will result in an empty filter name.
954 if (filter_end
== *str
) {
955 errmsg
= "Filter name is missing";
959 errmsg
= parse_filter(str
, filter_end
, &temp_filters
[i
],
964 // Skip trailing spaces.
969 } while (**str
!= '\0');
971 // Seems to be good, terminate the array so that
972 // basic validation can be done.
973 temp_filters
[i
].id
= LZMA_VLI_UNKNOWN
;
974 temp_filters
[i
].options
= NULL
;
976 // Do basic validation if the application didn't prohibit it.
977 if ((flags
& LZMA_STR_NO_VALIDATION
) == 0) {
979 const lzma_ret ret
= lzma_validate_chain(temp_filters
, &dummy
);
980 assert(ret
== LZMA_OK
|| ret
== LZMA_OPTIONS_ERROR
);
981 if (ret
!= LZMA_OK
) {
982 errmsg
= "Invalid filter chain "
983 "('lzma2' missing at the end?)";
988 // All good. Copy the filters to the application supplied array.
989 memcpy(filters
, temp_filters
, (i
+ 1) * sizeof(lzma_filter
));
993 // Free the filter options that were successfully decoded.
995 lzma_free(temp_filters
[i
].options
, allocator
);
1001 extern LZMA_API(const char *)
1002 lzma_str_to_filters(const char *str
, int *error_pos
, lzma_filter
*filters
,
1003 uint32_t flags
, const lzma_allocator
*allocator
)
1005 // If error_pos isn't NULL, *error_pos must always be set.
1006 // liblzma <= 5.4.6 and <= 5.6.1 have a bug and don't do this
1007 // when str == NULL or filters == NULL or flags are unsupported.
1008 if (error_pos
!= NULL
)
1011 if (str
== NULL
|| filters
== NULL
)
1012 return "Unexpected NULL pointer argument(s) "
1013 "to lzma_str_to_filters()";
1015 // Validate the flags.
1016 const uint32_t supported_flags
1017 = LZMA_STR_ALL_FILTERS
1018 | LZMA_STR_NO_VALIDATION
;
1020 if (flags
& ~supported_flags
)
1021 return "Unsupported flags to lzma_str_to_filters()";
1023 const char *used
= str
;
1024 const char *errmsg
= str_to_filters(&used
, filters
, flags
, allocator
);
1026 if (error_pos
!= NULL
) {
1027 const size_t n
= (size_t)(used
- str
);
1028 *error_pos
= n
> INT_MAX
? INT_MAX
: (int)n
;
1035 /// Converts options of one filter to a string.
1037 /// The caller must have already put the filter name in the destination
1038 /// string. Since it is possible that no options will be needed, the caller
1039 /// won't have put a delimiter character (':' or '=') in the string yet.
1040 /// We will add it if at least one option will be added to the string.
1042 strfy_filter(lzma_str
*dest
, const char *delimiter
,
1043 const option_map
*optmap
, size_t optmap_count
,
1044 const void *filter_options
)
1046 for (size_t i
= 0; i
< optmap_count
; ++i
) {
1047 // No attempt is made to reverse LZMA1/2 preset.
1048 if (optmap
[i
].type
== OPTMAP_TYPE_LZMA_PRESET
)
1051 // All options have integer values, some just are mapped
1052 // to a string with a name_value_map. LZMA1/2 preset
1053 // isn't reversed back to preset=PRESET form.
1056 = (const char *)filter_options
+ optmap
[i
].offset
;
1057 switch (optmap
[i
].type
) {
1058 case OPTMAP_TYPE_LZMA_MODE
:
1059 v
= *(const lzma_mode
*)ptr
;
1062 case OPTMAP_TYPE_LZMA_MATCH_FINDER
:
1063 v
= *(const lzma_match_finder
*)ptr
;
1067 v
= *(const uint32_t *)ptr
;
1071 // Skip this if this option should be omitted from
1072 // the string when the value is zero.
1073 if (v
== 0 && (optmap
[i
].flags
& OPTMAP_NO_STRFY_ZERO
))
1076 // Before the first option we add whatever delimiter
1077 // the caller gave us. For later options a comma is used.
1078 str_append_str(dest
, delimiter
);
1081 // Add the option name and equals sign.
1082 str_append_str(dest
, optmap
[i
].name
);
1083 str_append_str(dest
, "=");
1085 if (optmap
[i
].flags
& OPTMAP_USE_NAME_VALUE_MAP
) {
1086 const name_value_map
*map
= optmap
[i
].u
.map
;
1089 if (map
[j
].name
[0] == '\0') {
1090 str_append_str(dest
, "UNKNOWN");
1094 if (map
[j
].value
== v
) {
1095 str_append_str(dest
, map
[j
].name
);
1102 str_append_u32(dest
, v
,
1103 optmap
[i
].flags
& OPTMAP_USE_BYTE_SUFFIX
);
1111 extern LZMA_API(lzma_ret
)
1112 lzma_str_from_filters(char **output_str
, const lzma_filter
*filters
,
1113 uint32_t flags
, const lzma_allocator
*allocator
)
1115 // On error *output_str is always set to NULL.
1116 // Do it as the very first step.
1117 if (output_str
== NULL
)
1118 return LZMA_PROG_ERROR
;
1122 if (filters
== NULL
)
1123 return LZMA_PROG_ERROR
;
1125 // Validate the flags.
1126 const uint32_t supported_flags
1129 | LZMA_STR_GETOPT_LONG
1130 | LZMA_STR_NO_SPACES
;
1132 if (flags
& ~supported_flags
)
1133 return LZMA_OPTIONS_ERROR
;
1135 // There must be at least one filter.
1136 if (filters
[0].id
== LZMA_VLI_UNKNOWN
)
1137 return LZMA_OPTIONS_ERROR
;
1139 // Allocate memory for the output string.
1141 return_if_error(str_init(&dest
, allocator
));
1143 const bool show_opts
= (flags
& (LZMA_STR_ENCODER
| LZMA_STR_DECODER
));
1145 const char *opt_delim
= (flags
& LZMA_STR_GETOPT_LONG
) ? "=" : ":";
1147 for (size_t i
= 0; filters
[i
].id
!= LZMA_VLI_UNKNOWN
; ++i
) {
1148 // If we reach LZMA_FILTERS_MAX, then the filters array
1149 // is too large since the ID cannot be LZMA_VLI_UNKNOWN here.
1150 if (i
== LZMA_FILTERS_MAX
) {
1151 str_free(&dest
, allocator
);
1152 return LZMA_OPTIONS_ERROR
;
1155 // Don't add a space between filters if the caller
1156 // doesn't want them.
1157 if (i
> 0 && !(flags
& LZMA_STR_NO_SPACES
))
1158 str_append_str(&dest
, " ");
1160 // Use dashes for xz getopt_long() compatible syntax but also
1161 // use dashes to separate filters when spaces weren't wanted.
1162 if ((flags
& LZMA_STR_GETOPT_LONG
)
1163 || (i
> 0 && (flags
& LZMA_STR_NO_SPACES
)))
1164 str_append_str(&dest
, "--");
1168 if (j
== ARRAY_SIZE(filter_name_map
)) {
1169 // Filter ID in filters[i].id isn't supported.
1170 str_free(&dest
, allocator
);
1171 return LZMA_OPTIONS_ERROR
;
1174 if (filter_name_map
[j
].id
== filters
[i
].id
) {
1175 // Add the filter name.
1176 str_append_str(&dest
, filter_name_map
[j
].name
);
1178 // If only the filter names were wanted then
1179 // skip to the next filter. In this case
1180 // .options is ignored and may be NULL even
1181 // when the filter doesn't allow NULL options.
1185 if (filters
[i
].options
== NULL
) {
1186 if (!filter_name_map
[j
].allow_null
) {
1187 // Filter-specific options
1188 // are missing but with
1189 // this filter the options
1190 // structure is mandatory.
1191 str_free(&dest
, allocator
);
1192 return LZMA_OPTIONS_ERROR
;
1195 // .options is allowed to be NULL.
1196 // There is no need to add any
1197 // options to the string.
1201 // Options structure is available. Add
1202 // the filter options to the string.
1203 const size_t optmap_count
1204 = (flags
& LZMA_STR_ENCODER
)
1205 ? filter_name_map
[j
].strfy_encoder
1206 : filter_name_map
[j
].strfy_decoder
;
1207 strfy_filter(&dest
, opt_delim
,
1208 filter_name_map
[j
].optmap
,
1210 filters
[i
].options
);
1218 return str_finish(output_str
, &dest
, allocator
);
1222 extern LZMA_API(lzma_ret
)
1223 lzma_str_list_filters(char **output_str
, lzma_vli filter_id
, uint32_t flags
,
1224 const lzma_allocator
*allocator
)
1226 // On error *output_str is always set to NULL.
1227 // Do it as the very first step.
1228 if (output_str
== NULL
)
1229 return LZMA_PROG_ERROR
;
1233 // Validate the flags.
1234 const uint32_t supported_flags
1235 = LZMA_STR_ALL_FILTERS
1238 | LZMA_STR_GETOPT_LONG
;
1240 if (flags
& ~supported_flags
)
1241 return LZMA_OPTIONS_ERROR
;
1243 // Allocate memory for the output string.
1245 return_if_error(str_init(&dest
, allocator
));
1247 // If only listing the filter names then separate them with spaces.
1248 // Otherwise use newlines.
1249 const bool show_opts
= (flags
& (LZMA_STR_ENCODER
| LZMA_STR_DECODER
));
1250 const char *filter_delim
= show_opts
? "\n" : " ";
1252 const char *opt_delim
= (flags
& LZMA_STR_GETOPT_LONG
) ? "=" : ":";
1253 bool first_filter_printed
= false;
1255 for (size_t i
= 0; i
< ARRAY_SIZE(filter_name_map
); ++i
) {
1256 // If we are printing only one filter then skip others.
1257 if (filter_id
!= LZMA_VLI_UNKNOWN
1258 && filter_id
!= filter_name_map
[i
].id
)
1261 // If we are printing only .xz filters then skip the others.
1262 if (filter_name_map
[i
].id
>= LZMA_FILTER_RESERVED_START
1263 && (flags
& LZMA_STR_ALL_FILTERS
) == 0
1264 && filter_id
== LZMA_VLI_UNKNOWN
)
1267 // Add a new line if this isn't the first filter being
1268 // written to the string.
1269 if (first_filter_printed
)
1270 str_append_str(&dest
, filter_delim
);
1272 first_filter_printed
= true;
1274 if (flags
& LZMA_STR_GETOPT_LONG
)
1275 str_append_str(&dest
, "--");
1277 str_append_str(&dest
, filter_name_map
[i
].name
);
1279 // If only the filter names were wanted then continue
1280 // to the next filter.
1284 const option_map
*optmap
= filter_name_map
[i
].optmap
;
1285 const char *d
= opt_delim
;
1287 const size_t end
= (flags
& LZMA_STR_ENCODER
)
1288 ? filter_name_map
[i
].strfy_encoder
1289 : filter_name_map
[i
].strfy_decoder
;
1291 for (size_t j
= 0; j
< end
; ++j
) {
1292 // The first option is delimited from the filter
1293 // name using "=" or ":" and the rest of the options
1294 // are separated with ",".
1295 str_append_str(&dest
, d
);
1298 // optname=<possible_values>
1299 str_append_str(&dest
, optmap
[j
].name
);
1300 str_append_str(&dest
, "=<");
1302 if (optmap
[j
].type
== OPTMAP_TYPE_LZMA_PRESET
) {
1303 // LZMA1/2 preset has its custom help string.
1304 str_append_str(&dest
, LZMA12_PRESET_STR
);
1305 } else if (optmap
[j
].flags
1306 & OPTMAP_USE_NAME_VALUE_MAP
) {
1307 // Separate the possible option values by "|".
1308 const name_value_map
*m
= optmap
[j
].u
.map
;
1309 for (size_t k
= 0; m
[k
].name
[0] != '\0'; ++k
) {
1311 str_append_str(&dest
, "|");
1313 str_append_str(&dest
, m
[k
].name
);
1316 // Integer range is shown as min-max.
1317 const bool use_byte_suffix
= optmap
[j
].flags
1318 & OPTMAP_USE_BYTE_SUFFIX
;
1319 str_append_u32(&dest
, optmap
[j
].u
.range
.min
,
1321 str_append_str(&dest
, "-");
1322 str_append_u32(&dest
, optmap
[j
].u
.range
.max
,
1326 str_append_str(&dest
, ">");
1330 // If no filters were added to the string then it must be because
1331 // the caller provided an unsupported Filter ID.
1332 if (!first_filter_printed
) {
1333 str_free(&dest
, allocator
);
1334 return LZMA_OPTIONS_ERROR
;
1337 return str_finish(output_str
, &dest
, allocator
);