1 // SPDX-License-Identifier: 0BSD
3 ///////////////////////////////////////////////////////////////////////////////
5 /// \file 01_compress_easy.c
6 /// \brief Compress from stdin to stdout in multi-call mode
8 /// Usage: ./01_compress_easy PRESET < INFILE > OUTFILE
10 /// Example: ./01_compress_easy 6 < foo > foo.xz
12 // Author: Lasse Collin
14 ///////////////////////////////////////////////////////////////////////////////
25 show_usage_and_exit(const char *argv0
)
27 fprintf(stderr
, "Usage: %s PRESET < INFILE > OUTFILE\n"
28 "PRESET is a number 0-9 and can optionally be "
29 "followed by 'e' to indicate extreme preset\n",
36 get_preset(int argc
, char **argv
)
38 // One argument whose first char must be 0-9.
39 if (argc
!= 2 || argv
[1][0] < '0' || argv
[1][0] > '9')
40 show_usage_and_exit(argv
[0]);
42 // Calculate the preste level 0-9.
43 uint32_t preset
= argv
[1][0] - '0';
45 // If there is a second char, it must be 'e'. It will set
46 // the LZMA_PRESET_EXTREME flag.
47 if (argv
[1][1] != '\0') {
48 if (argv
[1][1] != 'e' || argv
[1][2] != '\0')
49 show_usage_and_exit(argv
[0]);
51 preset
|= LZMA_PRESET_EXTREME
;
59 init_encoder(lzma_stream
*strm
, uint32_t preset
)
61 // Initialize the encoder using a preset. Set the integrity to check
62 // to CRC64, which is the default in the xz command line tool. If
63 // the .xz file needs to be decompressed with XZ Embedded, use
64 // LZMA_CHECK_CRC32 instead.
65 lzma_ret ret
= lzma_easy_encoder(strm
, preset
, LZMA_CHECK_CRC64
);
67 // Return successfully if the initialization went fine.
71 // Something went wrong. The possible errors are documented in
72 // lzma/container.h (src/liblzma/api/lzma/container.h in the source
73 // package or e.g. /usr/include/lzma/container.h depending on the
78 msg
= "Memory allocation failed";
81 case LZMA_OPTIONS_ERROR
:
82 msg
= "Specified preset is not supported";
85 case LZMA_UNSUPPORTED_CHECK
:
86 msg
= "Specified integrity check is not supported";
90 // This is most likely LZMA_PROG_ERROR indicating a bug in
91 // this program or in liblzma. It is inconvenient to have a
92 // separate error message for errors that should be impossible
93 // to occur, but knowing the error code is important for
94 // debugging. That's why it is good to print the error code
95 // at least when there is no good error message to show.
96 msg
= "Unknown error, possibly a bug";
100 fprintf(stderr
, "Error initializing the encoder: %s (error code %u)\n",
107 compress(lzma_stream
*strm
, FILE *infile
, FILE *outfile
)
109 // This will be LZMA_RUN until the end of the input file is reached.
110 // This tells lzma_code() when there will be no more input.
111 lzma_action action
= LZMA_RUN
;
113 // Buffers to temporarily hold uncompressed input
114 // and compressed output.
115 uint8_t inbuf
[BUFSIZ
];
116 uint8_t outbuf
[BUFSIZ
];
118 // Initialize the input and output pointers. Initializing next_in
119 // and avail_in isn't really necessary when we are going to encode
120 // just one file since LZMA_STREAM_INIT takes care of initializing
121 // those already. But it doesn't hurt much and it will be needed
122 // if encoding more than one file like we will in 02_decompress.c.
124 // While we don't care about strm->total_in or strm->total_out in this
125 // example, it is worth noting that initializing the encoder will
126 // always reset total_in and total_out to zero. But the encoder
127 // initialization doesn't touch next_in, avail_in, next_out, or
129 strm
->next_in
= NULL
;
131 strm
->next_out
= outbuf
;
132 strm
->avail_out
= sizeof(outbuf
);
134 // Loop until the file has been successfully compressed or until
137 // Fill the input buffer if it is empty.
138 if (strm
->avail_in
== 0 && !feof(infile
)) {
139 strm
->next_in
= inbuf
;
140 strm
->avail_in
= fread(inbuf
, 1, sizeof(inbuf
),
143 if (ferror(infile
)) {
144 fprintf(stderr
, "Read error: %s\n",
149 // Once the end of the input file has been reached,
150 // we need to tell lzma_code() that no more input
151 // will be coming and that it should finish the
154 action
= LZMA_FINISH
;
157 // Tell liblzma do the actual encoding.
159 // This reads up to strm->avail_in bytes of input starting
160 // from strm->next_in. avail_in will be decremented and
161 // next_in incremented by an equal amount to match the
162 // number of input bytes consumed.
164 // Up to strm->avail_out bytes of compressed output will be
165 // written starting from strm->next_out. avail_out and next_out
166 // will be incremented by an equal amount to match the number
167 // of output bytes written.
169 // The encoder has to do internal buffering, which means that
170 // it may take quite a bit of input before the same data is
171 // available in compressed form in the output buffer.
172 lzma_ret ret
= lzma_code(strm
, action
);
174 // If the output buffer is full or if the compression finished
175 // successfully, write the data from the output buffer to
177 if (strm
->avail_out
== 0 || ret
== LZMA_STREAM_END
) {
178 // When lzma_code() has returned LZMA_STREAM_END,
179 // the output buffer is likely to be only partially
180 // full. Calculate how much new data there is to
181 // be written to the output file.
182 size_t write_size
= sizeof(outbuf
) - strm
->avail_out
;
184 if (fwrite(outbuf
, 1, write_size
, outfile
)
186 fprintf(stderr
, "Write error: %s\n",
191 // Reset next_out and avail_out.
192 strm
->next_out
= outbuf
;
193 strm
->avail_out
= sizeof(outbuf
);
196 // Normally the return value of lzma_code() will be LZMA_OK
197 // until everything has been encoded.
198 if (ret
!= LZMA_OK
) {
199 // Once everything has been encoded successfully, the
200 // return value of lzma_code() will be LZMA_STREAM_END.
202 // It is important to check for LZMA_STREAM_END. Do not
203 // assume that getting ret != LZMA_OK would mean that
204 // everything has gone well.
205 if (ret
== LZMA_STREAM_END
)
208 // It's not LZMA_OK nor LZMA_STREAM_END,
209 // so it must be an error code. See lzma/base.h
210 // (src/liblzma/api/lzma/base.h in the source package
211 // or e.g. /usr/include/lzma/base.h depending on the
212 // install prefix) for the list and documentation of
213 // possible values. Most values listen in lzma_ret
214 // enumeration aren't possible in this example.
218 msg
= "Memory allocation failed";
221 case LZMA_DATA_ERROR
:
222 // This error is returned if the compressed
223 // or uncompressed size get near 8 EiB
224 // (2^63 bytes) because that's where the .xz
225 // file format size limits currently are.
226 // That is, the possibility of this error
227 // is mostly theoretical unless you are doing
228 // something very unusual.
230 // Note that strm->total_in and strm->total_out
231 // have nothing to do with this error. Changing
232 // those variables won't increase or decrease
233 // the chance of getting this error.
234 msg
= "File size limits exceeded";
238 // This is most likely LZMA_PROG_ERROR, but
239 // if this program is buggy (or liblzma has
240 // a bug), it may be e.g. LZMA_BUF_ERROR or
241 // LZMA_OPTIONS_ERROR too.
243 // It is inconvenient to have a separate
244 // error message for errors that should be
245 // impossible to occur, but knowing the error
246 // code is important for debugging. That's why
247 // it is good to print the error code at least
248 // when there is no good error message to show.
249 msg
= "Unknown error, possibly a bug";
253 fprintf(stderr
, "Encoder error: %s (error code %u)\n",
262 main(int argc
, char **argv
)
264 // Get the preset number from the command line.
265 uint32_t preset
= get_preset(argc
, argv
);
267 // Initialize a lzma_stream structure. When it is allocated on stack,
268 // it is simplest to use LZMA_STREAM_INIT macro like below. When it
269 // is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr))
270 // works (as long as NULL pointers are represented with zero bits
271 // as they are on practically all computers today).
272 lzma_stream strm
= LZMA_STREAM_INIT
;
274 // Initialize the encoder. If it succeeds, compress from
276 bool success
= init_encoder(&strm
, preset
);
278 success
= compress(&strm
, stdin
, stdout
);
280 // Free the memory allocated for the encoder. If we were encoding
281 // multiple files, this would only need to be done after the last
282 // file. See 02_decompress.c for handling of multiple files.
284 // It is OK to call lzma_end() multiple times or when it hasn't been
285 // actually used except initialized with LZMA_STREAM_INIT.
288 // Close stdout to catch possible write errors that can occur
289 // when pending data is flushed from the stdio buffers.
290 if (fclose(stdout
)) {
291 fprintf(stderr
, "Write error: %s\n", strerror(errno
));
295 return success
? EXIT_SUCCESS
: EXIT_FAILURE
;