1 /*-------------------------------------------------------------------------
4 * Routines for archivers to write an uncompressed or compressed data
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * This file includes two APIs for dealing with compressed data. The first
11 * provides more flexibility, using callbacks to read/write data from the
12 * underlying stream. The second API is a wrapper around fopen and
13 * friends, providing an interface similar to those, but abstracts away
14 * the possible compression. The second API is aimed for the resulting
15 * files to be easily manipulated with an external compression utility
21 * The interface for writing to an archive consists of three functions:
22 * AllocateCompressor, writeData, and EndCompressor. First you call
23 * AllocateCompressor, then write all the data by calling writeData as many
24 * times as needed, and finally EndCompressor. writeData will call the
25 * WriteFunc that was provided to AllocateCompressor for each chunk of
28 * The interface for reading an archive consists of the same three functions:
29 * AllocateCompressor, readData, and EndCompressor. First you call
30 * AllocateCompressor, then read all the data by calling readData to read the
31 * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
32 * returns the compressed data one chunk at a time. Then readData decompresses
33 * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
34 * to signal EOF. The interface is the same for compressed and uncompressed
37 * Compressed stream API
38 * ----------------------
40 * The compressed stream API is providing a set of function pointers for
41 * opening, reading, writing, and finally closing files. The implemented
42 * function pointers are documented in the corresponding header file and are
43 * common for all streams. It allows the caller to use the same functions for
44 * both compressed and uncompressed streams.
46 * The interface consists of three functions, InitCompressFileHandle,
47 * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
48 * compression is known, then start by calling InitCompressFileHandle,
49 * otherwise discover it by using InitDiscoverCompressFileHandle. Then call
50 * the function pointers as required for the read/write operations. Finally
51 * call EndCompressFileHandle to end the stream.
53 * InitDiscoverCompressFileHandle tries to infer the compression by the
54 * filename suffix. If the suffix is not yet known then it tries to simply
55 * open the file and if it fails, it tries to open the same file with
56 * compressed suffixes (.gz, .lz4 and .zst, in this order).
59 * src/bin/pg_dump/compress_io.c
61 *-------------------------------------------------------------------------
63 #include "postgres_fe.h"
68 #include "compress_gzip.h"
69 #include "compress_io.h"
70 #include "compress_lz4.h"
71 #include "compress_none.h"
72 #include "compress_zstd.h"
74 /*----------------------
76 *----------------------
80 * Checks whether support for a compression algorithm is implemented in
83 * On success returns NULL, otherwise returns a malloc'ed string which can be
84 * used by the caller in an error message.
87 supports_compression(const pg_compress_specification compression_spec
)
89 const pg_compress_algorithm algorithm
= compression_spec
.algorithm
;
90 bool supported
= false;
92 if (algorithm
== PG_COMPRESSION_NONE
)
95 if (algorithm
== PG_COMPRESSION_GZIP
)
99 if (algorithm
== PG_COMPRESSION_LZ4
)
103 if (algorithm
== PG_COMPRESSION_ZSTD
)
108 return psprintf(_("this build does not support compression with %s"),
109 get_compress_algorithm_name(algorithm
));
114 /*----------------------
116 *----------------------
120 * Allocate a new compressor.
123 AllocateCompressor(const pg_compress_specification compression_spec
,
124 ReadFunc readF
, WriteFunc writeF
)
128 cs
= (CompressorState
*) pg_malloc0(sizeof(CompressorState
));
132 if (compression_spec
.algorithm
== PG_COMPRESSION_NONE
)
133 InitCompressorNone(cs
, compression_spec
);
134 else if (compression_spec
.algorithm
== PG_COMPRESSION_GZIP
)
135 InitCompressorGzip(cs
, compression_spec
);
136 else if (compression_spec
.algorithm
== PG_COMPRESSION_LZ4
)
137 InitCompressorLZ4(cs
, compression_spec
);
138 else if (compression_spec
.algorithm
== PG_COMPRESSION_ZSTD
)
139 InitCompressorZstd(cs
, compression_spec
);
145 * Terminate compression library context and flush its buffers.
148 EndCompressor(ArchiveHandle
*AH
, CompressorState
*cs
)
154 /*----------------------
155 * Compressed stream API
156 *----------------------
163 hasSuffix(const char *filename
, const char *suffix
)
165 int filenamelen
= strlen(filename
);
166 int suffixlen
= strlen(suffix
);
168 if (filenamelen
< suffixlen
)
171 return memcmp(&filename
[filenamelen
- suffixlen
],
176 /* free() without changing errno; useful in several places below */
178 free_keep_errno(void *p
)
180 int save_errno
= errno
;
191 * Initialize a compress file handle for the specified compression algorithm.
194 InitCompressFileHandle(const pg_compress_specification compression_spec
)
196 CompressFileHandle
*CFH
;
198 CFH
= pg_malloc0(sizeof(CompressFileHandle
));
200 if (compression_spec
.algorithm
== PG_COMPRESSION_NONE
)
201 InitCompressFileHandleNone(CFH
, compression_spec
);
202 else if (compression_spec
.algorithm
== PG_COMPRESSION_GZIP
)
203 InitCompressFileHandleGzip(CFH
, compression_spec
);
204 else if (compression_spec
.algorithm
== PG_COMPRESSION_LZ4
)
205 InitCompressFileHandleLZ4(CFH
, compression_spec
);
206 else if (compression_spec
.algorithm
== PG_COMPRESSION_ZSTD
)
207 InitCompressFileHandleZstd(CFH
, compression_spec
);
213 * Checks if a compressed file (with the specified extension) exists.
215 * The filename of the tested file is stored to fname buffer (the existing
216 * buffer is freed, new buffer is allocated and returned through the pointer).
219 check_compressed_file(const char *path
, char **fname
, char *ext
)
221 free_keep_errno(*fname
);
222 *fname
= psprintf("%s.%s", path
, ext
);
223 return (access(*fname
, F_OK
) == 0);
227 * Open a file for reading. 'path' is the file to open, and 'mode' should
228 * be either "r" or "rb".
230 * If the file at 'path' contains the suffix of a supported compression method,
231 * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
232 * throughout. Otherwise the compression will be inferred by iteratively trying
233 * to open the file at 'path', first as is, then by appending known compression
234 * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
235 * "foo.{gz,lz4,zst}", trying in that order.
237 * On failure, return NULL with an error code in errno.
240 InitDiscoverCompressFileHandle(const char *path
, const char *mode
)
242 CompressFileHandle
*CFH
= NULL
;
245 pg_compress_specification compression_spec
= {0};
247 compression_spec
.algorithm
= PG_COMPRESSION_NONE
;
249 Assert(strcmp(mode
, PG_BINARY_R
) == 0);
251 fname
= pg_strdup(path
);
253 if (hasSuffix(fname
, ".gz"))
254 compression_spec
.algorithm
= PG_COMPRESSION_GZIP
;
255 else if (hasSuffix(fname
, ".lz4"))
256 compression_spec
.algorithm
= PG_COMPRESSION_LZ4
;
257 else if (hasSuffix(fname
, ".zst"))
258 compression_spec
.algorithm
= PG_COMPRESSION_ZSTD
;
261 if (stat(path
, &st
) == 0)
262 compression_spec
.algorithm
= PG_COMPRESSION_NONE
;
263 else if (check_compressed_file(path
, &fname
, "gz"))
264 compression_spec
.algorithm
= PG_COMPRESSION_GZIP
;
265 else if (check_compressed_file(path
, &fname
, "lz4"))
266 compression_spec
.algorithm
= PG_COMPRESSION_LZ4
;
267 else if (check_compressed_file(path
, &fname
, "zst"))
268 compression_spec
.algorithm
= PG_COMPRESSION_ZSTD
;
271 CFH
= InitCompressFileHandle(compression_spec
);
272 if (!CFH
->open_func(fname
, -1, mode
, CFH
))
274 free_keep_errno(CFH
);
277 free_keep_errno(fname
);
283 * Close an open file handle and release its memory.
285 * On failure, returns false and sets errno appropriately.
288 EndCompressFileHandle(CompressFileHandle
*CFH
)
292 if (CFH
->private_data
)
293 ret
= CFH
->close_func(CFH
);
295 free_keep_errno(CFH
);