Fix pgbench performance issue induced by commit af35fe501.
[pgsql.git] / src / bin / pg_rewind / file_ops.c
blob467845419edaa3f87fcaef7a755e19881ee1d23b
1 /*-------------------------------------------------------------------------
3 * file_ops.c
4 * Helper functions for operating on files.
6 * Most of the functions in this file are helper functions for writing to
7 * the target data directory. The functions check the --dry-run flag, and
8 * do nothing if it's enabled. You should avoid accessing the target files
9 * directly but if you do, make sure you honor the --dry-run mode!
11 * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
13 *-------------------------------------------------------------------------
15 #include "postgres_fe.h"
17 #include <sys/stat.h>
18 #include <dirent.h>
19 #include <fcntl.h>
20 #include <unistd.h>
22 #include "common/file_perm.h"
23 #include "common/file_utils.h"
24 #include "file_ops.h"
25 #include "filemap.h"
26 #include "pg_rewind.h"
29 * Currently open target file.
31 static int dstfd = -1;
32 static char dstpath[MAXPGPATH] = "";
34 static void create_target_dir(const char *path);
35 static void remove_target_dir(const char *path);
36 static void create_target_symlink(const char *path, const char *link);
37 static void remove_target_symlink(const char *path);
39 static void recurse_dir(const char *datadir, const char *parentpath,
40 process_file_callback_t callback);
43 * Open a target file for writing. If 'trunc' is true and the file already
44 * exists, it will be truncated.
46 void
47 open_target_file(const char *path, bool trunc)
49 int mode;
51 if (dry_run)
52 return;
54 if (dstfd != -1 && !trunc &&
55 strcmp(path, &dstpath[strlen(datadir_target) + 1]) == 0)
56 return; /* already open */
58 close_target_file();
60 snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
62 mode = O_WRONLY | O_CREAT | PG_BINARY;
63 if (trunc)
64 mode |= O_TRUNC;
65 dstfd = open(dstpath, mode, pg_file_create_mode);
66 if (dstfd < 0)
67 pg_fatal("could not open target file \"%s\": %m",
68 dstpath);
72 * Close target file, if it's open.
74 void
75 close_target_file(void)
77 if (dstfd == -1)
78 return;
80 if (close(dstfd) != 0)
81 pg_fatal("could not close target file \"%s\": %m",
82 dstpath);
84 dstfd = -1;
87 void
88 write_target_range(char *buf, off_t begin, size_t size)
90 size_t writeleft;
91 char *p;
93 /* update progress report */
94 fetch_done += size;
95 progress_report(false);
97 if (dry_run)
98 return;
100 if (lseek(dstfd, begin, SEEK_SET) == -1)
101 pg_fatal("could not seek in target file \"%s\": %m",
102 dstpath);
104 writeleft = size;
105 p = buf;
106 while (writeleft > 0)
108 ssize_t writelen;
110 errno = 0;
111 writelen = write(dstfd, p, writeleft);
112 if (writelen < 0)
114 /* if write didn't set errno, assume problem is no disk space */
115 if (errno == 0)
116 errno = ENOSPC;
117 pg_fatal("could not write file \"%s\": %m",
118 dstpath);
121 p += writelen;
122 writeleft -= writelen;
125 /* keep the file open, in case we need to copy more blocks in it */
129 void
130 remove_target(file_entry_t *entry)
132 Assert(entry->action == FILE_ACTION_REMOVE);
133 Assert(entry->target_exists);
135 switch (entry->target_type)
137 case FILE_TYPE_DIRECTORY:
138 remove_target_dir(entry->path);
139 break;
141 case FILE_TYPE_REGULAR:
142 remove_target_file(entry->path, false);
143 break;
145 case FILE_TYPE_SYMLINK:
146 remove_target_symlink(entry->path);
147 break;
149 case FILE_TYPE_UNDEFINED:
150 pg_fatal("undefined file type for \"%s\"", entry->path);
151 break;
155 void
156 create_target(file_entry_t *entry)
158 Assert(entry->action == FILE_ACTION_CREATE);
159 Assert(!entry->target_exists);
161 switch (entry->source_type)
163 case FILE_TYPE_DIRECTORY:
164 create_target_dir(entry->path);
165 break;
167 case FILE_TYPE_SYMLINK:
168 create_target_symlink(entry->path, entry->source_link_target);
169 break;
171 case FILE_TYPE_REGULAR:
172 /* can't happen. Regular files are created with open_target_file. */
173 pg_fatal("invalid action (CREATE) for regular file");
174 break;
176 case FILE_TYPE_UNDEFINED:
177 pg_fatal("undefined file type for \"%s\"", entry->path);
178 break;
183 * Remove a file from target data directory. If missing_ok is true, it
184 * is fine for the target file to not exist.
186 void
187 remove_target_file(const char *path, bool missing_ok)
189 char dstpath[MAXPGPATH];
191 if (dry_run)
192 return;
194 snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
195 if (unlink(dstpath) != 0)
197 if (errno == ENOENT && missing_ok)
198 return;
200 pg_fatal("could not remove file \"%s\": %m",
201 dstpath);
205 void
206 truncate_target_file(const char *path, off_t newsize)
208 char dstpath[MAXPGPATH];
209 int fd;
211 if (dry_run)
212 return;
214 snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
216 fd = open(dstpath, O_WRONLY, pg_file_create_mode);
217 if (fd < 0)
218 pg_fatal("could not open file \"%s\" for truncation: %m",
219 dstpath);
221 if (ftruncate(fd, newsize) != 0)
222 pg_fatal("could not truncate file \"%s\" to %u: %m",
223 dstpath, (unsigned int) newsize);
225 close(fd);
228 static void
229 create_target_dir(const char *path)
231 char dstpath[MAXPGPATH];
233 if (dry_run)
234 return;
236 snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
237 if (mkdir(dstpath, pg_dir_create_mode) != 0)
238 pg_fatal("could not create directory \"%s\": %m",
239 dstpath);
242 static void
243 remove_target_dir(const char *path)
245 char dstpath[MAXPGPATH];
247 if (dry_run)
248 return;
250 snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
251 if (rmdir(dstpath) != 0)
252 pg_fatal("could not remove directory \"%s\": %m",
253 dstpath);
256 static void
257 create_target_symlink(const char *path, const char *link)
259 char dstpath[MAXPGPATH];
261 if (dry_run)
262 return;
264 snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
265 if (symlink(link, dstpath) != 0)
266 pg_fatal("could not create symbolic link at \"%s\": %m",
267 dstpath);
270 static void
271 remove_target_symlink(const char *path)
273 char dstpath[MAXPGPATH];
275 if (dry_run)
276 return;
278 snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
279 if (unlink(dstpath) != 0)
280 pg_fatal("could not remove symbolic link \"%s\": %m",
281 dstpath);
285 * Sync target data directory to ensure that modifications are safely on disk.
287 * We do this once, for the whole data directory, for performance reasons. At
288 * the end of pg_rewind's run, the kernel is likely to already have flushed
289 * most dirty buffers to disk. Additionally sync_pgdata uses a two-pass
290 * approach when fsync is specified (only initiating writeback in the first
291 * pass), which often reduces the overall amount of IO noticeably.
293 void
294 sync_target_dir(void)
296 if (!do_sync || dry_run)
297 return;
299 sync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
304 * Read a file into memory. The file to be read is <datadir>/<path>.
305 * The file contents are returned in a malloc'd buffer, and *filesize
306 * is set to the length of the file.
308 * The returned buffer is always zero-terminated; the size of the returned
309 * buffer is actually *filesize + 1. That's handy when reading a text file.
310 * This function can be used to read binary files as well, you can just
311 * ignore the zero-terminator in that case.
313 char *
314 slurpFile(const char *datadir, const char *path, size_t *filesize)
316 int fd;
317 char *buffer;
318 struct stat statbuf;
319 char fullpath[MAXPGPATH];
320 int len;
321 int r;
323 snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
325 if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) == -1)
326 pg_fatal("could not open file \"%s\" for reading: %m",
327 fullpath);
329 if (fstat(fd, &statbuf) < 0)
330 pg_fatal("could not open file \"%s\" for reading: %m",
331 fullpath);
333 len = statbuf.st_size;
335 buffer = pg_malloc(len + 1);
337 r = read(fd, buffer, len);
338 if (r != len)
340 if (r < 0)
341 pg_fatal("could not read file \"%s\": %m",
342 fullpath);
343 else
344 pg_fatal("could not read file \"%s\": read %d of %zu",
345 fullpath, r, (Size) len);
347 close(fd);
349 /* Zero-terminate the buffer. */
350 buffer[len] = '\0';
352 if (filesize)
353 *filesize = len;
354 return buffer;
358 * Traverse through all files in a data directory, calling 'callback'
359 * for each file.
361 void
362 traverse_datadir(const char *datadir, process_file_callback_t callback)
364 recurse_dir(datadir, NULL, callback);
368 * recursive part of traverse_datadir
370 * parentpath is the current subdirectory's path relative to datadir,
371 * or NULL at the top level.
373 static void
374 recurse_dir(const char *datadir, const char *parentpath,
375 process_file_callback_t callback)
377 DIR *xldir;
378 struct dirent *xlde;
379 char fullparentpath[MAXPGPATH];
381 if (parentpath)
382 snprintf(fullparentpath, MAXPGPATH, "%s/%s", datadir, parentpath);
383 else
384 snprintf(fullparentpath, MAXPGPATH, "%s", datadir);
386 xldir = opendir(fullparentpath);
387 if (xldir == NULL)
388 pg_fatal("could not open directory \"%s\": %m",
389 fullparentpath);
391 while (errno = 0, (xlde = readdir(xldir)) != NULL)
393 struct stat fst;
394 char fullpath[MAXPGPATH * 2];
395 char path[MAXPGPATH * 2];
397 if (strcmp(xlde->d_name, ".") == 0 ||
398 strcmp(xlde->d_name, "..") == 0)
399 continue;
401 snprintf(fullpath, sizeof(fullpath), "%s/%s", fullparentpath, xlde->d_name);
403 if (lstat(fullpath, &fst) < 0)
405 if (errno == ENOENT)
408 * File doesn't exist anymore. This is ok, if the new primary
409 * is running and the file was just removed. If it was a data
410 * file, there should be a WAL record of the removal. If it
411 * was something else, it couldn't have been anyway.
413 * TODO: But complain if we're processing the target dir!
416 else
417 pg_fatal("could not stat file \"%s\": %m",
418 fullpath);
421 if (parentpath)
422 snprintf(path, sizeof(path), "%s/%s", parentpath, xlde->d_name);
423 else
424 snprintf(path, sizeof(path), "%s", xlde->d_name);
426 if (S_ISREG(fst.st_mode))
427 callback(path, FILE_TYPE_REGULAR, fst.st_size, NULL);
428 else if (S_ISDIR(fst.st_mode))
430 callback(path, FILE_TYPE_DIRECTORY, 0, NULL);
431 /* recurse to handle subdirectories */
432 recurse_dir(datadir, path, callback);
434 else if (S_ISLNK(fst.st_mode))
436 char link_target[MAXPGPATH];
437 int len;
439 len = readlink(fullpath, link_target, sizeof(link_target));
440 if (len < 0)
441 pg_fatal("could not read symbolic link \"%s\": %m",
442 fullpath);
443 if (len >= sizeof(link_target))
444 pg_fatal("symbolic link \"%s\" target is too long",
445 fullpath);
446 link_target[len] = '\0';
448 callback(path, FILE_TYPE_SYMLINK, 0, link_target);
451 * If it's a symlink within pg_tblspc, we need to recurse into it,
452 * to process all the tablespaces. We also follow a symlink if
453 * it's for pg_wal. Symlinks elsewhere are ignored.
455 if ((parentpath && strcmp(parentpath, PG_TBLSPC_DIR) == 0) ||
456 strcmp(path, "pg_wal") == 0)
457 recurse_dir(datadir, path, callback);
461 if (errno)
462 pg_fatal("could not read directory \"%s\": %m",
463 fullparentpath);
465 if (closedir(xldir))
466 pg_fatal("could not close directory \"%s\": %m",
467 fullparentpath);