4 * file system operations
6 * Copyright (c) 2010-2025, PostgreSQL Global Development Group
7 * src/bin/pg_upgrade/file.c
10 #include "postgres_fe.h"
15 #ifdef HAVE_COPYFILE_H
19 #include <sys/ioctl.h>
23 #include "access/visibilitymapdefs.h"
24 #include "common/file_perm.h"
25 #include "pg_upgrade.h"
26 #include "storage/bufpage.h"
27 #include "storage/checksum.h"
28 #include "storage/checksum_impl.h"
34 * Clones/reflinks a relation file from src to dst.
36 * schemaName/relName are relation's SQL name (used for error messages only).
39 cloneFile(const char *src
, const char *dst
,
40 const char *schemaName
, const char *relName
)
42 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
43 if (copyfile(src
, dst
, NULL
, COPYFILE_CLONE_FORCE
) < 0)
44 pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
45 schemaName
, relName
, src
, dst
);
46 #elif defined(__linux__) && defined(FICLONE)
50 if ((src_fd
= open(src
, O_RDONLY
| PG_BINARY
, 0)) < 0)
51 pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %m",
52 schemaName
, relName
, src
);
54 if ((dest_fd
= open(dst
, O_RDWR
| O_CREAT
| O_EXCL
| PG_BINARY
,
55 pg_file_create_mode
)) < 0)
56 pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %m",
57 schemaName
, relName
, dst
);
59 if (ioctl(dest_fd
, FICLONE
, src_fd
) < 0)
61 int save_errno
= errno
;
65 pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
66 schemaName
, relName
, src
, dst
, strerror(save_errno
));
78 * Copies a relation file from src to dst.
79 * schemaName/relName are relation's SQL name (used for error messages only).
82 copyFile(const char *src
, const char *dst
,
83 const char *schemaName
, const char *relName
)
90 if ((src_fd
= open(src
, O_RDONLY
| PG_BINARY
, 0)) < 0)
91 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
92 schemaName
, relName
, src
);
94 if ((dest_fd
= open(dst
, O_RDWR
| O_CREAT
| O_EXCL
| PG_BINARY
,
95 pg_file_create_mode
)) < 0)
96 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
97 schemaName
, relName
, dst
);
99 /* copy in fairly large chunks for best efficiency */
100 #define COPY_BUF_SIZE (50 * BLCKSZ)
102 buffer
= (char *) pg_malloc(COPY_BUF_SIZE
);
104 /* perform data copying i.e read src source, write to destination */
107 ssize_t nbytes
= read(src_fd
, buffer
, COPY_BUF_SIZE
);
110 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %m",
111 schemaName
, relName
, src
);
117 if (write(dest_fd
, buffer
, nbytes
) != nbytes
)
119 /* if write didn't set errno, assume problem is no disk space */
122 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %m",
123 schemaName
, relName
, dst
);
133 if (CopyFile(src
, dst
, true) == 0)
135 _dosmaperr(GetLastError());
136 pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
137 schemaName
, relName
, src
, dst
);
147 * Copies a relation file from src to dst.
148 * schemaName/relName are relation's SQL name (used for error messages only).
151 copyFileByRange(const char *src
, const char *dst
,
152 const char *schemaName
, const char *relName
)
154 #ifdef HAVE_COPY_FILE_RANGE
159 if ((src_fd
= open(src
, O_RDONLY
| PG_BINARY
, 0)) < 0)
160 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
161 schemaName
, relName
, src
);
163 if ((dest_fd
= open(dst
, O_RDWR
| O_CREAT
| O_EXCL
| PG_BINARY
,
164 pg_file_create_mode
)) < 0)
165 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
166 schemaName
, relName
, dst
);
170 nbytes
= copy_file_range(src_fd
, NULL
, dest_fd
, NULL
, SSIZE_MAX
, 0);
172 pg_fatal("error while copying relation \"%s.%s\": could not copy file range from \"%s\" to \"%s\": %m",
173 schemaName
, relName
, src
, dst
);
186 * Hard-links a relation file from src to dst.
187 * schemaName/relName are relation's SQL name (used for error messages only).
190 linkFile(const char *src
, const char *dst
,
191 const char *schemaName
, const char *relName
)
193 if (link(src
, dst
) < 0)
194 pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
195 schemaName
, relName
, src
, dst
);
200 * rewriteVisibilityMap()
202 * Transform a visibility map file, copying from src to dst.
203 * schemaName/relName are relation's SQL name (used for error messages only).
205 * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
206 * visibility map included one bit per heap page; it now includes two.
207 * When upgrading a cluster from before that time to a current PostgreSQL
208 * version, we could refuse to copy visibility maps from the old cluster
209 * to the new cluster; the next VACUUM would recreate them, but at the
210 * price of scanning the entire table. So, instead, we rewrite the old
211 * visibility maps in the new format. That way, the all-visible bits
212 * remain set for the pages for which they were set previously. The
213 * all-frozen bits are never set by this conversion; we leave that to VACUUM.
216 rewriteVisibilityMap(const char *fromfile
, const char *tofile
,
217 const char *schemaName
, const char *relName
)
221 PGIOAlignedBlock buffer
;
222 PGIOAlignedBlock new_vmbuf
;
223 ssize_t totalBytesRead
= 0;
224 ssize_t src_filesize
;
225 int rewriteVmBytesPerPage
;
226 BlockNumber new_blkno
= 0;
229 /* Compute number of old-format bytes per new page */
230 rewriteVmBytesPerPage
= (BLCKSZ
- SizeOfPageHeaderData
) / 2;
232 if ((src_fd
= open(fromfile
, O_RDONLY
| PG_BINARY
, 0)) < 0)
233 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
234 schemaName
, relName
, fromfile
);
236 if (fstat(src_fd
, &statbuf
) != 0)
237 pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %m",
238 schemaName
, relName
, fromfile
);
240 if ((dst_fd
= open(tofile
, O_RDWR
| O_CREAT
| O_EXCL
| PG_BINARY
,
241 pg_file_create_mode
)) < 0)
242 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
243 schemaName
, relName
, tofile
);
245 /* Save old file size */
246 src_filesize
= statbuf
.st_size
;
249 * Turn each visibility map page into 2 pages one by one. Each new page
250 * has the same page header as the old one. If the last section of the
251 * last page is empty, we skip it, mostly to avoid turning one-page
252 * visibility maps for small relations into two pages needlessly.
254 while (totalBytesRead
< src_filesize
)
260 PageHeaderData pageheader
;
263 if ((bytesRead
= read(src_fd
, buffer
.data
, BLCKSZ
)) != BLCKSZ
)
266 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %m",
267 schemaName
, relName
, fromfile
);
269 pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"",
270 schemaName
, relName
, fromfile
);
273 totalBytesRead
+= BLCKSZ
;
274 old_lastblk
= (totalBytesRead
== src_filesize
);
276 /* Save the page header data */
277 memcpy(&pageheader
, buffer
.data
, SizeOfPageHeaderData
);
280 * These old_* variables point to old visibility map page. old_cur
281 * points to current position on old page. old_blkend points to end of
282 * old block. old_break is the end+1 position on the old page for the
283 * data that will be transferred to the current new page.
285 old_cur
= buffer
.data
+ SizeOfPageHeaderData
;
286 old_blkend
= buffer
.data
+ bytesRead
;
287 old_break
= old_cur
+ rewriteVmBytesPerPage
;
289 while (old_break
<= old_blkend
)
295 /* First, copy old page header to new page */
296 memcpy(new_vmbuf
.data
, &pageheader
, SizeOfPageHeaderData
);
298 /* Rewriting the last part of the last old page? */
299 old_lastpart
= old_lastblk
&& (old_break
== old_blkend
);
301 new_cur
= new_vmbuf
.data
+ SizeOfPageHeaderData
;
303 /* Process old page bytes one by one, and turn it into new page. */
304 while (old_cur
< old_break
)
306 uint8 byte
= *(uint8
*) old_cur
;
307 uint16 new_vmbits
= 0;
310 /* Generate new format bits while keeping old information */
311 for (i
= 0; i
< BITS_PER_BYTE
; i
++)
317 VISIBILITYMAP_ALL_VISIBLE
<< (BITS_PER_HEAPBLOCK
* i
);
321 /* Copy new visibility map bytes to new-format page */
322 new_cur
[0] = (char) (new_vmbits
& 0xFF);
323 new_cur
[1] = (char) (new_vmbits
>> 8);
326 new_cur
+= BITS_PER_HEAPBLOCK
;
329 /* If the last part of the last page is empty, skip writing it */
330 if (old_lastpart
&& empty
)
333 /* Set new checksum for visibility map page, if enabled */
334 if (new_cluster
.controldata
.data_checksum_version
!= 0)
335 ((PageHeader
) new_vmbuf
.data
)->pd_checksum
=
336 pg_checksum_page(new_vmbuf
.data
, new_blkno
);
339 if (write(dst_fd
, new_vmbuf
.data
, BLCKSZ
) != BLCKSZ
)
341 /* if write didn't set errno, assume problem is no disk space */
344 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %m",
345 schemaName
, relName
, tofile
);
348 /* Advance for next new page */
349 old_break
+= rewriteVmBytesPerPage
;
360 check_file_clone(void)
362 char existing_file
[MAXPGPATH
];
363 char new_link_file
[MAXPGPATH
];
365 snprintf(existing_file
, sizeof(existing_file
), "%s/PG_VERSION", old_cluster
.pgdata
);
366 snprintf(new_link_file
, sizeof(new_link_file
), "%s/PG_VERSION.clonetest", new_cluster
.pgdata
);
367 unlink(new_link_file
); /* might fail */
369 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
370 if (copyfile(existing_file
, new_link_file
, NULL
, COPYFILE_CLONE_FORCE
) < 0)
371 pg_fatal("could not clone file between old and new data directories: %m");
372 #elif defined(__linux__) && defined(FICLONE)
377 if ((src_fd
= open(existing_file
, O_RDONLY
| PG_BINARY
, 0)) < 0)
378 pg_fatal("could not open file \"%s\": %m",
381 if ((dest_fd
= open(new_link_file
, O_RDWR
| O_CREAT
| O_EXCL
| PG_BINARY
,
382 pg_file_create_mode
)) < 0)
383 pg_fatal("could not create file \"%s\": %m",
386 if (ioctl(dest_fd
, FICLONE
, src_fd
) < 0)
387 pg_fatal("could not clone file between old and new data directories: %m");
393 pg_fatal("file cloning not supported on this platform");
396 unlink(new_link_file
);
400 check_copy_file_range(void)
402 char existing_file
[MAXPGPATH
];
403 char new_link_file
[MAXPGPATH
];
405 snprintf(existing_file
, sizeof(existing_file
), "%s/PG_VERSION", old_cluster
.pgdata
);
406 snprintf(new_link_file
, sizeof(new_link_file
), "%s/PG_VERSION.copy_file_range_test", new_cluster
.pgdata
);
407 unlink(new_link_file
); /* might fail */
409 #if defined(HAVE_COPY_FILE_RANGE)
414 if ((src_fd
= open(existing_file
, O_RDONLY
| PG_BINARY
, 0)) < 0)
415 pg_fatal("could not open file \"%s\": %m",
418 if ((dest_fd
= open(new_link_file
, O_RDWR
| O_CREAT
| O_EXCL
| PG_BINARY
,
419 pg_file_create_mode
)) < 0)
420 pg_fatal("could not create file \"%s\": %m",
423 if (copy_file_range(src_fd
, NULL
, dest_fd
, NULL
, SSIZE_MAX
, 0) < 0)
424 pg_fatal("could not copy file range between old and new data directories: %m");
430 pg_fatal("copy_file_range not supported on this platform");
433 unlink(new_link_file
);
437 check_hard_link(void)
439 char existing_file
[MAXPGPATH
];
440 char new_link_file
[MAXPGPATH
];
442 snprintf(existing_file
, sizeof(existing_file
), "%s/PG_VERSION", old_cluster
.pgdata
);
443 snprintf(new_link_file
, sizeof(new_link_file
), "%s/PG_VERSION.linktest", new_cluster
.pgdata
);
444 unlink(new_link_file
); /* might fail */
446 if (link(existing_file
, new_link_file
) < 0)
447 pg_fatal("could not create hard link between old and new data directories: %m\n"
448 "In link mode the old and new data directories must be on the same file system.");
450 unlink(new_link_file
);