1 /*-------------------------------------------------------------------------
4 * Commands to manipulate table spaces
6 * Tablespaces in PostgreSQL are designed to allow users to determine
7 * where the data file(s) for a given database object reside on the file
10 * A tablespace represents a directory on the file system. At tablespace
11 * creation time, the directory must be empty. To simplify things and
12 * remove the possibility of having file name conflicts, we isolate
13 * files within a tablespace into database-specific subdirectories.
15 * To support file access via the information given in RelFileLocator, we
16 * maintain a symbolic-link map in $PGDATA/pg_tblspc. The symlinks are
17 * named by tablespace OIDs and point to the actual tablespace directories.
18 * There is also a per-cluster version directory in each tablespace.
19 * Thus the full path to an arbitrary file is
20 * $PGDATA/pg_tblspc/spcoid/PG_MAJORVER_CATVER/dboid/relfilenumber
22 * $PGDATA/pg_tblspc/20981/PG_9.0_201002161/719849/83292814
24 * There are two tablespaces created at initdb time: pg_global (for shared
25 * tables) and pg_default (for everything else). For backwards compatibility
26 * and to remain functional on platforms without symlinks, these tablespaces
27 * are accessed specially: they are respectively
28 * $PGDATA/global/relfilenumber
29 * $PGDATA/base/dboid/relfilenumber
31 * To allow CREATE DATABASE to give a new database a default tablespace
32 * that's different from the template database's default, we make the
33 * provision that a zero in pg_class.reltablespace means the database's
34 * default tablespace. Without this, CREATE DATABASE would have to go in
35 * and munge the system catalogs of the new database.
38 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
39 * Portions Copyright (c) 1994, Regents of the University of California
43 * src/backend/commands/tablespace.c
45 *-------------------------------------------------------------------------
53 #include "access/heapam.h"
54 #include "access/htup_details.h"
55 #include "access/reloptions.h"
56 #include "access/tableam.h"
57 #include "access/xact.h"
58 #include "access/xloginsert.h"
59 #include "access/xlogutils.h"
60 #include "catalog/binary_upgrade.h"
61 #include "catalog/catalog.h"
62 #include "catalog/dependency.h"
63 #include "catalog/indexing.h"
64 #include "catalog/objectaccess.h"
65 #include "catalog/pg_tablespace.h"
66 #include "commands/comment.h"
67 #include "commands/seclabel.h"
68 #include "commands/tablespace.h"
69 #include "common/file_perm.h"
70 #include "miscadmin.h"
71 #include "postmaster/bgwriter.h"
72 #include "storage/fd.h"
73 #include "storage/standby.h"
74 #include "utils/acl.h"
75 #include "utils/builtins.h"
76 #include "utils/fmgroids.h"
77 #include "utils/guc_hooks.h"
78 #include "utils/memutils.h"
79 #include "utils/rel.h"
80 #include "utils/varlena.h"
83 char *default_tablespace
= NULL
;
84 char *temp_tablespaces
= NULL
;
85 bool allow_in_place_tablespaces
= false;
87 Oid binary_upgrade_next_pg_tablespace_oid
= InvalidOid
;
89 static void create_tablespace_directories(const char *location
,
90 const Oid tablespaceoid
);
91 static bool destroy_tablespace_directories(Oid tablespaceoid
, bool redo
);
95 * Each database using a table space is isolated into its own name space
96 * by a subdirectory named for the database OID. On first creation of an
97 * object in the tablespace, create the subdirectory. If the subdirectory
98 * already exists, fall through quietly.
100 * isRedo indicates that we are creating an object during WAL replay.
101 * In this case we will cope with the possibility of the tablespace
102 * directory not being there either --- this could happen if we are
103 * replaying an operation on a table in a subsequently-dropped tablespace.
104 * We handle this by making a directory in the place where the tablespace
105 * symlink would normally be. This isn't an exact replay of course, but
106 * it's the best we can do given the available information.
108 * If tablespaces are not supported, we still need it in case we have to
109 * re-create a database subdirectory (of $PGDATA/base) during WAL replay.
112 TablespaceCreateDbspace(Oid spcOid
, Oid dbOid
, bool isRedo
)
118 * The global tablespace doesn't have per-database subdirectories, so
119 * nothing to do for it.
121 if (spcOid
== GLOBALTABLESPACE_OID
)
124 Assert(OidIsValid(spcOid
));
125 Assert(OidIsValid(dbOid
));
127 dir
= GetDatabasePath(dbOid
, spcOid
);
129 if (stat(dir
, &st
) < 0)
131 /* Directory does not exist? */
135 * Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE
136 * or TablespaceCreateDbspace is running concurrently.
138 LWLockAcquire(TablespaceCreateLock
, LW_EXCLUSIVE
);
141 * Recheck to see if someone created the directory while we were
144 if (stat(dir
, &st
) == 0 && S_ISDIR(st
.st_mode
))
146 /* Directory was created */
150 /* Directory creation failed? */
151 if (MakePGDirectory(dir
) < 0)
153 /* Failure other than not exists or not in WAL replay? */
154 if (errno
!= ENOENT
|| !isRedo
)
156 (errcode_for_file_access(),
157 errmsg("could not create directory \"%s\": %m",
161 * During WAL replay, it's conceivable that several levels
162 * of directories are missing if tablespaces are dropped
163 * further ahead of the WAL stream than we're currently
164 * replaying. An easy way forward is to create them as
165 * plain directories and hope they are removed by further
166 * WAL replay if necessary. If this also fails, there is
167 * trouble we cannot get out of, so just report that and
170 if (pg_mkdir_p(dir
, pg_dir_create_mode
) < 0)
172 (errcode_for_file_access(),
173 errmsg("could not create directory \"%s\": %m",
178 LWLockRelease(TablespaceCreateLock
);
183 (errcode_for_file_access(),
184 errmsg("could not stat directory \"%s\": %m", dir
)));
189 /* Is it not a directory? */
190 if (!S_ISDIR(st
.st_mode
))
192 (errcode(ERRCODE_WRONG_OBJECT_TYPE
),
193 errmsg("\"%s\" exists but is not a directory",
201 * Create a table space
203 * Only superusers can create a tablespace. This seems a reasonable restriction
204 * since we're determining the system layout and, anyway, we probably have
205 * root if we're doing this kind of activity
208 CreateTableSpace(CreateTableSpaceStmt
*stmt
)
211 Datum values
[Natts_pg_tablespace
];
212 bool nulls
[Natts_pg_tablespace
] = {0};
220 /* Must be superuser */
223 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
224 errmsg("permission denied to create tablespace \"%s\"",
225 stmt
->tablespacename
),
226 errhint("Must be superuser to create a tablespace.")));
228 /* However, the eventual owner of the tablespace need not be */
230 ownerId
= get_rolespec_oid(stmt
->owner
, false);
232 ownerId
= GetUserId();
234 /* Unix-ify the offered path, and strip any trailing slashes */
235 location
= pstrdup(stmt
->location
);
236 canonicalize_path(location
);
238 /* disallow quotes, else CREATE DATABASE would be at risk */
239 if (strchr(location
, '\''))
241 (errcode(ERRCODE_INVALID_NAME
),
242 errmsg("tablespace location cannot contain single quotes")));
244 in_place
= allow_in_place_tablespaces
&& strlen(location
) == 0;
247 * Allowing relative paths seems risky
249 * This also helps us ensure that location is not empty or whitespace,
250 * unless specifying a developer-only in-place tablespace.
252 if (!in_place
&& !is_absolute_path(location
))
254 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION
),
255 errmsg("tablespace location must be an absolute path")));
258 * Check that location isn't too long. Remember that we're going to append
259 * 'PG_XXX/<dboid>/<relid>_<fork>.<nnn>'. FYI, we never actually
260 * reference the whole path here, but MakePGDirectory() uses the first two
263 if (strlen(location
) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY
) + 1 +
264 OIDCHARS
+ 1 + OIDCHARS
+ 1 + FORKNAMECHARS
+ 1 + OIDCHARS
> MAXPGPATH
)
266 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION
),
267 errmsg("tablespace location \"%s\" is too long",
270 /* Warn if the tablespace is in the data directory. */
271 if (path_is_prefix_of_path(DataDir
, location
))
273 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION
),
274 errmsg("tablespace location should not be inside the data directory")));
277 * Disallow creation of tablespaces named "pg_xxx"; we reserve this
278 * namespace for system purposes.
280 if (!allowSystemTableMods
&& IsReservedName(stmt
->tablespacename
))
282 (errcode(ERRCODE_RESERVED_NAME
),
283 errmsg("unacceptable tablespace name \"%s\"",
284 stmt
->tablespacename
),
285 errdetail("The prefix \"pg_\" is reserved for system tablespaces.")));
288 * If built with appropriate switch, whine when regression-testing
289 * conventions for tablespace names are violated.
291 #ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
292 if (strncmp(stmt
->tablespacename
, "regress_", 8) != 0)
293 elog(WARNING
, "tablespaces created by regression test cases should have names starting with \"regress_\"");
297 * Check that there is no other tablespace by this name. (The unique
298 * index would catch this anyway, but might as well give a friendlier
301 if (OidIsValid(get_tablespace_oid(stmt
->tablespacename
, true)))
303 (errcode(ERRCODE_DUPLICATE_OBJECT
),
304 errmsg("tablespace \"%s\" already exists",
305 stmt
->tablespacename
)));
308 * Insert tuple into pg_tablespace. The purpose of doing this first is to
309 * lock the proposed tablename against other would-be creators. The
310 * insertion will roll back if we find problems below.
312 rel
= table_open(TableSpaceRelationId
, RowExclusiveLock
);
316 /* Use binary-upgrade override for tablespace oid */
317 if (!OidIsValid(binary_upgrade_next_pg_tablespace_oid
))
319 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
320 errmsg("pg_tablespace OID value not set when in binary upgrade mode")));
322 tablespaceoid
= binary_upgrade_next_pg_tablespace_oid
;
323 binary_upgrade_next_pg_tablespace_oid
= InvalidOid
;
326 tablespaceoid
= GetNewOidWithIndex(rel
, TablespaceOidIndexId
,
327 Anum_pg_tablespace_oid
);
328 values
[Anum_pg_tablespace_oid
- 1] = ObjectIdGetDatum(tablespaceoid
);
329 values
[Anum_pg_tablespace_spcname
- 1] =
330 DirectFunctionCall1(namein
, CStringGetDatum(stmt
->tablespacename
));
331 values
[Anum_pg_tablespace_spcowner
- 1] =
332 ObjectIdGetDatum(ownerId
);
333 nulls
[Anum_pg_tablespace_spcacl
- 1] = true;
335 /* Generate new proposed spcoptions (text array) */
336 newOptions
= transformRelOptions((Datum
) 0,
338 NULL
, NULL
, false, false);
339 (void) tablespace_reloptions(newOptions
, true);
340 if (newOptions
!= (Datum
) 0)
341 values
[Anum_pg_tablespace_spcoptions
- 1] = newOptions
;
343 nulls
[Anum_pg_tablespace_spcoptions
- 1] = true;
345 tuple
= heap_form_tuple(rel
->rd_att
, values
, nulls
);
347 CatalogTupleInsert(rel
, tuple
);
349 heap_freetuple(tuple
);
351 /* Record dependency on owner */
352 recordDependencyOnOwner(TableSpaceRelationId
, tablespaceoid
, ownerId
);
354 /* Post creation hook for new tablespace */
355 InvokeObjectPostCreateHook(TableSpaceRelationId
, tablespaceoid
, 0);
357 create_tablespace_directories(location
, tablespaceoid
);
359 /* Record the filesystem change in XLOG */
361 xl_tblspc_create_rec xlrec
;
363 xlrec
.ts_id
= tablespaceoid
;
366 XLogRegisterData((char *) &xlrec
,
367 offsetof(xl_tblspc_create_rec
, ts_path
));
368 XLogRegisterData((char *) location
, strlen(location
) + 1);
370 (void) XLogInsert(RM_TBLSPC_ID
, XLOG_TBLSPC_CREATE
);
374 * Force synchronous commit, to minimize the window between creating the
375 * symlink on-disk and marking the transaction committed. It's not great
376 * that there is any window at all, but definitely we don't want to make
377 * it larger than necessary.
383 /* We keep the lock on pg_tablespace until commit */
384 table_close(rel
, NoLock
);
386 return tablespaceoid
;
392 * Be careful to check that the tablespace is empty.
395 DropTableSpace(DropTableSpaceStmt
*stmt
)
397 char *tablespacename
= stmt
->tablespacename
;
398 TableScanDesc scandesc
;
401 Form_pg_tablespace spcform
;
402 ScanKeyData entry
[1];
408 * Find the target tuple
410 rel
= table_open(TableSpaceRelationId
, RowExclusiveLock
);
412 ScanKeyInit(&entry
[0],
413 Anum_pg_tablespace_spcname
,
414 BTEqualStrategyNumber
, F_NAMEEQ
,
415 CStringGetDatum(tablespacename
));
416 scandesc
= table_beginscan_catalog(rel
, 1, entry
);
417 tuple
= heap_getnext(scandesc
, ForwardScanDirection
);
419 if (!HeapTupleIsValid(tuple
))
421 if (!stmt
->missing_ok
)
424 (errcode(ERRCODE_UNDEFINED_OBJECT
),
425 errmsg("tablespace \"%s\" does not exist",
431 (errmsg("tablespace \"%s\" does not exist, skipping",
433 table_endscan(scandesc
);
434 table_close(rel
, NoLock
);
439 spcform
= (Form_pg_tablespace
) GETSTRUCT(tuple
);
440 tablespaceoid
= spcform
->oid
;
442 /* Must be tablespace owner */
443 if (!object_ownercheck(TableSpaceRelationId
, tablespaceoid
, GetUserId()))
444 aclcheck_error(ACLCHECK_NOT_OWNER
, OBJECT_TABLESPACE
,
447 /* Disallow drop of the standard tablespaces, even by superuser */
448 if (IsPinnedObject(TableSpaceRelationId
, tablespaceoid
))
449 aclcheck_error(ACLCHECK_NO_PRIV
, OBJECT_TABLESPACE
,
452 /* Check for pg_shdepend entries depending on this tablespace */
453 if (checkSharedDependencies(TableSpaceRelationId
, tablespaceoid
,
454 &detail
, &detail_log
))
456 (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST
),
457 errmsg("tablespace \"%s\" cannot be dropped because some objects depend on it",
459 errdetail_internal("%s", detail
),
460 errdetail_log("%s", detail_log
)));
462 /* DROP hook for the tablespace being removed */
463 InvokeObjectDropHook(TableSpaceRelationId
, tablespaceoid
, 0);
466 * Remove the pg_tablespace tuple (this will roll back if we fail below)
468 CatalogTupleDelete(rel
, &tuple
->t_self
);
470 table_endscan(scandesc
);
473 * Remove any comments or security labels on this tablespace.
475 DeleteSharedComments(tablespaceoid
, TableSpaceRelationId
);
476 DeleteSharedSecurityLabel(tablespaceoid
, TableSpaceRelationId
);
479 * Remove dependency on owner.
481 deleteSharedDependencyRecordsFor(TableSpaceRelationId
, tablespaceoid
, 0);
484 * Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace
485 * is running concurrently.
487 LWLockAcquire(TablespaceCreateLock
, LW_EXCLUSIVE
);
490 * Try to remove the physical infrastructure.
492 if (!destroy_tablespace_directories(tablespaceoid
, false))
495 * Not all files deleted? However, there can be lingering empty files
496 * in the directories, left behind by for example DROP TABLE, that
497 * have been scheduled for deletion at next checkpoint (see comments
498 * in mdunlink() for details). We could just delete them immediately,
499 * but we can't tell them apart from important data files that we
500 * mustn't delete. So instead, we force a checkpoint which will clean
501 * out any lingering files, and try again.
503 RequestCheckpoint(CHECKPOINT_IMMEDIATE
| CHECKPOINT_FORCE
| CHECKPOINT_WAIT
);
506 * On Windows, an unlinked file persists in the directory listing
507 * until no process retains an open handle for the file. The DDL
508 * commands that schedule files for unlink send invalidation messages
509 * directing other PostgreSQL processes to close the files, but
510 * nothing guarantees they'll be processed in time. So, we'll also
511 * use a global barrier to ask all backends to close all files, and
512 * wait until they're finished.
514 LWLockRelease(TablespaceCreateLock
);
515 WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE
));
516 LWLockAcquire(TablespaceCreateLock
, LW_EXCLUSIVE
);
518 /* And now try again. */
519 if (!destroy_tablespace_directories(tablespaceoid
, false))
521 /* Still not empty, the files must be important then */
523 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
524 errmsg("tablespace \"%s\" is not empty",
529 /* Record the filesystem change in XLOG */
531 xl_tblspc_drop_rec xlrec
;
533 xlrec
.ts_id
= tablespaceoid
;
536 XLogRegisterData((char *) &xlrec
, sizeof(xl_tblspc_drop_rec
));
538 (void) XLogInsert(RM_TBLSPC_ID
, XLOG_TBLSPC_DROP
);
542 * Note: because we checked that the tablespace was empty, there should be
543 * no need to worry about flushing shared buffers or free space map
544 * entries for relations in the tablespace.
548 * Force synchronous commit, to minimize the window between removing the
549 * files on-disk and marking the transaction committed. It's not great
550 * that there is any window at all, but definitely we don't want to make
551 * it larger than necessary.
556 * Allow TablespaceCreateDbspace again.
558 LWLockRelease(TablespaceCreateLock
);
560 /* We keep the lock on pg_tablespace until commit */
561 table_close(rel
, NoLock
);
566 * create_tablespace_directories
568 * Attempt to create filesystem infrastructure linking $PGDATA/pg_tblspc/
569 * to the specified directory
572 create_tablespace_directories(const char *location
, const Oid tablespaceoid
)
575 char *location_with_version_dir
;
579 linkloc
= psprintf("%s/%u", PG_TBLSPC_DIR
, tablespaceoid
);
582 * If we're asked to make an 'in place' tablespace, create the directory
583 * directly where the symlink would normally go. This is a developer-only
584 * option for now, to facilitate regression testing.
586 in_place
= strlen(location
) == 0;
590 if (MakePGDirectory(linkloc
) < 0 && errno
!= EEXIST
)
592 (errcode_for_file_access(),
593 errmsg("could not create directory \"%s\": %m",
597 location_with_version_dir
= psprintf("%s/%s", in_place
? linkloc
: location
,
598 TABLESPACE_VERSION_DIRECTORY
);
601 * Attempt to coerce target directory to safe permissions. If this fails,
602 * it doesn't exist or has the wrong owner. Not needed for in-place mode,
603 * because in that case we created the directory with the desired
606 if (!in_place
&& chmod(location
, pg_dir_create_mode
) != 0)
610 (errcode(ERRCODE_UNDEFINED_FILE
),
611 errmsg("directory \"%s\" does not exist", location
),
612 InRecovery
? errhint("Create this directory for the tablespace before "
613 "restarting the server.") : 0));
616 (errcode_for_file_access(),
617 errmsg("could not set permissions on directory \"%s\": %m",
622 * The creation of the version directory prevents more than one tablespace
623 * in a single location. This imitates TablespaceCreateDbspace(), but it
624 * ignores concurrency and missing parent directories. The chmod() would
625 * have failed in the absence of a parent. pg_tablespace_spcname_index
626 * prevents concurrency.
628 if (stat(location_with_version_dir
, &st
) < 0)
632 (errcode_for_file_access(),
633 errmsg("could not stat directory \"%s\": %m",
634 location_with_version_dir
)));
635 else if (MakePGDirectory(location_with_version_dir
) < 0)
637 (errcode_for_file_access(),
638 errmsg("could not create directory \"%s\": %m",
639 location_with_version_dir
)));
641 else if (!S_ISDIR(st
.st_mode
))
643 (errcode(ERRCODE_WRONG_OBJECT_TYPE
),
644 errmsg("\"%s\" exists but is not a directory",
645 location_with_version_dir
)));
646 else if (!InRecovery
)
648 (errcode(ERRCODE_OBJECT_IN_USE
),
649 errmsg("directory \"%s\" already in use as a tablespace",
650 location_with_version_dir
)));
653 * In recovery, remove old symlink, in case it points to the wrong place.
655 if (!in_place
&& InRecovery
)
656 remove_tablespace_symlink(linkloc
);
659 * Create the symlink under PGDATA
661 if (!in_place
&& symlink(location
, linkloc
) < 0)
663 (errcode_for_file_access(),
664 errmsg("could not create symbolic link \"%s\": %m",
668 pfree(location_with_version_dir
);
673 * destroy_tablespace_directories
675 * Attempt to remove filesystem infrastructure for the tablespace.
677 * 'redo' indicates we are redoing a drop from XLOG; in that case we should
678 * not throw an ERROR for problems, just LOG them. The worst consequence of
679 * not removing files here would be failure to release some disk space, which
680 * does not justify throwing an error that would require manual intervention
681 * to get the database running again.
683 * Returns true if successful, false if some subdirectory is not empty
686 destroy_tablespace_directories(Oid tablespaceoid
, bool redo
)
689 char *linkloc_with_version_dir
;
695 linkloc_with_version_dir
= psprintf("%s/%u/%s", PG_TBLSPC_DIR
, tablespaceoid
,
696 TABLESPACE_VERSION_DIRECTORY
);
699 * Check if the tablespace still contains any files. We try to rmdir each
700 * per-database directory we find in it. rmdir failure implies there are
701 * still files in that subdirectory, so give up. (We do not have to worry
702 * about undoing any already completed rmdirs, since the next attempt to
703 * use the tablespace from that database will simply recreate the
704 * subdirectory via TablespaceCreateDbspace.)
706 * Since we hold TablespaceCreateLock, no one else should be creating any
707 * fresh subdirectories in parallel. It is possible that new files are
708 * being created within subdirectories, though, so the rmdir call could
709 * fail. Worst consequence is a less friendly error message.
711 * If redo is true then ENOENT is a likely outcome here, and we allow it
712 * to pass without comment. In normal operation we still allow it, but
713 * with a warning. This is because even though ProcessUtility disallows
714 * DROP TABLESPACE in a transaction block, it's possible that a previous
715 * DROP failed and rolled back after removing the tablespace directories
716 * and/or symlink. We want to allow a new DROP attempt to succeed at
717 * removing the catalog entries (and symlink if still present), so we
718 * should not give a hard error here.
720 dirdesc
= AllocateDir(linkloc_with_version_dir
);
727 (errcode_for_file_access(),
728 errmsg("could not open directory \"%s\": %m",
729 linkloc_with_version_dir
)));
730 /* The symlink might still exist, so go try to remove it */
735 /* in redo, just log other types of error */
737 (errcode_for_file_access(),
738 errmsg("could not open directory \"%s\": %m",
739 linkloc_with_version_dir
)));
740 pfree(linkloc_with_version_dir
);
743 /* else let ReadDir report the error */
746 while ((de
= ReadDir(dirdesc
, linkloc_with_version_dir
)) != NULL
)
748 if (strcmp(de
->d_name
, ".") == 0 ||
749 strcmp(de
->d_name
, "..") == 0)
752 subfile
= psprintf("%s/%s", linkloc_with_version_dir
, de
->d_name
);
754 /* This check is just to deliver a friendlier error message */
755 if (!redo
&& !directory_is_empty(subfile
))
759 pfree(linkloc_with_version_dir
);
763 /* remove empty directory */
764 if (rmdir(subfile
) < 0)
765 ereport(redo
? LOG
: ERROR
,
766 (errcode_for_file_access(),
767 errmsg("could not remove directory \"%s\": %m",
775 /* remove version directory */
776 if (rmdir(linkloc_with_version_dir
) < 0)
778 ereport(redo
? LOG
: ERROR
,
779 (errcode_for_file_access(),
780 errmsg("could not remove directory \"%s\": %m",
781 linkloc_with_version_dir
)));
782 pfree(linkloc_with_version_dir
);
787 * Try to remove the symlink. We must however deal with the possibility
788 * that it's a directory instead of a symlink --- this could happen during
789 * WAL replay (see TablespaceCreateDbspace).
791 * Note: in the redo case, we'll return true if this final step fails;
792 * there's no point in retrying it. Also, ENOENT should provoke no more
796 linkloc
= pstrdup(linkloc_with_version_dir
);
797 get_parent_directory(linkloc
);
798 if (lstat(linkloc
, &st
) < 0)
800 int saved_errno
= errno
;
802 ereport(redo
? LOG
: (saved_errno
== ENOENT
? WARNING
: ERROR
),
803 (errcode_for_file_access(),
804 errmsg("could not stat file \"%s\": %m",
807 else if (S_ISDIR(st
.st_mode
))
809 if (rmdir(linkloc
) < 0)
811 int saved_errno
= errno
;
813 ereport(redo
? LOG
: (saved_errno
== ENOENT
? WARNING
: ERROR
),
814 (errcode_for_file_access(),
815 errmsg("could not remove directory \"%s\": %m",
819 else if (S_ISLNK(st
.st_mode
))
821 if (unlink(linkloc
) < 0)
823 int saved_errno
= errno
;
825 ereport(redo
? LOG
: (saved_errno
== ENOENT
? WARNING
: ERROR
),
826 (errcode_for_file_access(),
827 errmsg("could not remove symbolic link \"%s\": %m",
833 /* Refuse to remove anything that's not a directory or symlink */
834 ereport(redo
? LOG
: ERROR
,
835 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
836 errmsg("\"%s\" is not a directory or symbolic link",
840 pfree(linkloc_with_version_dir
);
848 * Check if a directory is empty.
850 * This probably belongs somewhere else, but not sure where...
853 directory_is_empty(const char *path
)
858 dirdesc
= AllocateDir(path
);
860 while ((de
= ReadDir(dirdesc
, path
)) != NULL
)
862 if (strcmp(de
->d_name
, ".") == 0 ||
863 strcmp(de
->d_name
, "..") == 0)
874 * remove_tablespace_symlink
876 * This function removes symlinks in pg_tblspc. On Windows, junction points
877 * act like directories so we must be able to apply rmdir. This function
878 * works like the symlink removal code in destroy_tablespace_directories,
879 * except that failure to remove is always an ERROR. But if the file doesn't
880 * exist at all, that's OK.
883 remove_tablespace_symlink(const char *linkloc
)
887 if (lstat(linkloc
, &st
) < 0)
892 (errcode_for_file_access(),
893 errmsg("could not stat file \"%s\": %m", linkloc
)));
896 if (S_ISDIR(st
.st_mode
))
899 * This will fail if the directory isn't empty, but not if it's a
902 if (rmdir(linkloc
) < 0 && errno
!= ENOENT
)
904 (errcode_for_file_access(),
905 errmsg("could not remove directory \"%s\": %m",
908 else if (S_ISLNK(st
.st_mode
))
910 if (unlink(linkloc
) < 0 && errno
!= ENOENT
)
912 (errcode_for_file_access(),
913 errmsg("could not remove symbolic link \"%s\": %m",
918 /* Refuse to remove anything that's not a directory or symlink */
920 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
921 errmsg("\"%s\" is not a directory or symbolic link",
927 * Rename a tablespace
930 RenameTableSpace(const char *oldname
, const char *newname
)
934 ScanKeyData entry
[1];
938 Form_pg_tablespace newform
;
939 ObjectAddress address
;
941 /* Search pg_tablespace */
942 rel
= table_open(TableSpaceRelationId
, RowExclusiveLock
);
944 ScanKeyInit(&entry
[0],
945 Anum_pg_tablespace_spcname
,
946 BTEqualStrategyNumber
, F_NAMEEQ
,
947 CStringGetDatum(oldname
));
948 scan
= table_beginscan_catalog(rel
, 1, entry
);
949 tup
= heap_getnext(scan
, ForwardScanDirection
);
950 if (!HeapTupleIsValid(tup
))
952 (errcode(ERRCODE_UNDEFINED_OBJECT
),
953 errmsg("tablespace \"%s\" does not exist",
956 newtuple
= heap_copytuple(tup
);
957 newform
= (Form_pg_tablespace
) GETSTRUCT(newtuple
);
958 tspId
= newform
->oid
;
963 if (!object_ownercheck(TableSpaceRelationId
, tspId
, GetUserId()))
964 aclcheck_error(ACLCHECK_NO_PRIV
, OBJECT_TABLESPACE
, oldname
);
966 /* Validate new name */
967 if (!allowSystemTableMods
&& IsReservedName(newname
))
969 (errcode(ERRCODE_RESERVED_NAME
),
970 errmsg("unacceptable tablespace name \"%s\"", newname
),
971 errdetail("The prefix \"pg_\" is reserved for system tablespaces.")));
974 * If built with appropriate switch, whine when regression-testing
975 * conventions for tablespace names are violated.
977 #ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
978 if (strncmp(newname
, "regress_", 8) != 0)
979 elog(WARNING
, "tablespaces created by regression test cases should have names starting with \"regress_\"");
982 /* Make sure the new name doesn't exist */
983 ScanKeyInit(&entry
[0],
984 Anum_pg_tablespace_spcname
,
985 BTEqualStrategyNumber
, F_NAMEEQ
,
986 CStringGetDatum(newname
));
987 scan
= table_beginscan_catalog(rel
, 1, entry
);
988 tup
= heap_getnext(scan
, ForwardScanDirection
);
989 if (HeapTupleIsValid(tup
))
991 (errcode(ERRCODE_DUPLICATE_OBJECT
),
992 errmsg("tablespace \"%s\" already exists",
997 /* OK, update the entry */
998 namestrcpy(&(newform
->spcname
), newname
);
1000 CatalogTupleUpdate(rel
, &newtuple
->t_self
, newtuple
);
1002 InvokeObjectPostAlterHook(TableSpaceRelationId
, tspId
, 0);
1004 ObjectAddressSet(address
, TableSpaceRelationId
, tspId
);
1006 table_close(rel
, NoLock
);
1012 * Alter table space options
1015 AlterTableSpaceOptions(AlterTableSpaceOptionsStmt
*stmt
)
1018 ScanKeyData entry
[1];
1019 TableScanDesc scandesc
;
1024 Datum repl_val
[Natts_pg_tablespace
];
1026 bool repl_null
[Natts_pg_tablespace
];
1027 bool repl_repl
[Natts_pg_tablespace
];
1030 /* Search pg_tablespace */
1031 rel
= table_open(TableSpaceRelationId
, RowExclusiveLock
);
1033 ScanKeyInit(&entry
[0],
1034 Anum_pg_tablespace_spcname
,
1035 BTEqualStrategyNumber
, F_NAMEEQ
,
1036 CStringGetDatum(stmt
->tablespacename
));
1037 scandesc
= table_beginscan_catalog(rel
, 1, entry
);
1038 tup
= heap_getnext(scandesc
, ForwardScanDirection
);
1039 if (!HeapTupleIsValid(tup
))
1041 (errcode(ERRCODE_UNDEFINED_OBJECT
),
1042 errmsg("tablespace \"%s\" does not exist",
1043 stmt
->tablespacename
)));
1045 tablespaceoid
= ((Form_pg_tablespace
) GETSTRUCT(tup
))->oid
;
1047 /* Must be owner of the existing object */
1048 if (!object_ownercheck(TableSpaceRelationId
, tablespaceoid
, GetUserId()))
1049 aclcheck_error(ACLCHECK_NOT_OWNER
, OBJECT_TABLESPACE
,
1050 stmt
->tablespacename
);
1052 /* Generate new proposed spcoptions (text array) */
1053 datum
= heap_getattr(tup
, Anum_pg_tablespace_spcoptions
,
1054 RelationGetDescr(rel
), &isnull
);
1055 newOptions
= transformRelOptions(isnull
? (Datum
) 0 : datum
,
1056 stmt
->options
, NULL
, NULL
, false,
1058 (void) tablespace_reloptions(newOptions
, true);
1060 /* Build new tuple. */
1061 memset(repl_null
, false, sizeof(repl_null
));
1062 memset(repl_repl
, false, sizeof(repl_repl
));
1063 if (newOptions
!= (Datum
) 0)
1064 repl_val
[Anum_pg_tablespace_spcoptions
- 1] = newOptions
;
1066 repl_null
[Anum_pg_tablespace_spcoptions
- 1] = true;
1067 repl_repl
[Anum_pg_tablespace_spcoptions
- 1] = true;
1068 newtuple
= heap_modify_tuple(tup
, RelationGetDescr(rel
), repl_val
,
1069 repl_null
, repl_repl
);
1071 /* Update system catalog. */
1072 CatalogTupleUpdate(rel
, &newtuple
->t_self
, newtuple
);
1074 InvokeObjectPostAlterHook(TableSpaceRelationId
, tablespaceoid
, 0);
1076 heap_freetuple(newtuple
);
1078 /* Conclude heap scan. */
1079 table_endscan(scandesc
);
1080 table_close(rel
, NoLock
);
1082 return tablespaceoid
;
1086 * Routines for handling the GUC variable 'default_tablespace'.
1089 /* check_hook: validate new default_tablespace */
1091 check_default_tablespace(char **newval
, void **extra
, GucSource source
)
1094 * If we aren't inside a transaction, or connected to a database, we
1095 * cannot do the catalog accesses necessary to verify the name. Must
1096 * accept the value on faith.
1098 if (IsTransactionState() && MyDatabaseId
!= InvalidOid
)
1100 if (**newval
!= '\0' &&
1101 !OidIsValid(get_tablespace_oid(*newval
, true)))
1104 * When source == PGC_S_TEST, don't throw a hard error for a
1105 * nonexistent tablespace, only a NOTICE. See comments in guc.h.
1107 if (source
== PGC_S_TEST
)
1110 (errcode(ERRCODE_UNDEFINED_OBJECT
),
1111 errmsg("tablespace \"%s\" does not exist",
1116 GUC_check_errdetail("Tablespace \"%s\" does not exist.",
1127 * GetDefaultTablespace -- get the OID of the current default tablespace
1129 * Temporary objects have different default tablespaces, hence the
1130 * relpersistence parameter must be specified. Also, for partitioned tables,
1131 * we disallow specifying the database default, so that needs to be specified
1134 * May return InvalidOid to indicate "use the database's default tablespace".
1136 * Note that caller is expected to check appropriate permissions for any
1137 * result other than InvalidOid.
1139 * This exists to hide (and possibly optimize the use of) the
1140 * default_tablespace GUC variable.
1143 GetDefaultTablespace(char relpersistence
, bool partitioned
)
1147 /* The temp-table case is handled elsewhere */
1148 if (relpersistence
== RELPERSISTENCE_TEMP
)
1150 PrepareTempTablespaces();
1151 return GetNextTempTableSpace();
1154 /* Fast path for default_tablespace == "" */
1155 if (default_tablespace
== NULL
|| default_tablespace
[0] == '\0')
1159 * It is tempting to cache this lookup for more speed, but then we would
1160 * fail to detect the case where the tablespace was dropped since the GUC
1161 * variable was set. Note also that we don't complain if the value fails
1162 * to refer to an existing tablespace; we just silently return InvalidOid,
1163 * causing the new object to be created in the database's tablespace.
1165 result
= get_tablespace_oid(default_tablespace
, true);
1168 * Allow explicit specification of database's default tablespace in
1169 * default_tablespace without triggering permissions checks. Don't allow
1170 * specifying that when creating a partitioned table, however, since the
1171 * result is confusing.
1173 if (result
== MyDatabaseTableSpace
)
1177 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
1178 errmsg("cannot specify default tablespace for partitioned relations")));
1179 result
= InvalidOid
;
1186 * Routines for handling the GUC variable 'temp_tablespaces'.
1191 /* Array of OIDs to be passed to SetTempTablespaces() */
1193 Oid tblSpcs
[FLEXIBLE_ARRAY_MEMBER
];
1194 } temp_tablespaces_extra
;
1196 /* check_hook: validate new temp_tablespaces */
1198 check_temp_tablespaces(char **newval
, void **extra
, GucSource source
)
1203 /* Need a modifiable copy of string */
1204 rawname
= pstrdup(*newval
);
1206 /* Parse string into list of identifiers */
1207 if (!SplitIdentifierString(rawname
, ',', &namelist
))
1209 /* syntax error in name list */
1210 GUC_check_errdetail("List syntax is invalid.");
1212 list_free(namelist
);
1217 * If we aren't inside a transaction, or connected to a database, we
1218 * cannot do the catalog accesses necessary to verify the name. Must
1219 * accept the value on faith. Fortunately, there's then also no need to
1220 * pass the data to fd.c.
1222 if (IsTransactionState() && MyDatabaseId
!= InvalidOid
)
1224 temp_tablespaces_extra
*myextra
;
1229 /* temporary workspace until we are done verifying the list */
1230 tblSpcs
= (Oid
*) palloc(list_length(namelist
) * sizeof(Oid
));
1232 foreach(l
, namelist
)
1234 char *curname
= (char *) lfirst(l
);
1236 AclResult aclresult
;
1238 /* Allow an empty string (signifying database default) */
1239 if (curname
[0] == '\0')
1241 /* InvalidOid signifies database's default tablespace */
1242 tblSpcs
[numSpcs
++] = InvalidOid
;
1247 * In an interactive SET command, we ereport for bad info. When
1248 * source == PGC_S_TEST, don't throw a hard error for a
1249 * nonexistent tablespace, only a NOTICE. See comments in guc.h.
1251 curoid
= get_tablespace_oid(curname
, source
<= PGC_S_TEST
);
1252 if (curoid
== InvalidOid
)
1254 if (source
== PGC_S_TEST
)
1256 (errcode(ERRCODE_UNDEFINED_OBJECT
),
1257 errmsg("tablespace \"%s\" does not exist",
1263 * Allow explicit specification of database's default tablespace
1264 * in temp_tablespaces without triggering permissions checks.
1266 if (curoid
== MyDatabaseTableSpace
)
1268 /* InvalidOid signifies database's default tablespace */
1269 tblSpcs
[numSpcs
++] = InvalidOid
;
1273 /* Check permissions, similarly complaining only if interactive */
1274 aclresult
= object_aclcheck(TableSpaceRelationId
, curoid
, GetUserId(),
1276 if (aclresult
!= ACLCHECK_OK
)
1278 if (source
>= PGC_S_INTERACTIVE
)
1279 aclcheck_error(aclresult
, OBJECT_TABLESPACE
, curname
);
1283 tblSpcs
[numSpcs
++] = curoid
;
1286 /* Now prepare an "extra" struct for assign_temp_tablespaces */
1287 myextra
= guc_malloc(LOG
, offsetof(temp_tablespaces_extra
, tblSpcs
) +
1288 numSpcs
* sizeof(Oid
));
1291 myextra
->numSpcs
= numSpcs
;
1292 memcpy(myextra
->tblSpcs
, tblSpcs
, numSpcs
* sizeof(Oid
));
1299 list_free(namelist
);
1304 /* assign_hook: do extra actions as needed */
1306 assign_temp_tablespaces(const char *newval
, void *extra
)
1308 temp_tablespaces_extra
*myextra
= (temp_tablespaces_extra
*) extra
;
1311 * If check_temp_tablespaces was executed inside a transaction, then pass
1312 * the list it made to fd.c. Otherwise, clear fd.c's list; we must be
1313 * still outside a transaction, or else restoring during transaction exit,
1314 * and in either case we can just let the next PrepareTempTablespaces call
1318 SetTempTablespaces(myextra
->tblSpcs
, myextra
->numSpcs
);
1320 SetTempTablespaces(NULL
, 0);
1324 * PrepareTempTablespaces -- prepare to use temp tablespaces
1326 * If we have not already done so in the current transaction, parse the
1327 * temp_tablespaces GUC variable and tell fd.c which tablespace(s) to use
1331 PrepareTempTablespaces(void)
1339 /* No work if already done in current transaction */
1340 if (TempTablespacesAreSet())
1344 * Can't do catalog access unless within a transaction. This is just a
1345 * safety check in case this function is called by low-level code that
1346 * could conceivably execute outside a transaction. Note that in such a
1347 * scenario, fd.c will fall back to using the current database's default
1348 * tablespace, which should always be OK.
1350 if (!IsTransactionState())
1353 /* Need a modifiable copy of string */
1354 rawname
= pstrdup(temp_tablespaces
);
1356 /* Parse string into list of identifiers */
1357 if (!SplitIdentifierString(rawname
, ',', &namelist
))
1359 /* syntax error in name list */
1360 SetTempTablespaces(NULL
, 0);
1362 list_free(namelist
);
1366 /* Store tablespace OIDs in an array in TopTransactionContext */
1367 tblSpcs
= (Oid
*) MemoryContextAlloc(TopTransactionContext
,
1368 list_length(namelist
) * sizeof(Oid
));
1370 foreach(l
, namelist
)
1372 char *curname
= (char *) lfirst(l
);
1374 AclResult aclresult
;
1376 /* Allow an empty string (signifying database default) */
1377 if (curname
[0] == '\0')
1379 /* InvalidOid signifies database's default tablespace */
1380 tblSpcs
[numSpcs
++] = InvalidOid
;
1384 /* Else verify that name is a valid tablespace name */
1385 curoid
= get_tablespace_oid(curname
, true);
1386 if (curoid
== InvalidOid
)
1388 /* Skip any bad list elements */
1393 * Allow explicit specification of database's default tablespace in
1394 * temp_tablespaces without triggering permissions checks.
1396 if (curoid
== MyDatabaseTableSpace
)
1398 /* InvalidOid signifies database's default tablespace */
1399 tblSpcs
[numSpcs
++] = InvalidOid
;
1403 /* Check permissions similarly */
1404 aclresult
= object_aclcheck(TableSpaceRelationId
, curoid
, GetUserId(),
1406 if (aclresult
!= ACLCHECK_OK
)
1409 tblSpcs
[numSpcs
++] = curoid
;
1412 SetTempTablespaces(tblSpcs
, numSpcs
);
1415 list_free(namelist
);
1420 * get_tablespace_oid - given a tablespace name, look up the OID
1422 * If missing_ok is false, throw an error if tablespace name not found. If
1423 * true, just return InvalidOid.
1426 get_tablespace_oid(const char *tablespacename
, bool missing_ok
)
1430 TableScanDesc scandesc
;
1432 ScanKeyData entry
[1];
1435 * Search pg_tablespace. We use a heapscan here even though there is an
1436 * index on name, on the theory that pg_tablespace will usually have just
1437 * a few entries and so an indexed lookup is a waste of effort.
1439 rel
= table_open(TableSpaceRelationId
, AccessShareLock
);
1441 ScanKeyInit(&entry
[0],
1442 Anum_pg_tablespace_spcname
,
1443 BTEqualStrategyNumber
, F_NAMEEQ
,
1444 CStringGetDatum(tablespacename
));
1445 scandesc
= table_beginscan_catalog(rel
, 1, entry
);
1446 tuple
= heap_getnext(scandesc
, ForwardScanDirection
);
1448 /* We assume that there can be at most one matching tuple */
1449 if (HeapTupleIsValid(tuple
))
1450 result
= ((Form_pg_tablespace
) GETSTRUCT(tuple
))->oid
;
1452 result
= InvalidOid
;
1454 table_endscan(scandesc
);
1455 table_close(rel
, AccessShareLock
);
1457 if (!OidIsValid(result
) && !missing_ok
)
1459 (errcode(ERRCODE_UNDEFINED_OBJECT
),
1460 errmsg("tablespace \"%s\" does not exist",
1467 * get_tablespace_name - given a tablespace OID, look up the name
1469 * Returns a palloc'd string, or NULL if no such tablespace.
1472 get_tablespace_name(Oid spc_oid
)
1476 TableScanDesc scandesc
;
1478 ScanKeyData entry
[1];
1481 * Search pg_tablespace. We use a heapscan here even though there is an
1482 * index on oid, on the theory that pg_tablespace will usually have just a
1483 * few entries and so an indexed lookup is a waste of effort.
1485 rel
= table_open(TableSpaceRelationId
, AccessShareLock
);
1487 ScanKeyInit(&entry
[0],
1488 Anum_pg_tablespace_oid
,
1489 BTEqualStrategyNumber
, F_OIDEQ
,
1490 ObjectIdGetDatum(spc_oid
));
1491 scandesc
= table_beginscan_catalog(rel
, 1, entry
);
1492 tuple
= heap_getnext(scandesc
, ForwardScanDirection
);
1494 /* We assume that there can be at most one matching tuple */
1495 if (HeapTupleIsValid(tuple
))
1496 result
= pstrdup(NameStr(((Form_pg_tablespace
) GETSTRUCT(tuple
))->spcname
));
1500 table_endscan(scandesc
);
1501 table_close(rel
, AccessShareLock
);
1508 * TABLESPACE resource manager's routines
1511 tblspc_redo(XLogReaderState
*record
)
1513 uint8 info
= XLogRecGetInfo(record
) & ~XLR_INFO_MASK
;
1515 /* Backup blocks are not used in tblspc records */
1516 Assert(!XLogRecHasAnyBlockRefs(record
));
1518 if (info
== XLOG_TBLSPC_CREATE
)
1520 xl_tblspc_create_rec
*xlrec
= (xl_tblspc_create_rec
*) XLogRecGetData(record
);
1521 char *location
= xlrec
->ts_path
;
1523 create_tablespace_directories(location
, xlrec
->ts_id
);
1525 else if (info
== XLOG_TBLSPC_DROP
)
1527 xl_tblspc_drop_rec
*xlrec
= (xl_tblspc_drop_rec
*) XLogRecGetData(record
);
1529 /* Close all smgr fds in all backends. */
1530 WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE
));
1533 * If we issued a WAL record for a drop tablespace it implies that
1534 * there were no files in it at all when the DROP was done. That means
1535 * that no permanent objects can exist in it at this point.
1537 * It is possible for standby users to be using this tablespace as a
1538 * location for their temporary files, so if we fail to remove all
1539 * files then do conflict processing and try again, if currently
1542 * Other possible reasons for failure include bollixed file
1543 * permissions on a standby server when they were okay on the primary,
1544 * etc etc. There's not much we can do about that, so just remove what
1545 * we can and press on.
1547 if (!destroy_tablespace_directories(xlrec
->ts_id
, true))
1549 ResolveRecoveryConflictWithTablespace(xlrec
->ts_id
);
1552 * If we did recovery processing then hopefully the backends who
1553 * wrote temp files should have cleaned up and exited by now. So
1554 * retry before complaining. If we fail again, this is just a LOG
1555 * condition, because it's not worth throwing an ERROR for (as
1556 * that would crash the database and require manual intervention
1557 * before we could get past this WAL record on restart).
1559 if (!destroy_tablespace_directories(xlrec
->ts_id
, true))
1561 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
1562 errmsg("directories for tablespace %u could not be removed",
1564 errhint("You can remove the directories manually if necessary.")));
1568 elog(PANIC
, "tblspc_redo: unknown op code %u", info
);