1 /*-------------------------------------------------------------------------
4 * create shared memory and initialize shared memory data structures.
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/storage/ipc/shmem.c
13 *-------------------------------------------------------------------------
16 * POSTGRES processes share one or more regions of shared memory.
17 * The shared memory is created by a postmaster and is inherited
18 * by each backend via fork() (or, in some ports, via other OS-specific
19 * methods). The routines in this file are used for allocating and
20 * binding to shared memory data structures.
23 * (a) There are three kinds of shared memory data structures
24 * available to POSTGRES: fixed-size structures, queues and hash
25 * tables. Fixed-size structures contain things like global variables
26 * for a module and should never be allocated after the shared memory
27 * initialization phase. Hash tables have a fixed maximum size, but
28 * their actual size can vary dynamically. When entries are added
29 * to the table, more space is allocated. Queues link data structures
30 * that have been allocated either within fixed-size structures or as hash
31 * buckets. Each shared data structure has a string name to identify
32 * it (assigned in the module that declares it).
34 * (b) During initialization, each module looks for its
35 * shared data structures in a hash table called the "Shmem Index".
36 * If the data structure is not present, the caller can allocate
37 * a new one and initialize it. If the data structure is present,
38 * the caller "attaches" to the structure by initializing a pointer
39 * in the local address space.
40 * The shmem index has two purposes: first, it gives us
41 * a simple model of how the world looks when a backend process
42 * initializes. If something is present in the shmem index,
43 * it is initialized. If it is not, it is uninitialized. Second,
44 * the shmem index allows us to allocate shared memory on demand
45 * instead of trying to preallocate structures and hard-wire the
46 * sizes and locations in header files. If you are using a lot
47 * of shared memory in a lot of different places (and changing
48 * things during development), this is important.
50 * (c) In standard Unix-ish environments, individual backends do not
51 * need to re-establish their local pointers into shared memory, because
52 * they inherit correct values of those variables via fork() from the
53 * postmaster. However, this does not work in the EXEC_BACKEND case.
54 * In ports using EXEC_BACKEND, new backends have to set up their local
55 * pointers using the method described in (b) above.
57 * (d) memory allocation model: shared memory can never be
58 * freed, once allocated. Each hash table has its own free list,
59 * so hash buckets can be reused when an item is deleted. However,
60 * if one hash table grows very large and then shrinks, its space
61 * cannot be redistributed to other tables. We could build a simple
62 * hash bucket garbage collector if need be. Right now, it seems
70 #include "miscadmin.h"
71 #include "storage/lwlock.h"
72 #include "storage/pg_shmem.h"
73 #include "storage/shmem.h"
74 #include "storage/spin.h"
75 #include "utils/builtins.h"
77 static void *ShmemAllocRaw(Size size
, Size
*allocated_size
);
79 /* shared memory global variables */
81 static PGShmemHeader
*ShmemSegHdr
; /* shared mem segment header */
83 static void *ShmemBase
; /* start address of shared memory */
85 static void *ShmemEnd
; /* end+1 address of shared memory */
87 slock_t
*ShmemLock
; /* spinlock for shared memory and LWLock
90 static HTAB
*ShmemIndex
= NULL
; /* primary index hashtable for shmem */
94 * InitShmemAccess() --- set up basic pointers to shared memory.
96 * Note: the argument should be declared "PGShmemHeader *seghdr",
97 * but we use void to avoid having to include ipc.h in shmem.h.
100 InitShmemAccess(void *seghdr
)
102 PGShmemHeader
*shmhdr
= (PGShmemHeader
*) seghdr
;
104 ShmemSegHdr
= shmhdr
;
105 ShmemBase
= (void *) shmhdr
;
106 ShmemEnd
= (char *) ShmemBase
+ shmhdr
->totalsize
;
110 * InitShmemAllocation() --- set up shared-memory space allocation.
112 * This should be called only in the postmaster or a standalone backend.
115 InitShmemAllocation(void)
117 PGShmemHeader
*shmhdr
= ShmemSegHdr
;
120 Assert(shmhdr
!= NULL
);
123 * Initialize the spinlock used by ShmemAlloc. We must use
124 * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
126 ShmemLock
= (slock_t
*) ShmemAllocUnlocked(sizeof(slock_t
));
128 SpinLockInit(ShmemLock
);
131 * Allocations after this point should go through ShmemAlloc, which
132 * expects to allocate everything on cache line boundaries. Make sure the
133 * first allocation begins on a cache line boundary.
136 (CACHELINEALIGN((((char *) shmhdr
) + shmhdr
->freeoffset
)));
137 shmhdr
->freeoffset
= aligned
- (char *) shmhdr
;
139 /* ShmemIndex can't be set up yet (need LWLocks first) */
140 shmhdr
->index
= NULL
;
141 ShmemIndex
= (HTAB
*) NULL
;
145 * ShmemAlloc -- allocate max-aligned chunk from shared memory
147 * Throws error if request cannot be satisfied.
149 * Assumes ShmemLock and ShmemSegHdr are initialized.
152 ShmemAlloc(Size size
)
157 newSpace
= ShmemAllocRaw(size
, &allocated_size
);
160 (errcode(ERRCODE_OUT_OF_MEMORY
),
161 errmsg("out of shared memory (%zu bytes requested)",
167 * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
169 * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
172 ShmemAllocNoError(Size size
)
176 return ShmemAllocRaw(size
, &allocated_size
);
180 * ShmemAllocRaw -- allocate align chunk and return allocated size
182 * Also sets *allocated_size to the number of bytes allocated, which will
183 * be equal to the number requested plus any padding we choose to add.
186 ShmemAllocRaw(Size size
, Size
*allocated_size
)
193 * Ensure all space is adequately aligned. We used to only MAXALIGN this
194 * space but experience has proved that on modern systems that is not good
195 * enough. Many parts of the system are very sensitive to critical data
196 * structures getting split across cache line boundaries. To avoid that,
197 * attempt to align the beginning of the allocation to a cache line
198 * boundary. The calling code will still need to be careful about how it
199 * uses the allocated space - e.g. by padding each element in an array of
200 * structures out to a power-of-two size - but without this, even that
201 * won't be sufficient.
203 size
= CACHELINEALIGN(size
);
204 *allocated_size
= size
;
206 Assert(ShmemSegHdr
!= NULL
);
208 SpinLockAcquire(ShmemLock
);
210 newStart
= ShmemSegHdr
->freeoffset
;
212 newFree
= newStart
+ size
;
213 if (newFree
<= ShmemSegHdr
->totalsize
)
215 newSpace
= (void *) ((char *) ShmemBase
+ newStart
);
216 ShmemSegHdr
->freeoffset
= newFree
;
221 SpinLockRelease(ShmemLock
);
223 /* note this assert is okay with newSpace == NULL */
224 Assert(newSpace
== (void *) CACHELINEALIGN(newSpace
));
230 * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
232 * Allocate space without locking ShmemLock. This should be used for,
233 * and only for, allocations that must happen before ShmemLock is ready.
235 * We consider maxalign, rather than cachealign, sufficient here.
238 ShmemAllocUnlocked(Size size
)
245 * Ensure allocated space is adequately aligned.
247 size
= MAXALIGN(size
);
249 Assert(ShmemSegHdr
!= NULL
);
251 newStart
= ShmemSegHdr
->freeoffset
;
253 newFree
= newStart
+ size
;
254 if (newFree
> ShmemSegHdr
->totalsize
)
256 (errcode(ERRCODE_OUT_OF_MEMORY
),
257 errmsg("out of shared memory (%zu bytes requested)",
259 ShmemSegHdr
->freeoffset
= newFree
;
261 newSpace
= (void *) ((char *) ShmemBase
+ newStart
);
263 Assert(newSpace
== (void *) MAXALIGN(newSpace
));
269 * ShmemAddrIsValid -- test if an address refers to shared memory
271 * Returns true if the pointer points within the shared memory segment.
274 ShmemAddrIsValid(const void *addr
)
276 return (addr
>= ShmemBase
) && (addr
< ShmemEnd
);
280 * InitShmemIndex() --- set up or attach to shmem index table.
288 * Create the shared memory shmem index.
290 * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
291 * hashtable to exist already, we have a bit of a circularity problem in
292 * initializing the ShmemIndex itself. The special "ShmemIndex" hash
293 * table name will tell ShmemInitStruct to fake it.
295 info
.keysize
= SHMEM_INDEX_KEYSIZE
;
296 info
.entrysize
= sizeof(ShmemIndexEnt
);
298 ShmemIndex
= ShmemInitHash("ShmemIndex",
299 SHMEM_INDEX_SIZE
, SHMEM_INDEX_SIZE
,
301 HASH_ELEM
| HASH_STRINGS
);
305 * ShmemInitHash -- Create and initialize, or attach to, a
306 * shared memory hash table.
308 * We assume caller is doing some kind of synchronization
309 * so that two processes don't try to create/initialize the same
310 * table at once. (In practice, all creations are done in the postmaster
311 * process; child processes should always be attaching to existing tables.)
313 * max_size is the estimated maximum number of hashtable entries. This is
314 * not a hard limit, but the access efficiency will degrade if it is
315 * exceeded substantially (since it's used to compute directory size and
316 * the hash table buckets will get overfull).
318 * init_size is the number of hashtable entries to preallocate. For a table
319 * whose maximum size is certain, this should be equal to max_size; that
320 * ensures that no run-time out-of-shared-memory failures can occur.
322 * *infoP and hash_flags must specify at least the entry sizes and key
323 * comparison semantics (see hash_create()). Flag bits and values specific
324 * to shared-memory hash tables are added here, except that callers may
325 * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
327 * Note: before Postgres 9.0, this function returned NULL for some failure
328 * cases. Now, it always throws error instead, so callers need not check
332 ShmemInitHash(const char *name
, /* table string name for shmem index */
333 long init_size
, /* initial table size */
334 long max_size
, /* max size of the table */
335 HASHCTL
*infoP
, /* info about key and bucket size */
336 int hash_flags
) /* info about infoP */
342 * Hash tables allocated in shared memory have a fixed directory; it can't
343 * grow or other backends wouldn't be able to find it. So, make sure we
344 * make it big enough to start with.
346 * The shared memory allocator must be specified too.
348 infoP
->dsize
= infoP
->max_dsize
= hash_select_dirsize(max_size
);
349 infoP
->alloc
= ShmemAllocNoError
;
350 hash_flags
|= HASH_SHARED_MEM
| HASH_ALLOC
| HASH_DIRSIZE
;
352 /* look it up in the shmem index */
353 location
= ShmemInitStruct(name
,
354 hash_get_shared_size(infoP
, hash_flags
),
358 * if it already exists, attach to it rather than allocate and initialize
362 hash_flags
|= HASH_ATTACH
;
364 /* Pass location of hashtable header to hash_create */
365 infoP
->hctl
= (HASHHDR
*) location
;
367 return hash_create(name
, init_size
, infoP
, hash_flags
);
371 * ShmemInitStruct -- Create/attach to a structure in shared memory.
373 * This is called during initialization to find or allocate
374 * a data structure in shared memory. If no other process
375 * has created the structure, this routine allocates space
376 * for it. If it exists already, a pointer to the existing
377 * structure is returned.
379 * Returns: pointer to the object. *foundPtr is set true if the object was
380 * already in the shmem index (hence, already initialized).
382 * Note: before Postgres 9.0, this function returned NULL for some failure
383 * cases. Now, it always throws error instead, so callers need not check
387 ShmemInitStruct(const char *name
, Size size
, bool *foundPtr
)
389 ShmemIndexEnt
*result
;
392 LWLockAcquire(ShmemIndexLock
, LW_EXCLUSIVE
);
396 PGShmemHeader
*shmemseghdr
= ShmemSegHdr
;
398 /* Must be trying to create/attach to ShmemIndex itself */
399 Assert(strcmp(name
, "ShmemIndex") == 0);
401 if (IsUnderPostmaster
)
403 /* Must be initializing a (non-standalone) backend */
404 Assert(shmemseghdr
->index
!= NULL
);
405 structPtr
= shmemseghdr
->index
;
411 * If the shmem index doesn't exist, we are bootstrapping: we must
412 * be trying to init the shmem index itself.
414 * Notice that the ShmemIndexLock is released before the shmem
415 * index has been initialized. This should be OK because no other
416 * process can be accessing shared memory yet.
418 Assert(shmemseghdr
->index
== NULL
);
419 structPtr
= ShmemAlloc(size
);
420 shmemseghdr
->index
= structPtr
;
423 LWLockRelease(ShmemIndexLock
);
427 /* look it up in the shmem index */
428 result
= (ShmemIndexEnt
*)
429 hash_search(ShmemIndex
, name
, HASH_ENTER_NULL
, foundPtr
);
433 LWLockRelease(ShmemIndexLock
);
435 (errcode(ERRCODE_OUT_OF_MEMORY
),
436 errmsg("could not create ShmemIndex entry for data structure \"%s\"",
443 * Structure is in the shmem index so someone else has allocated it
444 * already. The size better be the same as the size we are trying to
445 * initialize to, or there is a name conflict (or worse).
447 if (result
->size
!= size
)
449 LWLockRelease(ShmemIndexLock
);
451 (errmsg("ShmemIndex entry size is wrong for data structure"
452 " \"%s\": expected %zu, actual %zu",
453 name
, size
, result
->size
)));
455 structPtr
= result
->location
;
461 /* It isn't in the table yet. allocate and initialize it */
462 structPtr
= ShmemAllocRaw(size
, &allocated_size
);
463 if (structPtr
== NULL
)
465 /* out of memory; remove the failed ShmemIndex entry */
466 hash_search(ShmemIndex
, name
, HASH_REMOVE
, NULL
);
467 LWLockRelease(ShmemIndexLock
);
469 (errcode(ERRCODE_OUT_OF_MEMORY
),
470 errmsg("not enough shared memory for data structure"
471 " \"%s\" (%zu bytes requested)",
475 result
->allocated_size
= allocated_size
;
476 result
->location
= structPtr
;
479 LWLockRelease(ShmemIndexLock
);
481 Assert(ShmemAddrIsValid(structPtr
));
483 Assert(structPtr
== (void *) CACHELINEALIGN(structPtr
));
490 * Add two Size values, checking for overflow
493 add_size(Size s1
, Size s2
)
498 /* We are assuming Size is an unsigned type here... */
499 if (result
< s1
|| result
< s2
)
501 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
502 errmsg("requested shared memory size overflows size_t")));
507 * Multiply two Size values, checking for overflow
510 mul_size(Size s1
, Size s2
)
514 if (s1
== 0 || s2
== 0)
517 /* We are assuming Size is an unsigned type here... */
518 if (result
/ s2
!= s1
)
520 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
521 errmsg("requested shared memory size overflows size_t")));
525 /* SQL SRF showing allocated shared memory */
527 pg_get_shmem_allocations(PG_FUNCTION_ARGS
)
529 #define PG_GET_SHMEM_SIZES_COLS 4
530 ReturnSetInfo
*rsinfo
= (ReturnSetInfo
*) fcinfo
->resultinfo
;
531 HASH_SEQ_STATUS hstat
;
533 Size named_allocated
= 0;
534 Datum values
[PG_GET_SHMEM_SIZES_COLS
];
535 bool nulls
[PG_GET_SHMEM_SIZES_COLS
];
537 InitMaterializedSRF(fcinfo
, 0);
539 LWLockAcquire(ShmemIndexLock
, LW_SHARED
);
541 hash_seq_init(&hstat
, ShmemIndex
);
543 /* output all allocated entries */
544 memset(nulls
, 0, sizeof(nulls
));
545 while ((ent
= (ShmemIndexEnt
*) hash_seq_search(&hstat
)) != NULL
)
547 values
[0] = CStringGetTextDatum(ent
->key
);
548 values
[1] = Int64GetDatum((char *) ent
->location
- (char *) ShmemSegHdr
);
549 values
[2] = Int64GetDatum(ent
->size
);
550 values
[3] = Int64GetDatum(ent
->allocated_size
);
551 named_allocated
+= ent
->allocated_size
;
553 tuplestore_putvalues(rsinfo
->setResult
, rsinfo
->setDesc
,
557 /* output shared memory allocated but not counted via the shmem index */
558 values
[0] = CStringGetTextDatum("<anonymous>");
560 values
[2] = Int64GetDatum(ShmemSegHdr
->freeoffset
- named_allocated
);
561 values
[3] = values
[2];
562 tuplestore_putvalues(rsinfo
->setResult
, rsinfo
->setDesc
, values
, nulls
);
564 /* output as-of-yet unused shared memory */
566 values
[1] = Int64GetDatum(ShmemSegHdr
->freeoffset
);
568 values
[2] = Int64GetDatum(ShmemSegHdr
->totalsize
- ShmemSegHdr
->freeoffset
);
569 values
[3] = values
[2];
570 tuplestore_putvalues(rsinfo
->setResult
, rsinfo
->setDesc
, values
, nulls
);
572 LWLockRelease(ShmemIndexLock
);