nbtree: fix read page recheck typo.
[pgsql.git] / src / backend / storage / ipc / dsm.c
blobc2e33a7e4330591b2f48af7f73e8177ec8b96c01
1 /*-------------------------------------------------------------------------
3 * dsm.c
4 * manage dynamic shared memory segments
6 * This file provides a set of services to make programming with dynamic
7 * shared memory segments more convenient. Unlike the low-level
8 * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9 * created using this module will be cleaned up automatically. Mappings
10 * will be removed when the resource owner under which they were created
11 * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12 * have session lifespan. Segments will be removed when there are no
13 * remaining mappings, or at postmaster shutdown in any case. After a
14 * hard postmaster crash, remaining segments will be removed, if they
15 * still exist, at the next postmaster startup.
17 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
21 * IDENTIFICATION
22 * src/backend/storage/ipc/dsm.c
24 *-------------------------------------------------------------------------
27 #include "postgres.h"
29 #include <fcntl.h>
30 #include <unistd.h>
31 #ifndef WIN32
32 #include <sys/mman.h>
33 #endif
34 #include <sys/stat.h>
36 #include "common/pg_prng.h"
37 #include "lib/ilist.h"
38 #include "miscadmin.h"
39 #include "port/pg_bitutils.h"
40 #include "storage/dsm.h"
41 #include "storage/fd.h"
42 #include "storage/ipc.h"
43 #include "storage/lwlock.h"
44 #include "storage/pg_shmem.h"
45 #include "storage/shmem.h"
46 #include "utils/freepage.h"
47 #include "utils/memutils.h"
48 #include "utils/resowner.h"
50 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
52 #define PG_DYNSHMEM_FIXED_SLOTS 64
53 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
55 #define INVALID_CONTROL_SLOT ((uint32) -1)
57 /* Backend-local tracking for on-detach callbacks. */
58 typedef struct dsm_segment_detach_callback
60 on_dsm_detach_callback function;
61 Datum arg;
62 slist_node node;
63 } dsm_segment_detach_callback;
65 /* Backend-local state for a dynamic shared memory segment. */
66 struct dsm_segment
68 dlist_node node; /* List link in dsm_segment_list. */
69 ResourceOwner resowner; /* Resource owner. */
70 dsm_handle handle; /* Segment name. */
71 uint32 control_slot; /* Slot in control segment. */
72 void *impl_private; /* Implementation-specific private data. */
73 void *mapped_address; /* Mapping address, or NULL if unmapped. */
74 Size mapped_size; /* Size of our mapping. */
75 slist_head on_detach; /* On-detach callbacks. */
78 /* Shared-memory state for a dynamic shared memory segment. */
79 typedef struct dsm_control_item
81 dsm_handle handle;
82 uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
83 size_t first_page;
84 size_t npages;
85 void *impl_private_pm_handle; /* only needed on Windows */
86 bool pinned;
87 } dsm_control_item;
89 /* Layout of the dynamic shared memory control segment. */
90 typedef struct dsm_control_header
92 uint32 magic;
93 uint32 nitems;
94 uint32 maxitems;
95 dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
96 } dsm_control_header;
98 static void dsm_cleanup_for_mmap(void);
99 static void dsm_postmaster_shutdown(int code, Datum arg);
100 static dsm_segment *dsm_create_descriptor(void);
101 static bool dsm_control_segment_sane(dsm_control_header *control,
102 Size mapped_size);
103 static uint64 dsm_control_bytes_needed(uint32 nitems);
104 static inline dsm_handle make_main_region_dsm_handle(int slot);
105 static inline bool is_main_region_dsm_handle(dsm_handle handle);
107 /* Has this backend initialized the dynamic shared memory system yet? */
108 static bool dsm_init_done = false;
110 /* Preallocated DSM space in the main shared memory region. */
111 static void *dsm_main_space_begin = NULL;
114 * List of dynamic shared memory segments used by this backend.
116 * At process exit time, we must decrement the reference count of each
117 * segment we have attached; this list makes it possible to find all such
118 * segments.
120 * This list should always be empty in the postmaster. We could probably
121 * allow the postmaster to map dynamic shared memory segments before it
122 * begins to start child processes, provided that each process adjusted
123 * the reference counts for those segments in the control segment at
124 * startup time, but there's no obvious need for such a facility, which
125 * would also be complex to handle in the EXEC_BACKEND case. Once the
126 * postmaster has begun spawning children, there's an additional problem:
127 * each new mapping would require an update to the control segment,
128 * which requires locking, in which the postmaster must not be involved.
130 static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
133 * Control segment information.
135 * Unlike ordinary shared memory segments, the control segment is not
136 * reference counted; instead, it lasts for the postmaster's entire
137 * life cycle. For simplicity, it doesn't have a dsm_segment object either.
139 static dsm_handle dsm_control_handle;
140 static dsm_control_header *dsm_control;
141 static Size dsm_control_mapped_size = 0;
142 static void *dsm_control_impl_private = NULL;
145 /* ResourceOwner callbacks to hold DSM segments */
146 static void ResOwnerReleaseDSM(Datum res);
147 static char *ResOwnerPrintDSM(Datum res);
149 static const ResourceOwnerDesc dsm_resowner_desc =
151 .name = "dynamic shared memory segment",
152 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
153 .release_priority = RELEASE_PRIO_DSMS,
154 .ReleaseResource = ResOwnerReleaseDSM,
155 .DebugPrint = ResOwnerPrintDSM
158 /* Convenience wrappers over ResourceOwnerRemember/Forget */
159 static inline void
160 ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
162 ResourceOwnerRemember(owner, PointerGetDatum(seg), &dsm_resowner_desc);
164 static inline void
165 ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
167 ResourceOwnerForget(owner, PointerGetDatum(seg), &dsm_resowner_desc);
171 * Start up the dynamic shared memory system.
173 * This is called just once during each cluster lifetime, at postmaster
174 * startup time.
176 void
177 dsm_postmaster_startup(PGShmemHeader *shim)
179 void *dsm_control_address = NULL;
180 uint32 maxitems;
181 Size segsize;
183 Assert(!IsUnderPostmaster);
186 * If we're using the mmap implementations, clean up any leftovers.
187 * Cleanup isn't needed on Windows, and happens earlier in startup for
188 * POSIX and System V shared memory, via a direct call to
189 * dsm_cleanup_using_control_segment.
191 if (dynamic_shared_memory_type == DSM_IMPL_MMAP)
192 dsm_cleanup_for_mmap();
194 /* Determine size for new control segment. */
195 maxitems = PG_DYNSHMEM_FIXED_SLOTS
196 + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends;
197 elog(DEBUG2, "dynamic shared memory system will support %u segments",
198 maxitems);
199 segsize = dsm_control_bytes_needed(maxitems);
202 * Loop until we find an unused identifier for the new control segment. We
203 * sometimes use DSM_HANDLE_INVALID as a sentinel value indicating "no
204 * control segment", so avoid generating that value for a real handle.
206 for (;;)
208 Assert(dsm_control_address == NULL);
209 Assert(dsm_control_mapped_size == 0);
210 /* Use even numbers only */
211 dsm_control_handle = pg_prng_uint32(&pg_global_prng_state) << 1;
212 if (dsm_control_handle == DSM_HANDLE_INVALID)
213 continue;
214 if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
215 &dsm_control_impl_private, &dsm_control_address,
216 &dsm_control_mapped_size, ERROR))
217 break;
219 dsm_control = dsm_control_address;
220 on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim));
221 elog(DEBUG2,
222 "created dynamic shared memory control segment %u (%zu bytes)",
223 dsm_control_handle, segsize);
224 shim->dsm_control = dsm_control_handle;
226 /* Initialize control segment. */
227 dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
228 dsm_control->nitems = 0;
229 dsm_control->maxitems = maxitems;
233 * Determine whether the control segment from the previous postmaster
234 * invocation still exists. If so, remove the dynamic shared memory
235 * segments to which it refers, and then the control segment itself.
237 void
238 dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
240 void *mapped_address = NULL;
241 void *junk_mapped_address = NULL;
242 void *impl_private = NULL;
243 void *junk_impl_private = NULL;
244 Size mapped_size = 0;
245 Size junk_mapped_size = 0;
246 uint32 nitems;
247 uint32 i;
248 dsm_control_header *old_control;
251 * Try to attach the segment. If this fails, it probably just means that
252 * the operating system has been rebooted and the segment no longer
253 * exists, or an unrelated process has used the same shm ID. So just fall
254 * out quietly.
256 if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
257 &mapped_address, &mapped_size, DEBUG1))
258 return;
261 * We've managed to reattach it, but the contents might not be sane. If
262 * they aren't, we disregard the segment after all.
264 old_control = (dsm_control_header *) mapped_address;
265 if (!dsm_control_segment_sane(old_control, mapped_size))
267 dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
268 &mapped_address, &mapped_size, LOG);
269 return;
273 * OK, the control segment looks basically valid, so we can use it to get
274 * a list of segments that need to be removed.
276 nitems = old_control->nitems;
277 for (i = 0; i < nitems; ++i)
279 dsm_handle handle;
280 uint32 refcnt;
282 /* If the reference count is 0, the slot is actually unused. */
283 refcnt = old_control->item[i].refcnt;
284 if (refcnt == 0)
285 continue;
287 /* If it was using the main shmem area, there is nothing to do. */
288 handle = old_control->item[i].handle;
289 if (is_main_region_dsm_handle(handle))
290 continue;
292 /* Log debugging information. */
293 elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
294 handle, refcnt);
296 /* Destroy the referenced segment. */
297 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
298 &junk_mapped_address, &junk_mapped_size, LOG);
301 /* Destroy the old control segment, too. */
302 elog(DEBUG2,
303 "cleaning up dynamic shared memory control segment with ID %u",
304 old_control_handle);
305 dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
306 &mapped_address, &mapped_size, LOG);
310 * When we're using the mmap shared memory implementation, "shared memory"
311 * segments might even manage to survive an operating system reboot.
312 * But there's no guarantee as to exactly what will survive: some segments
313 * may survive, and others may not, and the contents of some may be out
314 * of date. In particular, the control segment may be out of date, so we
315 * can't rely on it to figure out what to remove. However, since we know
316 * what directory contains the files we used as shared memory, we can simply
317 * scan the directory and blow everything away that shouldn't be there.
319 static void
320 dsm_cleanup_for_mmap(void)
322 DIR *dir;
323 struct dirent *dent;
325 /* Scan the directory for something with a name of the correct format. */
326 dir = AllocateDir(PG_DYNSHMEM_DIR);
328 while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
330 if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
331 strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
333 char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
335 snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
337 elog(DEBUG2, "removing file \"%s\"", buf);
339 /* We found a matching file; so remove it. */
340 if (unlink(buf) != 0)
341 ereport(ERROR,
342 (errcode_for_file_access(),
343 errmsg("could not remove file \"%s\": %m", buf)));
347 /* Cleanup complete. */
348 FreeDir(dir);
352 * At shutdown time, we iterate over the control segment and remove all
353 * remaining dynamic shared memory segments. We avoid throwing errors here;
354 * the postmaster is shutting down either way, and this is just non-critical
355 * resource cleanup.
357 static void
358 dsm_postmaster_shutdown(int code, Datum arg)
360 uint32 nitems;
361 uint32 i;
362 void *dsm_control_address;
363 void *junk_mapped_address = NULL;
364 void *junk_impl_private = NULL;
365 Size junk_mapped_size = 0;
366 PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg);
369 * If some other backend exited uncleanly, it might have corrupted the
370 * control segment while it was dying. In that case, we warn and ignore
371 * the contents of the control segment. This may end up leaving behind
372 * stray shared memory segments, but there's not much we can do about that
373 * if the metadata is gone.
375 nitems = dsm_control->nitems;
376 if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
378 ereport(LOG,
379 (errmsg("dynamic shared memory control segment is corrupt")));
380 return;
383 /* Remove any remaining segments. */
384 for (i = 0; i < nitems; ++i)
386 dsm_handle handle;
388 /* If the reference count is 0, the slot is actually unused. */
389 if (dsm_control->item[i].refcnt == 0)
390 continue;
392 handle = dsm_control->item[i].handle;
393 if (is_main_region_dsm_handle(handle))
394 continue;
396 /* Log debugging information. */
397 elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
398 handle);
400 /* Destroy the segment. */
401 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
402 &junk_mapped_address, &junk_mapped_size, LOG);
405 /* Remove the control segment itself. */
406 elog(DEBUG2,
407 "cleaning up dynamic shared memory control segment with ID %u",
408 dsm_control_handle);
409 dsm_control_address = dsm_control;
410 dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0,
411 &dsm_control_impl_private, &dsm_control_address,
412 &dsm_control_mapped_size, LOG);
413 dsm_control = dsm_control_address;
414 shim->dsm_control = 0;
418 * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
419 * we must reread the state file and map the control segment; in other cases,
420 * we'll have inherited the postmaster's mapping and global variables.
422 static void
423 dsm_backend_startup(void)
425 #ifdef EXEC_BACKEND
426 if (IsUnderPostmaster)
428 void *control_address = NULL;
430 /* Attach control segment. */
431 Assert(dsm_control_handle != 0);
432 dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0,
433 &dsm_control_impl_private, &control_address,
434 &dsm_control_mapped_size, ERROR);
435 dsm_control = control_address;
436 /* If control segment doesn't look sane, something is badly wrong. */
437 if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
439 dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
440 &dsm_control_impl_private, &control_address,
441 &dsm_control_mapped_size, WARNING);
442 ereport(FATAL,
443 (errcode(ERRCODE_INTERNAL_ERROR),
444 errmsg("dynamic shared memory control segment is not valid")));
447 #endif
449 dsm_init_done = true;
452 #ifdef EXEC_BACKEND
454 * When running under EXEC_BACKEND, we get a callback here when the main
455 * shared memory segment is re-attached, so that we can record the control
456 * handle retrieved from it.
458 void
459 dsm_set_control_handle(dsm_handle h)
461 Assert(dsm_control_handle == 0 && h != 0);
462 dsm_control_handle = h;
464 #endif
467 * Reserve some space in the main shared memory segment for DSM segments.
469 size_t
470 dsm_estimate_size(void)
472 return 1024 * 1024 * (size_t) min_dynamic_shared_memory;
476 * Initialize space in the main shared memory segment for DSM segments.
478 void
479 dsm_shmem_init(void)
481 size_t size = dsm_estimate_size();
482 bool found;
484 if (size == 0)
485 return;
487 dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found);
488 if (!found)
490 FreePageManager *fpm = (FreePageManager *) dsm_main_space_begin;
491 size_t first_page = 0;
492 size_t pages;
494 /* Reserve space for the FreePageManager. */
495 while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager))
496 ++first_page;
498 /* Initialize it and give it all the rest of the space. */
499 FreePageManagerInitialize(fpm, dsm_main_space_begin);
500 pages = (size / FPM_PAGE_SIZE) - first_page;
501 FreePageManagerPut(fpm, first_page, pages);
506 * Create a new dynamic shared memory segment.
508 * If there is a non-NULL CurrentResourceOwner, the new segment is associated
509 * with it and must be detached before the resource owner releases, or a
510 * warning will be logged. If CurrentResourceOwner is NULL, the segment
511 * remains attached until explicitly detached or the session ends.
512 * Creating with a NULL CurrentResourceOwner is equivalent to creating
513 * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
515 dsm_segment *
516 dsm_create(Size size, int flags)
518 dsm_segment *seg;
519 uint32 i;
520 uint32 nitems;
521 size_t npages = 0;
522 size_t first_page = 0;
523 FreePageManager *dsm_main_space_fpm = dsm_main_space_begin;
524 bool using_main_dsm_region = false;
527 * Unsafe in postmaster. It might seem pointless to allow use of dsm in
528 * single user mode, but otherwise some subsystems will need dedicated
529 * single user mode code paths.
531 Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
533 if (!dsm_init_done)
534 dsm_backend_startup();
536 /* Create a new segment descriptor. */
537 seg = dsm_create_descriptor();
540 * Lock the control segment while we try to allocate from the main shared
541 * memory area, if configured.
543 if (dsm_main_space_fpm)
545 npages = size / FPM_PAGE_SIZE;
546 if (size % FPM_PAGE_SIZE > 0)
547 ++npages;
549 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
550 if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page))
552 /* We can carve out a piece of the main shared memory segment. */
553 seg->mapped_address = (char *) dsm_main_space_begin +
554 first_page * FPM_PAGE_SIZE;
555 seg->mapped_size = npages * FPM_PAGE_SIZE;
556 using_main_dsm_region = true;
557 /* We'll choose a handle below. */
561 if (!using_main_dsm_region)
564 * We need to create a new memory segment. Loop until we find an
565 * unused segment identifier.
567 if (dsm_main_space_fpm)
568 LWLockRelease(DynamicSharedMemoryControlLock);
569 for (;;)
571 Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
572 /* Use even numbers only */
573 seg->handle = pg_prng_uint32(&pg_global_prng_state) << 1;
574 if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
575 continue;
576 if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
577 &seg->mapped_address, &seg->mapped_size, ERROR))
578 break;
580 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
583 /* Search the control segment for an unused slot. */
584 nitems = dsm_control->nitems;
585 for (i = 0; i < nitems; ++i)
587 if (dsm_control->item[i].refcnt == 0)
589 if (using_main_dsm_region)
591 seg->handle = make_main_region_dsm_handle(i);
592 dsm_control->item[i].first_page = first_page;
593 dsm_control->item[i].npages = npages;
595 else
596 Assert(!is_main_region_dsm_handle(seg->handle));
597 dsm_control->item[i].handle = seg->handle;
598 /* refcnt of 1 triggers destruction, so start at 2 */
599 dsm_control->item[i].refcnt = 2;
600 dsm_control->item[i].impl_private_pm_handle = NULL;
601 dsm_control->item[i].pinned = false;
602 seg->control_slot = i;
603 LWLockRelease(DynamicSharedMemoryControlLock);
604 return seg;
608 /* Verify that we can support an additional mapping. */
609 if (nitems >= dsm_control->maxitems)
611 if (using_main_dsm_region)
612 FreePageManagerPut(dsm_main_space_fpm, first_page, npages);
613 LWLockRelease(DynamicSharedMemoryControlLock);
614 if (!using_main_dsm_region)
615 dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
616 &seg->mapped_address, &seg->mapped_size, WARNING);
617 if (seg->resowner != NULL)
618 ResourceOwnerForgetDSM(seg->resowner, seg);
619 dlist_delete(&seg->node);
620 pfree(seg);
622 if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
623 return NULL;
624 ereport(ERROR,
625 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
626 errmsg("too many dynamic shared memory segments")));
629 /* Enter the handle into a new array slot. */
630 if (using_main_dsm_region)
632 seg->handle = make_main_region_dsm_handle(nitems);
633 dsm_control->item[i].first_page = first_page;
634 dsm_control->item[i].npages = npages;
636 dsm_control->item[nitems].handle = seg->handle;
637 /* refcnt of 1 triggers destruction, so start at 2 */
638 dsm_control->item[nitems].refcnt = 2;
639 dsm_control->item[nitems].impl_private_pm_handle = NULL;
640 dsm_control->item[nitems].pinned = false;
641 seg->control_slot = nitems;
642 dsm_control->nitems++;
643 LWLockRelease(DynamicSharedMemoryControlLock);
645 return seg;
649 * Attach a dynamic shared memory segment.
651 * See comments for dsm_segment_handle() for an explanation of how this
652 * is intended to be used.
654 * This function will return NULL if the segment isn't known to the system.
655 * This can happen if we're asked to attach the segment, but then everyone
656 * else detaches it (causing it to be destroyed) before we get around to
657 * attaching it.
659 * If there is a non-NULL CurrentResourceOwner, the attached segment is
660 * associated with it and must be detached before the resource owner releases,
661 * or a warning will be logged. Otherwise the segment remains attached until
662 * explicitly detached or the session ends. See the note atop dsm_create().
664 dsm_segment *
665 dsm_attach(dsm_handle h)
667 dsm_segment *seg;
668 dlist_iter iter;
669 uint32 i;
670 uint32 nitems;
672 /* Unsafe in postmaster (and pointless in a stand-alone backend). */
673 Assert(IsUnderPostmaster);
675 if (!dsm_init_done)
676 dsm_backend_startup();
679 * Since this is just a debugging cross-check, we could leave it out
680 * altogether, or include it only in assert-enabled builds. But since the
681 * list of attached segments should normally be very short, let's include
682 * it always for right now.
684 * If you're hitting this error, you probably want to attempt to find an
685 * existing mapping via dsm_find_mapping() before calling dsm_attach() to
686 * create a new one.
688 dlist_foreach(iter, &dsm_segment_list)
690 seg = dlist_container(dsm_segment, node, iter.cur);
691 if (seg->handle == h)
692 elog(ERROR, "can't attach the same segment more than once");
695 /* Create a new segment descriptor. */
696 seg = dsm_create_descriptor();
697 seg->handle = h;
699 /* Bump reference count for this segment in shared memory. */
700 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
701 nitems = dsm_control->nitems;
702 for (i = 0; i < nitems; ++i)
705 * If the reference count is 0, the slot is actually unused. If the
706 * reference count is 1, the slot is still in use, but the segment is
707 * in the process of going away; even if the handle matches, another
708 * slot may already have started using the same handle value by
709 * coincidence so we have to keep searching.
711 if (dsm_control->item[i].refcnt <= 1)
712 continue;
714 /* If the handle doesn't match, it's not the slot we want. */
715 if (dsm_control->item[i].handle != seg->handle)
716 continue;
718 /* Otherwise we've found a match. */
719 dsm_control->item[i].refcnt++;
720 seg->control_slot = i;
721 if (is_main_region_dsm_handle(seg->handle))
723 seg->mapped_address = (char *) dsm_main_space_begin +
724 dsm_control->item[i].first_page * FPM_PAGE_SIZE;
725 seg->mapped_size = dsm_control->item[i].npages * FPM_PAGE_SIZE;
727 break;
729 LWLockRelease(DynamicSharedMemoryControlLock);
732 * If we didn't find the handle we're looking for in the control segment,
733 * it probably means that everyone else who had it mapped, including the
734 * original creator, died before we got to this point. It's up to the
735 * caller to decide what to do about that.
737 if (seg->control_slot == INVALID_CONTROL_SLOT)
739 dsm_detach(seg);
740 return NULL;
743 /* Here's where we actually try to map the segment. */
744 if (!is_main_region_dsm_handle(seg->handle))
745 dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
746 &seg->mapped_address, &seg->mapped_size, ERROR);
748 return seg;
752 * At backend shutdown time, detach any segments that are still attached.
753 * (This is similar to dsm_detach_all, except that there's no reason to
754 * unmap the control segment before exiting, so we don't bother.)
756 void
757 dsm_backend_shutdown(void)
759 while (!dlist_is_empty(&dsm_segment_list))
761 dsm_segment *seg;
763 seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
764 dsm_detach(seg);
769 * Detach all shared memory segments, including the control segments. This
770 * should be called, along with PGSharedMemoryDetach, in processes that
771 * might inherit mappings but are not intended to be connected to dynamic
772 * shared memory.
774 void
775 dsm_detach_all(void)
777 void *control_address = dsm_control;
779 while (!dlist_is_empty(&dsm_segment_list))
781 dsm_segment *seg;
783 seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
784 dsm_detach(seg);
787 if (control_address != NULL)
788 dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
789 &dsm_control_impl_private, &control_address,
790 &dsm_control_mapped_size, ERROR);
794 * Detach from a shared memory segment, destroying the segment if we
795 * remove the last reference.
797 * This function should never fail. It will often be invoked when aborting
798 * a transaction, and a further error won't serve any purpose. It's not a
799 * complete disaster if we fail to unmap or destroy the segment; it means a
800 * resource leak, but that doesn't necessarily preclude further operations.
802 void
803 dsm_detach(dsm_segment *seg)
806 * Invoke registered callbacks. Just in case one of those callbacks
807 * throws a further error that brings us back here, pop the callback
808 * before invoking it, to avoid infinite error recursion. Don't allow
809 * interrupts while running the individual callbacks in non-error code
810 * paths, to avoid leaving cleanup work unfinished if we're interrupted by
811 * a statement timeout or similar.
813 HOLD_INTERRUPTS();
814 while (!slist_is_empty(&seg->on_detach))
816 slist_node *node;
817 dsm_segment_detach_callback *cb;
818 on_dsm_detach_callback function;
819 Datum arg;
821 node = slist_pop_head_node(&seg->on_detach);
822 cb = slist_container(dsm_segment_detach_callback, node, node);
823 function = cb->function;
824 arg = cb->arg;
825 pfree(cb);
827 function(seg, arg);
829 RESUME_INTERRUPTS();
832 * Try to remove the mapping, if one exists. Normally, there will be, but
833 * maybe not, if we failed partway through a create or attach operation.
834 * We remove the mapping before decrementing the reference count so that
835 * the process that sees a zero reference count can be certain that no
836 * remaining mappings exist. Even if this fails, we pretend that it
837 * works, because retrying is likely to fail in the same way.
839 if (seg->mapped_address != NULL)
841 if (!is_main_region_dsm_handle(seg->handle))
842 dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
843 &seg->mapped_address, &seg->mapped_size, WARNING);
844 seg->impl_private = NULL;
845 seg->mapped_address = NULL;
846 seg->mapped_size = 0;
849 /* Reduce reference count, if we previously increased it. */
850 if (seg->control_slot != INVALID_CONTROL_SLOT)
852 uint32 refcnt;
853 uint32 control_slot = seg->control_slot;
855 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
856 Assert(dsm_control->item[control_slot].handle == seg->handle);
857 Assert(dsm_control->item[control_slot].refcnt > 1);
858 refcnt = --dsm_control->item[control_slot].refcnt;
859 seg->control_slot = INVALID_CONTROL_SLOT;
860 LWLockRelease(DynamicSharedMemoryControlLock);
862 /* If new reference count is 1, try to destroy the segment. */
863 if (refcnt == 1)
865 /* A pinned segment should never reach 1. */
866 Assert(!dsm_control->item[control_slot].pinned);
869 * If we fail to destroy the segment here, or are killed before we
870 * finish doing so, the reference count will remain at 1, which
871 * will mean that nobody else can attach to the segment. At
872 * postmaster shutdown time, or when a new postmaster is started
873 * after a hard kill, another attempt will be made to remove the
874 * segment.
876 * The main case we're worried about here is being killed by a
877 * signal before we can finish removing the segment. In that
878 * case, it's important to be sure that the segment still gets
879 * removed. If we actually fail to remove the segment for some
880 * other reason, the postmaster may not have any better luck than
881 * we did. There's not much we can do about that, though.
883 if (is_main_region_dsm_handle(seg->handle) ||
884 dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
885 &seg->mapped_address, &seg->mapped_size, WARNING))
887 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
888 if (is_main_region_dsm_handle(seg->handle))
889 FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
890 dsm_control->item[control_slot].first_page,
891 dsm_control->item[control_slot].npages);
892 Assert(dsm_control->item[control_slot].handle == seg->handle);
893 Assert(dsm_control->item[control_slot].refcnt == 1);
894 dsm_control->item[control_slot].refcnt = 0;
895 LWLockRelease(DynamicSharedMemoryControlLock);
900 /* Clean up our remaining backend-private data structures. */
901 if (seg->resowner != NULL)
902 ResourceOwnerForgetDSM(seg->resowner, seg);
903 dlist_delete(&seg->node);
904 pfree(seg);
908 * Keep a dynamic shared memory mapping until end of session.
910 * By default, mappings are owned by the current resource owner, which
911 * typically means they stick around for the duration of the current query
912 * only.
914 void
915 dsm_pin_mapping(dsm_segment *seg)
917 if (seg->resowner != NULL)
919 ResourceOwnerForgetDSM(seg->resowner, seg);
920 seg->resowner = NULL;
925 * Arrange to remove a dynamic shared memory mapping at cleanup time.
927 * dsm_pin_mapping() can be used to preserve a mapping for the entire
928 * lifetime of a process; this function reverses that decision, making
929 * the segment owned by the current resource owner. This may be useful
930 * just before performing some operation that will invalidate the segment
931 * for future use by this backend.
933 void
934 dsm_unpin_mapping(dsm_segment *seg)
936 Assert(seg->resowner == NULL);
937 ResourceOwnerEnlarge(CurrentResourceOwner);
938 seg->resowner = CurrentResourceOwner;
939 ResourceOwnerRememberDSM(seg->resowner, seg);
943 * Keep a dynamic shared memory segment until postmaster shutdown, or until
944 * dsm_unpin_segment is called.
946 * This function should not be called more than once per segment, unless the
947 * segment is explicitly unpinned with dsm_unpin_segment in between calls.
949 * Note that this function does not arrange for the current process to
950 * keep the segment mapped indefinitely; if that behavior is desired,
951 * dsm_pin_mapping() should be used from each process that needs to
952 * retain the mapping.
954 void
955 dsm_pin_segment(dsm_segment *seg)
957 void *handle = NULL;
960 * Bump reference count for this segment in shared memory. This will
961 * ensure that even if there is no session which is attached to this
962 * segment, it will remain until postmaster shutdown or an explicit call
963 * to unpin.
965 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
966 if (dsm_control->item[seg->control_slot].pinned)
967 elog(ERROR, "cannot pin a segment that is already pinned");
968 if (!is_main_region_dsm_handle(seg->handle))
969 dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
970 dsm_control->item[seg->control_slot].pinned = true;
971 dsm_control->item[seg->control_slot].refcnt++;
972 dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
973 LWLockRelease(DynamicSharedMemoryControlLock);
977 * Unpin a dynamic shared memory segment that was previously pinned with
978 * dsm_pin_segment. This function should not be called unless dsm_pin_segment
979 * was previously called for this segment.
981 * The argument is a dsm_handle rather than a dsm_segment in case you want
982 * to unpin a segment to which you haven't attached. This turns out to be
983 * useful if, for example, a reference to one shared memory segment is stored
984 * within another shared memory segment. You might want to unpin the
985 * referenced segment before destroying the referencing segment.
987 void
988 dsm_unpin_segment(dsm_handle handle)
990 uint32 control_slot = INVALID_CONTROL_SLOT;
991 bool destroy = false;
992 uint32 i;
994 /* Find the control slot for the given handle. */
995 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
996 for (i = 0; i < dsm_control->nitems; ++i)
998 /* Skip unused slots and segments that are concurrently going away. */
999 if (dsm_control->item[i].refcnt <= 1)
1000 continue;
1002 /* If we've found our handle, we can stop searching. */
1003 if (dsm_control->item[i].handle == handle)
1005 control_slot = i;
1006 break;
1011 * We should definitely have found the slot, and it should not already be
1012 * in the process of going away, because this function should only be
1013 * called on a segment which is pinned.
1015 if (control_slot == INVALID_CONTROL_SLOT)
1016 elog(ERROR, "cannot unpin unknown segment handle");
1017 if (!dsm_control->item[control_slot].pinned)
1018 elog(ERROR, "cannot unpin a segment that is not pinned");
1019 Assert(dsm_control->item[control_slot].refcnt > 1);
1022 * Allow implementation-specific code to run. We have to do this before
1023 * releasing the lock, because impl_private_pm_handle may get modified by
1024 * dsm_impl_unpin_segment.
1026 if (!is_main_region_dsm_handle(handle))
1027 dsm_impl_unpin_segment(handle,
1028 &dsm_control->item[control_slot].impl_private_pm_handle);
1030 /* Note that 1 means no references (0 means unused slot). */
1031 if (--dsm_control->item[control_slot].refcnt == 1)
1032 destroy = true;
1033 dsm_control->item[control_slot].pinned = false;
1035 /* Now we can release the lock. */
1036 LWLockRelease(DynamicSharedMemoryControlLock);
1038 /* Clean up resources if that was the last reference. */
1039 if (destroy)
1041 void *junk_impl_private = NULL;
1042 void *junk_mapped_address = NULL;
1043 Size junk_mapped_size = 0;
1046 * For an explanation of how error handling works in this case, see
1047 * comments in dsm_detach. Note that if we reach this point, the
1048 * current process certainly does not have the segment mapped, because
1049 * if it did, the reference count would have still been greater than 1
1050 * even after releasing the reference count held by the pin. The fact
1051 * that there can't be a dsm_segment for this handle makes it OK to
1052 * pass the mapped size, mapped address, and private data as NULL
1053 * here.
1055 if (is_main_region_dsm_handle(handle) ||
1056 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
1057 &junk_mapped_address, &junk_mapped_size, WARNING))
1059 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
1060 if (is_main_region_dsm_handle(handle))
1061 FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
1062 dsm_control->item[control_slot].first_page,
1063 dsm_control->item[control_slot].npages);
1064 Assert(dsm_control->item[control_slot].handle == handle);
1065 Assert(dsm_control->item[control_slot].refcnt == 1);
1066 dsm_control->item[control_slot].refcnt = 0;
1067 LWLockRelease(DynamicSharedMemoryControlLock);
1073 * Find an existing mapping for a shared memory segment, if there is one.
1075 dsm_segment *
1076 dsm_find_mapping(dsm_handle handle)
1078 dlist_iter iter;
1079 dsm_segment *seg;
1081 dlist_foreach(iter, &dsm_segment_list)
1083 seg = dlist_container(dsm_segment, node, iter.cur);
1084 if (seg->handle == handle)
1085 return seg;
1088 return NULL;
1092 * Get the address at which a dynamic shared memory segment is mapped.
1094 void *
1095 dsm_segment_address(dsm_segment *seg)
1097 Assert(seg->mapped_address != NULL);
1098 return seg->mapped_address;
1102 * Get the size of a mapping.
1104 Size
1105 dsm_segment_map_length(dsm_segment *seg)
1107 Assert(seg->mapped_address != NULL);
1108 return seg->mapped_size;
1112 * Get a handle for a mapping.
1114 * To establish communication via dynamic shared memory between two backends,
1115 * one of them should first call dsm_create() to establish a new shared
1116 * memory mapping. That process should then call dsm_segment_handle() to
1117 * obtain a handle for the mapping, and pass that handle to the
1118 * coordinating backend via some means (e.g. bgw_main_arg, or via the
1119 * main shared memory segment). The recipient, once in possession of the
1120 * handle, should call dsm_attach().
1122 dsm_handle
1123 dsm_segment_handle(dsm_segment *seg)
1125 return seg->handle;
1129 * Register an on-detach callback for a dynamic shared memory segment.
1131 void
1132 on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
1134 dsm_segment_detach_callback *cb;
1136 cb = MemoryContextAlloc(TopMemoryContext,
1137 sizeof(dsm_segment_detach_callback));
1138 cb->function = function;
1139 cb->arg = arg;
1140 slist_push_head(&seg->on_detach, &cb->node);
1144 * Unregister an on-detach callback for a dynamic shared memory segment.
1146 void
1147 cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function,
1148 Datum arg)
1150 slist_mutable_iter iter;
1152 slist_foreach_modify(iter, &seg->on_detach)
1154 dsm_segment_detach_callback *cb;
1156 cb = slist_container(dsm_segment_detach_callback, node, iter.cur);
1157 if (cb->function == function && cb->arg == arg)
1159 slist_delete_current(&iter);
1160 pfree(cb);
1161 break;
1167 * Discard all registered on-detach callbacks without executing them.
1169 void
1170 reset_on_dsm_detach(void)
1172 dlist_iter iter;
1174 dlist_foreach(iter, &dsm_segment_list)
1176 dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1178 /* Throw away explicit on-detach actions one by one. */
1179 while (!slist_is_empty(&seg->on_detach))
1181 slist_node *node;
1182 dsm_segment_detach_callback *cb;
1184 node = slist_pop_head_node(&seg->on_detach);
1185 cb = slist_container(dsm_segment_detach_callback, node, node);
1186 pfree(cb);
1190 * Decrementing the reference count is a sort of implicit on-detach
1191 * action; make sure we don't do that, either.
1193 seg->control_slot = INVALID_CONTROL_SLOT;
1198 * Create a segment descriptor.
1200 static dsm_segment *
1201 dsm_create_descriptor(void)
1203 dsm_segment *seg;
1205 if (CurrentResourceOwner)
1206 ResourceOwnerEnlarge(CurrentResourceOwner);
1208 seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment));
1209 dlist_push_head(&dsm_segment_list, &seg->node);
1211 /* seg->handle must be initialized by the caller */
1212 seg->control_slot = INVALID_CONTROL_SLOT;
1213 seg->impl_private = NULL;
1214 seg->mapped_address = NULL;
1215 seg->mapped_size = 0;
1217 seg->resowner = CurrentResourceOwner;
1218 if (CurrentResourceOwner)
1219 ResourceOwnerRememberDSM(CurrentResourceOwner, seg);
1221 slist_init(&seg->on_detach);
1223 return seg;
1227 * Sanity check a control segment.
1229 * The goal here isn't to detect everything that could possibly be wrong with
1230 * the control segment; there's not enough information for that. Rather, the
1231 * goal is to make sure that someone can iterate over the items in the segment
1232 * without overrunning the end of the mapping and crashing. We also check
1233 * the magic number since, if that's messed up, this may not even be one of
1234 * our segments at all.
1236 static bool
1237 dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
1239 if (mapped_size < offsetof(dsm_control_header, item))
1240 return false; /* Mapped size too short to read header. */
1241 if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1242 return false; /* Magic number doesn't match. */
1243 if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1244 return false; /* Max item count won't fit in map. */
1245 if (control->nitems > control->maxitems)
1246 return false; /* Overfull. */
1247 return true;
1251 * Compute the number of control-segment bytes needed to store a given
1252 * number of items.
1254 static uint64
1255 dsm_control_bytes_needed(uint32 nitems)
1257 return offsetof(dsm_control_header, item)
1258 + sizeof(dsm_control_item) * (uint64) nitems;
1261 static inline dsm_handle
1262 make_main_region_dsm_handle(int slot)
1264 dsm_handle handle;
1267 * We need to create a handle that doesn't collide with any existing extra
1268 * segment created by dsm_impl_op(), so we'll make it odd. It also
1269 * mustn't collide with any other main area pseudo-segment, so we'll
1270 * include the slot number in some of the bits. We also want to make an
1271 * effort to avoid newly created and recently destroyed handles from being
1272 * confused, so we'll make the rest of the bits random.
1274 handle = 1;
1275 handle |= slot << 1;
1276 handle |= pg_prng_uint32(&pg_global_prng_state) << (pg_leftmost_one_pos32(dsm_control->maxitems) + 1);
1277 return handle;
1280 static inline bool
1281 is_main_region_dsm_handle(dsm_handle handle)
1283 return handle & 1;
1286 /* ResourceOwner callbacks */
1288 static void
1289 ResOwnerReleaseDSM(Datum res)
1291 dsm_segment *seg = (dsm_segment *) DatumGetPointer(res);
1293 seg->resowner = NULL;
1294 dsm_detach(seg);
1296 static char *
1297 ResOwnerPrintDSM(Datum res)
1299 dsm_segment *seg = (dsm_segment *) DatumGetPointer(res);
1301 return psprintf("dynamic shared memory segment %u",
1302 dsm_segment_handle(seg));