1 /*-------------------------------------------------------------------------
4 * manage dynamic shared memory segments
6 * This file provides a set of services to make programming with dynamic
7 * shared memory segments more convenient. Unlike the low-level
8 * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9 * created using this module will be cleaned up automatically. Mappings
10 * will be removed when the resource owner under which they were created
11 * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12 * have session lifespan. Segments will be removed when there are no
13 * remaining mappings, or at postmaster shutdown in any case. After a
14 * hard postmaster crash, remaining segments will be removed, if they
15 * still exist, at the next postmaster startup.
17 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
22 * src/backend/storage/ipc/dsm.c
24 *-------------------------------------------------------------------------
36 #include "common/pg_prng.h"
37 #include "lib/ilist.h"
38 #include "miscadmin.h"
39 #include "port/pg_bitutils.h"
40 #include "storage/dsm.h"
41 #include "storage/fd.h"
42 #include "storage/ipc.h"
43 #include "storage/lwlock.h"
44 #include "storage/pg_shmem.h"
45 #include "storage/shmem.h"
46 #include "utils/freepage.h"
47 #include "utils/memutils.h"
48 #include "utils/resowner.h"
50 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
52 #define PG_DYNSHMEM_FIXED_SLOTS 64
53 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
55 #define INVALID_CONTROL_SLOT ((uint32) -1)
57 /* Backend-local tracking for on-detach callbacks. */
58 typedef struct dsm_segment_detach_callback
60 on_dsm_detach_callback function
;
63 } dsm_segment_detach_callback
;
65 /* Backend-local state for a dynamic shared memory segment. */
68 dlist_node node
; /* List link in dsm_segment_list. */
69 ResourceOwner resowner
; /* Resource owner. */
70 dsm_handle handle
; /* Segment name. */
71 uint32 control_slot
; /* Slot in control segment. */
72 void *impl_private
; /* Implementation-specific private data. */
73 void *mapped_address
; /* Mapping address, or NULL if unmapped. */
74 Size mapped_size
; /* Size of our mapping. */
75 slist_head on_detach
; /* On-detach callbacks. */
78 /* Shared-memory state for a dynamic shared memory segment. */
79 typedef struct dsm_control_item
82 uint32 refcnt
; /* 2+ = active, 1 = moribund, 0 = gone */
85 void *impl_private_pm_handle
; /* only needed on Windows */
89 /* Layout of the dynamic shared memory control segment. */
90 typedef struct dsm_control_header
95 dsm_control_item item
[FLEXIBLE_ARRAY_MEMBER
];
98 static void dsm_cleanup_for_mmap(void);
99 static void dsm_postmaster_shutdown(int code
, Datum arg
);
100 static dsm_segment
*dsm_create_descriptor(void);
101 static bool dsm_control_segment_sane(dsm_control_header
*control
,
103 static uint64
dsm_control_bytes_needed(uint32 nitems
);
104 static inline dsm_handle
make_main_region_dsm_handle(int slot
);
105 static inline bool is_main_region_dsm_handle(dsm_handle handle
);
107 /* Has this backend initialized the dynamic shared memory system yet? */
108 static bool dsm_init_done
= false;
110 /* Preallocated DSM space in the main shared memory region. */
111 static void *dsm_main_space_begin
= NULL
;
114 * List of dynamic shared memory segments used by this backend.
116 * At process exit time, we must decrement the reference count of each
117 * segment we have attached; this list makes it possible to find all such
120 * This list should always be empty in the postmaster. We could probably
121 * allow the postmaster to map dynamic shared memory segments before it
122 * begins to start child processes, provided that each process adjusted
123 * the reference counts for those segments in the control segment at
124 * startup time, but there's no obvious need for such a facility, which
125 * would also be complex to handle in the EXEC_BACKEND case. Once the
126 * postmaster has begun spawning children, there's an additional problem:
127 * each new mapping would require an update to the control segment,
128 * which requires locking, in which the postmaster must not be involved.
130 static dlist_head dsm_segment_list
= DLIST_STATIC_INIT(dsm_segment_list
);
133 * Control segment information.
135 * Unlike ordinary shared memory segments, the control segment is not
136 * reference counted; instead, it lasts for the postmaster's entire
137 * life cycle. For simplicity, it doesn't have a dsm_segment object either.
139 static dsm_handle dsm_control_handle
;
140 static dsm_control_header
*dsm_control
;
141 static Size dsm_control_mapped_size
= 0;
142 static void *dsm_control_impl_private
= NULL
;
145 /* ResourceOwner callbacks to hold DSM segments */
146 static void ResOwnerReleaseDSM(Datum res
);
147 static char *ResOwnerPrintDSM(Datum res
);
149 static const ResourceOwnerDesc dsm_resowner_desc
=
151 .name
= "dynamic shared memory segment",
152 .release_phase
= RESOURCE_RELEASE_BEFORE_LOCKS
,
153 .release_priority
= RELEASE_PRIO_DSMS
,
154 .ReleaseResource
= ResOwnerReleaseDSM
,
155 .DebugPrint
= ResOwnerPrintDSM
158 /* Convenience wrappers over ResourceOwnerRemember/Forget */
160 ResourceOwnerRememberDSM(ResourceOwner owner
, dsm_segment
*seg
)
162 ResourceOwnerRemember(owner
, PointerGetDatum(seg
), &dsm_resowner_desc
);
165 ResourceOwnerForgetDSM(ResourceOwner owner
, dsm_segment
*seg
)
167 ResourceOwnerForget(owner
, PointerGetDatum(seg
), &dsm_resowner_desc
);
171 * Start up the dynamic shared memory system.
173 * This is called just once during each cluster lifetime, at postmaster
177 dsm_postmaster_startup(PGShmemHeader
*shim
)
179 void *dsm_control_address
= NULL
;
183 Assert(!IsUnderPostmaster
);
186 * If we're using the mmap implementations, clean up any leftovers.
187 * Cleanup isn't needed on Windows, and happens earlier in startup for
188 * POSIX and System V shared memory, via a direct call to
189 * dsm_cleanup_using_control_segment.
191 if (dynamic_shared_memory_type
== DSM_IMPL_MMAP
)
192 dsm_cleanup_for_mmap();
194 /* Determine size for new control segment. */
195 maxitems
= PG_DYNSHMEM_FIXED_SLOTS
196 + PG_DYNSHMEM_SLOTS_PER_BACKEND
* MaxBackends
;
197 elog(DEBUG2
, "dynamic shared memory system will support %u segments",
199 segsize
= dsm_control_bytes_needed(maxitems
);
202 * Loop until we find an unused identifier for the new control segment. We
203 * sometimes use DSM_HANDLE_INVALID as a sentinel value indicating "no
204 * control segment", so avoid generating that value for a real handle.
208 Assert(dsm_control_address
== NULL
);
209 Assert(dsm_control_mapped_size
== 0);
210 /* Use even numbers only */
211 dsm_control_handle
= pg_prng_uint32(&pg_global_prng_state
) << 1;
212 if (dsm_control_handle
== DSM_HANDLE_INVALID
)
214 if (dsm_impl_op(DSM_OP_CREATE
, dsm_control_handle
, segsize
,
215 &dsm_control_impl_private
, &dsm_control_address
,
216 &dsm_control_mapped_size
, ERROR
))
219 dsm_control
= dsm_control_address
;
220 on_shmem_exit(dsm_postmaster_shutdown
, PointerGetDatum(shim
));
222 "created dynamic shared memory control segment %u (%zu bytes)",
223 dsm_control_handle
, segsize
);
224 shim
->dsm_control
= dsm_control_handle
;
226 /* Initialize control segment. */
227 dsm_control
->magic
= PG_DYNSHMEM_CONTROL_MAGIC
;
228 dsm_control
->nitems
= 0;
229 dsm_control
->maxitems
= maxitems
;
233 * Determine whether the control segment from the previous postmaster
234 * invocation still exists. If so, remove the dynamic shared memory
235 * segments to which it refers, and then the control segment itself.
238 dsm_cleanup_using_control_segment(dsm_handle old_control_handle
)
240 void *mapped_address
= NULL
;
241 void *junk_mapped_address
= NULL
;
242 void *impl_private
= NULL
;
243 void *junk_impl_private
= NULL
;
244 Size mapped_size
= 0;
245 Size junk_mapped_size
= 0;
248 dsm_control_header
*old_control
;
251 * Try to attach the segment. If this fails, it probably just means that
252 * the operating system has been rebooted and the segment no longer
253 * exists, or an unrelated process has used the same shm ID. So just fall
256 if (!dsm_impl_op(DSM_OP_ATTACH
, old_control_handle
, 0, &impl_private
,
257 &mapped_address
, &mapped_size
, DEBUG1
))
261 * We've managed to reattach it, but the contents might not be sane. If
262 * they aren't, we disregard the segment after all.
264 old_control
= (dsm_control_header
*) mapped_address
;
265 if (!dsm_control_segment_sane(old_control
, mapped_size
))
267 dsm_impl_op(DSM_OP_DETACH
, old_control_handle
, 0, &impl_private
,
268 &mapped_address
, &mapped_size
, LOG
);
273 * OK, the control segment looks basically valid, so we can use it to get
274 * a list of segments that need to be removed.
276 nitems
= old_control
->nitems
;
277 for (i
= 0; i
< nitems
; ++i
)
282 /* If the reference count is 0, the slot is actually unused. */
283 refcnt
= old_control
->item
[i
].refcnt
;
287 /* If it was using the main shmem area, there is nothing to do. */
288 handle
= old_control
->item
[i
].handle
;
289 if (is_main_region_dsm_handle(handle
))
292 /* Log debugging information. */
293 elog(DEBUG2
, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
296 /* Destroy the referenced segment. */
297 dsm_impl_op(DSM_OP_DESTROY
, handle
, 0, &junk_impl_private
,
298 &junk_mapped_address
, &junk_mapped_size
, LOG
);
301 /* Destroy the old control segment, too. */
303 "cleaning up dynamic shared memory control segment with ID %u",
305 dsm_impl_op(DSM_OP_DESTROY
, old_control_handle
, 0, &impl_private
,
306 &mapped_address
, &mapped_size
, LOG
);
310 * When we're using the mmap shared memory implementation, "shared memory"
311 * segments might even manage to survive an operating system reboot.
312 * But there's no guarantee as to exactly what will survive: some segments
313 * may survive, and others may not, and the contents of some may be out
314 * of date. In particular, the control segment may be out of date, so we
315 * can't rely on it to figure out what to remove. However, since we know
316 * what directory contains the files we used as shared memory, we can simply
317 * scan the directory and blow everything away that shouldn't be there.
320 dsm_cleanup_for_mmap(void)
325 /* Scan the directory for something with a name of the correct format. */
326 dir
= AllocateDir(PG_DYNSHMEM_DIR
);
328 while ((dent
= ReadDir(dir
, PG_DYNSHMEM_DIR
)) != NULL
)
330 if (strncmp(dent
->d_name
, PG_DYNSHMEM_MMAP_FILE_PREFIX
,
331 strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX
)) == 0)
333 char buf
[MAXPGPATH
+ sizeof(PG_DYNSHMEM_DIR
)];
335 snprintf(buf
, sizeof(buf
), PG_DYNSHMEM_DIR
"/%s", dent
->d_name
);
337 elog(DEBUG2
, "removing file \"%s\"", buf
);
339 /* We found a matching file; so remove it. */
340 if (unlink(buf
) != 0)
342 (errcode_for_file_access(),
343 errmsg("could not remove file \"%s\": %m", buf
)));
347 /* Cleanup complete. */
352 * At shutdown time, we iterate over the control segment and remove all
353 * remaining dynamic shared memory segments. We avoid throwing errors here;
354 * the postmaster is shutting down either way, and this is just non-critical
358 dsm_postmaster_shutdown(int code
, Datum arg
)
362 void *dsm_control_address
;
363 void *junk_mapped_address
= NULL
;
364 void *junk_impl_private
= NULL
;
365 Size junk_mapped_size
= 0;
366 PGShmemHeader
*shim
= (PGShmemHeader
*) DatumGetPointer(arg
);
369 * If some other backend exited uncleanly, it might have corrupted the
370 * control segment while it was dying. In that case, we warn and ignore
371 * the contents of the control segment. This may end up leaving behind
372 * stray shared memory segments, but there's not much we can do about that
373 * if the metadata is gone.
375 nitems
= dsm_control
->nitems
;
376 if (!dsm_control_segment_sane(dsm_control
, dsm_control_mapped_size
))
379 (errmsg("dynamic shared memory control segment is corrupt")));
383 /* Remove any remaining segments. */
384 for (i
= 0; i
< nitems
; ++i
)
388 /* If the reference count is 0, the slot is actually unused. */
389 if (dsm_control
->item
[i
].refcnt
== 0)
392 handle
= dsm_control
->item
[i
].handle
;
393 if (is_main_region_dsm_handle(handle
))
396 /* Log debugging information. */
397 elog(DEBUG2
, "cleaning up orphaned dynamic shared memory with ID %u",
400 /* Destroy the segment. */
401 dsm_impl_op(DSM_OP_DESTROY
, handle
, 0, &junk_impl_private
,
402 &junk_mapped_address
, &junk_mapped_size
, LOG
);
405 /* Remove the control segment itself. */
407 "cleaning up dynamic shared memory control segment with ID %u",
409 dsm_control_address
= dsm_control
;
410 dsm_impl_op(DSM_OP_DESTROY
, dsm_control_handle
, 0,
411 &dsm_control_impl_private
, &dsm_control_address
,
412 &dsm_control_mapped_size
, LOG
);
413 dsm_control
= dsm_control_address
;
414 shim
->dsm_control
= 0;
418 * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
419 * we must reread the state file and map the control segment; in other cases,
420 * we'll have inherited the postmaster's mapping and global variables.
423 dsm_backend_startup(void)
426 if (IsUnderPostmaster
)
428 void *control_address
= NULL
;
430 /* Attach control segment. */
431 Assert(dsm_control_handle
!= 0);
432 dsm_impl_op(DSM_OP_ATTACH
, dsm_control_handle
, 0,
433 &dsm_control_impl_private
, &control_address
,
434 &dsm_control_mapped_size
, ERROR
);
435 dsm_control
= control_address
;
436 /* If control segment doesn't look sane, something is badly wrong. */
437 if (!dsm_control_segment_sane(dsm_control
, dsm_control_mapped_size
))
439 dsm_impl_op(DSM_OP_DETACH
, dsm_control_handle
, 0,
440 &dsm_control_impl_private
, &control_address
,
441 &dsm_control_mapped_size
, WARNING
);
443 (errcode(ERRCODE_INTERNAL_ERROR
),
444 errmsg("dynamic shared memory control segment is not valid")));
449 dsm_init_done
= true;
454 * When running under EXEC_BACKEND, we get a callback here when the main
455 * shared memory segment is re-attached, so that we can record the control
456 * handle retrieved from it.
459 dsm_set_control_handle(dsm_handle h
)
461 Assert(dsm_control_handle
== 0 && h
!= 0);
462 dsm_control_handle
= h
;
467 * Reserve some space in the main shared memory segment for DSM segments.
470 dsm_estimate_size(void)
472 return 1024 * 1024 * (size_t) min_dynamic_shared_memory
;
476 * Initialize space in the main shared memory segment for DSM segments.
481 size_t size
= dsm_estimate_size();
487 dsm_main_space_begin
= ShmemInitStruct("Preallocated DSM", size
, &found
);
490 FreePageManager
*fpm
= (FreePageManager
*) dsm_main_space_begin
;
491 size_t first_page
= 0;
494 /* Reserve space for the FreePageManager. */
495 while (first_page
* FPM_PAGE_SIZE
< sizeof(FreePageManager
))
498 /* Initialize it and give it all the rest of the space. */
499 FreePageManagerInitialize(fpm
, dsm_main_space_begin
);
500 pages
= (size
/ FPM_PAGE_SIZE
) - first_page
;
501 FreePageManagerPut(fpm
, first_page
, pages
);
506 * Create a new dynamic shared memory segment.
508 * If there is a non-NULL CurrentResourceOwner, the new segment is associated
509 * with it and must be detached before the resource owner releases, or a
510 * warning will be logged. If CurrentResourceOwner is NULL, the segment
511 * remains attached until explicitly detached or the session ends.
512 * Creating with a NULL CurrentResourceOwner is equivalent to creating
513 * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
516 dsm_create(Size size
, int flags
)
522 size_t first_page
= 0;
523 FreePageManager
*dsm_main_space_fpm
= dsm_main_space_begin
;
524 bool using_main_dsm_region
= false;
527 * Unsafe in postmaster. It might seem pointless to allow use of dsm in
528 * single user mode, but otherwise some subsystems will need dedicated
529 * single user mode code paths.
531 Assert(IsUnderPostmaster
|| !IsPostmasterEnvironment
);
534 dsm_backend_startup();
536 /* Create a new segment descriptor. */
537 seg
= dsm_create_descriptor();
540 * Lock the control segment while we try to allocate from the main shared
541 * memory area, if configured.
543 if (dsm_main_space_fpm
)
545 npages
= size
/ FPM_PAGE_SIZE
;
546 if (size
% FPM_PAGE_SIZE
> 0)
549 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
550 if (FreePageManagerGet(dsm_main_space_fpm
, npages
, &first_page
))
552 /* We can carve out a piece of the main shared memory segment. */
553 seg
->mapped_address
= (char *) dsm_main_space_begin
+
554 first_page
* FPM_PAGE_SIZE
;
555 seg
->mapped_size
= npages
* FPM_PAGE_SIZE
;
556 using_main_dsm_region
= true;
557 /* We'll choose a handle below. */
561 if (!using_main_dsm_region
)
564 * We need to create a new memory segment. Loop until we find an
565 * unused segment identifier.
567 if (dsm_main_space_fpm
)
568 LWLockRelease(DynamicSharedMemoryControlLock
);
571 Assert(seg
->mapped_address
== NULL
&& seg
->mapped_size
== 0);
572 /* Use even numbers only */
573 seg
->handle
= pg_prng_uint32(&pg_global_prng_state
) << 1;
574 if (seg
->handle
== DSM_HANDLE_INVALID
) /* Reserve sentinel */
576 if (dsm_impl_op(DSM_OP_CREATE
, seg
->handle
, size
, &seg
->impl_private
,
577 &seg
->mapped_address
, &seg
->mapped_size
, ERROR
))
580 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
583 /* Search the control segment for an unused slot. */
584 nitems
= dsm_control
->nitems
;
585 for (i
= 0; i
< nitems
; ++i
)
587 if (dsm_control
->item
[i
].refcnt
== 0)
589 if (using_main_dsm_region
)
591 seg
->handle
= make_main_region_dsm_handle(i
);
592 dsm_control
->item
[i
].first_page
= first_page
;
593 dsm_control
->item
[i
].npages
= npages
;
596 Assert(!is_main_region_dsm_handle(seg
->handle
));
597 dsm_control
->item
[i
].handle
= seg
->handle
;
598 /* refcnt of 1 triggers destruction, so start at 2 */
599 dsm_control
->item
[i
].refcnt
= 2;
600 dsm_control
->item
[i
].impl_private_pm_handle
= NULL
;
601 dsm_control
->item
[i
].pinned
= false;
602 seg
->control_slot
= i
;
603 LWLockRelease(DynamicSharedMemoryControlLock
);
608 /* Verify that we can support an additional mapping. */
609 if (nitems
>= dsm_control
->maxitems
)
611 if (using_main_dsm_region
)
612 FreePageManagerPut(dsm_main_space_fpm
, first_page
, npages
);
613 LWLockRelease(DynamicSharedMemoryControlLock
);
614 if (!using_main_dsm_region
)
615 dsm_impl_op(DSM_OP_DESTROY
, seg
->handle
, 0, &seg
->impl_private
,
616 &seg
->mapped_address
, &seg
->mapped_size
, WARNING
);
617 if (seg
->resowner
!= NULL
)
618 ResourceOwnerForgetDSM(seg
->resowner
, seg
);
619 dlist_delete(&seg
->node
);
622 if ((flags
& DSM_CREATE_NULL_IF_MAXSEGMENTS
) != 0)
625 (errcode(ERRCODE_INSUFFICIENT_RESOURCES
),
626 errmsg("too many dynamic shared memory segments")));
629 /* Enter the handle into a new array slot. */
630 if (using_main_dsm_region
)
632 seg
->handle
= make_main_region_dsm_handle(nitems
);
633 dsm_control
->item
[i
].first_page
= first_page
;
634 dsm_control
->item
[i
].npages
= npages
;
636 dsm_control
->item
[nitems
].handle
= seg
->handle
;
637 /* refcnt of 1 triggers destruction, so start at 2 */
638 dsm_control
->item
[nitems
].refcnt
= 2;
639 dsm_control
->item
[nitems
].impl_private_pm_handle
= NULL
;
640 dsm_control
->item
[nitems
].pinned
= false;
641 seg
->control_slot
= nitems
;
642 dsm_control
->nitems
++;
643 LWLockRelease(DynamicSharedMemoryControlLock
);
649 * Attach a dynamic shared memory segment.
651 * See comments for dsm_segment_handle() for an explanation of how this
652 * is intended to be used.
654 * This function will return NULL if the segment isn't known to the system.
655 * This can happen if we're asked to attach the segment, but then everyone
656 * else detaches it (causing it to be destroyed) before we get around to
659 * If there is a non-NULL CurrentResourceOwner, the attached segment is
660 * associated with it and must be detached before the resource owner releases,
661 * or a warning will be logged. Otherwise the segment remains attached until
662 * explicitly detached or the session ends. See the note atop dsm_create().
665 dsm_attach(dsm_handle h
)
672 /* Unsafe in postmaster (and pointless in a stand-alone backend). */
673 Assert(IsUnderPostmaster
);
676 dsm_backend_startup();
679 * Since this is just a debugging cross-check, we could leave it out
680 * altogether, or include it only in assert-enabled builds. But since the
681 * list of attached segments should normally be very short, let's include
682 * it always for right now.
684 * If you're hitting this error, you probably want to attempt to find an
685 * existing mapping via dsm_find_mapping() before calling dsm_attach() to
688 dlist_foreach(iter
, &dsm_segment_list
)
690 seg
= dlist_container(dsm_segment
, node
, iter
.cur
);
691 if (seg
->handle
== h
)
692 elog(ERROR
, "can't attach the same segment more than once");
695 /* Create a new segment descriptor. */
696 seg
= dsm_create_descriptor();
699 /* Bump reference count for this segment in shared memory. */
700 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
701 nitems
= dsm_control
->nitems
;
702 for (i
= 0; i
< nitems
; ++i
)
705 * If the reference count is 0, the slot is actually unused. If the
706 * reference count is 1, the slot is still in use, but the segment is
707 * in the process of going away; even if the handle matches, another
708 * slot may already have started using the same handle value by
709 * coincidence so we have to keep searching.
711 if (dsm_control
->item
[i
].refcnt
<= 1)
714 /* If the handle doesn't match, it's not the slot we want. */
715 if (dsm_control
->item
[i
].handle
!= seg
->handle
)
718 /* Otherwise we've found a match. */
719 dsm_control
->item
[i
].refcnt
++;
720 seg
->control_slot
= i
;
721 if (is_main_region_dsm_handle(seg
->handle
))
723 seg
->mapped_address
= (char *) dsm_main_space_begin
+
724 dsm_control
->item
[i
].first_page
* FPM_PAGE_SIZE
;
725 seg
->mapped_size
= dsm_control
->item
[i
].npages
* FPM_PAGE_SIZE
;
729 LWLockRelease(DynamicSharedMemoryControlLock
);
732 * If we didn't find the handle we're looking for in the control segment,
733 * it probably means that everyone else who had it mapped, including the
734 * original creator, died before we got to this point. It's up to the
735 * caller to decide what to do about that.
737 if (seg
->control_slot
== INVALID_CONTROL_SLOT
)
743 /* Here's where we actually try to map the segment. */
744 if (!is_main_region_dsm_handle(seg
->handle
))
745 dsm_impl_op(DSM_OP_ATTACH
, seg
->handle
, 0, &seg
->impl_private
,
746 &seg
->mapped_address
, &seg
->mapped_size
, ERROR
);
752 * At backend shutdown time, detach any segments that are still attached.
753 * (This is similar to dsm_detach_all, except that there's no reason to
754 * unmap the control segment before exiting, so we don't bother.)
757 dsm_backend_shutdown(void)
759 while (!dlist_is_empty(&dsm_segment_list
))
763 seg
= dlist_head_element(dsm_segment
, node
, &dsm_segment_list
);
769 * Detach all shared memory segments, including the control segments. This
770 * should be called, along with PGSharedMemoryDetach, in processes that
771 * might inherit mappings but are not intended to be connected to dynamic
777 void *control_address
= dsm_control
;
779 while (!dlist_is_empty(&dsm_segment_list
))
783 seg
= dlist_head_element(dsm_segment
, node
, &dsm_segment_list
);
787 if (control_address
!= NULL
)
788 dsm_impl_op(DSM_OP_DETACH
, dsm_control_handle
, 0,
789 &dsm_control_impl_private
, &control_address
,
790 &dsm_control_mapped_size
, ERROR
);
794 * Detach from a shared memory segment, destroying the segment if we
795 * remove the last reference.
797 * This function should never fail. It will often be invoked when aborting
798 * a transaction, and a further error won't serve any purpose. It's not a
799 * complete disaster if we fail to unmap or destroy the segment; it means a
800 * resource leak, but that doesn't necessarily preclude further operations.
803 dsm_detach(dsm_segment
*seg
)
806 * Invoke registered callbacks. Just in case one of those callbacks
807 * throws a further error that brings us back here, pop the callback
808 * before invoking it, to avoid infinite error recursion. Don't allow
809 * interrupts while running the individual callbacks in non-error code
810 * paths, to avoid leaving cleanup work unfinished if we're interrupted by
811 * a statement timeout or similar.
814 while (!slist_is_empty(&seg
->on_detach
))
817 dsm_segment_detach_callback
*cb
;
818 on_dsm_detach_callback function
;
821 node
= slist_pop_head_node(&seg
->on_detach
);
822 cb
= slist_container(dsm_segment_detach_callback
, node
, node
);
823 function
= cb
->function
;
832 * Try to remove the mapping, if one exists. Normally, there will be, but
833 * maybe not, if we failed partway through a create or attach operation.
834 * We remove the mapping before decrementing the reference count so that
835 * the process that sees a zero reference count can be certain that no
836 * remaining mappings exist. Even if this fails, we pretend that it
837 * works, because retrying is likely to fail in the same way.
839 if (seg
->mapped_address
!= NULL
)
841 if (!is_main_region_dsm_handle(seg
->handle
))
842 dsm_impl_op(DSM_OP_DETACH
, seg
->handle
, 0, &seg
->impl_private
,
843 &seg
->mapped_address
, &seg
->mapped_size
, WARNING
);
844 seg
->impl_private
= NULL
;
845 seg
->mapped_address
= NULL
;
846 seg
->mapped_size
= 0;
849 /* Reduce reference count, if we previously increased it. */
850 if (seg
->control_slot
!= INVALID_CONTROL_SLOT
)
853 uint32 control_slot
= seg
->control_slot
;
855 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
856 Assert(dsm_control
->item
[control_slot
].handle
== seg
->handle
);
857 Assert(dsm_control
->item
[control_slot
].refcnt
> 1);
858 refcnt
= --dsm_control
->item
[control_slot
].refcnt
;
859 seg
->control_slot
= INVALID_CONTROL_SLOT
;
860 LWLockRelease(DynamicSharedMemoryControlLock
);
862 /* If new reference count is 1, try to destroy the segment. */
865 /* A pinned segment should never reach 1. */
866 Assert(!dsm_control
->item
[control_slot
].pinned
);
869 * If we fail to destroy the segment here, or are killed before we
870 * finish doing so, the reference count will remain at 1, which
871 * will mean that nobody else can attach to the segment. At
872 * postmaster shutdown time, or when a new postmaster is started
873 * after a hard kill, another attempt will be made to remove the
876 * The main case we're worried about here is being killed by a
877 * signal before we can finish removing the segment. In that
878 * case, it's important to be sure that the segment still gets
879 * removed. If we actually fail to remove the segment for some
880 * other reason, the postmaster may not have any better luck than
881 * we did. There's not much we can do about that, though.
883 if (is_main_region_dsm_handle(seg
->handle
) ||
884 dsm_impl_op(DSM_OP_DESTROY
, seg
->handle
, 0, &seg
->impl_private
,
885 &seg
->mapped_address
, &seg
->mapped_size
, WARNING
))
887 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
888 if (is_main_region_dsm_handle(seg
->handle
))
889 FreePageManagerPut((FreePageManager
*) dsm_main_space_begin
,
890 dsm_control
->item
[control_slot
].first_page
,
891 dsm_control
->item
[control_slot
].npages
);
892 Assert(dsm_control
->item
[control_slot
].handle
== seg
->handle
);
893 Assert(dsm_control
->item
[control_slot
].refcnt
== 1);
894 dsm_control
->item
[control_slot
].refcnt
= 0;
895 LWLockRelease(DynamicSharedMemoryControlLock
);
900 /* Clean up our remaining backend-private data structures. */
901 if (seg
->resowner
!= NULL
)
902 ResourceOwnerForgetDSM(seg
->resowner
, seg
);
903 dlist_delete(&seg
->node
);
908 * Keep a dynamic shared memory mapping until end of session.
910 * By default, mappings are owned by the current resource owner, which
911 * typically means they stick around for the duration of the current query
915 dsm_pin_mapping(dsm_segment
*seg
)
917 if (seg
->resowner
!= NULL
)
919 ResourceOwnerForgetDSM(seg
->resowner
, seg
);
920 seg
->resowner
= NULL
;
925 * Arrange to remove a dynamic shared memory mapping at cleanup time.
927 * dsm_pin_mapping() can be used to preserve a mapping for the entire
928 * lifetime of a process; this function reverses that decision, making
929 * the segment owned by the current resource owner. This may be useful
930 * just before performing some operation that will invalidate the segment
931 * for future use by this backend.
934 dsm_unpin_mapping(dsm_segment
*seg
)
936 Assert(seg
->resowner
== NULL
);
937 ResourceOwnerEnlarge(CurrentResourceOwner
);
938 seg
->resowner
= CurrentResourceOwner
;
939 ResourceOwnerRememberDSM(seg
->resowner
, seg
);
943 * Keep a dynamic shared memory segment until postmaster shutdown, or until
944 * dsm_unpin_segment is called.
946 * This function should not be called more than once per segment, unless the
947 * segment is explicitly unpinned with dsm_unpin_segment in between calls.
949 * Note that this function does not arrange for the current process to
950 * keep the segment mapped indefinitely; if that behavior is desired,
951 * dsm_pin_mapping() should be used from each process that needs to
952 * retain the mapping.
955 dsm_pin_segment(dsm_segment
*seg
)
960 * Bump reference count for this segment in shared memory. This will
961 * ensure that even if there is no session which is attached to this
962 * segment, it will remain until postmaster shutdown or an explicit call
965 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
966 if (dsm_control
->item
[seg
->control_slot
].pinned
)
967 elog(ERROR
, "cannot pin a segment that is already pinned");
968 if (!is_main_region_dsm_handle(seg
->handle
))
969 dsm_impl_pin_segment(seg
->handle
, seg
->impl_private
, &handle
);
970 dsm_control
->item
[seg
->control_slot
].pinned
= true;
971 dsm_control
->item
[seg
->control_slot
].refcnt
++;
972 dsm_control
->item
[seg
->control_slot
].impl_private_pm_handle
= handle
;
973 LWLockRelease(DynamicSharedMemoryControlLock
);
977 * Unpin a dynamic shared memory segment that was previously pinned with
978 * dsm_pin_segment. This function should not be called unless dsm_pin_segment
979 * was previously called for this segment.
981 * The argument is a dsm_handle rather than a dsm_segment in case you want
982 * to unpin a segment to which you haven't attached. This turns out to be
983 * useful if, for example, a reference to one shared memory segment is stored
984 * within another shared memory segment. You might want to unpin the
985 * referenced segment before destroying the referencing segment.
988 dsm_unpin_segment(dsm_handle handle
)
990 uint32 control_slot
= INVALID_CONTROL_SLOT
;
991 bool destroy
= false;
994 /* Find the control slot for the given handle. */
995 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
996 for (i
= 0; i
< dsm_control
->nitems
; ++i
)
998 /* Skip unused slots and segments that are concurrently going away. */
999 if (dsm_control
->item
[i
].refcnt
<= 1)
1002 /* If we've found our handle, we can stop searching. */
1003 if (dsm_control
->item
[i
].handle
== handle
)
1011 * We should definitely have found the slot, and it should not already be
1012 * in the process of going away, because this function should only be
1013 * called on a segment which is pinned.
1015 if (control_slot
== INVALID_CONTROL_SLOT
)
1016 elog(ERROR
, "cannot unpin unknown segment handle");
1017 if (!dsm_control
->item
[control_slot
].pinned
)
1018 elog(ERROR
, "cannot unpin a segment that is not pinned");
1019 Assert(dsm_control
->item
[control_slot
].refcnt
> 1);
1022 * Allow implementation-specific code to run. We have to do this before
1023 * releasing the lock, because impl_private_pm_handle may get modified by
1024 * dsm_impl_unpin_segment.
1026 if (!is_main_region_dsm_handle(handle
))
1027 dsm_impl_unpin_segment(handle
,
1028 &dsm_control
->item
[control_slot
].impl_private_pm_handle
);
1030 /* Note that 1 means no references (0 means unused slot). */
1031 if (--dsm_control
->item
[control_slot
].refcnt
== 1)
1033 dsm_control
->item
[control_slot
].pinned
= false;
1035 /* Now we can release the lock. */
1036 LWLockRelease(DynamicSharedMemoryControlLock
);
1038 /* Clean up resources if that was the last reference. */
1041 void *junk_impl_private
= NULL
;
1042 void *junk_mapped_address
= NULL
;
1043 Size junk_mapped_size
= 0;
1046 * For an explanation of how error handling works in this case, see
1047 * comments in dsm_detach. Note that if we reach this point, the
1048 * current process certainly does not have the segment mapped, because
1049 * if it did, the reference count would have still been greater than 1
1050 * even after releasing the reference count held by the pin. The fact
1051 * that there can't be a dsm_segment for this handle makes it OK to
1052 * pass the mapped size, mapped address, and private data as NULL
1055 if (is_main_region_dsm_handle(handle
) ||
1056 dsm_impl_op(DSM_OP_DESTROY
, handle
, 0, &junk_impl_private
,
1057 &junk_mapped_address
, &junk_mapped_size
, WARNING
))
1059 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
1060 if (is_main_region_dsm_handle(handle
))
1061 FreePageManagerPut((FreePageManager
*) dsm_main_space_begin
,
1062 dsm_control
->item
[control_slot
].first_page
,
1063 dsm_control
->item
[control_slot
].npages
);
1064 Assert(dsm_control
->item
[control_slot
].handle
== handle
);
1065 Assert(dsm_control
->item
[control_slot
].refcnt
== 1);
1066 dsm_control
->item
[control_slot
].refcnt
= 0;
1067 LWLockRelease(DynamicSharedMemoryControlLock
);
1073 * Find an existing mapping for a shared memory segment, if there is one.
1076 dsm_find_mapping(dsm_handle handle
)
1081 dlist_foreach(iter
, &dsm_segment_list
)
1083 seg
= dlist_container(dsm_segment
, node
, iter
.cur
);
1084 if (seg
->handle
== handle
)
1092 * Get the address at which a dynamic shared memory segment is mapped.
1095 dsm_segment_address(dsm_segment
*seg
)
1097 Assert(seg
->mapped_address
!= NULL
);
1098 return seg
->mapped_address
;
1102 * Get the size of a mapping.
1105 dsm_segment_map_length(dsm_segment
*seg
)
1107 Assert(seg
->mapped_address
!= NULL
);
1108 return seg
->mapped_size
;
1112 * Get a handle for a mapping.
1114 * To establish communication via dynamic shared memory between two backends,
1115 * one of them should first call dsm_create() to establish a new shared
1116 * memory mapping. That process should then call dsm_segment_handle() to
1117 * obtain a handle for the mapping, and pass that handle to the
1118 * coordinating backend via some means (e.g. bgw_main_arg, or via the
1119 * main shared memory segment). The recipient, once in possession of the
1120 * handle, should call dsm_attach().
1123 dsm_segment_handle(dsm_segment
*seg
)
1129 * Register an on-detach callback for a dynamic shared memory segment.
1132 on_dsm_detach(dsm_segment
*seg
, on_dsm_detach_callback function
, Datum arg
)
1134 dsm_segment_detach_callback
*cb
;
1136 cb
= MemoryContextAlloc(TopMemoryContext
,
1137 sizeof(dsm_segment_detach_callback
));
1138 cb
->function
= function
;
1140 slist_push_head(&seg
->on_detach
, &cb
->node
);
1144 * Unregister an on-detach callback for a dynamic shared memory segment.
1147 cancel_on_dsm_detach(dsm_segment
*seg
, on_dsm_detach_callback function
,
1150 slist_mutable_iter iter
;
1152 slist_foreach_modify(iter
, &seg
->on_detach
)
1154 dsm_segment_detach_callback
*cb
;
1156 cb
= slist_container(dsm_segment_detach_callback
, node
, iter
.cur
);
1157 if (cb
->function
== function
&& cb
->arg
== arg
)
1159 slist_delete_current(&iter
);
1167 * Discard all registered on-detach callbacks without executing them.
1170 reset_on_dsm_detach(void)
1174 dlist_foreach(iter
, &dsm_segment_list
)
1176 dsm_segment
*seg
= dlist_container(dsm_segment
, node
, iter
.cur
);
1178 /* Throw away explicit on-detach actions one by one. */
1179 while (!slist_is_empty(&seg
->on_detach
))
1182 dsm_segment_detach_callback
*cb
;
1184 node
= slist_pop_head_node(&seg
->on_detach
);
1185 cb
= slist_container(dsm_segment_detach_callback
, node
, node
);
1190 * Decrementing the reference count is a sort of implicit on-detach
1191 * action; make sure we don't do that, either.
1193 seg
->control_slot
= INVALID_CONTROL_SLOT
;
1198 * Create a segment descriptor.
1200 static dsm_segment
*
1201 dsm_create_descriptor(void)
1205 if (CurrentResourceOwner
)
1206 ResourceOwnerEnlarge(CurrentResourceOwner
);
1208 seg
= MemoryContextAlloc(TopMemoryContext
, sizeof(dsm_segment
));
1209 dlist_push_head(&dsm_segment_list
, &seg
->node
);
1211 /* seg->handle must be initialized by the caller */
1212 seg
->control_slot
= INVALID_CONTROL_SLOT
;
1213 seg
->impl_private
= NULL
;
1214 seg
->mapped_address
= NULL
;
1215 seg
->mapped_size
= 0;
1217 seg
->resowner
= CurrentResourceOwner
;
1218 if (CurrentResourceOwner
)
1219 ResourceOwnerRememberDSM(CurrentResourceOwner
, seg
);
1221 slist_init(&seg
->on_detach
);
1227 * Sanity check a control segment.
1229 * The goal here isn't to detect everything that could possibly be wrong with
1230 * the control segment; there's not enough information for that. Rather, the
1231 * goal is to make sure that someone can iterate over the items in the segment
1232 * without overrunning the end of the mapping and crashing. We also check
1233 * the magic number since, if that's messed up, this may not even be one of
1234 * our segments at all.
1237 dsm_control_segment_sane(dsm_control_header
*control
, Size mapped_size
)
1239 if (mapped_size
< offsetof(dsm_control_header
, item
))
1240 return false; /* Mapped size too short to read header. */
1241 if (control
->magic
!= PG_DYNSHMEM_CONTROL_MAGIC
)
1242 return false; /* Magic number doesn't match. */
1243 if (dsm_control_bytes_needed(control
->maxitems
) > mapped_size
)
1244 return false; /* Max item count won't fit in map. */
1245 if (control
->nitems
> control
->maxitems
)
1246 return false; /* Overfull. */
1251 * Compute the number of control-segment bytes needed to store a given
1255 dsm_control_bytes_needed(uint32 nitems
)
1257 return offsetof(dsm_control_header
, item
)
1258 + sizeof(dsm_control_item
) * (uint64
) nitems
;
1261 static inline dsm_handle
1262 make_main_region_dsm_handle(int slot
)
1267 * We need to create a handle that doesn't collide with any existing extra
1268 * segment created by dsm_impl_op(), so we'll make it odd. It also
1269 * mustn't collide with any other main area pseudo-segment, so we'll
1270 * include the slot number in some of the bits. We also want to make an
1271 * effort to avoid newly created and recently destroyed handles from being
1272 * confused, so we'll make the rest of the bits random.
1275 handle
|= slot
<< 1;
1276 handle
|= pg_prng_uint32(&pg_global_prng_state
) << (pg_leftmost_one_pos32(dsm_control
->maxitems
) + 1);
1281 is_main_region_dsm_handle(dsm_handle handle
)
1286 /* ResourceOwner callbacks */
1289 ResOwnerReleaseDSM(Datum res
)
1291 dsm_segment
*seg
= (dsm_segment
*) DatumGetPointer(res
);
1293 seg
->resowner
= NULL
;
1297 ResOwnerPrintDSM(Datum res
)
1299 dsm_segment
*seg
= (dsm_segment
*) DatumGetPointer(res
);
1301 return psprintf("dynamic shared memory segment %u",
1302 dsm_segment_handle(seg
));