1 /*-------------------------------------------------------------------------
4 * shared memory segment table of contents
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * src/backend/storage/ipc/shm_toc.c
11 *-------------------------------------------------------------------------
16 #include "port/atomics.h"
17 #include "storage/shm_toc.h"
18 #include "storage/spin.h"
20 typedef struct shm_toc_entry
22 uint64 key
; /* Arbitrary identifier */
23 Size offset
; /* Offset, in bytes, from TOC start */
28 uint64 toc_magic
; /* Magic number identifying this TOC */
29 slock_t toc_mutex
; /* Spinlock for mutual exclusion */
30 Size toc_total_bytes
; /* Bytes managed by this TOC */
31 Size toc_allocated_bytes
; /* Bytes allocated of those managed */
32 uint32 toc_nentry
; /* Number of entries in TOC */
33 shm_toc_entry toc_entry
[FLEXIBLE_ARRAY_MEMBER
];
37 * Initialize a region of shared memory with a table of contents.
40 shm_toc_create(uint64 magic
, void *address
, Size nbytes
)
42 shm_toc
*toc
= (shm_toc
*) address
;
44 Assert(nbytes
> offsetof(shm_toc
, toc_entry
));
45 toc
->toc_magic
= magic
;
46 SpinLockInit(&toc
->toc_mutex
);
49 * The alignment code in shm_toc_allocate() assumes that the starting
50 * value is buffer-aligned.
52 toc
->toc_total_bytes
= BUFFERALIGN_DOWN(nbytes
);
53 toc
->toc_allocated_bytes
= 0;
60 * Attach to an existing table of contents. If the magic number found at
61 * the target address doesn't match our expectations, return NULL.
64 shm_toc_attach(uint64 magic
, void *address
)
66 shm_toc
*toc
= (shm_toc
*) address
;
68 if (toc
->toc_magic
!= magic
)
71 Assert(toc
->toc_total_bytes
>= toc
->toc_allocated_bytes
);
72 Assert(toc
->toc_total_bytes
> offsetof(shm_toc
, toc_entry
));
78 * Allocate shared memory from a segment managed by a table of contents.
80 * This is not a full-blown allocator; there's no way to free memory. It's
81 * just a way of dividing a single physical shared memory segment into logical
82 * chunks that may be used for different purposes.
84 * We allocate backwards from the end of the segment, so that the TOC entries
85 * can grow forward from the start of the segment.
88 shm_toc_allocate(shm_toc
*toc
, Size nbytes
)
90 volatile shm_toc
*vtoc
= toc
;
97 * Make sure request is well-aligned. XXX: MAXALIGN is not enough,
98 * because atomic ops might need a wider alignment. We don't have a
99 * proper definition for the minimum to make atomic ops safe, but
100 * BUFFERALIGN ought to be enough.
102 nbytes
= BUFFERALIGN(nbytes
);
104 SpinLockAcquire(&toc
->toc_mutex
);
106 total_bytes
= vtoc
->toc_total_bytes
;
107 allocated_bytes
= vtoc
->toc_allocated_bytes
;
108 nentry
= vtoc
->toc_nentry
;
109 toc_bytes
= offsetof(shm_toc
, toc_entry
) + nentry
* sizeof(shm_toc_entry
)
112 /* Check for memory exhaustion and overflow. */
113 if (toc_bytes
+ nbytes
> total_bytes
|| toc_bytes
+ nbytes
< toc_bytes
)
115 SpinLockRelease(&toc
->toc_mutex
);
117 (errcode(ERRCODE_OUT_OF_MEMORY
),
118 errmsg("out of shared memory")));
120 vtoc
->toc_allocated_bytes
+= nbytes
;
122 SpinLockRelease(&toc
->toc_mutex
);
124 return ((char *) toc
) + (total_bytes
- allocated_bytes
- nbytes
);
128 * Return the number of bytes that can still be allocated.
131 shm_toc_freespace(shm_toc
*toc
)
133 volatile shm_toc
*vtoc
= toc
;
135 Size allocated_bytes
;
139 SpinLockAcquire(&toc
->toc_mutex
);
140 total_bytes
= vtoc
->toc_total_bytes
;
141 allocated_bytes
= vtoc
->toc_allocated_bytes
;
142 nentry
= vtoc
->toc_nentry
;
143 SpinLockRelease(&toc
->toc_mutex
);
145 toc_bytes
= offsetof(shm_toc
, toc_entry
) + nentry
* sizeof(shm_toc_entry
);
146 Assert(allocated_bytes
+ BUFFERALIGN(toc_bytes
) <= total_bytes
);
147 return total_bytes
- (allocated_bytes
+ BUFFERALIGN(toc_bytes
));
151 * Insert a TOC entry.
153 * The idea here is that the process setting up the shared memory segment will
154 * register the addresses of data structures within the segment using this
155 * function. Each data structure will be identified using a 64-bit key, which
156 * is assumed to be a well-known or discoverable integer. Other processes
157 * accessing the shared memory segment can pass the same key to
158 * shm_toc_lookup() to discover the addresses of those data structures.
160 * Since the shared memory segment may be mapped at different addresses within
161 * different backends, we store relative rather than absolute pointers.
163 * This won't scale well to a large number of keys. Hopefully, that isn't
164 * necessary; if it proves to be, we might need to provide a more sophisticated
165 * data structure here. But the real idea here is just to give someone mapping
166 * a dynamic shared memory the ability to find the bare minimum number of
167 * pointers that they need to bootstrap. If you're storing a lot of stuff in
168 * the TOC, you're doing it wrong.
171 shm_toc_insert(shm_toc
*toc
, uint64 key
, void *address
)
173 volatile shm_toc
*vtoc
= toc
;
175 Size allocated_bytes
;
180 /* Relativize pointer. */
181 Assert(address
> (void *) toc
);
182 offset
= ((char *) address
) - (char *) toc
;
184 SpinLockAcquire(&toc
->toc_mutex
);
186 total_bytes
= vtoc
->toc_total_bytes
;
187 allocated_bytes
= vtoc
->toc_allocated_bytes
;
188 nentry
= vtoc
->toc_nentry
;
189 toc_bytes
= offsetof(shm_toc
, toc_entry
) + nentry
* sizeof(shm_toc_entry
)
192 /* Check for memory exhaustion and overflow. */
193 if (toc_bytes
+ sizeof(shm_toc_entry
) > total_bytes
||
194 toc_bytes
+ sizeof(shm_toc_entry
) < toc_bytes
||
195 nentry
>= PG_UINT32_MAX
)
197 SpinLockRelease(&toc
->toc_mutex
);
199 (errcode(ERRCODE_OUT_OF_MEMORY
),
200 errmsg("out of shared memory")));
203 Assert(offset
< total_bytes
);
204 vtoc
->toc_entry
[nentry
].key
= key
;
205 vtoc
->toc_entry
[nentry
].offset
= offset
;
208 * By placing a write barrier after filling in the entry and before
209 * updating the number of entries, we make it safe to read the TOC
216 SpinLockRelease(&toc
->toc_mutex
);
220 * Look up a TOC entry.
222 * If the key is not found, returns NULL if noError is true, otherwise
223 * throws elog(ERROR).
225 * Unlike the other functions in this file, this operation acquires no lock;
226 * it uses only barriers. It probably wouldn't hurt concurrency very much even
227 * if it did get a lock, but since it's reasonably likely that a group of
228 * worker processes could each read a series of entries from the same TOC
229 * right around the same time, there seems to be some value in avoiding it.
232 shm_toc_lookup(shm_toc
*toc
, uint64 key
, bool noError
)
238 * Read the number of entries before we examine any entry. We assume that
239 * reading a uint32 is atomic.
241 nentry
= toc
->toc_nentry
;
244 /* Now search for a matching entry. */
245 for (i
= 0; i
< nentry
; ++i
)
247 if (toc
->toc_entry
[i
].key
== key
)
248 return ((char *) toc
) + toc
->toc_entry
[i
].offset
;
251 /* No matching entry was found. */
253 elog(ERROR
, "could not find key " UINT64_FORMAT
" in shm TOC at %p",
259 * Estimate how much shared memory will be required to store a TOC and its
260 * dependent data structures.
263 shm_toc_estimate(shm_toc_estimator
*e
)
267 sz
= offsetof(shm_toc
, toc_entry
);
268 sz
= add_size(sz
, mul_size(e
->number_of_keys
, sizeof(shm_toc_entry
)));
269 sz
= add_size(sz
, e
->space_for_chunks
);
271 return BUFFERALIGN(sz
);