1 /*-------------------------------------------------------------------------
4 * Implement shared memory using SysV facilities
6 * These routines represent a fairly thin layer on top of SysV shared
7 * memory functionality.
9 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
15 *-------------------------------------------------------------------------
29 #ifdef HAVE_KERNEL_OS_H
30 #include <kernel/OS.h>
33 #include "miscadmin.h"
34 #include "storage/ipc.h"
35 #include "storage/pg_shmem.h"
38 typedef key_t IpcMemoryKey
; /* shared memory key passed to shmget(2) */
39 typedef int IpcMemoryId
; /* shared memory ID returned by shmget(2) */
41 #define IPCProtection (0600) /* access/modify by user only */
43 #ifdef SHM_SHARE_MMU /* use intimate shared memory on Solaris */
44 #define PG_SHMAT_FLAGS SHM_SHARE_MMU
46 #define PG_SHMAT_FLAGS 0
50 unsigned long UsedShmemSegID
= 0;
51 void *UsedShmemSegAddr
= NULL
;
53 static void *InternalIpcMemoryCreate(IpcMemoryKey memKey
, Size size
);
54 static void IpcMemoryDetach(int status
, Datum shmaddr
);
55 static void IpcMemoryDelete(int status
, Datum shmId
);
56 static PGShmemHeader
*PGSharedMemoryAttach(IpcMemoryKey key
,
61 * InternalIpcMemoryCreate(memKey, size)
63 * Attempt to create a new shared memory segment with the specified key.
64 * Will fail (return NULL) if such a segment already exists. If successful,
65 * attach the segment to the current process and return its attached address.
66 * On success, callbacks are registered with on_shmem_exit to detach and
67 * delete the segment when on_shmem_exit is called.
69 * If we fail with a failure code other than collision-with-existing-segment,
70 * print out an error and abort. Other types of errors are not recoverable.
73 InternalIpcMemoryCreate(IpcMemoryKey memKey
, Size size
)
78 shmid
= shmget(memKey
, size
, IPC_CREAT
| IPC_EXCL
| IPCProtection
);
83 * Fail quietly if error indicates a collision with existing segment.
84 * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
85 * we could get a permission violation instead? Also, EIDRM might
86 * occur if an old seg is slated for destruction but not gone yet.
88 if (errno
== EEXIST
|| errno
== EACCES
96 * Else complain and abort
99 (errmsg("could not create shared memory segment: %m"),
100 errdetail("Failed system call was shmget(key=%lu, size=%lu, 0%o).",
101 (unsigned long) memKey
, (unsigned long) size
,
102 IPC_CREAT
| IPC_EXCL
| IPCProtection
),
104 errhint("This error usually means that PostgreSQL's request for a shared memory "
105 "segment exceeded your kernel's SHMMAX parameter. You can either "
106 "reduce the request size or reconfigure the kernel with larger SHMMAX. "
107 "To reduce the request size (currently %lu bytes), reduce "
108 "PostgreSQL's shared_buffers parameter (currently %d) and/or "
109 "its max_connections parameter (currently %d).\n"
110 "If the request size is already small, it's possible that it is less than "
111 "your kernel's SHMMIN parameter, in which case raising the request size or "
112 "reconfiguring SHMMIN is called for.\n"
113 "The PostgreSQL documentation contains more information about shared "
114 "memory configuration.",
115 (unsigned long) size
, NBuffers
, MaxBackends
) : 0,
117 errhint("This error usually means that PostgreSQL's request for a shared "
118 "memory segment exceeded available memory or swap space. "
119 "To reduce the request size (currently %lu bytes), reduce "
120 "PostgreSQL's shared_buffers parameter (currently %d) and/or "
121 "its max_connections parameter (currently %d).\n"
122 "The PostgreSQL documentation contains more information about shared "
123 "memory configuration.",
124 (unsigned long) size
, NBuffers
, MaxBackends
) : 0,
126 errhint("This error does *not* mean that you have run out of disk space. "
127 "It occurs either if all available shared memory IDs have been taken, "
128 "in which case you need to raise the SHMMNI parameter in your kernel, "
129 "or because the system's overall limit for shared memory has been "
130 "reached. If you cannot increase the shared memory limit, "
131 "reduce PostgreSQL's shared memory request (currently %lu bytes), "
132 "by reducing its shared_buffers parameter (currently %d) and/or "
133 "its max_connections parameter (currently %d).\n"
134 "The PostgreSQL documentation contains more information about shared "
135 "memory configuration.",
136 (unsigned long) size
, NBuffers
, MaxBackends
) : 0));
139 /* Register on-exit routine to delete the new segment */
140 on_shmem_exit(IpcMemoryDelete
, Int32GetDatum(shmid
));
142 /* OK, should be able to attach to the segment */
143 memAddress
= shmat(shmid
, NULL
, PG_SHMAT_FLAGS
);
145 if (memAddress
== (void *) -1)
146 elog(FATAL
, "shmat(id=%d) failed: %m", shmid
);
148 /* Register on-exit routine to detach new segment before deleting */
149 on_shmem_exit(IpcMemoryDetach
, PointerGetDatum(memAddress
));
151 /* Record key and ID in lockfile for data directory. */
152 RecordSharedMemoryInLockFile((unsigned long) memKey
,
153 (unsigned long) shmid
);
158 /****************************************************************************/
159 /* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
160 /* from process' address spaceq */
161 /* (called as an on_shmem_exit callback, hence funny argument list) */
162 /****************************************************************************/
164 IpcMemoryDetach(int status
, Datum shmaddr
)
166 if (shmdt(DatumGetPointer(shmaddr
)) < 0)
167 elog(LOG
, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr
));
170 /****************************************************************************/
171 /* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
172 /* (called as an on_shmem_exit callback, hence funny argument list) */
173 /****************************************************************************/
175 IpcMemoryDelete(int status
, Datum shmId
)
177 if (shmctl(DatumGetInt32(shmId
), IPC_RMID
, NULL
) < 0)
178 elog(LOG
, "shmctl(%d, %d, 0) failed: %m",
179 DatumGetInt32(shmId
), IPC_RMID
);
183 * PGSharedMemoryIsInUse
185 * Is a previously-existing shmem segment still existing and in use?
187 * The point of this exercise is to detect the case where a prior postmaster
188 * crashed, but it left child backends that are still running. Therefore
189 * we only care about shmem segments that are associated with the intended
190 * DataDir. This is an important consideration since accidental matches of
191 * shmem segment IDs are reasonably common.
194 PGSharedMemoryIsInUse(unsigned long id1
, unsigned long id2
)
196 IpcMemoryId shmId
= (IpcMemoryId
) id2
;
197 struct shmid_ds shmStat
;
202 * We detect whether a shared memory segment is in use by seeing whether
203 * it (a) exists and (b) has any processes attached to it.
205 if (shmctl(shmId
, IPC_STAT
, &shmStat
) < 0)
208 * EINVAL actually has multiple possible causes documented in the
209 * shmctl man page, but we assume it must mean the segment no longer
216 * EACCES implies that the segment belongs to some other userid, which
217 * means it is not a Postgres shmem segment (or at least, not one that
218 * is relevant to our data directory).
224 * Some Linux kernel versions (in fact, all of them as of July 2007)
225 * sometimes return EIDRM when EINVAL is correct. The Linux kernel
226 * actually does not have any internal state that would justify
227 * returning EIDRM, so we can get away with assuming that EIDRM is
228 * equivalent to EINVAL on that platform.
230 #ifdef HAVE_LINUX_EIDRM_BUG
236 * Otherwise, we had better assume that the segment is in use. The
237 * only likely case is EIDRM, which implies that the segment has been
238 * IPC_RMID'd but there are still processes attached to it.
243 /* If it has no attached processes, it's not in use */
244 if (shmStat
.shm_nattch
== 0)
248 * Try to attach to the segment and see if it matches our data directory.
249 * This avoids shmid-conflict problems on machines that are running
250 * several postmasters under the same userid.
252 if (stat(DataDir
, &statbuf
) < 0)
253 return true; /* if can't stat, be conservative */
255 hdr
= (PGShmemHeader
*) shmat(shmId
, NULL
, PG_SHMAT_FLAGS
);
257 if (hdr
== (PGShmemHeader
*) -1)
258 return true; /* if can't attach, be conservative */
260 if (hdr
->magic
!= PGShmemMagic
||
261 hdr
->device
!= statbuf
.st_dev
||
262 hdr
->inode
!= statbuf
.st_ino
)
265 * It's either not a Postgres segment, or not one for my data
266 * directory. In either case it poses no threat.
272 /* Trouble --- looks a lot like there's still live backends */
280 * PGSharedMemoryCreate
282 * Create a shared memory segment of the given size and initialize its
283 * standard header. Also, register an on_shmem_exit callback to release
286 * Dead Postgres segments are recycled if found, but we do not fail upon
287 * collision with non-Postgres shmem segments. The idea here is to detect and
288 * re-use keys that may have been assigned by a crashed postmaster or backend.
290 * makePrivate means to always create a new segment, rather than attach to
291 * or recycle any existing segment.
293 * The port number is passed for possible use as a key (for SysV, we use
294 * it to generate the starting shmem key). In a standalone backend,
295 * zero will be passed.
298 PGSharedMemoryCreate(Size size
, bool makePrivate
, int port
)
300 IpcMemoryKey NextShmemSegID
;
306 /* Room for a header? */
307 Assert(size
> MAXALIGN(sizeof(PGShmemHeader
)));
309 /* Make sure PGSharedMemoryAttach doesn't fail without need */
310 UsedShmemSegAddr
= NULL
;
312 /* Loop till we find a free IPC key */
313 NextShmemSegID
= port
* 1000;
315 for (NextShmemSegID
++;; NextShmemSegID
++)
317 /* Try to create new segment */
318 memAddress
= InternalIpcMemoryCreate(NextShmemSegID
, size
);
320 break; /* successful create and attach */
322 /* Check shared memory and possibly remove and recreate */
324 if (makePrivate
) /* a standalone backend shouldn't do this */
327 if ((memAddress
= PGSharedMemoryAttach(NextShmemSegID
, &shmid
)) == NULL
)
328 continue; /* can't attach, not one of mine */
331 * If I am not the creator and it belongs to an extant process,
334 hdr
= (PGShmemHeader
*) memAddress
;
335 if (hdr
->creatorPID
!= getpid())
337 if (kill(hdr
->creatorPID
, 0) == 0 || errno
!= ESRCH
)
340 continue; /* segment belongs to a live process */
345 * The segment appears to be from a dead Postgres process, or from a
346 * previous cycle of life in this same process. Zap it, if possible.
347 * This probably shouldn't fail, but if it does, assume the segment
348 * belongs to someone else after all, and continue quietly.
351 if (shmctl(shmid
, IPC_RMID
, NULL
) < 0)
355 * Now try again to create the segment.
357 memAddress
= InternalIpcMemoryCreate(NextShmemSegID
, size
);
359 break; /* successful create and attach */
362 * Can only get here if some other process managed to create the same
363 * shmem key before we did. Let him have that one, loop around to try
369 * OK, we created a new segment. Mark it as created by this process. The
370 * order of assignments here is critical so that another Postgres process
371 * can't see the header as valid but belonging to an invalid PID!
373 hdr
= (PGShmemHeader
*) memAddress
;
374 hdr
->creatorPID
= getpid();
375 hdr
->magic
= PGShmemMagic
;
377 /* Fill in the data directory ID info, too */
378 if (stat(DataDir
, &statbuf
) < 0)
380 (errcode_for_file_access(),
381 errmsg("could not stat data directory \"%s\": %m",
383 hdr
->device
= statbuf
.st_dev
;
384 hdr
->inode
= statbuf
.st_ino
;
387 * Initialize space allocation status for segment.
389 hdr
->totalsize
= size
;
390 hdr
->freeoffset
= MAXALIGN(sizeof(PGShmemHeader
));
392 /* Save info for possible future use */
393 UsedShmemSegAddr
= memAddress
;
394 UsedShmemSegID
= (unsigned long) NextShmemSegID
;
402 * PGSharedMemoryReAttach
404 * Re-attach to an already existing shared memory segment. In the non
405 * EXEC_BACKEND case this is not used, because postmaster children inherit
406 * the shared memory segment attachment via fork().
408 * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
409 * routine. The caller must have already restored them to the postmaster's
413 PGSharedMemoryReAttach(void)
417 void *origUsedShmemSegAddr
= UsedShmemSegAddr
;
419 Assert(UsedShmemSegAddr
!= NULL
);
420 Assert(IsUnderPostmaster
);
423 /* cygipc (currently) appears to not detach on exec. */
424 PGSharedMemoryDetach();
425 UsedShmemSegAddr
= origUsedShmemSegAddr
;
428 elog(DEBUG3
, "attaching to %p", UsedShmemSegAddr
);
429 hdr
= (void *) PGSharedMemoryAttach((IpcMemoryKey
) UsedShmemSegID
, &shmid
);
431 elog(FATAL
, "could not reattach to shared memory (key=%d, addr=%p): %m",
432 (int) UsedShmemSegID
, UsedShmemSegAddr
);
433 if (hdr
!= origUsedShmemSegAddr
)
434 elog(FATAL
, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
435 hdr
, origUsedShmemSegAddr
);
437 UsedShmemSegAddr
= hdr
; /* probably redundant */
439 #endif /* EXEC_BACKEND */
442 * PGSharedMemoryDetach
444 * Detach from the shared memory segment, if still attached. This is not
445 * intended for use by the process that originally created the segment
446 * (it will have an on_shmem_exit callback registered to do that). Rather,
447 * this is for subprocesses that have inherited an attachment and want to
451 PGSharedMemoryDetach(void)
453 if (UsedShmemSegAddr
!= NULL
)
455 if ((shmdt(UsedShmemSegAddr
) < 0)
456 #if defined(EXEC_BACKEND) && defined(__CYGWIN__)
457 /* Work-around for cygipc exec bug */
461 elog(LOG
, "shmdt(%p) failed: %m", UsedShmemSegAddr
);
462 UsedShmemSegAddr
= NULL
;
468 * Attach to shared memory and make sure it has a Postgres header
470 * Returns attach address if OK, else NULL
472 static PGShmemHeader
*
473 PGSharedMemoryAttach(IpcMemoryKey key
, IpcMemoryId
*shmid
)
477 if ((*shmid
= shmget(key
, sizeof(PGShmemHeader
), 0)) < 0)
480 hdr
= (PGShmemHeader
*) shmat(*shmid
, UsedShmemSegAddr
, PG_SHMAT_FLAGS
);
482 if (hdr
== (PGShmemHeader
*) -1)
483 return NULL
; /* failed: must be some other app's */
485 if (hdr
->magic
!= PGShmemMagic
)
488 return NULL
; /* segment belongs to a non-Postgres app */