2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
11 static const char sccsid
[] = "@(#)os_map.c 10.24 (Sleepycat) 10/12/98";
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
31 #include "common_ext.h"
34 static int __os_map
__P((char *, int, size_t, int, int, int, void **));
37 static int __os_shmget
__P((REGINFO
*));
42 * Return if this OS can support anonymous memory regions.
44 * PUBLIC: int __db_mapanon_ok __P((int));
47 __db_mapanon_ok(need_names
)
55 * If we don't have spinlocks, we have to have a file descriptor
56 * for fcntl(2) locking, which implies using mmap(2) to map in a
57 * regular file. Theoretically, we could probably find ways to
58 * get a file descriptor to lock other types of shared regions,
59 * but I don't see any reason to do so.
61 * If need_names is set, the application wants to share anonymous
62 * memory among multiple processes, so we have to have a way to
63 * name it. This requires shmget(2), on UNIX systems.
79 COMPQUIET(need_names
, 0);
80 #endif /* HAVE_MMAP */
81 #endif /* HAVE_SPINLOCKS */
88 * Return if shared regions need to be initialized.
90 * PUBLIC: int __db_mapinit __P((void));
96 * Historically, some systems required that all of the bytes of the
97 * region be written before it could be mmapped and accessed randomly.
98 * We have the option of setting REGION_INIT_NEEDED at configuration
99 * time if we're running on one of those systems.
101 #ifdef REGION_INIT_NEEDED
110 * Attach to a shared memory region.
112 * PUBLIC: int __db_mapregion __P((char *, REGINFO *));
115 __db_mapregion(path
, infop
)
124 /* If the user replaces the map call, call through their interface. */
125 if (__db_jump
.j_map
!= NULL
) {
126 F_SET(infop
, REGION_HOLDINGSYS
);
127 return (__db_jump
.j_map(path
, infop
->fd
, infop
->size
,
128 1, F_ISSET(infop
, REGION_ANONYMOUS
), 0, &infop
->addr
));
131 if (F_ISSET(infop
, REGION_ANONYMOUS
)) {
134 * If we're creating anonymous regions:
136 * If it's private, we use mmap(2). The problem with using
137 * shmget(2) is that we may be creating a region of which the
138 * application isn't aware, and if the application crashes
139 * we'll have no way to remove the system resources for the
142 * If it's not private, we use the shmget(2) interface if it's
143 * available, because it allows us to name anonymous memory.
144 * If shmget(2) isn't available, use the mmap(2) calls.
146 * In the case of anonymous memory, using mmap(2) means the
147 * memory isn't named and only the single process and its
148 * threads can access the region.
152 #define HAVE_MMAP_ANONYMOUS 1
155 #define HAVE_MMAP_ANONYMOUS 1
159 #ifdef HAVE_MMAP_ANONYMOUS
160 if (!called
&& F_ISSET(infop
, REGION_PRIVATE
)) {
163 infop
->fd
, infop
->size
, 1, 1, 0, &infop
->addr
);
169 ret
= __os_shmget(infop
);
174 * If we're trying to join an unnamed anonymous region, fail --
175 * that's not possible.
180 if (!F_ISSET(infop
, REGION_CREATED
)) {
181 __db_err(infop
->dbenv
,
182 "cannot join region in unnamed anonymous memory");
187 infop
->fd
, infop
->size
, 1, 1, 0, &infop
->addr
);
193 * If we're creating normal regions, we use the mmap(2)
194 * interface if it's available because it's POSIX 1003.1
195 * standard and we trust it more than we do shmget(2).
201 /* Mmap(2) regions that aren't anonymous can grow. */
202 F_SET(infop
, REGION_CANGROW
);
205 infop
->fd
, infop
->size
, 1, 0, 0, &infop
->addr
);
211 ret
= __os_shmget(infop
);
219 * __db_unmapregion --
220 * Detach from the shared memory region.
222 * PUBLIC: int __db_unmapregion __P((REGINFO *));
225 __db_unmapregion(infop
)
233 if (__db_jump
.j_unmap
!= NULL
)
234 return (__db_jump
.j_unmap(infop
->addr
, infop
->size
));
237 if (infop
->segid
!= INVALID_SEGID
) {
239 ret
= shmdt(infop
->addr
) ? errno
: 0;
245 ret
= munmap(infop
->addr
, infop
->size
) ? errno
: 0;
252 * __db_unlinkregion --
253 * Remove the shared memory region.
255 * PUBLIC: int __db_unlinkregion __P((char *, REGINFO *));
258 __db_unlinkregion(name
, infop
)
267 if (__db_jump
.j_runlink
!= NULL
)
268 return (__db_jump
.j_runlink(name
));
271 if (infop
->segid
!= INVALID_SEGID
) {
273 ret
= shmctl(infop
->segid
, IPC_RMID
, NULL
) ? errno
: 0;
277 COMPQUIET(infop
, NULL
);
288 * Map in a shared memory file.
290 * PUBLIC: int __db_mapfile __P((char *, int, size_t, int, void **));
293 __db_mapfile(path
, fd
, len
, is_rdonly
, addr
)
299 if (__db_jump
.j_map
!= NULL
)
300 return (__db_jump
.j_map(path
, fd
, len
, 0, 0, is_rdonly
, addr
));
303 return (__os_map(path
, fd
, len
, 0, 0, is_rdonly
, addr
));
311 * Unmap the shared memory file.
313 * PUBLIC: int __db_unmapfile __P((void *, size_t));
316 __db_unmapfile(addr
, len
)
320 if (__db_jump
.j_unmap
!= NULL
)
321 return (__db_jump
.j_unmap(addr
, len
));
324 return (munmap(addr
, len
) ? errno
: 0);
333 * Call the mmap(2) function.
336 __os_map(path
, fd
, len
, is_region
, is_anonymous
, is_rdonly
, addr
)
338 int fd
, is_region
, is_anonymous
, is_rdonly
;
345 COMPQUIET(path
, NULL
);
348 * If it's read-only, it's private, and if it's not, it's shared.
349 * Don't bother with an additional parameter.
351 flags
= is_rdonly
? MAP_PRIVATE
: MAP_SHARED
;
353 if (is_region
&& is_anonymous
) {
355 * BSD derived systems use MAP_ANON; Digital Unix and HP/UX
362 flags
|= MAP_ANONYMOUS
;
367 if (!is_region
|| !is_anonymous
) {
369 * Historically, MAP_FILE was required for mapping regular
370 * files, even though it was the default. Some systems have
371 * it, some don't, some that have it set it to 0.
378 * I know of no systems that implement the flag to tell the system
379 * that the region contains semaphores, but it's not an unreasonable
380 * thing to do, and has been part of the design since forever. I
381 * don't think anyone will object, but don't set it for read-only
382 * files, it doesn't make sense.
384 #ifdef MAP_HASSEMAPHORE
386 flags
|= MAP_HASSEMAPHORE
;
389 prot
= PROT_READ
| (is_rdonly
? 0 : PROT_WRITE
);
393 * Work around a bug in the VMS V7.1 mmap() implementation. To map a file
394 * into memory on VMS it needs to be opened in a certain way, originally.
395 * To get the file opened in that certain way, the VMS mmap() closes the
396 * file and re-opens it. When it does this, it doesn't flush any caches
397 * out to disk before closing. The problem this causes us is that when the
398 * memory cache doesn't get written out, the file isn't big enough to match
399 * the memory chunk and the mmap() call fails. This call to fsync() fixes
400 * the problem. DEC thinks this isn't a bug because of language in XPG5
401 * discussing user responsibility for on-disk and in-memory synchronization.
404 if (__os_fsync(fd
) == -1)
408 /* MAP_FAILED was not defined in early mmap implementations. */
410 #define MAP_FAILED -1
413 mmap(NULL
, len
, prot
, flags
, fd
, (off_t
)0)) == (void *)MAP_FAILED
)
424 * Call the shmget(2) family of functions.
430 if (F_ISSET(infop
, REGION_CREATED
) &&
431 (infop
->segid
= shmget(0, infop
->size
, IPC_PRIVATE
| 0600)) == -1)
434 if ((infop
->addr
= shmat(infop
->segid
, NULL
, 0)) == (void *)-1) {
436 * If we're trying to join the region and failing, assume
437 * that there was a reboot and the region no longer exists.
439 if (!F_ISSET(infop
, REGION_CREATED
))
444 F_SET(infop
, REGION_HOLDINGSYS
);