1 /* cache.c - routines to maintain an in-core cache of entries */
2 /* $OpenLDAP: pkg/ldap/servers/slapd/back-bdb/cache.c,v 1.120.2.15 2008/05/01 21:39:35 quanah Exp $ */
3 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
5 * Copyright 2000-2008 The OpenLDAP Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted only as authorized by the OpenLDAP
12 * A copy of this license is available in the file LICENSE in the
13 * top-level directory of the distribution or, alternatively, at
14 * <http://www.OpenLDAP.org/license.html>.
22 #include <ac/string.h>
23 #include <ac/socket.h>
32 #define bdb_cache_lru_purge hdb_cache_lru_purge
34 static void bdb_cache_lru_purge( struct bdb_info
*bdb
);
36 static int bdb_cache_delete_internal(Cache
*cache
, EntryInfo
*e
, int decr
);
40 static void bdb_lru_print(Cache
*cache
);
44 /* For concurrency experiments only! */
46 #define ldap_pvt_thread_rdwr_wlock(a) 0
47 #define ldap_pvt_thread_rdwr_wunlock(a) 0
48 #define ldap_pvt_thread_rdwr_rlock(a) 0
49 #define ldap_pvt_thread_rdwr_runlock(a) 0
53 #define ldap_pvt_thread_mutex_trylock(a) 0
57 bdb_cache_entryinfo_new( Cache
*cache
)
61 if ( cache
->c_eifree
) {
62 ldap_pvt_thread_mutex_lock( &cache
->c_eifree_mutex
);
63 if ( cache
->c_eifree
) {
65 cache
->c_eifree
= ei
->bei_lrunext
;
68 ldap_pvt_thread_mutex_unlock( &cache
->c_eifree_mutex
);
71 ei
= ch_calloc(1, sizeof(EntryInfo
));
72 ldap_pvt_thread_mutex_init( &ei
->bei_kids_mutex
);
75 ei
->bei_state
= CACHE_ENTRY_REFERENCED
;
81 bdb_cache_entryinfo_free( Cache
*cache
, EntryInfo
*ei
)
83 free( ei
->bei_nrdn
.bv_val
);
84 ei
->bei_nrdn
.bv_val
= NULL
;
86 free( ei
->bei_rdn
.bv_val
);
87 ei
->bei_rdn
.bv_val
= NULL
;
92 ei
->bei_parent
= NULL
;
94 ei
->bei_lruprev
= NULL
;
96 ldap_pvt_thread_mutex_lock( &cache
->c_eifree_mutex
);
97 ei
->bei_lrunext
= cache
->c_eifree
;
99 ldap_pvt_thread_mutex_unlock( &cache
->c_eifree_mutex
);
102 #define LRU_DEL( c, e ) do { \
103 if ( e == (c)->c_lruhead ) (c)->c_lruhead = e->bei_lruprev; \
104 if ( e == (c)->c_lrutail ) (c)->c_lrutail = e->bei_lruprev; \
105 e->bei_lrunext->bei_lruprev = e->bei_lruprev; \
106 e->bei_lruprev->bei_lrunext = e->bei_lrunext; \
107 e->bei_lruprev = NULL; \
110 /* Note - we now use a Second-Chance / Clock algorithm instead of
111 * Least-Recently-Used. This tremendously improves concurrency
112 * because we no longer need to manipulate the lists every time an
113 * entry is touched. We only need to lock the lists when adding
114 * or deleting an entry. It's now a circular doubly-linked list.
115 * We always append to the tail, but the head traverses the circle
116 * during a purge operation.
119 bdb_cache_lru_link( struct bdb_info
*bdb
, EntryInfo
*ei
)
122 /* Already linked, ignore */
123 if ( ei
->bei_lruprev
)
126 /* Insert into circular LRU list */
127 ldap_pvt_thread_mutex_lock( &bdb
->bi_cache
.c_lru_mutex
);
129 ei
->bei_lruprev
= bdb
->bi_cache
.c_lrutail
;
130 if ( bdb
->bi_cache
.c_lrutail
) {
131 ei
->bei_lrunext
= bdb
->bi_cache
.c_lrutail
->bei_lrunext
;
132 bdb
->bi_cache
.c_lrutail
->bei_lrunext
= ei
;
133 if ( ei
->bei_lrunext
)
134 ei
->bei_lrunext
->bei_lruprev
= ei
;
136 ei
->bei_lrunext
= ei
->bei_lruprev
= ei
;
137 bdb
->bi_cache
.c_lruhead
= ei
;
139 bdb
->bi_cache
.c_lrutail
= ei
;
140 ldap_pvt_thread_mutex_unlock( &bdb
->bi_cache
.c_lru_mutex
);
147 /* #define NO_DB_LOCK 1 */
148 /* Note: The BerkeleyDB locks are much slower than regular
149 * mutexes or rdwr locks. But the BDB implementation has the
150 * advantage of using a fixed size lock table, instead of
151 * allocating a lock object per entry in the DB. That's a
152 * key benefit for scaling. It also frees us from worrying
153 * about undetectable deadlocks between BDB activity and our
154 * own cache activity. It's still worth exploring faster
155 * alternatives though.
158 /* Atomically release and reacquire a lock */
160 bdb_cache_entry_db_relock(
161 struct bdb_info
*bdb
,
175 if ( !lock
) return 0;
177 lockobj
.data
= &ei
->bei_id
;
178 lockobj
.size
= sizeof(ei
->bei_id
) + 1;
180 list
[0].op
= DB_LOCK_PUT
;
181 list
[0].lock
= *lock
;
182 list
[1].op
= DB_LOCK_GET
;
183 list
[1].lock
= *lock
;
184 list
[1].mode
= rw
? DB_LOCK_WRITE
: DB_LOCK_READ
;
185 list
[1].obj
= &lockobj
;
186 rc
= bdb
->bi_dbenv
->lock_vec(bdb
->bi_dbenv
, BDB_LOCKID(locker
), tryOnly
? DB_LOCK_NOWAIT
: 0,
189 if (rc
&& !tryOnly
) {
190 Debug( LDAP_DEBUG_TRACE
,
191 "bdb_cache_entry_db_relock: entry %ld, rw %d, rc %d\n",
192 ei
->bei_id
, rw
, rc
);
194 *lock
= list
[1].lock
;
201 bdb_cache_entry_db_lock( struct bdb_info
*bdb
, BDB_LOCKER locker
, EntryInfo
*ei
,
202 int rw
, int tryOnly
, DB_LOCK
*lock
)
211 if ( !lock
) return 0;
214 db_rw
= DB_LOCK_WRITE
;
216 db_rw
= DB_LOCK_READ
;
218 lockobj
.data
= &ei
->bei_id
;
219 lockobj
.size
= sizeof(ei
->bei_id
) + 1;
221 rc
= LOCK_GET(bdb
->bi_dbenv
, BDB_LOCKID(locker
), tryOnly
? DB_LOCK_NOWAIT
: 0,
222 &lockobj
, db_rw
, lock
);
223 if (rc
&& !tryOnly
) {
224 Debug( LDAP_DEBUG_TRACE
,
225 "bdb_cache_entry_db_lock: entry %ld, rw %d, rc %d\n",
226 ei
->bei_id
, rw
, rc
);
229 #endif /* NO_DB_LOCK */
233 bdb_cache_entry_db_unlock ( struct bdb_info
*bdb
, DB_LOCK
*lock
)
240 if ( !lock
|| lock
->mode
== DB_LOCK_NG
) return 0;
242 rc
= LOCK_PUT ( bdb
->bi_dbenv
, lock
);
248 bdb_cache_return_entry_rw( struct bdb_info
*bdb
, Entry
*e
,
249 int rw
, DB_LOCK
*lock
)
256 ( ei
->bei_state
& CACHE_ENTRY_NOT_CACHED
) &&
257 ( bdb_cache_entryinfo_trylock( ei
) == 0 )) {
258 if ( ei
->bei_state
& CACHE_ENTRY_NOT_CACHED
) {
259 /* Releasing the entry can only be done when
260 * we know that nobody else is using it, i.e we
261 * should have an entry_db writelock. But the
262 * flag is only set by the thread that loads the
263 * entry, and only if no other threads has found
264 * it while it was working. All other threads
265 * clear the flag, which mean that we should be
266 * the only thread using the entry if the flag
270 ei
->bei_state
^= CACHE_ENTRY_NOT_CACHED
;
273 bdb_cache_entryinfo_unlock( ei
);
275 bdb_cache_entry_db_unlock( bdb
, lock
);
278 bdb_entry_return( e
);
283 bdb_cache_entryinfo_destroy( EntryInfo
*e
)
285 ldap_pvt_thread_mutex_destroy( &e
->bei_kids_mutex
);
286 free( e
->bei_nrdn
.bv_val
);
288 free( e
->bei_rdn
.bv_val
);
294 /* Do a length-ordered sort on normalized RDNs */
296 bdb_rdn_cmp( const void *v_e1
, const void *v_e2
)
298 const EntryInfo
*e1
= v_e1
, *e2
= v_e2
;
299 int rc
= e1
->bei_nrdn
.bv_len
- e2
->bei_nrdn
.bv_len
;
301 rc
= strncmp( e1
->bei_nrdn
.bv_val
, e2
->bei_nrdn
.bv_val
,
302 e1
->bei_nrdn
.bv_len
);
308 bdb_id_cmp( const void *v_e1
, const void *v_e2
)
310 const EntryInfo
*e1
= v_e1
, *e2
= v_e2
;
311 return e1
->bei_id
- e2
->bei_id
;
315 bdb_id_dup_err( void *v1
, void *v2
)
318 e2
->bei_lrunext
= v1
;
322 /* Create an entryinfo in the cache. Caller must release the locks later.
325 bdb_entryinfo_add_internal(
326 struct bdb_info
*bdb
,
330 EntryInfo
*ei2
= NULL
;
334 ei2
= bdb_cache_entryinfo_new( &bdb
->bi_cache
);
336 ldap_pvt_thread_rdwr_wlock( &bdb
->bi_cache
.c_rwlock
);
337 bdb_cache_entryinfo_lock( ei
->bei_parent
);
339 ei2
->bei_id
= ei
->bei_id
;
340 ei2
->bei_parent
= ei
->bei_parent
;
342 ei2
->bei_rdn
= ei
->bei_rdn
;
344 #ifdef SLAP_ZONE_ALLOC
348 /* Add to cache ID tree */
349 if (avl_insert( &bdb
->bi_cache
.c_idtree
, ei2
, bdb_id_cmp
,
351 EntryInfo
*eix
= ei2
->bei_lrunext
;
352 bdb_cache_entryinfo_free( &bdb
->bi_cache
, ei2
);
355 /* It got freed above because its value was
358 ei
->bei_rdn
.bv_val
= NULL
;
363 bdb
->bi_cache
.c_eiused
++;
364 ber_dupbv( &ei2
->bei_nrdn
, &ei
->bei_nrdn
);
366 /* This is a new leaf node. But if parent had no kids, then it was
367 * a leaf and we would be decrementing that. So, only increment if
368 * the parent already has kids.
370 if ( ei
->bei_parent
->bei_kids
|| !ei
->bei_parent
->bei_id
)
371 bdb
->bi_cache
.c_leaves
++;
372 rc
= avl_insert( &ei
->bei_parent
->bei_kids
, ei2
, bdb_rdn_cmp
,
375 /* This should never happen; entry cache is corrupt */
376 bdb
->bi_dbenv
->log_flush( bdb
->bi_dbenv
, NULL
);
380 ei
->bei_parent
->bei_ckids
++;
388 /* Find the EntryInfo for the requested DN. If the DN cannot be found, return
389 * the info for its closest ancestor. *res should be NULL to process a
390 * complete DN starting from the tree root. Otherwise *res must be the
391 * immediate parent of the requested DN, and only the RDN will be searched.
392 * The EntryInfo is locked upon return and must be unlocked by the caller.
401 struct bdb_info
*bdb
= (struct bdb_info
*) op
->o_bd
->be_private
;
402 EntryInfo ei
, *eip
, *ei2
;
406 /* this function is always called with normalized DN */
408 /* we're doing a onelevel search for an RDN */
409 ei
.bei_nrdn
.bv_val
= ndn
->bv_val
;
410 ei
.bei_nrdn
.bv_len
= dn_rdnlen( op
->o_bd
, ndn
);
413 /* we're searching a full DN from the root */
414 ptr
= ndn
->bv_val
+ ndn
->bv_len
- op
->o_bd
->be_nsuffix
[0].bv_len
;
415 ei
.bei_nrdn
.bv_val
= ptr
;
416 ei
.bei_nrdn
.bv_len
= op
->o_bd
->be_nsuffix
[0].bv_len
;
417 /* Skip to next rdn if suffix is empty */
418 if ( ei
.bei_nrdn
.bv_len
== 0 ) {
419 for (ptr
= ei
.bei_nrdn
.bv_val
- 2; ptr
> ndn
->bv_val
420 && !DN_SEPARATOR(*ptr
); ptr
--) /* empty */;
421 if ( ptr
>= ndn
->bv_val
) {
422 if (DN_SEPARATOR(*ptr
)) ptr
++;
423 ei
.bei_nrdn
.bv_len
= ei
.bei_nrdn
.bv_val
- ptr
;
424 ei
.bei_nrdn
.bv_val
= ptr
;
427 eip
= &bdb
->bi_cache
.c_dntree
;
430 for ( bdb_cache_entryinfo_lock( eip
); eip
; ) {
431 eip
->bei_state
|= CACHE_ENTRY_REFERENCED
;
433 ei2
= (EntryInfo
*)avl_find( eip
->bei_kids
, &ei
, bdb_rdn_cmp
);
436 int len
= ei
.bei_nrdn
.bv_len
;
438 if ( BER_BVISEMPTY( ndn
)) {
443 ei
.bei_nrdn
.bv_len
= ndn
->bv_len
-
444 (ei
.bei_nrdn
.bv_val
- ndn
->bv_val
);
445 bdb_cache_entryinfo_unlock( eip
);
447 BDB_LOG_PRINTF( bdb
->bi_dbenv
, NULL
, "slapd Reading %s",
448 ei
.bei_nrdn
.bv_val
);
450 lock
.mode
= DB_LOCK_NG
;
451 rc
= bdb_dn2id( op
, &ei
.bei_nrdn
, &ei
, locker
, &lock
);
453 bdb_cache_entryinfo_lock( eip
);
454 bdb_cache_entry_db_unlock( bdb
, &lock
);
459 BDB_LOG_PRINTF( bdb
->bi_dbenv
, NULL
, "slapd Read got %s(%d)",
460 ei
.bei_nrdn
.bv_val
, ei
.bei_id
);
462 /* DN exists but needs to be added to cache */
463 ei
.bei_nrdn
.bv_len
= len
;
464 rc
= bdb_entryinfo_add_internal( bdb
, &ei
, &ei2
);
465 /* add_internal left eip and c_rwlock locked */
466 ldap_pvt_thread_rdwr_wunlock( &bdb
->bi_cache
.c_rwlock
);
467 bdb_cache_entry_db_unlock( bdb
, &lock
);
472 } else if ( ei2
->bei_state
& CACHE_ENTRY_DELETED
) {
473 /* In the midst of deleting? Give it a chance to
476 bdb_cache_entryinfo_unlock( eip
);
477 ldap_pvt_thread_yield();
478 bdb_cache_entryinfo_lock( eip
);
482 bdb_cache_entryinfo_unlock( eip
);
483 bdb_cache_entryinfo_lock( ei2
);
487 /* Advance to next lower RDN */
488 for (ptr
= ei
.bei_nrdn
.bv_val
- 2; ptr
> ndn
->bv_val
489 && !DN_SEPARATOR(*ptr
); ptr
--) /* empty */;
490 if ( ptr
>= ndn
->bv_val
) {
491 if (DN_SEPARATOR(*ptr
)) ptr
++;
492 ei
.bei_nrdn
.bv_len
= ei
.bei_nrdn
.bv_val
- ptr
- 1;
493 ei
.bei_nrdn
.bv_val
= ptr
;
495 if ( ptr
< ndn
->bv_val
) {
505 /* Walk up the tree from a child node, looking for an ID that's already
506 * been linked into the cache.
509 hdb_cache_find_parent(
515 struct bdb_info
*bdb
= (struct bdb_info
*) op
->o_bd
->be_private
;
516 EntryInfo ei
, eip
, *ei2
= NULL
, *ein
= NULL
, *eir
= NULL
;
524 rc
= hdb_dn2id_parent( op
, locker
, &ei
, &eip
.bei_id
);
527 /* Save the previous node, if any */
530 /* Create a new node for the current ID */
531 ein
= bdb_cache_entryinfo_new( &bdb
->bi_cache
);
532 ein
->bei_id
= ei
.bei_id
;
533 ein
->bei_kids
= ei
.bei_kids
;
534 ein
->bei_nrdn
= ei
.bei_nrdn
;
535 ein
->bei_rdn
= ei
.bei_rdn
;
536 ein
->bei_ckids
= ei
.bei_ckids
;
537 #ifdef SLAP_ZONE_ALLOC
542 /* This node is not fully connected yet */
543 ein
->bei_state
|= CACHE_ENTRY_NOT_LINKED
;
545 /* Insert this node into the ID tree */
546 ldap_pvt_thread_rdwr_wlock( &bdb
->bi_cache
.c_rwlock
);
547 if ( avl_insert( &bdb
->bi_cache
.c_idtree
, (caddr_t
)ein
,
548 bdb_id_cmp
, bdb_id_dup_err
) ) {
549 EntryInfo
*eix
= ein
->bei_lrunext
;
551 /* Someone else created this node just before us.
552 * Free our new copy and use the existing one.
554 bdb_cache_entryinfo_free( &bdb
->bi_cache
, ein
);
557 /* Link in any kids we've already processed */
559 bdb_cache_entryinfo_lock( ein
);
560 avl_insert( &ein
->bei_kids
, (caddr_t
)ei2
,
561 bdb_rdn_cmp
, avl_dup_error
);
563 bdb_cache_entryinfo_unlock( ein
);
567 /* If this is the first time, save this node
568 * to be returned later.
570 if ( eir
== NULL
) eir
= ein
;
572 /* If there was a previous node, link it to this one */
573 if ( ei2
) ei2
->bei_parent
= ein
;
575 /* Look for this node's parent */
577 ei2
= (EntryInfo
*) avl_find( bdb
->bi_cache
.c_idtree
,
578 (caddr_t
) &eip
, bdb_id_cmp
);
580 ei2
= &bdb
->bi_cache
.c_dntree
;
582 bdb
->bi_cache
.c_eiused
++;
583 if ( ei2
&& ( ei2
->bei_kids
|| !ei2
->bei_id
))
584 bdb
->bi_cache
.c_leaves
++;
585 ldap_pvt_thread_rdwr_wunlock( &bdb
->bi_cache
.c_rwlock
);
587 /* Got the parent, link in and we're done. */
589 bdb_cache_entryinfo_lock( eir
);
590 bdb_cache_entryinfo_lock( ei2
);
591 ein
->bei_parent
= ei2
;
593 avl_insert( &ei2
->bei_kids
, (caddr_t
)ein
, bdb_rdn_cmp
,
597 /* Reset all the state info */
598 for (ein
= eir
; ein
!= ei2
; ein
=ein
->bei_parent
)
599 ein
->bei_state
&= ~CACHE_ENTRY_NOT_LINKED
;
601 bdb_cache_entryinfo_unlock( ei2
);
607 ei
.bei_id
= eip
.bei_id
;
609 avl_insert( &ei
.bei_kids
, (caddr_t
)ein
, bdb_rdn_cmp
,
615 /* Used by hdb_dn2idl when loading the EntryInfo for all the children
619 struct bdb_info
*bdb
,
626 /* See if we already have this one */
627 bdb_cache_entryinfo_lock( ei
->bei_parent
);
628 ei2
= (EntryInfo
*)avl_find( ei
->bei_parent
->bei_kids
, ei
, bdb_rdn_cmp
);
629 bdb_cache_entryinfo_unlock( ei
->bei_parent
);
632 /* Not found, add it */
635 /* bei_rdn was not malloc'd before, do it now */
636 ber_dupbv( &bv
, &ei
->bei_rdn
);
639 rc
= bdb_entryinfo_add_internal( bdb
, ei
, res
);
640 bdb_cache_entryinfo_unlock( ei
->bei_parent
);
641 ldap_pvt_thread_rdwr_wunlock( &bdb
->bi_cache
.c_rwlock
);
643 /* Found, return it */
651 /* This is best-effort only. If all entries in the cache are
652 * busy, they will all be kept. This is unlikely to happen
653 * unless the cache is very much smaller than the working set.
656 bdb_cache_lru_purge( struct bdb_info
*bdb
)
658 DB_LOCK lock
, *lockp
;
659 EntryInfo
*elru
, *elnext
= NULL
;
660 int count
, islocked
, eimax
;
662 /* Wait for the mutex; we're the only one trying to purge. */
663 ldap_pvt_thread_mutex_lock( &bdb
->bi_cache
.c_lru_mutex
);
665 if ( bdb
->bi_cache
.c_cursize
<= bdb
->bi_cache
.c_maxsize
) {
666 ldap_pvt_thread_mutex_unlock( &bdb
->bi_cache
.c_lru_mutex
);
667 bdb
->bi_cache
.c_purging
= 0;
671 if ( bdb
->bi_cache
.c_locker
) {
679 /* maximum number of EntryInfo leaves to cache. In slapcat
680 * we always free all leaf nodes.
682 if ( slapMode
& SLAP_TOOL_READONLY
)
685 eimax
= bdb
->bi_cache
.c_eimax
;
687 /* Look for an unused entry to remove */
688 for ( elru
= bdb
->bi_cache
.c_lruhead
; elru
; elru
= elnext
) {
689 elnext
= elru
->bei_lrunext
;
691 if ( bdb_cache_entryinfo_trylock( elru
))
694 /* This flag implements the clock replacement behavior */
695 if ( elru
->bei_state
& ( CACHE_ENTRY_REFERENCED
)) {
696 elru
->bei_state
&= ~CACHE_ENTRY_REFERENCED
;
697 bdb_cache_entryinfo_unlock( elru
);
701 /* If this node is in the process of linking into the cache,
702 * or this node is being deleted, skip it.
704 if (( elru
->bei_state
& ( CACHE_ENTRY_NOT_LINKED
|
705 CACHE_ENTRY_DELETED
| CACHE_ENTRY_LOADING
)) ||
706 elru
->bei_finders
> 0 ) {
707 bdb_cache_entryinfo_unlock( elru
);
711 /* entryinfo is locked */
714 /* If we can successfully writelock it, then
715 * the object is idle.
717 if ( bdb_cache_entry_db_lock( bdb
,
718 bdb
->bi_cache
.c_locker
, elru
, 1, 1, lockp
) == 0 ) {
720 /* Free entry for this node if it's present */
722 elru
->bei_e
->e_private
= NULL
;
723 #ifdef SLAP_ZONE_ALLOC
724 bdb_entry_return( bdb
, elru
->bei_e
, elru
->bei_zseq
);
726 bdb_entry_return( elru
->bei_e
);
731 bdb_cache_entry_db_unlock( bdb
, lockp
);
734 * If it is a leaf node, and we're over the limit, free it.
736 if ( elru
->bei_kids
) {
737 /* Drop from list, we ignore it... */
738 LRU_DEL( &bdb
->bi_cache
, elru
);
739 } else if ( bdb
->bi_cache
.c_leaves
> eimax
) {
740 /* Too many leaf nodes, free this one */
741 bdb_cache_delete_internal( &bdb
->bi_cache
, elru
, 0 );
742 bdb_cache_delete_cleanup( &bdb
->bi_cache
, elru
);
744 } /* Leave on list until we need to free it */
748 bdb_cache_entryinfo_unlock( elru
);
750 if ( count
>= bdb
->bi_cache
.c_minfree
) {
751 ldap_pvt_thread_mutex_lock( &bdb
->bi_cache
.c_count_mutex
);
752 bdb
->bi_cache
.c_cursize
-= count
;
753 ldap_pvt_thread_mutex_unlock( &bdb
->bi_cache
.c_count_mutex
);
757 if ( elnext
== bdb
->bi_cache
.c_lruhead
)
761 bdb
->bi_cache
.c_lruhead
= elnext
;
762 ldap_pvt_thread_mutex_unlock( &bdb
->bi_cache
.c_lru_mutex
);
763 bdb
->bi_cache
.c_purging
= 0;
768 struct bdb_info
*bdb
,
771 EntryInfo ei
= { 0 },
776 ldap_pvt_thread_rdwr_rlock( &bdb
->bi_cache
.c_rwlock
);
777 ei2
= (EntryInfo
*) avl_find( bdb
->bi_cache
.c_idtree
,
778 (caddr_t
) &ei
, bdb_id_cmp
);
779 ldap_pvt_thread_rdwr_runlock( &bdb
->bi_cache
.c_rwlock
);
784 * cache_find_id - find an entry in the cache, given id.
785 * The entry is locked for Read upon return. Call with flag ID_LOCKED if
786 * the supplied *eip was already locked.
799 struct bdb_info
*bdb
= (struct bdb_info
*) op
->o_bd
->be_private
;
801 int rc
= 0, load
= 0;
802 EntryInfo ei
= { 0 };
806 #ifdef SLAP_ZONE_ALLOC
807 slap_zh_rlock(bdb
->bi_cache
.c_zctx
);
809 /* If we weren't given any info, see if we have it already cached */
811 again
: ldap_pvt_thread_rdwr_rlock( &bdb
->bi_cache
.c_rwlock
);
812 *eip
= (EntryInfo
*) avl_find( bdb
->bi_cache
.c_idtree
,
813 (caddr_t
) &ei
, bdb_id_cmp
);
815 /* If the lock attempt fails, the info is in use */
816 if ( bdb_cache_entryinfo_trylock( *eip
)) {
817 ldap_pvt_thread_rdwr_runlock( &bdb
->bi_cache
.c_rwlock
);
818 /* If this node is being deleted, treat
819 * as if the delete has already finished
821 if ( (*eip
)->bei_state
& CACHE_ENTRY_DELETED
) {
824 /* otherwise, wait for the info to free up */
825 ldap_pvt_thread_yield();
828 /* If this info isn't hooked up to its parent yet,
829 * unlock and wait for it to be fully initialized
831 if ( (*eip
)->bei_state
& CACHE_ENTRY_NOT_LINKED
) {
832 bdb_cache_entryinfo_unlock( *eip
);
833 ldap_pvt_thread_rdwr_runlock( &bdb
->bi_cache
.c_rwlock
);
834 ldap_pvt_thread_yield();
839 ldap_pvt_thread_rdwr_runlock( &bdb
->bi_cache
.c_rwlock
);
842 /* See if the ID exists in the database; add it to the cache if so */
845 rc
= bdb_id2entry( op
->o_bd
, tid
, locker
, id
, &ep
);
847 rc
= bdb_cache_find_ndn( op
, locker
,
849 if ( *eip
) flag
|= ID_LOCKED
;
851 ep
->e_private
= NULL
;
852 #ifdef SLAP_ZONE_ALLOC
853 bdb_entry_return( bdb
, ep
, (*eip
)->bei_zseq
);
855 bdb_entry_return( ep
);
861 rc
= hdb_cache_find_parent(op
, locker
, id
, eip
);
862 if ( rc
== 0 ) flag
|= ID_LOCKED
;
866 /* Ok, we found the info, do we have the entry? */
868 if ( !( flag
& ID_LOCKED
)) {
869 bdb_cache_entryinfo_lock( *eip
);
873 if ( (*eip
)->bei_state
& CACHE_ENTRY_DELETED
) {
876 (*eip
)->bei_finders
++;
877 (*eip
)->bei_state
|= CACHE_ENTRY_REFERENCED
;
878 /* Make sure only one thread tries to load the entry */
880 #ifdef SLAP_ZONE_ALLOC
881 if ((*eip
)->bei_e
&& !slap_zn_validate(
882 bdb
->bi_cache
.c_zctx
, (*eip
)->bei_e
, (*eip
)->bei_zseq
)) {
883 (*eip
)->bei_e
= NULL
;
884 (*eip
)->bei_zseq
= 0;
887 if ( !(*eip
)->bei_e
&& !((*eip
)->bei_state
& CACHE_ENTRY_LOADING
)) {
889 (*eip
)->bei_state
|= CACHE_ENTRY_LOADING
;
893 /* Clear the uncached state if we are not
894 * loading it, i.e it is already cached or
895 * another thread is currently loading it.
897 (*eip
)->bei_state
&= ~CACHE_ENTRY_NOT_CACHED
;
901 if ( flag
& ID_LOCKED
) {
902 bdb_cache_entryinfo_unlock( *eip
);
905 rc
= bdb_cache_entry_db_lock( bdb
, locker
, *eip
, load
, 0, lock
);
906 if ( (*eip
)->bei_state
& CACHE_ENTRY_DELETED
) {
908 bdb_cache_entry_db_unlock( bdb
, lock
);
909 } else if ( rc
== 0 ) {
912 rc
= bdb_id2entry( op
->o_bd
, tid
, locker
, id
, &ep
);
915 ep
->e_private
= *eip
;
920 #ifdef SLAP_ZONE_ALLOC
921 (*eip
)->bei_zseq
= *((ber_len_t
*)ep
- 2);
924 bdb_cache_lru_link( bdb
, *eip
);
925 if (( flag
& ID_NOCACHE
) &&
926 ( bdb_cache_entryinfo_trylock( *eip
) == 0 )) {
927 /* Set the cached state only if no other thread
928 * found the info while we were loading the entry.
930 if ( (*eip
)->bei_finders
== 1 )
931 (*eip
)->bei_state
|= CACHE_ENTRY_NOT_CACHED
;
932 bdb_cache_entryinfo_unlock( *eip
);
936 /* If we succeeded, downgrade back to a readlock. */
937 rc
= bdb_cache_entry_db_relock( bdb
, locker
,
940 /* Otherwise, release the lock. */
941 bdb_cache_entry_db_unlock( bdb
, lock
);
943 } else if ( !(*eip
)->bei_e
) {
944 /* Some other thread is trying to load the entry,
945 * wait for it to finish.
947 bdb_cache_entry_db_unlock( bdb
, lock
);
948 bdb_cache_entryinfo_lock( *eip
);
953 /* Check for subtree renames
955 rc
= bdb_fix_dn( (*eip
)->bei_e
, 1 );
957 bdb_cache_entry_db_relock( bdb
,
958 locker
, *eip
, 1, 0, lock
);
959 /* check again in case other modifier did it already */
960 if ( bdb_fix_dn( (*eip
)->bei_e
, 1 ) )
961 rc
= bdb_fix_dn( (*eip
)->bei_e
, 2 );
962 bdb_cache_entry_db_relock( bdb
,
963 locker
, *eip
, 0, 0, lock
);
968 bdb_cache_entryinfo_lock( *eip
);
969 (*eip
)->bei_finders
--;
971 (*eip
)->bei_state
^= CACHE_ENTRY_LOADING
;
972 bdb_cache_entryinfo_unlock( *eip
);
975 if ( flag
& ID_LOCKED
) {
976 bdb_cache_entryinfo_unlock( *eip
);
979 ep
->e_private
= NULL
;
980 #ifdef SLAP_ZONE_ALLOC
981 bdb_entry_return( bdb
, ep
, (*eip
)->bei_zseq
);
983 bdb_entry_return( ep
);
990 if ( !( flag
& ID_NOCACHE
)) {
991 ldap_pvt_thread_mutex_lock( &bdb
->bi_cache
.c_count_mutex
);
992 bdb
->bi_cache
.c_cursize
++;
993 if ( bdb
->bi_cache
.c_cursize
> bdb
->bi_cache
.c_maxsize
&&
994 !bdb
->bi_cache
.c_purging
) {
996 bdb
->bi_cache
.c_purging
= 1;
998 ldap_pvt_thread_mutex_unlock( &bdb
->bi_cache
.c_count_mutex
);
1002 bdb_cache_lru_purge( bdb
);
1005 #ifdef SLAP_ZONE_ALLOC
1006 if (rc
== 0 && (*eip
)->bei_e
) {
1007 slap_zn_rlock(bdb
->bi_cache
.c_zctx
, (*eip
)->bei_e
);
1009 slap_zh_runlock(bdb
->bi_cache
.c_zctx
);
1022 if ( BEI(e
)->bei_kids
) {
1025 if ( BEI(e
)->bei_state
& CACHE_ENTRY_NO_KIDS
) {
1028 rc
= bdb_dn2id_children( op
, txn
, e
);
1029 if ( rc
== DB_NOTFOUND
) {
1030 BEI(e
)->bei_state
|= CACHE_ENTRY_NO_KIDS
| CACHE_ENTRY_NO_GRANDKIDS
;
1035 /* Update the cache after a successful database Add. */
1038 struct bdb_info
*bdb
,
1041 struct berval
*nrdn
,
1048 struct berval rdn
= e
->e_name
;
1051 ei
.bei_id
= e
->e_id
;
1052 ei
.bei_parent
= eip
;
1053 ei
.bei_nrdn
= *nrdn
;
1056 /* Lock this entry so that bdb_add can run to completion.
1057 * It can only fail if BDB has run out of lock resources.
1059 rc
= bdb_cache_entry_db_lock( bdb
, locker
, &ei
, 0, 0, lock
);
1061 bdb_cache_entryinfo_unlock( eip
);
1066 if ( nrdn
->bv_len
!= e
->e_nname
.bv_len
) {
1067 char *ptr
= ber_bvchr( &rdn
, ',' );
1068 assert( ptr
!= NULL
);
1069 rdn
.bv_len
= ptr
- rdn
.bv_val
;
1071 ber_dupbv( &ei
.bei_rdn
, &rdn
);
1072 if ( eip
->bei_dkids
) eip
->bei_dkids
++;
1075 rc
= bdb_entryinfo_add_internal( bdb
, &ei
, &new );
1076 /* bdb_csn_commit can cause this when adding the database root entry */
1078 new->bei_e
->e_private
= NULL
;
1079 #ifdef SLAP_ZONE_ALLOC
1080 bdb_entry_return( bdb
, new->bei_e
, new->bei_zseq
);
1082 bdb_entry_return( new->bei_e
);
1087 new->bei_state
|= CACHE_ENTRY_NO_KIDS
| CACHE_ENTRY_NO_GRANDKIDS
;
1088 eip
->bei_state
&= ~CACHE_ENTRY_NO_KIDS
;
1089 if (eip
->bei_parent
) {
1090 eip
->bei_parent
->bei_state
&= ~CACHE_ENTRY_NO_GRANDKIDS
;
1092 bdb_cache_entryinfo_unlock( eip
);
1094 ldap_pvt_thread_rdwr_wunlock( &bdb
->bi_cache
.c_rwlock
);
1095 ldap_pvt_thread_mutex_lock( &bdb
->bi_cache
.c_count_mutex
);
1096 ++bdb
->bi_cache
.c_cursize
;
1097 if ( bdb
->bi_cache
.c_cursize
> bdb
->bi_cache
.c_maxsize
&&
1098 !bdb
->bi_cache
.c_purging
) {
1100 bdb
->bi_cache
.c_purging
= 1;
1102 ldap_pvt_thread_mutex_unlock( &bdb
->bi_cache
.c_count_mutex
);
1104 bdb_cache_lru_link( bdb
, new );
1107 bdb_cache_lru_purge( bdb
);
1114 struct bdb_info
*bdb
,
1116 Attribute
*newAttrs
,
1120 EntryInfo
*ei
= BEI(e
);
1122 /* Get write lock on data */
1123 rc
= bdb_cache_entry_db_relock( bdb
, locker
, ei
, 1, 0, lock
);
1125 /* If we've done repeated mods on a cached entry, then e_attrs
1126 * is no longer contiguous with the entry, and must be freed.
1129 if ( (void *)e
->e_attrs
!= (void *)(e
+1) ) {
1130 attrs_free( e
->e_attrs
);
1132 e
->e_attrs
= newAttrs
;
1138 * Change the rdn in the entryinfo. Also move to a new parent if needed.
1142 struct bdb_info
*bdb
,
1144 struct berval
*nrdn
,
1150 EntryInfo
*ei
= BEI(e
), *pei
;
1156 /* Get write lock on data */
1157 rc
= bdb_cache_entry_db_relock( bdb
, locker
, ei
, 1, 0, lock
);
1158 if ( rc
) return rc
;
1160 /* If we've done repeated mods on a cached entry, then e_attrs
1161 * is no longer contiguous with the entry, and must be freed.
1163 if ( (void *)e
->e_attrs
!= (void *)(e
+1) ) {
1164 attrs_free( e
->e_attrs
);
1166 e
->e_attrs
= new->e_attrs
;
1167 if( e
->e_nname
.bv_val
< e
->e_bv
.bv_val
||
1168 e
->e_nname
.bv_val
> e
->e_bv
.bv_val
+ e
->e_bv
.bv_len
)
1170 ch_free(e
->e_name
.bv_val
);
1171 ch_free(e
->e_nname
.bv_val
);
1173 e
->e_name
= new->e_name
;
1174 e
->e_nname
= new->e_nname
;
1176 /* Lock the parent's kids AVL tree */
1177 pei
= ei
->bei_parent
;
1178 bdb_cache_entryinfo_lock( pei
);
1179 avl_delete( &pei
->bei_kids
, (caddr_t
) ei
, bdb_rdn_cmp
);
1180 free( ei
->bei_nrdn
.bv_val
);
1181 ber_dupbv( &ei
->bei_nrdn
, nrdn
);
1184 free( ei
->bei_rdn
.bv_val
);
1187 if ( nrdn
->bv_len
!= e
->e_nname
.bv_len
) {
1188 char *ptr
= ber_bvchr(&rdn
, ',');
1189 assert( ptr
!= NULL
);
1190 rdn
.bv_len
= ptr
- rdn
.bv_val
;
1192 ber_dupbv( &ei
->bei_rdn
, &rdn
);
1194 /* If new parent, decrement kid counts */
1197 if ( pei
->bei_dkids
) {
1199 if ( pei
->bei_dkids
< 2 )
1200 pei
->bei_state
|= CACHE_ENTRY_NO_KIDS
| CACHE_ENTRY_NO_GRANDKIDS
;
1206 ein
= ei
->bei_parent
;
1208 ei
->bei_parent
= ein
;
1209 bdb_cache_entryinfo_unlock( pei
);
1210 bdb_cache_entryinfo_lock( ein
);
1212 /* new parent now has kids */
1213 if ( ein
->bei_state
& CACHE_ENTRY_NO_KIDS
)
1214 ein
->bei_state
^= CACHE_ENTRY_NO_KIDS
;
1215 /* grandparent has grandkids */
1216 if ( ein
->bei_parent
)
1217 ein
->bei_parent
->bei_state
&= ~CACHE_ENTRY_NO_GRANDKIDS
;
1219 /* parent might now have grandkids */
1220 if ( ein
->bei_state
& CACHE_ENTRY_NO_GRANDKIDS
&&
1221 !(ei
->bei_state
& CACHE_ENTRY_NO_KIDS
))
1222 ein
->bei_state
^= CACHE_ENTRY_NO_GRANDKIDS
;
1225 if ( ein
->bei_dkids
) ein
->bei_dkids
++;
1230 /* Record the generation number of this change */
1231 ldap_pvt_thread_mutex_lock( &bdb
->bi_modrdns_mutex
);
1233 ei
->bei_modrdns
= bdb
->bi_modrdns
;
1234 ldap_pvt_thread_mutex_unlock( &bdb
->bi_modrdns_mutex
);
1237 avl_insert( &ein
->bei_kids
, ei
, bdb_rdn_cmp
, avl_dup_error
);
1238 bdb_cache_entryinfo_unlock( ein
);
1242 * cache_delete - delete the entry e from the cache.
1244 * returns: 0 e was deleted ok
1245 * 1 e was not in the cache
1246 * -1 something bad happened
1250 struct bdb_info
*bdb
,
1255 EntryInfo
*ei
= BEI(e
);
1258 assert( e
->e_private
!= NULL
);
1260 /* Lock the entry's info */
1261 bdb_cache_entryinfo_lock( ei
);
1263 /* Set this early, warn off any queriers */
1264 ei
->bei_state
|= CACHE_ENTRY_DELETED
;
1266 bdb_cache_entryinfo_unlock( ei
);
1268 /* Get write lock on the data */
1269 rc
= bdb_cache_entry_db_relock( bdb
, locker
, ei
, 1, 0, lock
);
1271 /* couldn't lock, undo and give up */
1272 ei
->bei_state
^= CACHE_ENTRY_DELETED
;
1276 Debug( LDAP_DEBUG_TRACE
, "====> bdb_cache_delete( %ld )\n",
1280 ldap_pvt_thread_mutex_lock( &bdb
->bi_cache
.c_lru_mutex
);
1282 rc
= bdb_cache_delete_internal( &bdb
->bi_cache
, e
->e_private
, 1 );
1284 /* free lru mutex */
1285 ldap_pvt_thread_mutex_unlock( &bdb
->bi_cache
.c_lru_mutex
);
1291 bdb_cache_delete_cleanup(
1295 /* Enter with ei locked */
1298 ei
->bei_e
->e_private
= NULL
;
1299 #ifdef SLAP_ZONE_ALLOC
1300 bdb_entry_return( ei
->bei_bdb
, ei
->bei_e
, ei
->bei_zseq
);
1302 bdb_entry_return( ei
->bei_e
);
1307 bdb_cache_entryinfo_free( cache
, ei
);
1308 bdb_cache_entryinfo_unlock( ei
);
1312 bdb_cache_delete_internal(
1317 int rc
= 0; /* return code */
1320 /* Lock the parent's kids tree */
1321 bdb_cache_entryinfo_lock( e
->bei_parent
);
1324 e
->bei_parent
->bei_ckids
--;
1325 if ( decr
&& e
->bei_parent
->bei_dkids
) e
->bei_parent
->bei_dkids
--;
1328 if ( avl_delete( &e
->bei_parent
->bei_kids
, (caddr_t
) e
, bdb_rdn_cmp
)
1333 if ( e
->bei_parent
->bei_kids
)
1336 bdb_cache_entryinfo_unlock( e
->bei_parent
);
1338 ldap_pvt_thread_rdwr_wlock( &cache
->c_rwlock
);
1340 if ( avl_delete( &cache
->c_idtree
, (caddr_t
) e
, bdb_id_cmp
)) {
1347 ldap_pvt_thread_rdwr_wunlock( &cache
->c_rwlock
);
1351 LRU_DEL( cache
, e
);
1354 ldap_pvt_thread_mutex_lock( &cache
->c_count_mutex
);
1356 ldap_pvt_thread_mutex_unlock( &cache
->c_count_mutex
);
1364 bdb_entryinfo_release( void *data
)
1366 EntryInfo
*ei
= (EntryInfo
*)data
;
1367 if ( ei
->bei_kids
) {
1368 avl_free( ei
->bei_kids
, NULL
);
1371 ei
->bei_e
->e_private
= NULL
;
1372 #ifdef SLAP_ZONE_ALLOC
1373 bdb_entry_return( ei
->bei_bdb
, ei
->bei_e
, ei
->bei_zseq
);
1375 bdb_entry_return( ei
->bei_e
);
1378 bdb_cache_entryinfo_destroy( ei
);
1382 bdb_cache_release_all( Cache
*cache
)
1384 /* set cache write lock */
1385 ldap_pvt_thread_rdwr_wlock( &cache
->c_rwlock
);
1387 ldap_pvt_thread_mutex_lock( &cache
->c_lru_mutex
);
1389 Debug( LDAP_DEBUG_TRACE
, "====> bdb_cache_release_all\n", 0, 0, 0 );
1391 avl_free( cache
->c_dntree
.bei_kids
, NULL
);
1392 avl_free( cache
->c_idtree
, bdb_entryinfo_release
);
1393 for (;cache
->c_eifree
;cache
->c_eifree
= cache
->c_lruhead
) {
1394 cache
->c_lruhead
= cache
->c_eifree
->bei_lrunext
;
1395 bdb_cache_entryinfo_destroy(cache
->c_eifree
);
1397 cache
->c_cursize
= 0;
1398 cache
->c_eiused
= 0;
1399 cache
->c_leaves
= 0;
1400 cache
->c_idtree
= NULL
;
1401 cache
->c_lruhead
= NULL
;
1402 cache
->c_lrutail
= NULL
;
1403 cache
->c_dntree
.bei_kids
= NULL
;
1405 /* free lru mutex */
1406 ldap_pvt_thread_mutex_unlock( &cache
->c_lru_mutex
);
1407 /* free cache write lock */
1408 ldap_pvt_thread_rdwr_wunlock( &cache
->c_rwlock
);
1414 bdb_lru_print( Cache
*cache
)
1418 fprintf( stderr
, "LRU circle head: %p\n", (void *) cache
->c_lruhead
);
1419 fprintf( stderr
, "LRU circle (tail forward):\n" );
1420 for ( e
= cache
->c_lrutail
; ; ) {
1421 fprintf( stderr
, "\t%p, %p id %ld rdn \"%s\"\n",
1422 (void *) e
, (void *) e
->bei_e
, e
->bei_id
, e
->bei_nrdn
.bv_val
);
1424 if ( e
== cache
->c_lrutail
)
1427 fprintf( stderr
, "LRU circle (tail backward):\n" );
1428 for ( e
= cache
->c_lrutail
; ; ) {
1429 fprintf( stderr
, "\t%p, %p id %ld rdn \"%s\"\n",
1430 (void *) e
, (void *) e
->bei_e
, e
->bei_id
, e
->bei_nrdn
.bv_val
);
1432 if ( e
== cache
->c_lrutail
)
1439 #ifdef BDB_REUSE_LOCKERS
1441 bdb_locker_id_free( void *key
, void *data
)
1447 #if DB_VERSION_FULL >= 0x04060012
1448 BDB_LOCKER lptr
= data
;
1451 lockid
= (long)data
;
1453 rc
= XLOCK_ID_FREE( env
, lockid
);
1454 if ( rc
== EINVAL
) {
1456 Debug( LDAP_DEBUG_ANY
,
1457 "bdb_locker_id_free: %lu err %s(%d)\n",
1458 (unsigned long) lockid
, db_strerror(rc
), rc
);
1459 /* release all locks held by this locker. */
1460 lr
.op
= DB_LOCK_PUT_ALL
;
1462 env
->lock_vec( env
, lockid
, 0, &lr
, 1, NULL
);
1463 XLOCK_ID_FREE( env
, lockid
);
1467 /* free up any keys used by the main thread */
1469 bdb_locker_flush( DB_ENV
*env
)
1472 void *ctx
= ldap_pvt_thread_pool_context();
1474 if ( !ldap_pvt_thread_pool_getkey( ctx
, env
, &data
, NULL
) ) {
1475 ldap_pvt_thread_pool_setkey( ctx
, env
, NULL
, 0, NULL
, NULL
);
1476 bdb_locker_id_free( env
, data
);
1481 bdb_locker_id( Operation
*op
, DB_ENV
*env
, BDB_LOCKER
*locker
)
1488 if ( !env
|| !locker
) return -1;
1490 /* If no op was provided, try to find the ctx anyway... */
1492 ctx
= op
->o_threadctx
;
1494 ctx
= ldap_pvt_thread_pool_context();
1497 /* Shouldn't happen unless we're single-threaded */
1503 if ( ldap_pvt_thread_pool_getkey( ctx
, env
, &data
, NULL
) ) {
1504 for ( i
=0, rc
=1; rc
!= 0 && i
<4; i
++ ) {
1505 rc
= XLOCK_ID( env
, &lockid
);
1506 if (rc
) ldap_pvt_thread_yield();
1511 #if DB_VERSION_FULL >= 0x04060012
1513 __lock_getlocker( env
->lk_handle
, lockid
, 0, &lptr
);
1517 data
= (void *)((long)lockid
);
1519 if ( ( rc
= ldap_pvt_thread_pool_setkey( ctx
, env
,
1520 data
, bdb_locker_id_free
, NULL
, NULL
) ) ) {
1521 XLOCK_ID_FREE( env
, lockid
);
1522 Debug( LDAP_DEBUG_ANY
, "bdb_locker_id: err %s(%d)\n",
1523 db_strerror(rc
), rc
, 0 );
1528 lockid
= (long)data
;
1530 #if DB_VERSION_FULL >= 0x04060012
1537 #endif /* BDB_REUSE_LOCKERS */