2 Unix SMB/CIFS implementation.
3 Database interface wrapper around red-black trees
4 Copyright (C) Volker Lendecke 2007, 2008
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "dbwrap/dbwrap.h"
22 #include "dbwrap/dbwrap_private.h"
23 #include "dbwrap/dbwrap_rbt.h"
24 #include "../lib/util/rbtree.h"
25 #include "../lib/util/dlinklist.h"
27 #define DBWRAP_RBT_ALIGN(_size_) (((_size_)+15)&~15)
31 struct db_rbt_node
*nodes
;
33 struct db_rbt_node
**traverse_nextp
;
37 struct db_rbt_node
*node
;
40 /* The structure that ends up in the tree */
43 struct rb_node rb_node
;
44 size_t keysize
, valuesize
;
45 struct db_rbt_node
*prev
, *next
;
49 * Hide the ugly pointer calculations in a function
52 static struct db_rbt_node
*db_rbt2node(struct rb_node
*node
)
54 return (struct db_rbt_node
*)
55 ((char *)node
- offsetof(struct db_rbt_node
, rb_node
));
62 static int db_rbt_compare(TDB_DATA a
, TDB_DATA b
)
66 res
= memcmp(a
.dptr
, b
.dptr
, MIN(a
.dsize
, b
.dsize
));
68 if ((res
< 0) || ((res
== 0) && (a
.dsize
< b
.dsize
))) {
71 if ((res
> 0) || ((res
== 0) && (a
.dsize
> b
.dsize
))) {
78 * dissect a db_rbt_node into its implicit key and value parts
81 static void db_rbt_parse_node(struct db_rbt_node
*node
,
82 TDB_DATA
*key
, TDB_DATA
*value
)
84 size_t key_offset
, value_offset
;
86 key_offset
= DBWRAP_RBT_ALIGN(sizeof(struct db_rbt_node
));
87 key
->dptr
= ((uint8_t *)node
) + key_offset
;
88 key
->dsize
= node
->keysize
;
90 value_offset
= DBWRAP_RBT_ALIGN(node
->keysize
);
91 value
->dptr
= key
->dptr
+ value_offset
;
92 value
->dsize
= node
->valuesize
;
95 static ssize_t
db_rbt_reclen(size_t keylen
, size_t valuelen
)
99 len
= DBWRAP_RBT_ALIGN(sizeof(struct db_rbt_node
));
101 tmp
= DBWRAP_RBT_ALIGN(keylen
);
112 if (len
< valuelen
) {
121 static NTSTATUS
db_rbt_storev(struct db_record
*rec
,
122 const TDB_DATA
*dbufs
, int num_dbufs
, int flag
)
124 struct db_rbt_ctx
*db_ctx
= talloc_get_type_abort(
125 rec
->db
->private_data
, struct db_rbt_ctx
);
126 struct db_rbt_rec
*rec_priv
= (struct db_rbt_rec
*)rec
->private_data
;
127 struct db_rbt_node
*node
;
130 struct rb_node
*parent
= NULL
;
131 struct db_rbt_node
*parent_node
= NULL
;
134 TDB_DATA data
, this_key
, this_val
;
135 void *to_free
= NULL
;
137 if (db_ctx
->traverse_read
> 0) {
138 return NT_STATUS_MEDIA_WRITE_PROTECTED
;
141 if ((flag
== TDB_INSERT
) && (rec_priv
->node
!= NULL
)) {
142 return NT_STATUS_OBJECT_NAME_COLLISION
;
145 if ((flag
== TDB_MODIFY
) && (rec_priv
->node
== NULL
)) {
146 return NT_STATUS_OBJECT_NAME_NOT_FOUND
;
149 if (num_dbufs
== 1) {
154 data
= (TDB_DATA
) {0};
155 status
= dbwrap_merge_dbufs(&data
, rec
, dbufs
, num_dbufs
);
156 if (!NT_STATUS_IS_OK(status
)) {
162 if (rec_priv
->node
!= NULL
) {
165 * The record was around previously
168 db_rbt_parse_node(rec_priv
->node
, &this_key
, &this_val
);
170 SMB_ASSERT(this_key
.dsize
== rec
->key
.dsize
);
171 SMB_ASSERT(memcmp(this_key
.dptr
, rec
->key
.dptr
,
172 this_key
.dsize
) == 0);
174 if (this_val
.dsize
>= data
.dsize
) {
176 * The new value fits into the old space
178 memcpy(this_val
.dptr
, data
.dptr
, data
.dsize
);
179 rec_priv
->node
->valuesize
= data
.dsize
;
180 TALLOC_FREE(to_free
);
185 reclen
= db_rbt_reclen(rec
->key
.dsize
, data
.dsize
);
187 TALLOC_FREE(to_free
);
188 return NT_STATUS_INSUFFICIENT_RESOURCES
;
191 node
= talloc_zero_size(db_ctx
, reclen
);
193 TALLOC_FREE(to_free
);
194 return NT_STATUS_NO_MEMORY
;
197 if (rec_priv
->node
!= NULL
) {
198 if (db_ctx
->traverse_nextp
!= NULL
) {
199 if (*db_ctx
->traverse_nextp
== rec_priv
->node
) {
200 *db_ctx
->traverse_nextp
= node
;
205 * We need to delete the key from the tree and start fresh,
206 * there's not enough space in the existing record
209 rb_erase(&rec_priv
->node
->rb_node
, &db_ctx
->tree
);
210 DLIST_REMOVE(db_ctx
->nodes
, rec_priv
->node
);
213 * Keep the existing node around for a while: If the record
214 * existed before, we reference the key data in there.
218 node
->keysize
= rec
->key
.dsize
;
219 node
->valuesize
= data
.dsize
;
221 db_rbt_parse_node(node
, &this_key
, &this_val
);
223 memcpy(this_key
.dptr
, rec
->key
.dptr
, node
->keysize
);
224 TALLOC_FREE(rec_priv
->node
);
225 rec_priv
->node
= node
;
227 if (node
->valuesize
> 0) {
228 memcpy(this_val
.dptr
, data
.dptr
, node
->valuesize
);
232 p
= &db_ctx
->tree
.rb_node
;
235 struct db_rbt_node
*r
;
236 TDB_DATA search_key
, search_val
;
244 db_rbt_parse_node(r
, &search_key
, &search_val
);
246 res
= db_rbt_compare(this_key
, search_key
);
255 smb_panic("someone messed with the tree");
259 rb_link_node(&node
->rb_node
, parent
, p
);
260 DLIST_ADD_AFTER(db_ctx
->nodes
, node
, parent_node
);
261 rb_insert_color(&node
->rb_node
, &db_ctx
->tree
);
263 TALLOC_FREE(to_free
);
268 static NTSTATUS
db_rbt_delete(struct db_record
*rec
)
270 struct db_rbt_ctx
*db_ctx
= talloc_get_type_abort(
271 rec
->db
->private_data
, struct db_rbt_ctx
);
272 struct db_rbt_rec
*rec_priv
= (struct db_rbt_rec
*)rec
->private_data
;
274 if (db_ctx
->traverse_read
> 0) {
275 return NT_STATUS_MEDIA_WRITE_PROTECTED
;
278 if (rec_priv
->node
== NULL
) {
282 if (db_ctx
->traverse_nextp
!= NULL
) {
283 if (*db_ctx
->traverse_nextp
== rec_priv
->node
) {
284 *db_ctx
->traverse_nextp
= rec_priv
->node
->next
;
288 rb_erase(&rec_priv
->node
->rb_node
, &db_ctx
->tree
);
289 DLIST_REMOVE(db_ctx
->nodes
, rec_priv
->node
);
290 TALLOC_FREE(rec_priv
->node
);
295 struct db_rbt_search_result
{
298 struct db_rbt_node
* node
;
301 static bool db_rbt_search_internal(struct db_context
*db
, TDB_DATA key
,
302 struct db_rbt_search_result
*result
)
304 struct db_rbt_ctx
*ctx
= talloc_get_type_abort(
305 db
->private_data
, struct db_rbt_ctx
);
309 struct db_rbt_node
*r
= NULL
;
310 TDB_DATA search_key
= { 0 };
311 TDB_DATA search_val
= { 0 };
313 n
= ctx
->tree
.rb_node
;
320 db_rbt_parse_node(r
, &search_key
, &search_val
);
322 res
= db_rbt_compare(key
, search_key
);
335 if (result
!= NULL
) {
337 result
->key
= search_key
;
338 result
->val
= search_val
;
341 ZERO_STRUCT(*result
);
347 static struct db_record
*db_rbt_fetch_locked(struct db_context
*db_ctx
,
351 struct db_rbt_rec
*rec_priv
;
352 struct db_record
*result
;
355 struct db_rbt_search_result res
;
357 found
= db_rbt_search_internal(db_ctx
, key
, &res
);
360 * In this low-level routine, play tricks to reduce the number of
361 * tallocs to one. Not recommended for general use, but here it pays
365 size
= DBWRAP_RBT_ALIGN(sizeof(struct db_record
))
366 + sizeof(struct db_rbt_rec
);
370 * We need to keep the key around for later store
375 result
= (struct db_record
*)talloc_size(mem_ctx
, size
);
376 if (result
== NULL
) {
380 rec_priv
= (struct db_rbt_rec
*)
381 ((char *)result
+ DBWRAP_RBT_ALIGN(sizeof(struct db_record
)));
383 result
->storev
= db_rbt_storev
;
384 result
->delete_rec
= db_rbt_delete
;
385 result
->private_data
= rec_priv
;
387 rec_priv
->node
= res
.node
;
388 result
->value
= res
.val
;
389 result
->value_valid
= true;
392 result
->key
= res
.key
;
395 result
->key
.dptr
= (uint8_t *)
396 ((char *)rec_priv
+ sizeof(*rec_priv
));
397 result
->key
.dsize
= key
.dsize
;
398 memcpy(result
->key
.dptr
, key
.dptr
, key
.dsize
);
404 static int db_rbt_exists(struct db_context
*db
, TDB_DATA key
)
406 return db_rbt_search_internal(db
, key
, NULL
);
409 static int db_rbt_wipe(struct db_context
*db
)
411 struct db_rbt_ctx
*old_ctx
= talloc_get_type_abort(
412 db
->private_data
, struct db_rbt_ctx
);
413 struct db_rbt_ctx
*new_ctx
= talloc_zero(db
, struct db_rbt_ctx
);
414 if (new_ctx
== NULL
) {
417 db
->private_data
= new_ctx
;
418 talloc_free(old_ctx
);
422 static NTSTATUS
db_rbt_parse_record(struct db_context
*db
, TDB_DATA key
,
423 void (*parser
)(TDB_DATA key
, TDB_DATA data
,
427 struct db_rbt_search_result res
;
428 bool found
= db_rbt_search_internal(db
, key
, &res
);
431 return NT_STATUS_NOT_FOUND
;
433 parser(res
.key
, res
.val
, private_data
);
437 static int db_rbt_traverse_internal(struct db_context
*db
,
438 int (*f
)(struct db_record
*db
,
440 void *private_data
, uint32_t* count
,
443 struct db_rbt_ctx
*ctx
= talloc_get_type_abort(
444 db
->private_data
, struct db_rbt_ctx
);
445 struct db_rbt_node
*cur
= NULL
;
446 struct db_rbt_node
*next
= NULL
;
449 for (cur
= ctx
->nodes
; cur
!= NULL
; cur
= next
) {
450 struct db_record rec
;
451 struct db_rbt_rec rec_priv
;
454 next
= rec_priv
.node
->next
;
458 rec
.private_data
= &rec_priv
;
459 rec
.storev
= db_rbt_storev
;
460 rec
.delete_rec
= db_rbt_delete
;
461 db_rbt_parse_node(rec_priv
.node
, &rec
.key
, &rec
.value
);
462 rec
.value_valid
= true;
465 ctx
->traverse_nextp
= &next
;
467 ret
= f(&rec
, private_data
);
470 ctx
->traverse_nextp
= NULL
;
475 if (rec_priv
.node
!= NULL
) {
476 next
= rec_priv
.node
->next
;
483 static int db_rbt_traverse_read(struct db_context
*db
,
484 int (*f
)(struct db_record
*db
,
488 struct db_rbt_ctx
*ctx
= talloc_get_type_abort(
489 db
->private_data
, struct db_rbt_ctx
);
493 ctx
->traverse_read
++;
494 ret
= db_rbt_traverse_internal(db
,
495 f
, private_data
, &count
,
497 ctx
->traverse_read
--;
501 if (count
> INT_MAX
) {
507 static int db_rbt_traverse(struct db_context
*db
,
508 int (*f
)(struct db_record
*db
,
512 struct db_rbt_ctx
*ctx
= talloc_get_type_abort(
513 db
->private_data
, struct db_rbt_ctx
);
517 if (ctx
->traverse_nextp
!= NULL
) {
521 if (ctx
->traverse_read
> 0) {
522 return db_rbt_traverse_read(db
, f
, private_data
);
525 ret
= db_rbt_traverse_internal(db
,
526 f
, private_data
, &count
,
531 if (count
> INT_MAX
) {
537 static int db_rbt_get_seqnum(struct db_context
*db
)
542 static int db_rbt_trans_dummy(struct db_context
*db
)
545 * Transactions are pretty pointless in-memory, just return success.
550 static size_t db_rbt_id(struct db_context
*db
, uint8_t *id
, size_t idlen
)
552 if (idlen
>= sizeof(struct db_context
*)) {
553 memcpy(id
, &db
, sizeof(struct db_context
*));
555 return sizeof(struct db_context
*);
558 struct db_context
*db_open_rbt(TALLOC_CTX
*mem_ctx
)
560 struct db_context
*result
;
562 result
= talloc_zero(mem_ctx
, struct db_context
);
564 if (result
== NULL
) {
568 result
->private_data
= talloc_zero(result
, struct db_rbt_ctx
);
570 if (result
->private_data
== NULL
) {
575 result
->fetch_locked
= db_rbt_fetch_locked
;
576 result
->traverse
= db_rbt_traverse
;
577 result
->traverse_read
= db_rbt_traverse_read
;
578 result
->get_seqnum
= db_rbt_get_seqnum
;
579 result
->transaction_start
= db_rbt_trans_dummy
;
580 result
->transaction_commit
= db_rbt_trans_dummy
;
581 result
->transaction_cancel
= db_rbt_trans_dummy
;
582 result
->exists
= db_rbt_exists
;
583 result
->wipe
= db_rbt_wipe
;
584 result
->parse_record
= db_rbt_parse_record
;
585 result
->id
= db_rbt_id
;
586 result
->name
= "dbwrap rbt";