1 /* $NetBSD: cdbw.c,v 1.1 2010/04/25 00:54:46 joerg Exp $ */
3 * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc.
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Joerg Sonnenberger.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #if HAVE_NBTOOL_CONFIG_H
35 #include "nbtool_config.h"
38 #include <sys/cdefs.h>
39 __RCSID("$NetBSD: cdbw.c,v 1.1 2010/04/25 00:54:46 joerg Exp $");
41 #include "namespace.h"
43 #include <sys/endian.h>
44 #include <sys/queue.h>
51 __weak_alias(cdbw_close
,_cdbw_close
)
52 __weak_alias(cdbw_open
,_cdbw_open
)
53 __weak_alias(cdbw_output
,_cdbw_output
)
54 __weak_alias(cdbw_put
,_cdbw_put
)
55 __weak_alias(cdbw_put_data
,_cdbw_put_data
)
56 __weak_alias(cdbw_put_key
,_cdbw_put_key
)
60 SLIST_ENTRY(key_hash
) link
;
67 SLIST_HEAD(key_hash_head
, key_hash
);
71 size_t data_allocated
;
77 struct key_hash_head
*hash
;
81 /* Max. data counter that allows the index size to be 32bit. */
82 static const uint32_t max_data_counter
= 0xccccccccU
;
90 cdbw
= calloc(sizeof(*cdbw
), 1);
94 cdbw
->hash_size
= 1024;
95 cdbw
->hash
= calloc(cdbw
->hash_size
, sizeof(*cdbw
->hash
));
96 if (cdbw
->hash
== NULL
) {
101 for (i
= 0; i
< cdbw
->hash_size
; ++i
)
102 SLIST_INIT(cdbw
->hash
+ i
);
108 cdbw_put(struct cdbw
*cdbw
, const void *key
, size_t keylen
,
109 const void *data
, size_t datalen
)
114 rv
= cdbw_put_data(cdbw
, data
, datalen
, &idx
);
117 rv
= cdbw_put_key(cdbw
, key
, keylen
, idx
);
119 --cdbw
->data_counter
;
120 free(cdbw
->data_ptr
[cdbw
->data_counter
]);
121 cdbw
->data_size
-= datalen
;
128 cdbw_put_data(struct cdbw
*cdbw
, const void *data
, size_t datalen
,
132 if (cdbw
->data_counter
== max_data_counter
)
135 if (cdbw
->data_size
+ datalen
< cdbw
->data_size
||
136 cdbw
->data_size
+ datalen
> 0xffffffffU
)
137 return -1; /* Overflow */
139 if (cdbw
->data_allocated
== cdbw
->data_counter
) {
141 size_t *new_data_len
;
142 size_t new_allocated
;
144 if (cdbw
->data_allocated
== 0)
147 new_allocated
= cdbw
->data_allocated
* 2;
149 new_data_ptr
= realloc(cdbw
->data_ptr
,
150 sizeof(*cdbw
->data_ptr
) * new_allocated
);
151 if (new_data_ptr
== NULL
)
153 cdbw
->data_ptr
= new_data_ptr
;
155 new_data_len
= realloc(cdbw
->data_len
,
156 sizeof(*cdbw
->data_len
) * new_allocated
);
157 if (new_data_len
== NULL
)
159 cdbw
->data_len
= new_data_len
;
161 cdbw
->data_allocated
= new_allocated
;
164 cdbw
->data_ptr
[cdbw
->data_counter
] = malloc(datalen
);
165 if (cdbw
->data_ptr
[cdbw
->data_counter
] == NULL
)
167 memcpy(cdbw
->data_ptr
[cdbw
->data_counter
], data
, datalen
);
168 cdbw
->data_len
[cdbw
->data_counter
] = datalen
;
169 cdbw
->data_size
+= datalen
;
170 *idx
= cdbw
->data_counter
++;
175 cdbw_put_key(struct cdbw
*cdbw
, const void *key
, size_t keylen
, uint32_t idx
)
178 struct key_hash_head
*head
, *head2
, *new_head
;
179 struct key_hash
*key_hash
;
180 size_t new_hash_size
, i
;
182 if (idx
>= cdbw
->data_counter
||
183 cdbw
->key_counter
== max_data_counter
)
186 mi_vector_hash(key
, keylen
, 0, hashes
);
188 head
= cdbw
->hash
+ (hashes
[0] & (cdbw
->hash_size
- 1));
189 SLIST_FOREACH(key_hash
, head
, link
) {
190 if (key_hash
->keylen
!= keylen
)
192 if (key_hash
->hashes
[0] != hashes
[0])
194 if (key_hash
->hashes
[1] != hashes
[1])
196 if (key_hash
->hashes
[2] != hashes
[2])
198 if (memcmp(key
, key_hash
->key
, keylen
))
202 key_hash
= malloc(sizeof(*key_hash
));
203 if (key_hash
== NULL
)
205 key_hash
->key
= malloc(keylen
);
206 if (key_hash
->key
== NULL
) {
210 memcpy(key_hash
->key
, key
, keylen
);
211 key_hash
->hashes
[0] = hashes
[0];
212 key_hash
->hashes
[1] = hashes
[1];
213 key_hash
->hashes
[2] = hashes
[2];
214 key_hash
->keylen
= keylen
;
216 SLIST_INSERT_HEAD(head
, key_hash
, link
);
219 if (cdbw
->key_counter
<= cdbw
->hash_size
)
222 /* Try to resize the hash table, but ignore errors. */
223 new_hash_size
= cdbw
->hash_size
* 2;
224 new_head
= calloc(sizeof(*new_head
), new_hash_size
);
225 if (new_head
== NULL
)
228 head
= &cdbw
->hash
[hashes
[0] & (cdbw
->hash_size
- 1)];
229 for (i
= 0; i
< new_hash_size
; ++i
)
230 SLIST_INIT(new_head
+ i
);
232 for (i
= 0; i
< cdbw
->hash_size
; ++i
) {
233 head
= cdbw
->hash
+ i
;
235 while ((key_hash
= SLIST_FIRST(head
)) != NULL
) {
236 SLIST_REMOVE_HEAD(head
, link
);
238 (key_hash
->hashes
[0] & (new_hash_size
- 1));
239 SLIST_INSERT_HEAD(head2
, key_hash
, link
);
243 cdbw
->hash_size
= new_hash_size
;
244 cdbw
->hash
= new_head
;
250 cdbw_close(struct cdbw
*cdbw
)
252 struct key_hash_head
*head
;
253 struct key_hash
*key_hash
;
256 for (i
= 0; i
< cdbw
->hash_size
; ++i
) {
257 head
= cdbw
->hash
+ i
;
258 while ((key_hash
= SLIST_FIRST(head
)) != NULL
) {
259 SLIST_REMOVE_HEAD(head
, link
);
265 for (i
= 0; i
< cdbw
->data_counter
; ++i
)
266 free(cdbw
->data_ptr
[i
]);
267 free(cdbw
->data_ptr
);
268 free(cdbw
->data_len
);
273 #define unused 0xffffffffU
276 uint32_t l_edge
, m_edge
, r_edge
;
282 uint32_t left
, middle
, right
;
283 uint32_t l_prev
, m_prev
, l_next
;
284 uint32_t r_prev
, m_next
, r_next
;
288 uint32_t data_entries
;
296 struct vertex
*verts
;
298 uint32_t output_index
;
299 uint32_t *output_order
;
303 remove_vertex(struct state
*state
, struct vertex
*v
)
306 struct vertex
*vl
, *vm
, *vr
;
308 if (v
->l_edge
!= unused
&& v
->m_edge
!= unused
)
310 if (v
->l_edge
!= unused
&& v
->r_edge
!= unused
)
312 if (v
->m_edge
!= unused
&& v
->r_edge
!= unused
)
314 if (v
->l_edge
== unused
&& v
->m_edge
== unused
&& v
->r_edge
== unused
)
317 if (v
->l_edge
!= unused
) {
318 e
= &state
->edges
[v
->l_edge
];
319 if (e
->l_next
!= unused
)
321 } else if (v
->m_edge
!= unused
) {
322 e
= &state
->edges
[v
->m_edge
];
323 if (e
->m_next
!= unused
)
326 if (v
->r_edge
== unused
)
328 e
= &state
->edges
[v
->r_edge
];
329 if (e
->r_next
!= unused
)
333 state
->output_order
[--state
->output_index
] = e
- state
->edges
;
335 vl
= &state
->verts
[e
->left
];
336 vm
= &state
->verts
[e
->middle
];
337 vr
= &state
->verts
[e
->right
];
339 if (e
->l_prev
== unused
)
340 vl
->l_edge
= e
->l_next
;
342 state
->edges
[e
->l_prev
].l_next
= e
->l_next
;
343 if (e
->l_next
!= unused
)
344 state
->edges
[e
->l_next
].l_prev
= e
->l_prev
;
346 if (e
->m_prev
== unused
)
347 vm
->m_edge
= e
->m_next
;
349 state
->edges
[e
->m_prev
].m_next
= e
->m_next
;
350 if (e
->m_next
!= unused
)
351 state
->edges
[e
->m_next
].m_prev
= e
->m_prev
;
353 if (e
->r_prev
== unused
)
354 vr
->r_edge
= e
->r_next
;
356 state
->edges
[e
->r_prev
].r_next
= e
->r_next
;
357 if (e
->r_next
!= unused
)
358 state
->edges
[e
->r_next
].r_prev
= e
->r_prev
;
362 build_graph(struct cdbw
*cdbw
, struct state
*state
)
364 struct key_hash_head
*head
;
365 struct key_hash
*key_hash
;
372 for (i
= 0; i
< cdbw
->hash_size
; ++i
) {
373 head
= &cdbw
->hash
[i
];
374 SLIST_FOREACH(key_hash
, head
, link
) {
375 e
->idx
= key_hash
->idx
;
376 mi_vector_hash(key_hash
->key
, key_hash
->keylen
,
377 state
->seed
, hashes
);
378 e
->left
= hashes
[0] % state
->entries
;
379 e
->middle
= hashes
[1] % state
->entries
;
380 e
->right
= hashes
[2] % state
->entries
;
386 for (i
= 0; i
< state
->entries
; ++i
) {
387 v
= state
->verts
+ i
;
393 for (i
= 0; i
< state
->keys
; ++i
) {
394 e
= state
->edges
+ i
;
395 v
= state
->verts
+ e
->left
;
396 if (v
->l_edge
!= unused
)
397 state
->edges
[v
->l_edge
].l_prev
= i
;
398 e
->l_next
= v
->l_edge
;
402 v
= &state
->verts
[e
->middle
];
403 if (v
->m_edge
!= unused
)
404 state
->edges
[v
->m_edge
].m_prev
= i
;
405 e
->m_next
= v
->m_edge
;
409 v
= &state
->verts
[e
->right
];
410 if (v
->r_edge
!= unused
)
411 state
->edges
[v
->r_edge
].r_prev
= i
;
412 e
->r_next
= v
->r_edge
;
417 state
->output_index
= state
->keys
;
418 for (i
= 0; i
< state
->entries
; ++i
)
419 remove_vertex(state
, state
->verts
+ i
);
422 while (i
> 0 && i
> state
->output_index
) {
424 e
= state
->edges
+ state
->output_order
[i
];
425 remove_vertex(state
, state
->verts
+ e
->left
);
426 remove_vertex(state
, state
->verts
+ e
->middle
);
427 remove_vertex(state
, state
->verts
+ e
->right
);
430 return state
->output_index
== 0 ? 0 : -1;
434 assign_nodes(struct state
*state
)
439 for (i
= 0; i
< state
->keys
; ++i
) {
440 e
= state
->edges
+ state
->output_order
[i
];
442 if (!state
->visited
[e
->left
]) {
444 (2 * state
->data_entries
+ e
->idx
445 - state
->g
[e
->middle
] - state
->g
[e
->right
])
446 % state
->data_entries
;
447 } else if (!state
->visited
[e
->middle
]) {
448 state
->g
[e
->middle
] =
449 (2 * state
->data_entries
+ e
->idx
450 - state
->g
[e
->left
] - state
->g
[e
->right
])
451 % state
->data_entries
;
454 (2 * state
->data_entries
+ e
->idx
455 - state
->g
[e
->left
] - state
->g
[e
->middle
])
456 % state
->data_entries
;
458 state
->visited
[e
->left
] = 1;
459 state
->visited
[e
->middle
] = 1;
460 state
->visited
[e
->right
] = 1;
465 compute_size(uint32_t size
)
469 else if (size
< 0x10000)
475 #define COND_FLUSH_BUFFER(n) do { \
476 if (__predict_false(cur_pos + (n) >= sizeof(buf))) { \
477 ret = write(fd, buf, cur_pos); \
478 if (ret == -1 || (size_t)ret != cur_pos) \
482 } while (/* CONSTCOND */ 0)
485 print_hash(struct cdbw
*cdbw
, struct state
*state
, int fd
, const char *descr
)
489 size_t i
, size
, size2
, cur_pos
;
492 memcpy(buf
, "NBCDB\n\0", 7);
494 strncpy((char *)buf
+ 8, descr
, 16);
495 le32enc(buf
+ 24, cdbw
->data_size
);
496 le32enc(buf
+ 28, cdbw
->data_counter
);
497 le32enc(buf
+ 32, state
->entries
);
498 le32enc(buf
+ 36, state
->seed
);
501 size
= compute_size(state
->entries
);
502 for (i
= 0; i
< state
->entries
; ++i
) {
503 COND_FLUSH_BUFFER(4);
504 le32enc(buf
+ cur_pos
, state
->g
[i
]);
507 size2
= compute_size(cdbw
->data_size
);
508 size
= size
* state
->entries
% size2
;
511 COND_FLUSH_BUFFER(4);
512 le32enc(buf
+ cur_pos
, 0);
515 for (data_size
= 0, i
= 0; i
< cdbw
->data_counter
; ++i
) {
516 COND_FLUSH_BUFFER(4);
517 le32enc(buf
+ cur_pos
, data_size
);
519 data_size
+= cdbw
->data_len
[i
];
521 COND_FLUSH_BUFFER(4);
522 le32enc(buf
+ cur_pos
, data_size
);
525 for (i
= 0; i
< cdbw
->data_counter
; ++i
) {
526 COND_FLUSH_BUFFER(cdbw
->data_len
[i
]);
527 if (cdbw
->data_len
[i
] < sizeof(buf
)) {
528 memcpy(buf
+ cur_pos
, cdbw
->data_ptr
[i
],
530 cur_pos
+= cdbw
->data_len
[i
];
532 ret
= write(fd
, cdbw
->data_ptr
[i
], cdbw
->data_len
[i
]);
533 if (ret
== -1 || (size_t)ret
!= cdbw
->data_len
[i
])
538 ret
= write(fd
, buf
, cur_pos
);
539 if (ret
== -1 || (size_t)ret
!= cur_pos
)
546 cdbw_output(struct cdbw
*cdbw
, int fd
, const char descr
[16],
547 uint32_t (*seedgen
)(void))
552 if (cdbw
->data_counter
== 0 || cdbw
->key_counter
== 0) {
555 print_hash(cdbw
, &state
, fd
, descr
);
560 seedgen
= arc4random
;
564 state
.keys
= cdbw
->key_counter
;
565 state
.data_entries
= cdbw
->data_counter
;
566 state
.entries
= state
.keys
+ (state
.keys
+ 3) / 4;
567 if (state
.entries
< 10)
570 #define NALLOC(var, n) var = calloc(sizeof(*var), n)
571 NALLOC(state
.g
, state
.entries
);
572 NALLOC(state
.visited
, state
.entries
);
573 NALLOC(state
.verts
, state
.entries
);
574 NALLOC(state
.edges
, state
.entries
);
575 NALLOC(state
.output_order
, state
.keys
);
578 if (state
.g
== NULL
|| state
.visited
== NULL
|| state
.verts
== NULL
||
579 state
.edges
== NULL
|| state
.output_order
== NULL
) {
585 state
.seed
= (*seedgen
)();
586 } while (build_graph(cdbw
, &state
));
588 assign_nodes(&state
);
589 rv
= print_hash(cdbw
, &state
, fd
, descr
);
596 free(state
.output_order
);