1 /* $NetBSD: cdbw.c,v 1.5 2012/07/21 22:49:37 joerg Exp $ */
3 * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc.
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Joerg Sonnenberger.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #if HAVE_NBTOOL_CONFIG_H
35 #include "nbtool_config.h"
38 #include <sys/cdefs.h>
39 __RCSID("$NetBSD: cdbw.c,v 1.5 2012/07/21 22:49:37 joerg Exp $");
41 #include "namespace.h"
43 #if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H
44 #include <sys/endian.h>
46 #include <sys/queue.h>
53 __weak_alias(cdbw_close
,_cdbw_close
)
54 __weak_alias(cdbw_open
,_cdbw_open
)
55 __weak_alias(cdbw_output
,_cdbw_output
)
56 __weak_alias(cdbw_put
,_cdbw_put
)
57 __weak_alias(cdbw_put_data
,_cdbw_put_data
)
58 __weak_alias(cdbw_put_key
,_cdbw_put_key
)
62 SLIST_ENTRY(key_hash
) link
;
69 SLIST_HEAD(key_hash_head
, key_hash
);
73 size_t data_allocated
;
79 struct key_hash_head
*hash
;
83 /* Max. data counter that allows the index size to be 32bit. */
84 static const uint32_t max_data_counter
= 0xccccccccU
;
92 cdbw
= calloc(sizeof(*cdbw
), 1);
96 cdbw
->hash_size
= 1024;
97 cdbw
->hash
= calloc(cdbw
->hash_size
, sizeof(*cdbw
->hash
));
98 if (cdbw
->hash
== NULL
) {
103 for (i
= 0; i
< cdbw
->hash_size
; ++i
)
104 SLIST_INIT(cdbw
->hash
+ i
);
110 cdbw_put(struct cdbw
*cdbw
, const void *key
, size_t keylen
,
111 const void *data
, size_t datalen
)
116 rv
= cdbw_put_data(cdbw
, data
, datalen
, &idx
);
119 rv
= cdbw_put_key(cdbw
, key
, keylen
, idx
);
121 --cdbw
->data_counter
;
122 free(cdbw
->data_ptr
[cdbw
->data_counter
]);
123 cdbw
->data_size
-= datalen
;
130 cdbw_put_data(struct cdbw
*cdbw
, const void *data
, size_t datalen
,
134 if (cdbw
->data_counter
== max_data_counter
)
137 if (cdbw
->data_size
+ datalen
< cdbw
->data_size
||
138 cdbw
->data_size
+ datalen
> 0xffffffffU
)
139 return -1; /* Overflow */
141 if (cdbw
->data_allocated
== cdbw
->data_counter
) {
143 size_t *new_data_len
;
144 size_t new_allocated
;
146 if (cdbw
->data_allocated
== 0)
149 new_allocated
= cdbw
->data_allocated
* 2;
151 new_data_ptr
= realloc(cdbw
->data_ptr
,
152 sizeof(*cdbw
->data_ptr
) * new_allocated
);
153 if (new_data_ptr
== NULL
)
155 cdbw
->data_ptr
= new_data_ptr
;
157 new_data_len
= realloc(cdbw
->data_len
,
158 sizeof(*cdbw
->data_len
) * new_allocated
);
159 if (new_data_len
== NULL
)
161 cdbw
->data_len
= new_data_len
;
163 cdbw
->data_allocated
= new_allocated
;
166 cdbw
->data_ptr
[cdbw
->data_counter
] = malloc(datalen
);
167 if (cdbw
->data_ptr
[cdbw
->data_counter
] == NULL
)
169 memcpy(cdbw
->data_ptr
[cdbw
->data_counter
], data
, datalen
);
170 cdbw
->data_len
[cdbw
->data_counter
] = datalen
;
171 cdbw
->data_size
+= datalen
;
172 *idx
= cdbw
->data_counter
++;
177 cdbw_put_key(struct cdbw
*cdbw
, const void *key
, size_t keylen
, uint32_t idx
)
180 struct key_hash_head
*head
, *head2
, *new_head
;
181 struct key_hash
*key_hash
;
182 size_t new_hash_size
, i
;
184 if (idx
>= cdbw
->data_counter
||
185 cdbw
->key_counter
== max_data_counter
)
188 mi_vector_hash(key
, keylen
, 0, hashes
);
190 head
= cdbw
->hash
+ (hashes
[0] & (cdbw
->hash_size
- 1));
191 SLIST_FOREACH(key_hash
, head
, link
) {
192 if (key_hash
->keylen
!= keylen
)
194 if (key_hash
->hashes
[0] != hashes
[0])
196 if (key_hash
->hashes
[1] != hashes
[1])
198 if (key_hash
->hashes
[2] != hashes
[2])
200 if (memcmp(key
, key_hash
->key
, keylen
))
204 key_hash
= malloc(sizeof(*key_hash
));
205 if (key_hash
== NULL
)
207 key_hash
->key
= malloc(keylen
);
208 if (key_hash
->key
== NULL
) {
212 memcpy(key_hash
->key
, key
, keylen
);
213 key_hash
->hashes
[0] = hashes
[0];
214 key_hash
->hashes
[1] = hashes
[1];
215 key_hash
->hashes
[2] = hashes
[2];
216 key_hash
->keylen
= keylen
;
218 SLIST_INSERT_HEAD(head
, key_hash
, link
);
221 if (cdbw
->key_counter
<= cdbw
->hash_size
)
224 /* Try to resize the hash table, but ignore errors. */
225 new_hash_size
= cdbw
->hash_size
* 2;
226 new_head
= calloc(sizeof(*new_head
), new_hash_size
);
227 if (new_head
== NULL
)
230 head
= &cdbw
->hash
[hashes
[0] & (cdbw
->hash_size
- 1)];
231 for (i
= 0; i
< new_hash_size
; ++i
)
232 SLIST_INIT(new_head
+ i
);
234 for (i
= 0; i
< cdbw
->hash_size
; ++i
) {
235 head
= cdbw
->hash
+ i
;
237 while ((key_hash
= SLIST_FIRST(head
)) != NULL
) {
238 SLIST_REMOVE_HEAD(head
, link
);
240 (key_hash
->hashes
[0] & (new_hash_size
- 1));
241 SLIST_INSERT_HEAD(head2
, key_hash
, link
);
245 cdbw
->hash_size
= new_hash_size
;
246 cdbw
->hash
= new_head
;
252 cdbw_close(struct cdbw
*cdbw
)
254 struct key_hash_head
*head
;
255 struct key_hash
*key_hash
;
258 for (i
= 0; i
< cdbw
->hash_size
; ++i
) {
259 head
= cdbw
->hash
+ i
;
260 while ((key_hash
= SLIST_FIRST(head
)) != NULL
) {
261 SLIST_REMOVE_HEAD(head
, link
);
267 for (i
= 0; i
< cdbw
->data_counter
; ++i
)
268 free(cdbw
->data_ptr
[i
]);
269 free(cdbw
->data_ptr
);
270 free(cdbw
->data_len
);
276 cdbw_stable_seeder(void)
281 #define unused 0xffffffffU
284 uint32_t l_edge
, m_edge
, r_edge
;
290 uint32_t left
, middle
, right
;
291 uint32_t l_prev
, m_prev
, l_next
;
292 uint32_t r_prev
, m_next
, r_next
;
296 uint32_t data_entries
;
304 struct vertex
*verts
;
306 uint32_t output_index
;
307 uint32_t *output_order
;
311 remove_vertex(struct state
*state
, struct vertex
*v
)
314 struct vertex
*vl
, *vm
, *vr
;
316 if (v
->l_edge
!= unused
&& v
->m_edge
!= unused
)
318 if (v
->l_edge
!= unused
&& v
->r_edge
!= unused
)
320 if (v
->m_edge
!= unused
&& v
->r_edge
!= unused
)
322 if (v
->l_edge
== unused
&& v
->m_edge
== unused
&& v
->r_edge
== unused
)
325 if (v
->l_edge
!= unused
) {
326 e
= &state
->edges
[v
->l_edge
];
327 if (e
->l_next
!= unused
)
329 } else if (v
->m_edge
!= unused
) {
330 e
= &state
->edges
[v
->m_edge
];
331 if (e
->m_next
!= unused
)
334 if (v
->r_edge
== unused
)
336 e
= &state
->edges
[v
->r_edge
];
337 if (e
->r_next
!= unused
)
341 state
->output_order
[--state
->output_index
] = e
- state
->edges
;
343 vl
= &state
->verts
[e
->left
];
344 vm
= &state
->verts
[e
->middle
];
345 vr
= &state
->verts
[e
->right
];
347 if (e
->l_prev
== unused
)
348 vl
->l_edge
= e
->l_next
;
350 state
->edges
[e
->l_prev
].l_next
= e
->l_next
;
351 if (e
->l_next
!= unused
)
352 state
->edges
[e
->l_next
].l_prev
= e
->l_prev
;
354 if (e
->m_prev
== unused
)
355 vm
->m_edge
= e
->m_next
;
357 state
->edges
[e
->m_prev
].m_next
= e
->m_next
;
358 if (e
->m_next
!= unused
)
359 state
->edges
[e
->m_next
].m_prev
= e
->m_prev
;
361 if (e
->r_prev
== unused
)
362 vr
->r_edge
= e
->r_next
;
364 state
->edges
[e
->r_prev
].r_next
= e
->r_next
;
365 if (e
->r_next
!= unused
)
366 state
->edges
[e
->r_next
].r_prev
= e
->r_prev
;
370 build_graph(struct cdbw
*cdbw
, struct state
*state
)
372 struct key_hash_head
*head
;
373 struct key_hash
*key_hash
;
380 for (i
= 0; i
< cdbw
->hash_size
; ++i
) {
381 head
= &cdbw
->hash
[i
];
382 SLIST_FOREACH(key_hash
, head
, link
) {
383 e
->idx
= key_hash
->idx
;
384 mi_vector_hash(key_hash
->key
, key_hash
->keylen
,
385 state
->seed
, hashes
);
386 e
->left
= hashes
[0] % state
->entries
;
387 e
->middle
= hashes
[1] % state
->entries
;
388 e
->right
= hashes
[2] % state
->entries
;
390 if (e
->left
== e
->middle
)
392 if (e
->left
== e
->right
)
394 if (e
->middle
== e
->right
)
401 for (i
= 0; i
< state
->entries
; ++i
) {
402 v
= state
->verts
+ i
;
408 for (i
= 0; i
< state
->keys
; ++i
) {
409 e
= state
->edges
+ i
;
410 v
= state
->verts
+ e
->left
;
411 if (v
->l_edge
!= unused
)
412 state
->edges
[v
->l_edge
].l_prev
= i
;
413 e
->l_next
= v
->l_edge
;
417 v
= &state
->verts
[e
->middle
];
418 if (v
->m_edge
!= unused
)
419 state
->edges
[v
->m_edge
].m_prev
= i
;
420 e
->m_next
= v
->m_edge
;
424 v
= &state
->verts
[e
->right
];
425 if (v
->r_edge
!= unused
)
426 state
->edges
[v
->r_edge
].r_prev
= i
;
427 e
->r_next
= v
->r_edge
;
432 state
->output_index
= state
->keys
;
433 for (i
= 0; i
< state
->entries
; ++i
)
434 remove_vertex(state
, state
->verts
+ i
);
437 while (i
> 0 && i
> state
->output_index
) {
439 e
= state
->edges
+ state
->output_order
[i
];
440 remove_vertex(state
, state
->verts
+ e
->left
);
441 remove_vertex(state
, state
->verts
+ e
->middle
);
442 remove_vertex(state
, state
->verts
+ e
->right
);
445 return state
->output_index
== 0 ? 0 : -1;
449 assign_nodes(struct state
*state
)
454 for (i
= 0; i
< state
->keys
; ++i
) {
455 e
= state
->edges
+ state
->output_order
[i
];
457 if (!state
->visited
[e
->left
]) {
459 (2 * state
->data_entries
+ e
->idx
460 - state
->g
[e
->middle
] - state
->g
[e
->right
])
461 % state
->data_entries
;
462 } else if (!state
->visited
[e
->middle
]) {
463 state
->g
[e
->middle
] =
464 (2 * state
->data_entries
+ e
->idx
465 - state
->g
[e
->left
] - state
->g
[e
->right
])
466 % state
->data_entries
;
469 (2 * state
->data_entries
+ e
->idx
470 - state
->g
[e
->left
] - state
->g
[e
->middle
])
471 % state
->data_entries
;
473 state
->visited
[e
->left
] = 1;
474 state
->visited
[e
->middle
] = 1;
475 state
->visited
[e
->right
] = 1;
480 compute_size(uint32_t size
)
484 else if (size
< 0x10000)
490 #define COND_FLUSH_BUFFER(n) do { \
491 if (__predict_false(cur_pos + (n) >= sizeof(buf))) { \
492 ret = write(fd, buf, cur_pos); \
493 if (ret == -1 || (size_t)ret != cur_pos) \
497 } while (/* CONSTCOND */ 0)
500 print_hash(struct cdbw
*cdbw
, struct state
*state
, int fd
, const char *descr
)
504 size_t i
, size
, size2
, cur_pos
;
507 memcpy(buf
, "NBCDB\n\0", 7);
509 strncpy((char *)buf
+ 8, descr
, 16);
510 le32enc(buf
+ 24, cdbw
->data_size
);
511 le32enc(buf
+ 28, cdbw
->data_counter
);
512 le32enc(buf
+ 32, state
->entries
);
513 le32enc(buf
+ 36, state
->seed
);
516 size
= compute_size(state
->entries
);
517 for (i
= 0; i
< state
->entries
; ++i
) {
518 COND_FLUSH_BUFFER(4);
519 le32enc(buf
+ cur_pos
, state
->g
[i
]);
522 size2
= compute_size(cdbw
->data_size
);
523 size
= size
* state
->entries
% size2
;
526 COND_FLUSH_BUFFER(4);
527 le32enc(buf
+ cur_pos
, 0);
530 for (data_size
= 0, i
= 0; i
< cdbw
->data_counter
; ++i
) {
531 COND_FLUSH_BUFFER(4);
532 le32enc(buf
+ cur_pos
, data_size
);
534 data_size
+= cdbw
->data_len
[i
];
536 COND_FLUSH_BUFFER(4);
537 le32enc(buf
+ cur_pos
, data_size
);
540 for (i
= 0; i
< cdbw
->data_counter
; ++i
) {
541 COND_FLUSH_BUFFER(cdbw
->data_len
[i
]);
542 if (cdbw
->data_len
[i
] < sizeof(buf
)) {
543 memcpy(buf
+ cur_pos
, cdbw
->data_ptr
[i
],
545 cur_pos
+= cdbw
->data_len
[i
];
547 ret
= write(fd
, cdbw
->data_ptr
[i
], cdbw
->data_len
[i
]);
548 if (ret
== -1 || (size_t)ret
!= cdbw
->data_len
[i
])
553 ret
= write(fd
, buf
, cur_pos
);
554 if (ret
== -1 || (size_t)ret
!= cur_pos
)
561 cdbw_output(struct cdbw
*cdbw
, int fd
, const char descr
[16],
562 uint32_t (*seedgen
)(void))
567 if (cdbw
->data_counter
== 0 || cdbw
->key_counter
== 0) {
570 print_hash(cdbw
, &state
, fd
, descr
);
574 #if HAVE_NBTOOL_CONFIG_H
576 seedgen
= cdbw_stable_seeder
;
579 seedgen
= arc4random
;
584 state
.keys
= cdbw
->key_counter
;
585 state
.data_entries
= cdbw
->data_counter
;
586 state
.entries
= state
.keys
+ (state
.keys
+ 3) / 4;
587 if (state
.entries
< 10)
590 #define NALLOC(var, n) var = calloc(sizeof(*var), n)
591 NALLOC(state
.g
, state
.entries
);
592 NALLOC(state
.visited
, state
.entries
);
593 NALLOC(state
.verts
, state
.entries
);
594 NALLOC(state
.edges
, state
.entries
);
595 NALLOC(state
.output_order
, state
.keys
);
598 if (state
.g
== NULL
|| state
.visited
== NULL
|| state
.verts
== NULL
||
599 state
.edges
== NULL
|| state
.output_order
== NULL
) {
606 if (seedgen
== cdbw_stable_seeder
)
609 state
.seed
= (*seedgen
)();
610 } while (build_graph(cdbw
, &state
));
612 assign_nodes(&state
);
613 rv
= print_hash(cdbw
, &state
, fd
, descr
);
620 free(state
.output_order
);