modified: myjupyterlab.sh
[GalaxyCodeBases.git] / c_cpp / lib / cstring / cstring.c
blob9946898c18eb7aa4b2cc2cbe8e6b3615d66007e2
1 #include "cstring.h"
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <assert.h>
6 #include <string.h>
7 #include <stdarg.h>
9 #define FORMAT_TEMP_SIZE 1024
11 #define INTERNING_POOL_SIZE 1024
12 // HASH_START_SIZE must be 2 pow
13 #define HASH_START_SIZE 16
15 struct string_node {
16 struct cstring_data str;
17 char buffer[CSTRING_INTERNING_SIZE];
18 struct string_node * next;
21 struct string_pool {
22 struct string_node node[INTERNING_POOL_SIZE];
25 struct string_interning {
26 int lock;
27 int size;
28 struct string_node ** hash;
29 struct string_pool * pool;
30 int index;
31 int total;
34 static struct string_interning S;
36 static inline void
37 LOCK() {
38 while (__sync_lock_test_and_set(&(S.lock),1)) {}
41 static inline void
42 UNLOCK() {
43 __sync_lock_release(&(S.lock));
46 static void
47 insert_node(struct string_node ** hash, int sz, struct string_node *n) {
48 uint32_t h = n->str.hash_size;
49 int index = h & (sz-1);
50 n->next = hash[index];
51 hash[index] = n;
54 static void
55 expand(struct string_interning * si) {
56 int new_size = si->size * 2;
57 if (new_size < HASH_START_SIZE) {
58 new_size = HASH_START_SIZE;
60 assert(new_size > si->total);
61 struct string_node ** new_hash = malloc(sizeof(struct string_node *) * new_size);
62 memset(new_hash, 0, sizeof(struct string_node *) * new_size);
63 int i;
64 for (i=0;i<si->size;i++) {
65 struct string_node *node = si->hash[i];
66 while (node) {
67 struct string_node * tmp = node->next;
68 insert_node(new_hash, new_size, node);
69 node = tmp;
72 free(si->hash);
73 si->hash = new_hash;
74 si->size = new_size;
77 static cstring
78 interning(struct string_interning * si, const char * cstr, size_t sz, uint32_t hash) {
79 if (si->hash == NULL) {
80 return NULL;
82 int index = (int)(hash & (si->size-1));
83 struct string_node * n = si->hash[index];
84 while(n) {
85 if (n->str.hash_size == hash) {
86 if (strcmp(n->str.cstr, cstr) == 0) {
87 return &n->str;
90 n = n->next;
92 // 80% (4/5) threshold
93 if (si->total * 5 >= si->size * 4) {
94 return NULL;
96 if (si->pool == NULL) {
97 // need not free pool
98 // todo: check memory alloc error
99 si->pool = malloc(sizeof(struct string_pool));
100 assert(si->pool);
101 si->index = 0;
103 n = &si->pool->node[si->index++];
104 memcpy(n->buffer, cstr, sz);
105 n->buffer[sz] = '\0';
107 cstring cs = &n->str;
108 cs->cstr = n->buffer;
109 cs->hash_size = hash;
110 cs->type = CSTRING_INTERNING;
111 cs->ref = 0;
113 n->next = si->hash[index];
114 si->hash[index] = n;
116 return cs;
119 static cstring
120 cstring_interning(const char * cstr, size_t sz, uint32_t hash) {
121 cstring ret;
122 LOCK();
123 ret = interning(&S, cstr, sz, hash);
124 if (ret == NULL) {
125 expand(&S);
126 ret = interning(&S, cstr, sz, hash);
128 ++S.total;
129 UNLOCK();
130 assert(ret);
131 return ret;
135 static uint32_t
136 hash_blob(const char * buffer, size_t len) {
137 const uint8_t * ptr = (const uint8_t *) buffer;
138 size_t h = len;
139 size_t step = (len>>5)+1;
140 size_t i;
141 for (i=len; i>=step; i-=step)
142 h = h ^ ((h<<5)+(h>>2)+ptr[i-1]);
143 if (h == 0)
144 return 1;
145 else
146 return h;
149 void
150 cstring_free_persist(cstring s) {
151 if (s->type == CSTRING_PERMANENT) {
152 free(s);
156 static cstring
157 cstring_clone(const char * cstr, size_t sz) {
158 if (sz < CSTRING_INTERNING_SIZE) {
159 return cstring_interning(cstr, sz, hash_blob(cstr,sz));
161 struct cstring_data * p = malloc(sizeof(struct cstring_data) + sz + 1);
162 // todo: memory alloc error
163 assert(p);
164 void * ptr = (void *)(p + 1);
165 p->cstr = ptr;
166 p->type = 0;
167 p->ref = 1;
168 memcpy(ptr, cstr, sz);
169 ((char *)ptr)[sz] = '\0';
170 p->hash_size = 0;
171 return p;
174 cstring
175 cstring_persist(const char * cstr, size_t sz) {
176 cstring s = cstring_clone(cstr, sz);
177 if (s->type == 0) {
178 s->type = CSTRING_PERMANENT;
179 s->ref = 0;
181 return s;
184 cstring
185 cstring_grab(cstring s) {
186 if (s->type & (CSTRING_PERMANENT | CSTRING_INTERNING)) {
187 return s;
189 if (s->type == CSTRING_ONSTACK) {
190 cstring tmp = cstring_clone(s->cstr, s->hash_size);
191 return tmp;
192 } else {
193 if (s->ref == 0) {
194 s->type = CSTRING_PERMANENT;
195 } else {
196 __sync_add_and_fetch(&s->ref,1);
198 return s;
202 void
203 cstring_release(cstring s) {
204 if (s->type != 0) {
205 return;
207 if (s->ref == 0) {
208 return;
210 if (__sync_sub_and_fetch(&s->ref,1) == 0) {
211 free(s);
215 uint32_t
216 cstring_hash(cstring s) {
217 if (s->type == CSTRING_ONSTACK)
218 return hash_blob(s->cstr, s->hash_size);
219 if (s->hash_size == 0) {
220 s->hash_size = hash_blob(s->cstr, strlen(s->cstr));
222 return s->hash_size;
225 int
226 cstring_equal(cstring a, cstring b) {
227 if (a == b)
228 return 1;
229 if ((a->type == CSTRING_INTERNING) &&
230 (b->type == CSTRING_INTERNING)) {
231 return 0;
233 if ((a->type == CSTRING_ONSTACK) &&
234 (b->type == CSTRING_ONSTACK)) {
235 if (a->hash_size != b->hash_size) {
236 return 0;
238 return memcmp(a->cstr, b->cstr, a->hash_size) == 0;
240 uint32_t hasha = cstring_hash(a);
241 uint32_t hashb = cstring_hash(b);
242 if (hasha != hashb) {
243 return 0;
245 return strcmp(a->cstr, b->cstr) == 0;
248 static cstring
249 cstring_cat2(const char * a, const char * b) {
250 size_t sa = strlen(a);
251 size_t sb = strlen(b);
252 if (sa + sb < CSTRING_INTERNING_SIZE) {
253 char tmp[CSTRING_INTERNING_SIZE];
254 memcpy(tmp, a, sa);
255 memcpy(tmp+sa, b, sb);
256 tmp[sa+sb] = '\0';
257 return cstring_interning(tmp, sa+sb, hash_blob(tmp,sa+sb));
259 struct cstring_data * p = malloc(sizeof(struct cstring_data) + sa + sb + 1);
260 // todo: memory alloc error
261 assert(p);
262 char * ptr = (char *)(p + 1);
263 p->cstr = ptr;
264 p->type = 0;
265 p->ref = 1;
266 memcpy(ptr, a, sa);
267 memcpy(ptr+sa, b, sb);
268 ptr[sa+sb] = '\0';
269 p->hash_size = 0;
270 return p;
273 cstring
274 cstring_cat(cstring_buffer sb, const char * str) {
275 cstring s = sb->str;
276 if (s->type == CSTRING_ONSTACK) {
277 int i = (int)s->hash_size;
278 while (i < CSTRING_STACK_SIZE-1) {
279 s->cstr[i] = *str;
280 if (*str == '\0') {
281 return s;
283 ++s->hash_size;
284 ++str;
285 ++i;
287 s->cstr[i] = '\0';
289 cstring tmp = s;
290 sb->str = cstring_cat2(tmp->cstr, str);
291 cstring_release(tmp);
292 return sb->str;
295 static cstring
296 cstring_format(const char * format, va_list ap) {
297 static char * cache = NULL;
298 char * result;
299 char * temp = cache;
300 // read cache buffer atomic
301 if (temp) {
302 temp = __sync_val_compare_and_swap(&cache, temp, NULL);
304 if (temp == NULL) {
305 temp = (char *)malloc(FORMAT_TEMP_SIZE);
306 // todo : check malloc
307 assert(temp);
309 va_list ap2;
310 va_copy(ap2, ap);
311 int n = vsnprintf(temp, FORMAT_TEMP_SIZE, format, ap2);
312 if (n >= FORMAT_TEMP_SIZE) {
313 int sz = FORMAT_TEMP_SIZE * 2;
314 for (;;) {
315 result = malloc(sz);
316 // todo : check malloc
317 assert(result);
318 va_copy(ap2, ap);
319 n = vsnprintf(result, sz, format, ap2);
320 if (n >= sz) {
321 free(result);
322 sz *= 2;
323 } else {
324 break;
327 } else {
328 result = temp;
330 cstring r = (cstring)malloc(sizeof(struct cstring_data) + n + 1);
331 // todo : check malloc
332 assert(r);
333 r->cstr = (char *)(r+1);
334 r->type = 0;
335 r->ref = 1;
336 r->hash_size = 0;
337 memcpy(r->cstr, result, n+1);
338 if (temp != result) {
339 free(result);
341 // save temp atomic
342 if (!__sync_bool_compare_and_swap(&cache, NULL, temp)) {
343 free(temp);
344 } else {
347 return r;
350 cstring
351 cstring_printf(cstring_buffer sb, const char * format, ...) {
352 cstring s = sb->str;
353 va_list ap;
354 va_start(ap, format);
355 if (s->type == CSTRING_ONSTACK) {
356 int n = vsnprintf(s->cstr, CSTRING_STACK_SIZE, format, ap);
357 if (n >= CSTRING_STACK_SIZE) {
358 va_end(ap);
359 va_start(ap, format);
360 s = cstring_format(format, ap);
361 sb->str = s;
362 } else {
363 s->hash_size = n;
365 } else {
366 cstring_release(sb->str);
367 s = cstring_format(format, ap);
368 sb->str = s;
370 va_end(ap);
371 return s;