Add (void*) parameter to HashFunc definition
[Rpkg-dict.git] / src / dict.c
blob47f9ea523d92911dc9175a9579c549acd516f010
1 #include "dict.h"
3 /* .Call registration */
4 static const R_CallMethodDef R_CallDef[] =
6 {"dict_new_dict", (DL_FUNC)&dict_new_dict, 3},
7 {"dict_copy_dict", (DL_FUNC)&dict_copy_dict, 1},
8 {"dict_get_info", (DL_FUNC)&dict_get_info, 1},
9 {"dict_get_item", (DL_FUNC)&dict_get_item, 2},
10 {"dict_getm", (DL_FUNC)&dict_getm, 3},
11 {"dict_set_item", (DL_FUNC)&dict_set_item, 3},
12 {"dict_del_item", (DL_FUNC)&dict_del_item, 2},
13 {"dict_get_keys", (DL_FUNC)&dict_get_keys, 2},
14 {"dict_list_hashfuns", (DL_FUNC)&dict_list_hashfuns, 0},
15 {"dict_get_hash_codes", (DL_FUNC)&dict_get_hash_codes, 3},
16 {"dict_num_items", (DL_FUNC)&dict_num_items, 1},
17 {"dict_to_list", (DL_FUNC)&dict_to_list, 1},
18 {"dict_xp_tag_address", (DL_FUNC)&dict_xp_tag_address, 1},
19 {NULL, NULL, 0},
22 /* number of hash functions in Dict_HashFunctions */
23 static int g_hash_func_count = 0;
25 void R_init_dict(DllInfo *info)
27 int i;
29 R_registerRoutines(info, NULL, R_CallDef, NULL, NULL);
31 for (i = 0; Dict_HashFunctions[i].hash != NULL; i++); /* empty */
32 g_hash_func_count = i;
35 int is_hash_func_idx(int idx)
37 return idx >= 0 && idx < g_hash_func_count;
40 SEXP dict_list_hashfuns()
42 SEXP ans, nms;
43 int i, *code;
45 PROTECT(ans = allocVector(INTSXP, g_hash_func_count));
46 PROTECT(nms = allocVector(STRSXP, g_hash_func_count));
47 SET_NAMES(ans, nms);
48 code = INTEGER(ans);
49 for (i = 0; i < g_hash_func_count; i++) {
50 SET_STRING_ELT(nms, i, mkChar(Dict_HashFunctions[i].name));
51 code[i] = i;
53 UNPROTECT(2);
54 return ans;
57 SEXP dict_get_hash_codes(SEXP keys, SEXP table_size, SEXP alg)
59 SEXP ans;
60 int i, size, *pans, mask;
62 if (!isInteger(alg) || !is_hash_func_idx(INTEGER(alg)[0]))
63 error("'alg' must be an integer code matching a hash function");
65 if (!isInteger(table_size))
66 error("'table_size' must be an integer");
68 if (!isString(keys))
69 error("'keys' must be character");
71 HashFunc hash = Dict_HashFunctions[INTEGER(alg)[0]].hash;
73 size = INTEGER(table_size)[0];
74 mask = size - 1;
75 PROTECT(ans = allocVector(INTSXP, length(keys)));
76 pans = INTEGER(ans);
77 for (i = 0; i < length(keys); i++) {
78 pans[i] = (*hash)(CHAR(STRING_ELT(keys, i)), NULL) & mask;
80 SET_NAMES(ans, NAMED(keys) ? duplicate(keys) : keys);
81 UNPROTECT(1);
82 return ans;
85 static SEXP hashStats(SEXP table)
87 SEXP chain, ans, chain_counts, nms;
88 int i, count;
90 PROTECT(ans = allocVector(VECSXP, 3));
91 PROTECT(nms = allocVector(STRSXP, 3));
92 SET_STRING_ELT(nms, 0, mkChar("size")); /* total size of hashtable */
93 SET_STRING_ELT(nms, 1, mkChar("items")); /* number of items in the table */
94 SET_STRING_ELT(nms, 2, mkChar("counts")); /* length of each chain */
95 SET_NAMES(ans, nms);
96 UNPROTECT(1);
98 SET_VECTOR_ELT(ans, 0, ScalarInteger(length(table)));
99 SET_VECTOR_ELT(ans, 1, ScalarInteger(HASHCOUNT(table)));
101 PROTECT(chain_counts = allocVector(INTSXP, length(table)));
102 for (i = 0; i < length(table); i++) {
103 chain = VECTOR_ELT(table, i);
104 count = 0;
105 for (; chain != R_NilValue ; chain = CDR(chain)) {
106 count++;
108 INTEGER(chain_counts)[i] = count;
111 SET_VECTOR_ELT(ans, 2, chain_counts);
113 UNPROTECT(2);
114 return ans;
117 SEXP getHashStats(SEXP env)
119 SEXP ans = R_NilValue; /* -Wall */
121 if (isEnvironment(env)) {
122 if (IS_HASHED(env))
123 ans = hashStats(HASHTAB(env));
124 else
125 ans = ScalarInteger(NA_INTEGER);
126 } else
127 error("argument must be a hashed environment");
128 return ans;
132 SEXP dict_num_items(SEXP xp)
134 SEXP dict = R_ExternalPtrTag(xp);
135 return ScalarInteger(DICT_COUNT(dict));
138 static SEXP new_dict(int size, int alg, int type)
140 SEXP details, dict, table = R_NilValue; /* -Wall */
141 int *d;
142 PROTECT(dict = allocVector(VECSXP, 2));
143 switch (type) {
144 case CHAIN_DICT:
145 PROTECT(table = allocVector(VECSXP, size));
146 break;
147 case OADDR_DICT:
148 error("dict type %d not implemented", OADDR_DICT);
149 break;
150 default:
151 error("unknown dict type: %d", type);
153 PROTECT(details = allocVector(INTSXP, 4));
154 d = INTEGER(details);
155 d[DICT_TYPE_IDX] = type;
156 d[HASHFUN_IDX] = alg;
157 d[COUNT_IDX] = 0;
159 SET_VECTOR_ELT(dict, 0, details);
160 SET_VECTOR_ELT(dict, 1, table);
162 UNPROTECT(3);
163 return dict;
166 SEXP dict_new_dict(SEXP size, SEXP alg, SEXP type)
168 SEXP table, xp;
169 /* TODO: add error checking and validation of size and alg args */
170 /* XXX: size must be 2^n */
171 PROTECT(table = new_dict(INTEGER(size)[0], INTEGER(alg)[0],
172 INTEGER(type)[0]));
173 PROTECT(xp = R_MakeExternalPtr(NULL, R_NilValue, R_NilValue));
174 R_SetExternalPtrTag(xp, table);
175 UNPROTECT(2);
176 return xp;
179 static SEXP get_info(SEXP dict)
181 SEXP ans, nms;
182 PROTECT(ans = allocVector(VECSXP, 4));
183 PROTECT(nms = allocVector(STRSXP, 4));
184 SET_STRING_ELT(nms, 0, mkChar("size"));
185 SET_STRING_ELT(nms, 1, mkChar("nitems"));
186 SET_STRING_ELT(nms, 2, mkChar("type"));
187 SET_STRING_ELT(nms, 3, mkChar("alg"));
188 SET_NAMES(ans, nms);
190 SET_VECTOR_ELT(ans, 0, ScalarInteger(DICT_SIZE(dict)));
191 SET_VECTOR_ELT(ans, 1, ScalarInteger(DICT_COUNT(dict)));
192 SET_VECTOR_ELT(ans, 2, mkString(GET_DICT_TYPE_NAME(dict)));
193 SET_VECTOR_ELT(ans, 3, mkString(GET_HASHNAME(dict)));
195 UNPROTECT(2);
196 return ans;
199 SEXP dict_get_info(SEXP xp)
201 SEXP dict;
202 dict = R_ExternalPtrTag(xp);
203 return get_info(dict);
206 static SEXP get_item(SEXP dict, const char *key)
208 SEXP (*get_item)(SEXP, const char*) = GET_DICTGET(dict);
209 return (*get_item)(dict, key);
212 SEXP dict_get_item(SEXP xp, SEXP key)
214 SEXP dict, item;
215 const char *s = NULL;
217 if (isString(key))
218 s = CHAR(STRING_ELT(key, 0));
219 else if (TYPEOF(key) == CHARSXP)
220 s = CHAR(key);
221 else
222 error("arg 'key' must be a character vector");
223 dict = R_ExternalPtrTag(xp);
224 item = get_item(dict, s);
225 /* NOTE: I'm not sure this is what we want, but it seems to help
226 the case when unnamed items are in the dict, get retrieved and
227 then modified. But I don't quite follow because subassign
228 looks like it only checks for NAMED(obj) == 2 and here we only
229 set it to 1 and so I don't yet see where it gets incremented to
230 trigger duplication (but it seems to work as desired). */
231 if (!isNull(item) && NAMED(item) == 0)
232 SET_NAMED(item, 1);
233 return item;
236 static SEXP set_item(SEXP dict, const char *key, SEXP value)
238 SEXP (*set_item)(SEXP, const char*, SEXP) = GET_DICTSET(dict);
239 return (*set_item)(dict, key, NAMED(value) ? duplicate(value) : value);
242 SEXP dict_set_item(SEXP xp, SEXP key, SEXP value)
244 SEXP dict;
245 const char *s;
247 if (!isString(key))
248 error("arg 'key' must be a character vector");
249 s = CHAR(STRING_ELT(key, 0));
250 dict = R_ExternalPtrTag(xp);
251 if (is_dict_full(dict)) {
252 dict = resize_dict(dict, DICT_SIZE(dict) * 2);
253 R_SetExternalPtrTag(xp, dict);
254 #ifdef DEBUG_DICT_RESIZE
255 Rprintf("resized to %d\n", DICT_SIZE(dict));
256 #endif
258 return set_item(dict, s, value);
261 static SEXP del_item(SEXP dict, const char *key)
263 SEXP (*del_item)(SEXP, const char*) = GET_DICTDEL(dict);
264 return (*del_item)(dict, key);
267 SEXP dict_del_item(SEXP xp, SEXP key)
269 SEXP dict;
270 const char *s;
272 if (!isString(key))
273 error("arg 'key' must be a character vector");
274 s = CHAR(STRING_ELT(key, 0));
275 dict = R_ExternalPtrTag(xp);
276 return del_item(dict, s);
279 static SEXP get_keys(SEXP dict, int sort)
281 SEXP keys;
282 SEXP (*get_keys)(SEXP) = GET_DICTKEYS(dict);
283 PROTECT(keys = (*get_keys)(dict));
284 if (sort)
285 error("sorry, R decided not to make sortVector available");
286 UNPROTECT(1);
287 return keys;
291 SEXP dict_get_keys(SEXP xp, SEXP sort)
293 int do_sort = 0;
294 SEXP dict = R_ExternalPtrTag(xp);
295 if (!isLogical(sort) || (do_sort = LOGICAL(sort)[0]) == NA_LOGICAL)
296 error("'sort' must be TRUE or FALSE");
297 return get_keys(dict, do_sort);
300 static SEXP to_list(SEXP dict)
302 SEXP ans, keys, val;
303 int i;
305 PROTECT(keys = get_keys(dict, 0));
306 PROTECT(ans = allocVector(VECSXP, length(keys)));
307 for (i = 0; i < length(keys); i++) {
308 val = get_item(dict, CHAR(STRING_ELT(keys, i)));
309 SET_VECTOR_ELT(ans, i, NAMED(val) ? duplicate(val) : val);
311 SET_NAMES(ans, keys);
312 UNPROTECT(2);
313 return ans;
316 SEXP dict_to_list(SEXP xp)
318 SEXP dict = R_ExternalPtrTag(xp);
319 return to_list(dict);
322 int is_dict_full(SEXP dict)
324 /* resize based on number of items compared to table size */
325 if (DICT_COUNT(dict) > (DICT_SIZE(dict) * 10))
326 return 1;
327 else
328 return 0;
331 SEXP resize_dict(SEXP dict, int size)
333 SEXP newd;
335 PROTECT(newd = new_dict(size, DICT_ALG(dict), DICT_TYPE(dict)));
336 UNPROTECT(1);
337 return copy_dict(dict, newd);
340 SEXP copy_dict(SEXP srcdict, SEXP destdict)
342 SEXP keys;
343 int i;
344 const char *key;
345 PROTECT(keys = get_keys(srcdict, 0));
346 for (i = 0; i < length(keys); i++) {
347 key = CHAR(STRING_ELT(keys, i));
348 set_item(destdict, key, get_item(srcdict, key));
350 UNPROTECT(1);
351 return destdict;
354 SEXP dict_copy_dict(SEXP xp)
356 SEXP newxp;
357 SEXP dict = R_ExternalPtrTag(xp);
358 PROTECT(newxp = R_MakeExternalPtr(NULL, R_NilValue, R_NilValue));
359 R_SetExternalPtrTag(newxp, resize_dict(dict, DICT_SIZE(dict)));
360 UNPROTECT(1);
361 return newxp;
364 SEXP dict_xp_tag_address(SEXP xp)
366 SEXP ans;
367 char buf[100];
368 int over;
369 over = snprintf(buf, 100, "%p", (void *) R_ExternalPtrTag(xp));
370 ans = mkString(buf);
371 return ans;
374 SEXP dict_getm(SEXP xp, SEXP keys, SEXP ifnotfound)
376 SEXP ans, nms, dict, val, curkey;
377 int i, j;
379 PROTECT(nms = allocVector(STRSXP, length(keys)));
380 PROTECT(ans = allocVector(VECSXP, length(keys)));
382 dict = R_ExternalPtrTag(xp);
384 for (i = 0, j = 0; i < length(keys); i++) {
385 curkey = STRING_ELT(keys, i);
386 val = get_item(dict, CHAR(curkey));
387 /* FIXME: what about putting NULL as the value? How to distinguish? */
388 if (val == R_NilValue) {
389 if (ifnotfound == R_NilValue)
390 continue;
391 /* needed? */
392 val = duplicate(ifnotfound);
394 SET_VECTOR_ELT(ans, j, val);
395 SET_STRING_ELT(nms, j++, curkey);
397 if (j < i) { /* then resize */
398 UNPROTECT(1);
399 SET_LENGTH(ans, j); /* XXX: this can alloc */
400 PROTECT(ans);
401 SET_LENGTH(nms, j);
403 SET_NAMES(ans, nms);
404 UNPROTECT(2);
405 return ans;
408 #if 0
411 static SEXP ht_table_rehash(SEXP table, int new_size, int alg)
413 SEXP new_table, chain, new_chain, val;
414 int counter, new_hashcode;
416 if (TYPEOF(table) != VECSXP)
417 error("ht_table_rehash: argument 'table' not of type VECSXP");
419 /* Allocate the new hash table */
420 new_table = ht_new_table(ScalarInteger(new_size));
421 PROTECT(new_table);
423 for (counter = 0; counter < length(table); counter++) {
424 chain = VECTOR_ELT(table, counter);
425 while (!isNull(chain)) {
426 val = CAR(chain);
427 new_hashcode = hashcode(CHAR(val), new_size, alg);
428 new_chain = VECTOR_ELT(new_table, new_hashcode);
429 SET_VECTOR_ELT(new_table, new_hashcode, CONS(val, new_chain));
430 chain = NEXT_CHAIN_EL(chain);
433 SET_HASHCOUNT(new_table, HASHCOUNT(table));
435 #ifdef DEBUG_HT_RESIZE
436 char *status = HASHPRI(new_table) > HASHPRI(table) ? " OK " : "FAIL";
437 Rprintf("Resized: size %d => %d\tpri %d => %d\t%s\n",
438 HASHSIZE(table), HASHSIZE(new_table),
439 HASHPRI(table), HASHPRI(new_table), status);
440 #endif
441 UNPROTECT(1);
442 return new_table;
445 SEXP ht_hashtable_getm(SEXP xp, SEXP keys, SEXP ifnotfound, SEXP hash)
447 SEXP ans, table, val;
448 int i, j;
450 PROTECT(ans = allocVector(VECSXP, length(keys)));
451 table = R_ExternalPtrTag(xp);
453 for (i = 0, j = 0; i < length(keys); i++) {
454 val = ht_table_get(table, ScalarString(STRING_ELT(keys, i)), hash);
455 /* FIXME: what about putting NULL as the value? How to distinguish? */
456 if (val == R_NilValue) {
457 if (ifnotfound == R_NilValue)
458 continue;
459 /* needed? */
460 val = duplicate(ifnotfound);
462 SET_VECTOR_ELT(ans, j++, val);
464 if (j < i) /* then resize */
465 SET_LENGTH(ans, j);
466 UNPROTECT(1);
467 return ans;
471 int ht_table_check_size1(SEXP table)
473 /* resize based on number of items compared to table size */
474 if ( (HASHSIZE(table) * 10) < HASHCOUNT(table))
475 return 1;
476 else
477 return 0;
480 SEXP ht_hashtable_aslist(SEXP xp)
482 SEXP ans, nms;
483 SEXP table, chain;
484 int i, j;
486 table = R_ExternalPtrTag(xp);
487 PROTECT(ans = allocVector(VECSXP, HASHCOUNT(table)));
488 PROTECT(nms = allocVector(STRSXP, HASHCOUNT(table)));
490 for (i = 0, j = 0; i < HASHSIZE(table); i++) {
491 chain = VECTOR_ELT(table, i);
492 for (; chain != R_NilValue; chain = CDR(chain)) {
493 SET_VECTOR_ELT(ans, j, duplicate(CAR(chain)));
494 SET_STRING_ELT(nms, j, duplicate(TAG(chain)));
495 j++;
499 setAttrib(ans, R_NamesSymbol, nms);
500 UNPROTECT(2);
501 return ans;
504 SEXP ht_hashtable_keys(SEXP xp)
506 SEXP nms;
507 SEXP table, chain;
508 int i, j;
510 table = R_ExternalPtrTag(xp);
511 PROTECT(nms = allocVector(STRSXP, HASHCOUNT(table)));
513 for (i = 0, j = 0; i < HASHSIZE(table); i++) {
514 chain = VECTOR_ELT(table, i);
515 for (; chain != R_NilValue; chain = CDR(chain)) {
516 SET_STRING_ELT(nms, j, duplicate(TAG(chain)));
517 j++;
521 UNPROTECT(1);
522 return nms;
528 #endif