More logically order libpq func. includes, e.g., group GUC vals
[pgsql.git] / contrib / hstore / hstore_gist.c
bloba3b08af385016c0bdf0db6379a6fe7f0a11cfaea
1 /*
2 * contrib/hstore/hstore_gist.c
3 */
4 #include "postgres.h"
6 #include "access/gist.h"
7 #include "access/reloptions.h"
8 #include "access/stratnum.h"
9 #include "catalog/pg_type.h"
10 #include "common/int.h"
11 #include "hstore.h"
12 #include "utils/pg_crc.h"
14 /* gist_hstore_ops opclass options */
15 typedef struct
17 int32 vl_len_; /* varlena header (do not touch directly!) */
18 int siglen; /* signature length in bytes */
19 } GistHstoreOptions;
21 /* bigint defines */
22 #define BITBYTE 8
23 #define SIGLEN_DEFAULT (sizeof(int32) * 4)
24 #define SIGLEN_MAX GISTMaxIndexKeySize
25 #define SIGLENBIT(siglen) ((siglen) * BITBYTE)
26 #define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
27 ((GistHstoreOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
28 SIGLEN_DEFAULT)
31 typedef char *BITVECP;
33 #define LOOPBYTE(siglen) \
34 for (i = 0; i < (siglen); i++)
36 #define LOOPBIT(siglen) \
37 for (i = 0; i < SIGLENBIT(siglen); i++)
39 /* beware of multiple evaluation of arguments to these macros! */
40 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
41 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
42 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
43 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
44 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
45 #define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
46 #define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
48 typedef struct
50 int32 vl_len_; /* varlena header (do not touch directly!) */
51 int32 flag;
52 char data[FLEXIBLE_ARRAY_MEMBER];
53 } GISTTYPE;
55 #define ALLISTRUE 0x04
57 #define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE )
59 #define GTHDRSIZE (VARHDRSZ + sizeof(int32))
60 #define CALCGTSIZE(flag, siglen) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : (siglen)) )
62 #define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
64 #define SUMBIT(val) ( \
65 GETBITBYTE((val),0) + \
66 GETBITBYTE((val),1) + \
67 GETBITBYTE((val),2) + \
68 GETBITBYTE((val),3) + \
69 GETBITBYTE((val),4) + \
70 GETBITBYTE((val),5) + \
71 GETBITBYTE((val),6) + \
72 GETBITBYTE((val),7) \
75 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
77 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
79 /* shorthand for calculating CRC-32 of a single chunk of data. */
80 static pg_crc32
81 crc32_sz(const char *buf, int size)
83 pg_crc32 crc;
85 INIT_TRADITIONAL_CRC32(crc);
86 COMP_TRADITIONAL_CRC32(crc, buf, size);
87 FIN_TRADITIONAL_CRC32(crc);
89 return crc;
93 PG_FUNCTION_INFO_V1(ghstore_in);
94 PG_FUNCTION_INFO_V1(ghstore_out);
97 Datum
98 ghstore_in(PG_FUNCTION_ARGS)
100 ereport(ERROR,
101 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
102 errmsg("cannot accept a value of type %s", "ghstore")));
104 PG_RETURN_VOID(); /* keep compiler quiet */
107 Datum
108 ghstore_out(PG_FUNCTION_ARGS)
110 ereport(ERROR,
111 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
112 errmsg("cannot display a value of type %s", "ghstore")));
114 PG_RETURN_VOID(); /* keep compiler quiet */
117 static GISTTYPE *
118 ghstore_alloc(bool allistrue, int siglen, BITVECP sign)
120 int flag = allistrue ? ALLISTRUE : 0;
121 int size = CALCGTSIZE(flag, siglen);
122 GISTTYPE *res = palloc(size);
124 SET_VARSIZE(res, size);
125 res->flag = flag;
127 if (!allistrue)
129 if (sign)
130 memcpy(GETSIGN(res), sign, siglen);
131 else
132 memset(GETSIGN(res), 0, siglen);
135 return res;
138 PG_FUNCTION_INFO_V1(ghstore_consistent);
139 PG_FUNCTION_INFO_V1(ghstore_compress);
140 PG_FUNCTION_INFO_V1(ghstore_decompress);
141 PG_FUNCTION_INFO_V1(ghstore_penalty);
142 PG_FUNCTION_INFO_V1(ghstore_picksplit);
143 PG_FUNCTION_INFO_V1(ghstore_union);
144 PG_FUNCTION_INFO_V1(ghstore_same);
145 PG_FUNCTION_INFO_V1(ghstore_options);
147 Datum
148 ghstore_compress(PG_FUNCTION_ARGS)
150 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
151 int siglen = GET_SIGLEN();
152 GISTENTRY *retval = entry;
154 if (entry->leafkey)
156 GISTTYPE *res = ghstore_alloc(false, siglen, NULL);
157 HStore *val = DatumGetHStoreP(entry->key);
158 HEntry *hsent = ARRPTR(val);
159 char *ptr = STRPTR(val);
160 int count = HS_COUNT(val);
161 int i;
163 for (i = 0; i < count; ++i)
165 int h;
167 h = crc32_sz((char *) HSTORE_KEY(hsent, ptr, i),
168 HSTORE_KEYLEN(hsent, i));
169 HASH(GETSIGN(res), h, siglen);
170 if (!HSTORE_VALISNULL(hsent, i))
172 h = crc32_sz((char *) HSTORE_VAL(hsent, ptr, i),
173 HSTORE_VALLEN(hsent, i));
174 HASH(GETSIGN(res), h, siglen);
178 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
179 gistentryinit(*retval, PointerGetDatum(res),
180 entry->rel, entry->page,
181 entry->offset,
182 false);
184 else if (!ISALLTRUE(DatumGetPointer(entry->key)))
186 int32 i;
187 GISTTYPE *res;
188 BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
190 LOOPBYTE(siglen)
192 if ((sign[i] & 0xff) != 0xff)
193 PG_RETURN_POINTER(retval);
196 res = ghstore_alloc(true, siglen, NULL);
198 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
199 gistentryinit(*retval, PointerGetDatum(res),
200 entry->rel, entry->page,
201 entry->offset,
202 false);
205 PG_RETURN_POINTER(retval);
209 * Since type ghstore isn't toastable (and doesn't need to be),
210 * this function can be a no-op.
212 Datum
213 ghstore_decompress(PG_FUNCTION_ARGS)
215 PG_RETURN_POINTER(PG_GETARG_POINTER(0));
218 Datum
219 ghstore_same(PG_FUNCTION_ARGS)
221 GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
222 GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
223 bool *result = (bool *) PG_GETARG_POINTER(2);
224 int siglen = GET_SIGLEN();
227 if (ISALLTRUE(a) && ISALLTRUE(b))
228 *result = true;
229 else if (ISALLTRUE(a))
230 *result = false;
231 else if (ISALLTRUE(b))
232 *result = false;
233 else
235 int32 i;
236 BITVECP sa = GETSIGN(a),
237 sb = GETSIGN(b);
239 *result = true;
240 LOOPBYTE(siglen)
242 if (sa[i] != sb[i])
244 *result = false;
245 break;
249 PG_RETURN_POINTER(result);
252 static int32
253 sizebitvec(BITVECP sign, int siglen)
255 int32 size = 0,
258 LOOPBYTE(siglen)
260 size += SUMBIT(sign);
261 sign = (BITVECP) (((char *) sign) + 1);
263 return size;
266 static int
267 hemdistsign(BITVECP a, BITVECP b, int siglen)
269 int i,
270 dist = 0;
272 LOOPBIT(siglen)
274 if (GETBIT(a, i) != GETBIT(b, i))
275 dist++;
277 return dist;
280 static int
281 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
283 if (ISALLTRUE(a))
285 if (ISALLTRUE(b))
286 return 0;
287 else
288 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
290 else if (ISALLTRUE(b))
291 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
293 return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
296 static int32
297 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
299 int32 i;
300 BITVECP sadd = GETSIGN(add);
302 if (ISALLTRUE(add))
303 return 1;
304 LOOPBYTE(siglen)
305 sbase[i] |= sadd[i];
306 return 0;
309 Datum
310 ghstore_union(PG_FUNCTION_ARGS)
312 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
313 int32 len = entryvec->n;
315 int *size = (int *) PG_GETARG_POINTER(1);
316 int siglen = GET_SIGLEN();
317 int32 i;
318 GISTTYPE *result = ghstore_alloc(false, siglen, NULL);
319 BITVECP base = GETSIGN(result);
321 for (i = 0; i < len; i++)
323 if (unionkey(base, GETENTRY(entryvec, i), siglen))
325 result->flag |= ALLISTRUE;
326 SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
327 break;
331 *size = VARSIZE(result);
333 PG_RETURN_POINTER(result);
336 Datum
337 ghstore_penalty(PG_FUNCTION_ARGS)
339 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
340 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
341 float *penalty = (float *) PG_GETARG_POINTER(2);
342 int siglen = GET_SIGLEN();
343 GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
344 GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
346 *penalty = hemdist(origval, newval, siglen);
347 PG_RETURN_POINTER(penalty);
351 typedef struct
353 OffsetNumber pos;
354 int32 cost;
355 } SPLITCOST;
357 static int
358 comparecost(const void *a, const void *b)
360 return pg_cmp_s32(((const SPLITCOST *) a)->cost,
361 ((const SPLITCOST *) b)->cost);
365 Datum
366 ghstore_picksplit(PG_FUNCTION_ARGS)
368 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
369 OffsetNumber maxoff = entryvec->n - 2;
371 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
372 int siglen = GET_SIGLEN();
373 OffsetNumber k,
375 GISTTYPE *datum_l,
376 *datum_r;
377 BITVECP union_l,
378 union_r;
379 int32 size_alpha,
380 size_beta;
381 int32 size_waste,
382 waste = -1;
383 int32 nbytes;
384 OffsetNumber seed_1 = 0,
385 seed_2 = 0;
386 OffsetNumber *left,
387 *right;
388 BITVECP ptr;
389 int i;
390 SPLITCOST *costvector;
391 GISTTYPE *_k,
392 *_j;
394 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
395 v->spl_left = (OffsetNumber *) palloc(nbytes);
396 v->spl_right = (OffsetNumber *) palloc(nbytes);
398 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
400 _k = GETENTRY(entryvec, k);
401 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
403 size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
404 if (size_waste > waste)
406 waste = size_waste;
407 seed_1 = k;
408 seed_2 = j;
413 left = v->spl_left;
414 v->spl_nleft = 0;
415 right = v->spl_right;
416 v->spl_nright = 0;
418 if (seed_1 == 0 || seed_2 == 0)
420 seed_1 = 1;
421 seed_2 = 2;
424 /* form initial .. */
425 datum_l = ghstore_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
426 GETSIGN(GETENTRY(entryvec, seed_1)));
427 datum_r = ghstore_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
428 GETSIGN(GETENTRY(entryvec, seed_2)));
430 maxoff = OffsetNumberNext(maxoff);
431 /* sort before ... */
432 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
433 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
435 costvector[j - 1].pos = j;
436 _j = GETENTRY(entryvec, j);
437 size_alpha = hemdist(datum_l, _j, siglen);
438 size_beta = hemdist(datum_r, _j, siglen);
439 costvector[j - 1].cost = abs(size_alpha - size_beta);
441 qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
443 union_l = GETSIGN(datum_l);
444 union_r = GETSIGN(datum_r);
446 for (k = 0; k < maxoff; k++)
448 j = costvector[k].pos;
449 if (j == seed_1)
451 *left++ = j;
452 v->spl_nleft++;
453 continue;
455 else if (j == seed_2)
457 *right++ = j;
458 v->spl_nright++;
459 continue;
461 _j = GETENTRY(entryvec, j);
462 size_alpha = hemdist(datum_l, _j, siglen);
463 size_beta = hemdist(datum_r, _j, siglen);
465 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
467 if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
469 if (!ISALLTRUE(datum_l))
470 memset(union_l, 0xff, siglen);
472 else
474 ptr = GETSIGN(_j);
475 LOOPBYTE(siglen)
476 union_l[i] |= ptr[i];
478 *left++ = j;
479 v->spl_nleft++;
481 else
483 if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
485 if (!ISALLTRUE(datum_r))
486 memset(union_r, 0xff, siglen);
488 else
490 ptr = GETSIGN(_j);
491 LOOPBYTE(siglen)
492 union_r[i] |= ptr[i];
494 *right++ = j;
495 v->spl_nright++;
499 *right = *left = FirstOffsetNumber;
501 v->spl_ldatum = PointerGetDatum(datum_l);
502 v->spl_rdatum = PointerGetDatum(datum_r);
504 PG_RETURN_POINTER(v);
508 Datum
509 ghstore_consistent(PG_FUNCTION_ARGS)
511 GISTTYPE *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
512 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
514 /* Oid subtype = PG_GETARG_OID(3); */
515 bool *recheck = (bool *) PG_GETARG_POINTER(4);
516 int siglen = GET_SIGLEN();
517 bool res = true;
518 BITVECP sign;
520 /* All cases served by this function are inexact */
521 *recheck = true;
523 if (ISALLTRUE(entry))
524 PG_RETURN_BOOL(true);
526 sign = GETSIGN(entry);
528 if (strategy == HStoreContainsStrategyNumber ||
529 strategy == HStoreOldContainsStrategyNumber)
531 HStore *query = PG_GETARG_HSTORE_P(1);
532 HEntry *qe = ARRPTR(query);
533 char *qv = STRPTR(query);
534 int count = HS_COUNT(query);
535 int i;
537 for (i = 0; res && i < count; ++i)
539 int crc = crc32_sz((char *) HSTORE_KEY(qe, qv, i),
540 HSTORE_KEYLEN(qe, i));
542 if (GETBIT(sign, HASHVAL(crc, siglen)))
544 if (!HSTORE_VALISNULL(qe, i))
546 crc = crc32_sz((char *) HSTORE_VAL(qe, qv, i),
547 HSTORE_VALLEN(qe, i));
548 if (!GETBIT(sign, HASHVAL(crc, siglen)))
549 res = false;
552 else
553 res = false;
556 else if (strategy == HStoreExistsStrategyNumber)
558 text *query = PG_GETARG_TEXT_PP(1);
559 int crc = crc32_sz(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
561 res = (GETBIT(sign, HASHVAL(crc, siglen))) ? true : false;
563 else if (strategy == HStoreExistsAllStrategyNumber)
565 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
566 Datum *key_datums;
567 bool *key_nulls;
568 int key_count;
569 int i;
571 deconstruct_array_builtin(query, TEXTOID, &key_datums, &key_nulls, &key_count);
573 for (i = 0; res && i < key_count; ++i)
575 int crc;
577 if (key_nulls[i])
578 continue;
579 crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
580 if (!(GETBIT(sign, HASHVAL(crc, siglen))))
581 res = false;
584 else if (strategy == HStoreExistsAnyStrategyNumber)
586 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
587 Datum *key_datums;
588 bool *key_nulls;
589 int key_count;
590 int i;
592 deconstruct_array_builtin(query, TEXTOID, &key_datums, &key_nulls, &key_count);
594 res = false;
596 for (i = 0; !res && i < key_count; ++i)
598 int crc;
600 if (key_nulls[i])
601 continue;
602 crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
603 if (GETBIT(sign, HASHVAL(crc, siglen)))
604 res = true;
607 else
608 elog(ERROR, "Unsupported strategy number: %d", strategy);
610 PG_RETURN_BOOL(res);
613 Datum
614 ghstore_options(PG_FUNCTION_ARGS)
616 local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
618 init_local_reloptions(relopts, sizeof(GistHstoreOptions));
619 add_local_int_reloption(relopts, "siglen",
620 "signature length in bytes",
621 SIGLEN_DEFAULT, 1, SIGLEN_MAX,
622 offsetof(GistHstoreOptions, siglen));
624 PG_RETURN_VOID();