1 /*-------------------------------------------------------------------------
4 * GiST support functions for tsvector_ops
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
12 *-------------------------------------------------------------------------
17 #include "access/gist.h"
18 #include "access/tuptoaster.h"
19 #include "tsearch/ts_type.h"
20 #include "tsearch/ts_utils.h"
21 #include "utils/pg_crc.h"
24 #define SIGLENINT 31 /* >121 => key will toast, so it will not work
27 #define SIGLEN ( sizeof(int4) * SIGLENINT )
28 #define SIGLENBIT (SIGLEN * BITS_PER_BYTE)
30 typedef char BITVEC
[SIGLEN
];
31 typedef char *BITVECP
;
36 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
37 #define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 )
38 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) )
39 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) )
40 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
42 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
43 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
45 #define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key))
48 * type of GiST index key
53 int32 vl_len_
; /* varlena header (do not touch directly!) */
60 #define ALLISTRUE 0x04
62 #define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY )
63 #define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY )
64 #define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE )
66 #define GTHDRSIZE ( VARHDRSZ + sizeof(int4) )
67 #define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
69 #define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) )
70 #define GETARR(x) ( (int4*)( (char*)(x)+GTHDRSIZE ) )
71 #define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int4) )
73 /* Number of one-bits in an unsigned byte */
74 static const uint8 number_of_ones
[256] = {
75 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
76 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
77 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
78 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
79 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
80 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
81 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
82 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
83 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
84 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
85 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
86 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
87 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
88 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
89 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
90 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
93 static int4
sizebitvec(BITVECP sign
);
96 gtsvectorin(PG_FUNCTION_ARGS
)
99 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
100 errmsg("gtsvector_in not implemented")));
104 #define SINGOUTSTR "%d true bits, %d false bits"
105 #define ARROUTSTR "%d unique words"
106 #define EXTRALEN ( 2*13 )
108 static int outbuf_maxlen
= 0;
111 gtsvectorout(PG_FUNCTION_ARGS
)
113 SignTSVector
*key
= (SignTSVector
*) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_POINTER(0)));
116 if (outbuf_maxlen
== 0)
117 outbuf_maxlen
= 2 * EXTRALEN
+ Max(strlen(SINGOUTSTR
), strlen(ARROUTSTR
)) + 1;
118 outbuf
= palloc(outbuf_maxlen
);
121 sprintf(outbuf
, ARROUTSTR
, (int) ARRNELEM(key
));
124 int cnttrue
= (ISALLTRUE(key
)) ? SIGLENBIT
: sizebitvec(GETSIGN(key
));
126 sprintf(outbuf
, SINGOUTSTR
, cnttrue
, (int) SIGLENBIT
- cnttrue
);
129 PG_FREE_IF_COPY(key
, 0);
130 PG_RETURN_POINTER(outbuf
);
134 compareint(const void *va
, const void *vb
)
136 int4 a
= *((int4
*) va
);
137 int4 b
= *((int4
*) vb
);
141 return (a
> b
) ? 1 : -1;
145 * Removes duplicates from an array of int4. 'l' is
146 * size of the input array. Returns the new size of the array.
149 uniqueint(int4
*a
, int4 l
)
159 qsort((void *) a
, l
, sizeof(int4
), compareint
);
170 makesign(BITVECP sign
, SignTSVector
*a
)
174 int4
*ptr
= GETARR(a
);
176 MemSet((void *) sign
, 0, sizeof(BITVEC
));
177 for (k
= 0; k
< len
; k
++)
182 gtsvector_compress(PG_FUNCTION_ARGS
)
184 GISTENTRY
*entry
= (GISTENTRY
*) PG_GETARG_POINTER(0);
185 GISTENTRY
*retval
= entry
;
190 TSVector val
= DatumGetTSVector(entry
->key
);
193 WordEntry
*ptr
= ARRPTR(val
);
194 char *words
= STRPTR(val
);
196 len
= CALCGTSIZE(ARRKEY
, val
->size
);
197 res
= (SignTSVector
*) palloc(len
);
198 SET_VARSIZE(res
, len
);
207 COMP_CRC32(c
, words
+ ptr
->pos
, ptr
->len
);
215 len
= uniqueint(GETARR(res
), val
->size
);
216 if (len
!= val
->size
)
219 * there is a collision of hash-function; len is always less than
222 len
= CALCGTSIZE(ARRKEY
, len
);
223 res
= (SignTSVector
*) repalloc((void *) res
, len
);
224 SET_VARSIZE(res
, len
);
227 /* make signature, if array is too long */
228 if (VARSIZE(res
) > TOAST_INDEX_TARGET
)
230 SignTSVector
*ressign
;
232 len
= CALCGTSIZE(SIGNKEY
, 0);
233 ressign
= (SignTSVector
*) palloc(len
);
234 SET_VARSIZE(ressign
, len
);
235 ressign
->flag
= SIGNKEY
;
236 makesign(GETSIGN(ressign
), res
);
240 retval
= (GISTENTRY
*) palloc(sizeof(GISTENTRY
));
241 gistentryinit(*retval
, PointerGetDatum(res
),
242 entry
->rel
, entry
->page
,
243 entry
->offset
, FALSE
);
245 else if (ISSIGNKEY(DatumGetPointer(entry
->key
)) &&
246 !ISALLTRUE(DatumGetPointer(entry
->key
)))
251 BITVECP sign
= GETSIGN(DatumGetPointer(entry
->key
));
255 if ((sign
[i
] & 0xff) != 0xff)
256 PG_RETURN_POINTER(retval
);
259 len
= CALCGTSIZE(SIGNKEY
| ALLISTRUE
, 0);
260 res
= (SignTSVector
*) palloc(len
);
261 SET_VARSIZE(res
, len
);
262 res
->flag
= SIGNKEY
| ALLISTRUE
;
264 retval
= (GISTENTRY
*) palloc(sizeof(GISTENTRY
));
265 gistentryinit(*retval
, PointerGetDatum(res
),
266 entry
->rel
, entry
->page
,
267 entry
->offset
, FALSE
);
269 PG_RETURN_POINTER(retval
);
273 gtsvector_decompress(PG_FUNCTION_ARGS
)
275 GISTENTRY
*entry
= (GISTENTRY
*) PG_GETARG_POINTER(0);
276 SignTSVector
*key
= (SignTSVector
*) DatumGetPointer(PG_DETOAST_DATUM(entry
->key
));
278 if (key
!= (SignTSVector
*) DatumGetPointer(entry
->key
))
280 GISTENTRY
*retval
= (GISTENTRY
*) palloc(sizeof(GISTENTRY
));
282 gistentryinit(*retval
, PointerGetDatum(key
),
283 entry
->rel
, entry
->page
,
284 entry
->offset
, FALSE
);
286 PG_RETURN_POINTER(retval
);
289 PG_RETURN_POINTER(entry
);
299 * is there value 'val' in array or not ?
302 checkcondition_arr(void *checkval
, QueryOperand
*val
)
304 int4
*StopLow
= ((CHKVAL
*) checkval
)->arrb
;
305 int4
*StopHigh
= ((CHKVAL
*) checkval
)->arre
;
308 /* Loop invariant: StopLow <= val < StopHigh */
311 * we are not able to find a a prefix by hash value
316 while (StopLow
< StopHigh
)
318 StopMiddle
= StopLow
+ (StopHigh
- StopLow
) / 2;
319 if (*StopMiddle
== val
->valcrc
)
321 else if (*StopMiddle
< val
->valcrc
)
322 StopLow
= StopMiddle
+ 1;
324 StopHigh
= StopMiddle
;
331 checkcondition_bit(void *checkval
, QueryOperand
*val
)
334 * we are not able to find a a prefix in signature tree
338 return GETBIT(checkval
, HASHVAL(val
->valcrc
));
342 gtsvector_consistent(PG_FUNCTION_ARGS
)
344 GISTENTRY
*entry
= (GISTENTRY
*) PG_GETARG_POINTER(0);
345 TSQuery query
= PG_GETARG_TSQUERY(1);
347 /* StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); */
348 /* Oid subtype = PG_GETARG_OID(3); */
349 bool *recheck
= (bool *) PG_GETARG_POINTER(4);
350 SignTSVector
*key
= (SignTSVector
*) DatumGetPointer(entry
->key
);
352 /* All cases served by this function are inexact */
356 PG_RETURN_BOOL(false);
361 PG_RETURN_BOOL(true);
363 PG_RETURN_BOOL(TS_execute(
365 (void *) GETSIGN(key
), false,
370 { /* only leaf pages */
373 chkval
.arrb
= GETARR(key
);
374 chkval
.arre
= chkval
.arrb
+ ARRNELEM(key
);
375 PG_RETURN_BOOL(TS_execute(
377 (void *) &chkval
, true,
384 unionkey(BITVECP sbase
, SignTSVector
*add
)
390 BITVECP sadd
= GETSIGN(add
);
400 int4
*ptr
= GETARR(add
);
402 for (i
= 0; i
< ARRNELEM(add
); i
++)
410 gtsvector_union(PG_FUNCTION_ARGS
)
412 GistEntryVector
*entryvec
= (GistEntryVector
*) PG_GETARG_POINTER(0);
413 int *size
= (int *) PG_GETARG_POINTER(1);
418 SignTSVector
*result
;
420 MemSet((void *) base
, 0, sizeof(BITVEC
));
421 for (i
= 0; i
< entryvec
->n
; i
++)
423 if (unionkey(base
, GETENTRY(entryvec
, i
)))
431 len
= CALCGTSIZE(flag
, 0);
432 result
= (SignTSVector
*) palloc(len
);
434 SET_VARSIZE(result
, len
);
436 if (!ISALLTRUE(result
))
437 memcpy((void *) GETSIGN(result
), (void *) base
, sizeof(BITVEC
));
439 PG_RETURN_POINTER(result
);
443 gtsvector_same(PG_FUNCTION_ARGS
)
445 SignTSVector
*a
= (SignTSVector
*) PG_GETARG_POINTER(0);
446 SignTSVector
*b
= (SignTSVector
*) PG_GETARG_POINTER(1);
447 bool *result
= (bool *) PG_GETARG_POINTER(2);
450 { /* then b also ISSIGNKEY */
451 if (ISALLTRUE(a
) && ISALLTRUE(b
))
453 else if (ISALLTRUE(a
))
455 else if (ISALLTRUE(b
))
460 BITVECP sa
= GETSIGN(a
),
475 { /* a and b ISARRKEY */
476 int4 lena
= ARRNELEM(a
),
483 int4
*ptra
= GETARR(a
),
488 for (i
= 0; i
< lena
; i
++)
489 if (ptra
[i
] != ptrb
[i
])
497 PG_RETURN_POINTER(result
);
501 sizebitvec(BITVECP sign
)
507 size
+= number_of_ones
[(unsigned char) sign
[i
]];
512 hemdistsign(BITVECP a
, BITVECP b
)
520 diff
= (unsigned char) (a
[i
] ^ b
[i
]);
521 dist
+= number_of_ones
[diff
];
527 hemdist(SignTSVector
*a
, SignTSVector
*b
)
534 return SIGLENBIT
- sizebitvec(GETSIGN(b
));
536 else if (ISALLTRUE(b
))
537 return SIGLENBIT
- sizebitvec(GETSIGN(a
));
539 return hemdistsign(GETSIGN(a
), GETSIGN(b
));
543 gtsvector_penalty(PG_FUNCTION_ARGS
)
545 GISTENTRY
*origentry
= (GISTENTRY
*) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
546 GISTENTRY
*newentry
= (GISTENTRY
*) PG_GETARG_POINTER(1);
547 float *penalty
= (float *) PG_GETARG_POINTER(2);
548 SignTSVector
*origval
= (SignTSVector
*) DatumGetPointer(origentry
->key
);
549 SignTSVector
*newval
= (SignTSVector
*) DatumGetPointer(newentry
->key
);
550 BITVECP orig
= GETSIGN(origval
);
554 if (ISARRKEY(newval
))
558 makesign(sign
, newval
);
560 if (ISALLTRUE(origval
))
561 *penalty
= ((float) (SIGLENBIT
- sizebitvec(sign
))) / (float) (SIGLENBIT
+ 1);
563 *penalty
= hemdistsign(sign
, orig
);
566 *penalty
= hemdist(origval
, newval
);
567 PG_RETURN_POINTER(penalty
);
577 fillcache(CACHESIGN
*item
, SignTSVector
*key
)
579 item
->allistrue
= false;
581 makesign(item
->sign
, key
);
582 else if (ISALLTRUE(key
))
583 item
->allistrue
= true;
585 memcpy((void *) item
->sign
, (void *) GETSIGN(key
), sizeof(BITVEC
));
588 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
596 comparecost(const void *va
, const void *vb
)
598 SPLITCOST
*a
= (SPLITCOST
*) va
;
599 SPLITCOST
*b
= (SPLITCOST
*) vb
;
601 if (a
->cost
== b
->cost
)
604 return (a
->cost
> b
->cost
) ? 1 : -1;
609 hemdistcache(CACHESIGN
*a
, CACHESIGN
*b
)
616 return SIGLENBIT
- sizebitvec(b
->sign
);
618 else if (b
->allistrue
)
619 return SIGLENBIT
- sizebitvec(a
->sign
);
621 return hemdistsign(a
->sign
, b
->sign
);
625 gtsvector_picksplit(PG_FUNCTION_ARGS
)
627 GistEntryVector
*entryvec
= (GistEntryVector
*) PG_GETARG_POINTER(0);
628 GIST_SPLITVEC
*v
= (GIST_SPLITVEC
*) PG_GETARG_POINTER(1);
631 SignTSVector
*datum_l
,
640 OffsetNumber seed_1
= 0,
648 SPLITCOST
*costvector
;
650 maxoff
= entryvec
->n
- 2;
651 nbytes
= (maxoff
+ 2) * sizeof(OffsetNumber
);
652 v
->spl_left
= (OffsetNumber
*) palloc(nbytes
);
653 v
->spl_right
= (OffsetNumber
*) palloc(nbytes
);
655 cache
= (CACHESIGN
*) palloc(sizeof(CACHESIGN
) * (maxoff
+ 2));
656 fillcache(&cache
[FirstOffsetNumber
], GETENTRY(entryvec
, FirstOffsetNumber
));
658 for (k
= FirstOffsetNumber
; k
< maxoff
; k
= OffsetNumberNext(k
))
660 for (j
= OffsetNumberNext(k
); j
<= maxoff
; j
= OffsetNumberNext(j
))
662 if (k
== FirstOffsetNumber
)
663 fillcache(&cache
[j
], GETENTRY(entryvec
, j
));
665 size_waste
= hemdistcache(&(cache
[j
]), &(cache
[k
]));
666 if (size_waste
> waste
)
677 right
= v
->spl_right
;
680 if (seed_1
== 0 || seed_2
== 0)
686 /* form initial .. */
687 if (cache
[seed_1
].allistrue
)
689 datum_l
= (SignTSVector
*) palloc(CALCGTSIZE(SIGNKEY
| ALLISTRUE
, 0));
690 SET_VARSIZE(datum_l
, CALCGTSIZE(SIGNKEY
| ALLISTRUE
, 0));
691 datum_l
->flag
= SIGNKEY
| ALLISTRUE
;
695 datum_l
= (SignTSVector
*) palloc(CALCGTSIZE(SIGNKEY
, 0));
696 SET_VARSIZE(datum_l
, CALCGTSIZE(SIGNKEY
, 0));
697 datum_l
->flag
= SIGNKEY
;
698 memcpy((void *) GETSIGN(datum_l
), (void *) cache
[seed_1
].sign
, sizeof(BITVEC
));
700 if (cache
[seed_2
].allistrue
)
702 datum_r
= (SignTSVector
*) palloc(CALCGTSIZE(SIGNKEY
| ALLISTRUE
, 0));
703 SET_VARSIZE(datum_r
, CALCGTSIZE(SIGNKEY
| ALLISTRUE
, 0));
704 datum_r
->flag
= SIGNKEY
| ALLISTRUE
;
708 datum_r
= (SignTSVector
*) palloc(CALCGTSIZE(SIGNKEY
, 0));
709 SET_VARSIZE(datum_r
, CALCGTSIZE(SIGNKEY
, 0));
710 datum_r
->flag
= SIGNKEY
;
711 memcpy((void *) GETSIGN(datum_r
), (void *) cache
[seed_2
].sign
, sizeof(BITVEC
));
714 union_l
= GETSIGN(datum_l
);
715 union_r
= GETSIGN(datum_r
);
716 maxoff
= OffsetNumberNext(maxoff
);
717 fillcache(&cache
[maxoff
], GETENTRY(entryvec
, maxoff
));
718 /* sort before ... */
719 costvector
= (SPLITCOST
*) palloc(sizeof(SPLITCOST
) * maxoff
);
720 for (j
= FirstOffsetNumber
; j
<= maxoff
; j
= OffsetNumberNext(j
))
722 costvector
[j
- 1].pos
= j
;
723 size_alpha
= hemdistcache(&(cache
[seed_1
]), &(cache
[j
]));
724 size_beta
= hemdistcache(&(cache
[seed_2
]), &(cache
[j
]));
725 costvector
[j
- 1].cost
= Abs(size_alpha
- size_beta
);
727 qsort((void *) costvector
, maxoff
, sizeof(SPLITCOST
), comparecost
);
729 for (k
= 0; k
< maxoff
; k
++)
731 j
= costvector
[k
].pos
;
738 else if (j
== seed_2
)
745 if (ISALLTRUE(datum_l
) || cache
[j
].allistrue
)
747 if (ISALLTRUE(datum_l
) && cache
[j
].allistrue
)
750 size_alpha
= SIGLENBIT
- sizebitvec(
751 (cache
[j
].allistrue
) ? GETSIGN(datum_l
) : GETSIGN(cache
[j
].sign
)
755 size_alpha
= hemdistsign(cache
[j
].sign
, GETSIGN(datum_l
));
757 if (ISALLTRUE(datum_r
) || cache
[j
].allistrue
)
759 if (ISALLTRUE(datum_r
) && cache
[j
].allistrue
)
762 size_beta
= SIGLENBIT
- sizebitvec(
763 (cache
[j
].allistrue
) ? GETSIGN(datum_r
) : GETSIGN(cache
[j
].sign
)
767 size_beta
= hemdistsign(cache
[j
].sign
, GETSIGN(datum_r
));
769 if (size_alpha
< size_beta
+ WISH_F(v
->spl_nleft
, v
->spl_nright
, 0.1))
771 if (ISALLTRUE(datum_l
) || cache
[j
].allistrue
)
773 if (!ISALLTRUE(datum_l
))
774 MemSet((void *) GETSIGN(datum_l
), 0xff, sizeof(BITVEC
));
780 union_l
[i
] |= ptr
[i
];
787 if (ISALLTRUE(datum_r
) || cache
[j
].allistrue
)
789 if (!ISALLTRUE(datum_r
))
790 MemSet((void *) GETSIGN(datum_r
), 0xff, sizeof(BITVEC
));
796 union_r
[i
] |= ptr
[i
];
803 *right
= *left
= FirstOffsetNumber
;
804 v
->spl_ldatum
= PointerGetDatum(datum_l
);
805 v
->spl_rdatum
= PointerGetDatum(datum_r
);
807 PG_RETURN_POINTER(v
);