5 * GiST support for ltree[]
6 * Teodor Sigaev <teodor@stack.net>
10 #include "access/gist.h"
11 #include "access/skey.h"
12 #include "utils/array.h"
17 PG_FUNCTION_INFO_V1(_ltree_compress
);
18 Datum
_ltree_compress(PG_FUNCTION_ARGS
);
20 PG_FUNCTION_INFO_V1(_ltree_same
);
21 Datum
_ltree_same(PG_FUNCTION_ARGS
);
23 PG_FUNCTION_INFO_V1(_ltree_union
);
24 Datum
_ltree_union(PG_FUNCTION_ARGS
);
26 PG_FUNCTION_INFO_V1(_ltree_penalty
);
27 Datum
_ltree_penalty(PG_FUNCTION_ARGS
);
29 PG_FUNCTION_INFO_V1(_ltree_picksplit
);
30 Datum
_ltree_picksplit(PG_FUNCTION_ARGS
);
32 PG_FUNCTION_INFO_V1(_ltree_consistent
);
33 Datum
_ltree_consistent(PG_FUNCTION_ARGS
);
35 #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
36 #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
38 /* Number of one-bits in an unsigned byte */
39 static const uint8 number_of_ones
[256] = {
40 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
41 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
42 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
43 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
44 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
45 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
46 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
47 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
48 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
49 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
50 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
52 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
53 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
54 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
55 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
58 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
62 hashing(BITVECP sign
, ltree
* t
)
64 int tlen
= t
->numlevel
;
65 ltree_level
*cur
= LTREE_FIRST(t
);
70 hash
= ltree_crc32_sz(cur
->name
, cur
->len
);
72 cur
= LEVEL_NEXT(cur
);
78 _ltree_compress(PG_FUNCTION_ARGS
)
80 GISTENTRY
*entry
= (GISTENTRY
*) PG_GETARG_POINTER(0);
81 GISTENTRY
*retval
= entry
;
86 ArrayType
*val
= DatumGetArrayTypeP(entry
->key
);
87 int4 len
= LTG_HDRSIZE
+ ASIGLEN
;
88 int num
= ArrayGetNItems(ARR_NDIM(val
), ARR_DIMS(val
));
89 ltree
*item
= (ltree
*) ARR_DATA_PTR(val
);
91 if (ARR_NDIM(val
) != 1)
93 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
94 errmsg("array must be one-dimensional")));
97 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED
),
98 errmsg("array must not contain nulls")));
100 key
= (ltree_gist
*) palloc(len
);
101 SET_VARSIZE(key
, len
);
104 MemSet(LTG_SIGN(key
), 0, ASIGLEN
);
107 hashing(LTG_SIGN(key
), item
);
109 item
= NEXTVAL(item
);
112 retval
= (GISTENTRY
*) palloc(sizeof(GISTENTRY
));
113 gistentryinit(*retval
, PointerGetDatum(key
),
114 entry
->rel
, entry
->page
,
115 entry
->offset
, FALSE
);
117 else if (!LTG_ISALLTRUE(entry
->key
))
123 BITVECP sign
= LTG_SIGN(DatumGetPointer(entry
->key
));
127 if ((sign
[i
] & 0xff) != 0xff)
128 PG_RETURN_POINTER(retval
);
131 key
= (ltree_gist
*) palloc(len
);
132 SET_VARSIZE(key
, len
);
133 key
->flag
= LTG_ALLTRUE
;
135 retval
= (GISTENTRY
*) palloc(sizeof(GISTENTRY
));
136 gistentryinit(*retval
, PointerGetDatum(key
),
137 entry
->rel
, entry
->page
,
138 entry
->offset
, FALSE
);
140 PG_RETURN_POINTER(retval
);
144 _ltree_same(PG_FUNCTION_ARGS
)
146 ltree_gist
*a
= (ltree_gist
*) PG_GETARG_POINTER(0);
147 ltree_gist
*b
= (ltree_gist
*) PG_GETARG_POINTER(1);
148 bool *result
= (bool *) PG_GETARG_POINTER(2);
150 if (LTG_ISALLTRUE(a
) && LTG_ISALLTRUE(b
))
152 else if (LTG_ISALLTRUE(a
))
154 else if (LTG_ISALLTRUE(b
))
159 BITVECP sa
= LTG_SIGN(a
),
172 PG_RETURN_POINTER(result
);
176 unionkey(BITVECP sbase
, ltree_gist
* add
)
179 BITVECP sadd
= LTG_SIGN(add
);
181 if (LTG_ISALLTRUE(add
))
190 _ltree_union(PG_FUNCTION_ARGS
)
192 GistEntryVector
*entryvec
= (GistEntryVector
*) PG_GETARG_POINTER(0);
193 int *size
= (int *) PG_GETARG_POINTER(1);
200 MemSet((void *) base
, 0, sizeof(ABITVEC
));
201 for (i
= 0; i
< entryvec
->n
; i
++)
203 if (unionkey(base
, GETENTRY(entryvec
, i
)))
210 len
= LTG_HDRSIZE
+ ((flag
& LTG_ALLTRUE
) ? 0 : ASIGLEN
);
211 result
= (ltree_gist
*) palloc(len
);
212 SET_VARSIZE(result
, len
);
214 if (!LTG_ISALLTRUE(result
))
215 memcpy((void *) LTG_SIGN(result
), (void *) base
, sizeof(ABITVEC
));
218 PG_RETURN_POINTER(result
);
222 sizebitvec(BITVECP sign
)
228 size
+= number_of_ones
[(unsigned char) sign
[i
]];
233 hemdistsign(BITVECP a
, BITVECP b
)
241 diff
= (unsigned char) (a
[i
] ^ b
[i
]);
242 dist
+= number_of_ones
[diff
];
248 hemdist(ltree_gist
* a
, ltree_gist
* b
)
250 if (LTG_ISALLTRUE(a
))
252 if (LTG_ISALLTRUE(b
))
255 return ASIGLENBIT
- sizebitvec(LTG_SIGN(b
));
257 else if (LTG_ISALLTRUE(b
))
258 return ASIGLENBIT
- sizebitvec(LTG_SIGN(a
));
260 return hemdistsign(LTG_SIGN(a
), LTG_SIGN(b
));
265 _ltree_penalty(PG_FUNCTION_ARGS
)
267 ltree_gist
*origval
= (ltree_gist
*) DatumGetPointer(((GISTENTRY
*) PG_GETARG_POINTER(0))->key
);
268 ltree_gist
*newval
= (ltree_gist
*) DatumGetPointer(((GISTENTRY
*) PG_GETARG_POINTER(1))->key
);
269 float *penalty
= (float *) PG_GETARG_POINTER(2);
271 *penalty
= hemdist(origval
, newval
);
272 PG_RETURN_POINTER(penalty
);
282 comparecost(const void *a
, const void *b
)
284 return ((SPLITCOST
*) a
)->cost
- ((SPLITCOST
*) b
)->cost
;
288 _ltree_picksplit(PG_FUNCTION_ARGS
)
290 GistEntryVector
*entryvec
= (GistEntryVector
*) PG_GETARG_POINTER(0);
291 GIST_SPLITVEC
*v
= (GIST_SPLITVEC
*) PG_GETARG_POINTER(1);
303 OffsetNumber seed_1
= 0,
310 SPLITCOST
*costvector
;
314 maxoff
= entryvec
->n
- 2;
315 nbytes
= (maxoff
+ 2) * sizeof(OffsetNumber
);
316 v
->spl_left
= (OffsetNumber
*) palloc(nbytes
);
317 v
->spl_right
= (OffsetNumber
*) palloc(nbytes
);
319 for (k
= FirstOffsetNumber
; k
< maxoff
; k
= OffsetNumberNext(k
))
321 _k
= GETENTRY(entryvec
, k
);
322 for (j
= OffsetNumberNext(k
); j
<= maxoff
; j
= OffsetNumberNext(j
))
324 size_waste
= hemdist(_k
, GETENTRY(entryvec
, j
));
325 if (size_waste
> waste
)
336 right
= v
->spl_right
;
339 if (seed_1
== 0 || seed_2
== 0)
345 /* form initial .. */
346 if (LTG_ISALLTRUE(GETENTRY(entryvec
, seed_1
)))
348 datum_l
= (ltree_gist
*) palloc(LTG_HDRSIZE
);
349 SET_VARSIZE(datum_l
, LTG_HDRSIZE
);
350 datum_l
->flag
= LTG_ALLTRUE
;
354 datum_l
= (ltree_gist
*) palloc(LTG_HDRSIZE
+ ASIGLEN
);
355 SET_VARSIZE(datum_l
, LTG_HDRSIZE
+ ASIGLEN
);
357 memcpy((void *) LTG_SIGN(datum_l
), (void *) LTG_SIGN(GETENTRY(entryvec
, seed_1
)), sizeof(ABITVEC
));
359 if (LTG_ISALLTRUE(GETENTRY(entryvec
, seed_2
)))
361 datum_r
= (ltree_gist
*) palloc(LTG_HDRSIZE
);
362 SET_VARSIZE(datum_r
, LTG_HDRSIZE
);
363 datum_r
->flag
= LTG_ALLTRUE
;
367 datum_r
= (ltree_gist
*) palloc(LTG_HDRSIZE
+ ASIGLEN
);
368 SET_VARSIZE(datum_r
, LTG_HDRSIZE
+ ASIGLEN
);
370 memcpy((void *) LTG_SIGN(datum_r
), (void *) LTG_SIGN(GETENTRY(entryvec
, seed_2
)), sizeof(ABITVEC
));
373 maxoff
= OffsetNumberNext(maxoff
);
374 /* sort before ... */
375 costvector
= (SPLITCOST
*) palloc(sizeof(SPLITCOST
) * maxoff
);
376 for (j
= FirstOffsetNumber
; j
<= maxoff
; j
= OffsetNumberNext(j
))
378 costvector
[j
- 1].pos
= j
;
379 _j
= GETENTRY(entryvec
, j
);
380 size_alpha
= hemdist(datum_l
, _j
);
381 size_beta
= hemdist(datum_r
, _j
);
382 costvector
[j
- 1].cost
= Abs(size_alpha
- size_beta
);
384 qsort((void *) costvector
, maxoff
, sizeof(SPLITCOST
), comparecost
);
386 union_l
= LTG_SIGN(datum_l
);
387 union_r
= LTG_SIGN(datum_r
);
389 for (k
= 0; k
< maxoff
; k
++)
391 j
= costvector
[k
].pos
;
398 else if (j
== seed_2
)
404 _j
= GETENTRY(entryvec
, j
);
405 size_alpha
= hemdist(datum_l
, _j
);
406 size_beta
= hemdist(datum_r
, _j
);
408 if (size_alpha
< size_beta
+ WISH_F(v
->spl_nleft
, v
->spl_nright
, 0.00001))
410 if (LTG_ISALLTRUE(datum_l
) || LTG_ISALLTRUE(_j
))
412 if (!LTG_ISALLTRUE(datum_l
))
413 MemSet((void *) union_l
, 0xff, sizeof(ABITVEC
));
419 union_l
[i
] |= ptr
[i
];
426 if (LTG_ISALLTRUE(datum_r
) || LTG_ISALLTRUE(_j
))
428 if (!LTG_ISALLTRUE(datum_r
))
429 MemSet((void *) union_r
, 0xff, sizeof(ABITVEC
));
435 union_r
[i
] |= ptr
[i
];
442 *right
= *left
= FirstOffsetNumber
;
444 v
->spl_ldatum
= PointerGetDatum(datum_l
);
445 v
->spl_rdatum
= PointerGetDatum(datum_r
);
447 PG_RETURN_POINTER(v
);
451 gist_te(ltree_gist
* key
, ltree
* query
)
453 ltree_level
*curq
= LTREE_FIRST(query
);
454 BITVECP sign
= LTG_SIGN(key
);
455 int qlen
= query
->numlevel
;
458 if (LTG_ISALLTRUE(key
))
463 hv
= ltree_crc32_sz(curq
->name
, curq
->len
);
464 if (!GETBIT(sign
, AHASHVAL(hv
)))
466 curq
= LEVEL_NEXT(curq
);
474 checkcondition_bit(void *checkval
, ITEM
* val
)
476 return (FLG_CANLOOKSIGN(val
->flag
)) ? GETBIT(checkval
, AHASHVAL(val
->val
)) : true;
480 gist_qtxt(ltree_gist
* key
, ltxtquery
* query
)
482 if (LTG_ISALLTRUE(key
))
485 return ltree_execute(
487 (void *) LTG_SIGN(key
), false,
493 gist_qe(ltree_gist
* key
, lquery
* query
)
495 lquery_level
*curq
= LQUERY_FIRST(query
);
496 BITVECP sign
= LTG_SIGN(key
);
497 int qlen
= query
->numlevel
;
499 if (LTG_ISALLTRUE(key
))
504 if (curq
->numvar
&& LQL_CANLOOKSIGN(curq
))
506 bool isexist
= false;
507 int vlen
= curq
->numvar
;
508 lquery_variant
*curv
= LQL_FIRST(curq
);
512 if (GETBIT(sign
, AHASHVAL(curv
->val
)))
517 curv
= LVAR_NEXT(curv
);
524 curq
= LQL_NEXT(curq
);
532 _arrq_cons(ltree_gist
* key
, ArrayType
*_query
)
534 lquery
*query
= (lquery
*) ARR_DATA_PTR(_query
);
535 int num
= ArrayGetNItems(ARR_NDIM(_query
), ARR_DIMS(_query
));
537 if (ARR_NDIM(_query
) != 1)
539 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
540 errmsg("array must be one-dimensional")));
541 if (ARR_HASNULL(_query
))
543 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED
),
544 errmsg("array must not contain nulls")));
548 if (gist_qe(key
, query
))
551 query
= (lquery
*) NEXTVAL(query
);
557 _ltree_consistent(PG_FUNCTION_ARGS
)
559 GISTENTRY
*entry
= (GISTENTRY
*) PG_GETARG_POINTER(0);
560 char *query
= (char *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
561 StrategyNumber strategy
= (StrategyNumber
) PG_GETARG_UINT16(2);
562 /* Oid subtype = PG_GETARG_OID(3); */
563 bool *recheck
= (bool *) PG_GETARG_POINTER(4);
564 ltree_gist
*key
= (ltree_gist
*) DatumGetPointer(entry
->key
);
567 /* All cases served by this function are inexact */
574 res
= gist_te(key
, (ltree
*) query
);
578 res
= gist_qe(key
, (lquery
*) query
);
582 res
= gist_qtxt(key
, (ltxtquery
*) query
);
586 res
= _arrq_cons(key
, (ArrayType
*) query
);
590 elog(ERROR
, "unrecognized StrategyNumber: %d", strategy
);
592 PG_FREE_IF_COPY(query
, 1);