Force a checkpoint in CREATE DATABASE before starting to copy the files,
[PostgreSQL.git] / src / backend / access / gist / gistsplit.c
blob56d0f66e0c3f30dd2626866c42e2f6c8f615d4d3
1 /*-------------------------------------------------------------------------
3 * gistsplit.c
4 * Split page algorithm
7 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "access/gist_private.h"
18 #include "utils/rel.h"
20 typedef struct
22 Datum *attr;
23 int len;
24 OffsetNumber *entries;
25 bool *isnull;
26 bool *equiv;
27 } GistSplitUnion;
31 * Forms unions of subkeys after page split, but
32 * uses only tuples aren't in groups of equalent tuples
34 static void
35 gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
36 GistSplitUnion *gsvp, int startkey)
38 IndexTuple *cleanedItVec;
39 int i,
40 cleanedLen = 0;
42 cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
44 for (i = 0; i < gsvp->len; i++)
46 if (gsvp->equiv && gsvp->equiv[gsvp->entries[i]])
47 continue;
49 cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
52 gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, startkey,
53 gsvp->attr, gsvp->isnull);
55 pfree(cleanedItVec);
59 * unions subkeys for after user picksplit over attno-1 column
61 static void
62 gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl, int attno)
64 GistSplitUnion gsvp;
66 gsvp.equiv = spl->spl_equiv;
68 gsvp.attr = spl->spl_lattr;
69 gsvp.len = spl->splitVector.spl_nleft;
70 gsvp.entries = spl->splitVector.spl_left;
71 gsvp.isnull = spl->spl_lisnull;
73 gistunionsubkeyvec(giststate, itvec, &gsvp, attno);
75 gsvp.attr = spl->spl_rattr;
76 gsvp.len = spl->splitVector.spl_nright;
77 gsvp.entries = spl->splitVector.spl_right;
78 gsvp.isnull = spl->spl_risnull;
80 gistunionsubkeyvec(giststate, itvec, &gsvp, attno);
84 * find group in vector with equivalent value
86 static int
87 gistfindgroup(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, GistSplitVector *spl, int attno)
89 int i;
90 GISTENTRY entry;
91 int len = 0;
94 * attno key is always not null (see gistSplitByKey), so we may not check
95 * for nulls
97 gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, (OffsetNumber) 0, FALSE);
98 for (i = 0; i < spl->splitVector.spl_nleft; i++)
100 float penalty = gistpenalty(giststate, attno, &entry, false,
101 &valvec[spl->splitVector.spl_left[i]], false);
103 if (penalty == 0.0)
105 spl->spl_equiv[spl->splitVector.spl_left[i]] = true;
106 len++;
110 gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, (OffsetNumber) 0, FALSE);
111 for (i = 0; i < spl->splitVector.spl_nright; i++)
113 float penalty = gistpenalty(giststate, attno, &entry, false,
114 &valvec[spl->splitVector.spl_right[i]], false);
116 if (penalty == 0.0)
118 spl->spl_equiv[spl->splitVector.spl_right[i]] = true;
119 len++;
123 return len;
126 static void
127 cleanupOffsets(OffsetNumber *a, int *len, bool *equiv, int *LenEquiv)
129 int curlen,
131 OffsetNumber *curwpos;
133 curlen = *len;
134 curwpos = a;
135 for (i = 0; i < *len; i++)
137 if (equiv[a[i]] == FALSE)
139 *curwpos = a[i];
140 curwpos++;
142 else
144 /* corner case: we shouldn't make void array */
145 if (curlen == 1)
147 equiv[a[i]] = FALSE; /* mark item as non-equivalent */
148 i--; /* redo the same */
149 *LenEquiv -= 1;
150 continue;
152 else
153 curlen--;
157 *len = curlen;
160 static void
161 placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno)
163 GISTENTRY identry[INDEX_MAX_KEYS];
164 bool isnull[INDEX_MAX_KEYS];
165 bool toLeft = true;
167 gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, identry, isnull);
169 for (; attno < giststate->tupdesc->natts; attno++)
171 float lpenalty,
172 rpenalty;
173 GISTENTRY entry;
175 gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
176 lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry + attno, isnull[attno]);
177 gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
178 rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry + attno, isnull[attno]);
180 if (lpenalty != rpenalty)
182 if (lpenalty > rpenalty)
183 toLeft = false;
184 break;
188 if (toLeft)
189 v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
190 else
191 v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
194 #define SWAPVAR( s, d, t ) \
195 do { \
196 (t) = (s); \
197 (s) = (d); \
198 (d) = (t); \
199 } while(0)
202 * adjust left and right unions according to splits by previous
203 * split by firsts columns. This function is called only in case
204 * when pickSplit doesn't support subspplit.
207 static void
208 supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
210 bool leaveOnLeft = true,
211 tmpBool;
212 GISTENTRY entryL,
213 entryR,
214 entrySL,
215 entrySR;
217 gistentryinit(entryL, oldL, r, NULL, 0, FALSE);
218 gistentryinit(entryR, oldR, r, NULL, 0, FALSE);
219 gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
220 gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
222 if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
224 float penalty1,
225 penalty2;
227 penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
228 gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
229 penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
230 gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
232 if (penalty1 > penalty2)
233 leaveOnLeft = false;
236 else
238 GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
239 float penalty1,
240 penalty2;
243 * there is only one previously defined union, so we just choose swap
244 * or not by lowest penalty
247 penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
248 penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
250 if (penalty1 < penalty2)
251 leaveOnLeft = (sv->spl_ldatum_exists) ? true : false;
252 else
253 leaveOnLeft = (sv->spl_rdatum_exists) ? true : false;
256 if (leaveOnLeft == false)
259 * swap left and right
261 OffsetNumber *off,
262 noff;
263 Datum datum;
265 SWAPVAR(sv->spl_left, sv->spl_right, off);
266 SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
267 SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
268 gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
269 gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
272 if (sv->spl_ldatum_exists)
273 gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
274 &sv->spl_ldatum, &tmpBool);
276 if (sv->spl_rdatum_exists)
277 gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
278 &sv->spl_rdatum, &tmpBool);
280 sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
284 * Calls user picksplit method for attno columns to split vector to
285 * two vectors. May use attno+n columns data to
286 * get better split.
287 * Returns TRUE and v->spl_equiv = NULL if left and right unions of attno columns are the same,
288 * so caller may find better split
289 * Returns TRUE and v->spl_equiv != NULL if there is tuples which may be freely moved
291 static bool
292 gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
293 IndexTuple *itup, int len, GISTSTATE *giststate)
295 GIST_SPLITVEC *sv = &v->splitVector;
298 * now let the user-defined picksplit function set up the split vector; in
299 * entryvec have no null value!!
302 sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
303 sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
304 sv->spl_ldatum = v->spl_lattr[attno];
305 sv->spl_rdatum = v->spl_rattr[attno];
307 FunctionCall2(&giststate->picksplitFn[attno],
308 PointerGetDatum(entryvec),
309 PointerGetDatum(sv));
311 /* compatibility with old code */
312 if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
313 sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
314 if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
315 sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
317 if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
319 elog(LOG, "PickSplit method of %d columns of index '%s' doesn't support secondary split",
320 attno + 1, RelationGetRelationName(r));
322 supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
325 v->spl_lattr[attno] = sv->spl_ldatum;
326 v->spl_rattr[attno] = sv->spl_rdatum;
327 v->spl_lisnull[attno] = false;
328 v->spl_risnull[attno] = false;
331 * if index is multikey, then we must to try get smaller bounding box for
332 * subkey(s)
334 v->spl_equiv = NULL;
336 if (giststate->tupdesc->natts > 1 && attno + 1 != giststate->tupdesc->natts)
338 if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
341 * Left and right key's unions are equial, so we can get better
342 * split by following columns. Note, unions for attno columns are
343 * already done.
346 return true;
348 else
350 int LenEquiv;
352 v->spl_equiv = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
354 LenEquiv = gistfindgroup(r, giststate, entryvec->vector, v, attno);
357 * if possible, we should distribute equivalent tuples
359 if (LenEquiv == 0)
361 gistunionsubkey(giststate, itup, v, attno + 1);
363 else
365 cleanupOffsets(sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv);
366 cleanupOffsets(sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv);
368 gistunionsubkey(giststate, itup, v, attno + 1);
369 if (LenEquiv == 1)
372 * In case with one tuple we just choose left-right by
373 * penalty. It's simplify user-defined pickSplit
375 OffsetNumber toMove = InvalidOffsetNumber;
377 for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
378 if (v->spl_equiv[toMove])
379 break;
380 Assert(toMove < entryvec->n);
382 placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
385 * redo gistunionsubkey(): it will not degradate
386 * performance, because it's very rarely
388 v->spl_equiv = NULL;
389 gistunionsubkey(giststate, itup, v, attno + 1);
391 return false;
393 else if (LenEquiv > 1)
394 return true;
399 return false;
403 * simple split page
405 static void
406 gistSplitHalf(GIST_SPLITVEC *v, int len)
408 int i;
410 v->spl_nright = v->spl_nleft = 0;
411 v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
412 v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
413 for (i = 1; i <= len; i++)
414 if (i < len / 2)
415 v->spl_right[v->spl_nright++] = i;
416 else
417 v->spl_left[v->spl_nleft++] = i;
421 * if it was invalid tuple then we need special processing.
422 * We move all invalid tuples on right page.
424 * if there is no place on left page, gistSplit will be called one more
425 * time for left page.
427 * Normally, we never exec this code, but after crash replay it's possible
428 * to get 'invalid' tuples (probability is low enough)
430 static void
431 gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, int len)
433 int i;
434 static OffsetNumber offInvTuples[MaxOffsetNumber];
435 int nOffInvTuples = 0;
437 for (i = 1; i <= len; i++)
438 if (GistTupleIsInvalid(itup[i - 1]))
439 offInvTuples[nOffInvTuples++] = i;
441 if (nOffInvTuples == len)
443 /* corner case, all tuples are invalid */
444 v->spl_rightvalid = v->spl_leftvalid = false;
445 gistSplitHalf(&v->splitVector, len);
447 else
449 GistSplitUnion gsvp;
451 v->splitVector.spl_right = offInvTuples;
452 v->splitVector.spl_nright = nOffInvTuples;
453 v->spl_rightvalid = false;
455 v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
456 v->splitVector.spl_nleft = 0;
457 for (i = 1; i <= len; i++)
458 if (!GistTupleIsInvalid(itup[i - 1]))
459 v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
460 v->spl_leftvalid = true;
462 gsvp.equiv = NULL;
463 gsvp.attr = v->spl_lattr;
464 gsvp.len = v->splitVector.spl_nleft;
465 gsvp.entries = v->splitVector.spl_left;
466 gsvp.isnull = v->spl_lisnull;
468 gistunionsubkeyvec(giststate, itup, &gsvp, 0);
473 * trys to split page by attno key, in a case of null
474 * values move its to separate page.
476 void
477 gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
478 GistSplitVector *v, GistEntryVector *entryvec, int attno)
480 int i;
481 static OffsetNumber offNullTuples[MaxOffsetNumber];
482 int nOffNullTuples = 0;
484 for (i = 1; i <= len; i++)
486 Datum datum;
487 bool IsNull;
489 if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1]))
491 gistSplitByInvalid(giststate, v, itup, len);
492 return;
495 datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc, &IsNull);
496 gistdentryinit(giststate, attno, &(entryvec->vector[i]),
497 datum, r, page, i,
498 FALSE, IsNull);
499 if (IsNull)
500 offNullTuples[nOffNullTuples++] = i;
503 v->spl_leftvalid = v->spl_rightvalid = true;
505 if (nOffNullTuples == len)
508 * Corner case: All keys in attno column are null, we should try to
509 * split by keys in next column. It all keys in all columns are NULL
510 * just split page half by half
512 v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
514 if (attno + 1 == r->rd_att->natts)
515 gistSplitHalf(&v->splitVector, len);
516 else
517 gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
519 else if (nOffNullTuples > 0)
521 int j = 0;
524 * We don't want to mix NULLs and not-NULLs keys on one page, so move
525 * nulls to right page
527 v->splitVector.spl_right = offNullTuples;
528 v->splitVector.spl_nright = nOffNullTuples;
529 v->spl_risnull[attno] = TRUE;
531 v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
532 v->splitVector.spl_nleft = 0;
533 for (i = 1; i <= len; i++)
534 if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
535 j++;
536 else
537 v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
539 v->spl_equiv = NULL;
540 gistunionsubkey(giststate, itup, v, attno);
542 else
545 * all keys are not-null
547 entryvec->n = len + 1;
549 if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno + 1 != r->rd_att->natts)
552 * Splitting on attno column is not optimized: there is a tuples
553 * which can be freely left or right page, we will try to split
554 * page by following columns
556 if (v->spl_equiv == NULL)
559 * simple case: left and right keys for attno column are
560 * equial
562 gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
564 else
566 /* we should clean up vector from already distributed tuples */
567 IndexTuple *newitup = (IndexTuple *) palloc((len + 1) * sizeof(IndexTuple));
568 OffsetNumber *map = (OffsetNumber *) palloc((len + 1) * sizeof(IndexTuple));
569 int newlen = 0;
570 GIST_SPLITVEC backupSplit = v->splitVector;
572 for (i = 0; i < len; i++)
573 if (v->spl_equiv[i + 1])
575 map[newlen] = i + 1;
576 newitup[newlen++] = itup[i];
579 Assert(newlen > 0);
581 backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
582 memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
583 backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
584 memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
586 gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno + 1);
588 /* merge result of subsplit */
589 for (i = 0; i < v->splitVector.spl_nleft; i++)
590 backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
591 for (i = 0; i < v->splitVector.spl_nright; i++)
592 backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
594 v->splitVector = backupSplit;
595 /* reunion left and right datums */
596 gistunionsubkey(giststate, itup, v, attno);