src/budb/db_hash.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include <afs/param.h>
  12
  13
  14 #ifdef AFS_NT40_ENV
  15 #include <winsock2.h>
  16 #else
  17 #include <netinet/in.h>
  18 #endif
  19 #include <string.h>
  20 #include <sys/types.h>
  21 #include <afs/stds.h>
  22 #include <ubik.h>
  23 #include <afs/bubasics.h>
  24 #include "budb_errs.h"
  25 #include "database.h"
  26 #include "budb_internal.h"
  27 #include "error_macros.h"
  28
  29
  30 int sizeFunctions[HT_MAX_FUNCTION + 1];
  31 int nHTBuckets = NhtBucketS;    /* testing: we need small HT blocks */
  32
  33 int ht_minHBlocks(struct memoryHashTable *mht);
  34
  35 /* ht_TableSize - return the size of table necessary to represent a hashtable
  36  * of given length in memory.  It basically rounds the length up by the number
  37  * of buckets per block. */
  38
  39 int
  40 ht_TableSize(int length)
  41 {
  42     int n;
  43     if (length == 0)
  44         return 0;
  45     n = (length + nHTBuckets - 1) / nHTBuckets;
  46     return n * sizeof(struct memoryHTBlock *);
  47 }
  48
  49 /* ht_ResetT - resets the in-memory representation of a hashtable block array.
  50  * It also resets the global variable nHTBuckets. */
  51
  52 static void
  53 ht_ResetT(struct memoryHTBlock ***blocksP, int *sizeP, int length)
  54 {
  55     struct memoryHTBlock **b = *blocksP;
  56     int newsize;
  57     int n;
  58     int i;
  59
  60     nHTBuckets = ntohl(db.h.nHTBuckets);
  61     if (b) {
  62         n = *sizeP / sizeof(b[0]);
  63         newsize = ht_TableSize(length);
  64         if (*sizeP != newsize) {
  65             /* free all blocks in the old array */
  66             for (i = 0; i < n; i++)
  67                 if (b[i])
  68                     free(b[i]);
  69             free(b);
  70             *sizeP = 0;
  71             *blocksP = 0;
  72         } else {
  73             /* invalidate the blocks of the array  */
  74             for (i = 0; i < n; i++)
  75                 if (b[i])
  76                     b[i]->valid = 0;
  77         }
  78     }
  79 }
  80
  81 /* ht_Reset
  82  *      reinitialize a memory hash table.
  83  *      Calls ht_ResetT to invalidate the two block arrays.
  84  */
  85
  86 void
  87 ht_Reset(struct memoryHashTable *mht)
  88 {
  89     struct hashTable *ht = NULL;
  90
  91     if (!(mht && (ht = mht->ht)))
  92         db_panic("some ht called with bad mht");
  93     mht->threadOffset = ntohl(ht->threadOffset);
  94     mht->length = ntohl(ht->length);
  95     mht->oldLength = ntohl(ht->oldLength);
  96     mht->progress = ntohl(ht->progress);
  97     ht_ResetT(&mht->blocks, &mht->size, mht->length);
  98     ht_ResetT(&mht->oldBlocks, &mht->oldSize, mht->oldLength);
  99 }
 100
 101 /* InitDBhash - When server starts, do hash table initialization.
 102      test - initialization parameters: bit 4 is small ht. */
 103
 104 afs_int32
 105 InitDBhash(void)
 106 {
 107     sizeFunctions[0] = 0;
 108
 109     sizeFunctions[HT_dumpIden_FUNCTION] = sizeof(struct dump);
 110     sizeFunctions[HT_dumpName_FUNCTION] = sizeof(struct dump);
 111     sizeFunctions[HT_volName_FUNCTION] = sizeof(struct volInfo);
 112     sizeFunctions[HT_tapeName_FUNCTION] = sizeof(struct tape);
 113
 114     db.volName.ht = &db.h.volName;
 115     db.tapeName.ht = &db.h.tapeName;
 116     db.dumpName.ht = &db.h.dumpName;
 117     db.dumpIden.ht = &db.h.dumpIden;
 118     return 0;
 119 }
 120
 121 /* ht_DBInit - When rebuilding database, this sets up the hash tables. */
 122
 123 void
 124 ht_DBInit(void)
 125 {
 126     db.h.nHTBuckets = htonl(nHTBuckets);
 127
 128     {
 129         struct volInfo *s = 0;
 130         db.h.volName.threadOffset =
 131             htonl((char *)&s->nameHashChain - (char *)s);
 132         db.h.volName.functionType = htonl(HT_volName_FUNCTION);
 133     }
 134     {
 135         struct tape *s = 0;
 136         db.h.tapeName.threadOffset =
 137             htonl((char *)&s->nameHashChain - (char *)s);
 138         db.h.tapeName.functionType = htonl(HT_tapeName_FUNCTION);
 139     }
 140     {
 141         struct dump *s = 0;
 142         db.h.dumpName.threadOffset =
 143             htonl((char *)&s->nameHashChain - (char *)s);
 144         db.h.dumpName.functionType = htonl(HT_dumpName_FUNCTION);
 145
 146         db.h.dumpIden.threadOffset =
 147             htonl((char *)&s->idHashChain - (char *)s);
 148         db.h.dumpIden.functionType = htonl(HT_dumpIden_FUNCTION);
 149     }
 150     ht_Reset(&db.volName);
 151     ht_Reset(&db.tapeName);
 152     ht_Reset(&db.dumpName);
 153     ht_Reset(&db.dumpIden);
 154 }
 155
 156 afs_int32
 157 ht_AllocTable(struct ubik_trans *ut, struct memoryHashTable *mht)
 158 {
 159     struct hashTable *ht = NULL;
 160     afs_int32 code;
 161     int len;
 162     int nb, mnb;                /* number of blocks for hashTable */
 163     int i;
 164     struct memoryHTBlock **b;
 165     afs_int32 *plen;
 166
 167     if (!(mht && (ht = mht->ht)))
 168         db_panic("some ht called with bad mht");
 169     if (ht->length || mht->blocks)
 170         db_panic("previous table still allocated");
 171
 172     len = ntohl(ht->entries) * 2;       /* allow room to grow */
 173     nb = (len + nHTBuckets - 1) / nHTBuckets;
 174     mnb = ht_minHBlocks(mht);
 175     if (nb < mnb)
 176         nb = mnb;               /* use minimum */
 177     len = nb * nHTBuckets;      /* new hash table length */
 178
 179     mht->size = nb * sizeof(struct memoryHTBlock *);
 180     b = mht->blocks = (struct memoryHTBlock **)malloc(mht->size);
 181     memset(b, 0, mht->size);
 182
 183     for (i = 0; i < nb; i++) {
 184         b[i] = (struct memoryHTBlock *)malloc(sizeof(struct memoryHTBlock));
 185         code = AllocBlock(ut, (struct block *)&b[i]->b, &b[i]->a);
 186         if (code)
 187             return code;
 188         b[i]->valid = 0;
 189
 190         b[i]->b.h.type = hashTable_BLOCK;
 191
 192         /* thread the blocks */
 193         if (i)
 194             b[i - 1]->b.h.next = htonl(b[i]->a);
 195     }
 196     for (i = 0; i < nb; i++) {
 197         code =
 198             dbwrite(ut, b[i]->a, (char *)&b[i]->b,
 199                     sizeof(struct htBlock) + (nHTBuckets -
 200                                               NhtBucketS) * sizeof(dbadr));
 201         if (code)
 202             return code;
 203     }
 204     if ((code = set_word_addr(ut, 0, &db.h, &ht->table, htonl(b[0]->a))))
 205         return code;
 206
 207     plen = &ht->length;
 208     if ((code = set_word_addr(ut, 0, &db.h, plen, htonl(len))))
 209         return code;
 210     mht->length = len;
 211     return 0;
 212 }
 213
 214 afs_int32
 215 ht_FreeTable(struct ubik_trans *ut, struct memoryHashTable *mht)
 216 {
 217     struct hashTable *ht = NULL;
 218     afs_int32 code;
 219     struct blockHeader bh;
 220     dbadr a, na;
 221     afs_int32 *plen, *pprog;
 222
 223     if (!(mht && (ht = mht->ht)))
 224         db_panic("some ht called with bad mht");
 225     if (ht->oldLength == 0)
 226         db_panic("no table to free");
 227
 228     ht_ResetT(&mht->oldBlocks, &mht->oldSize, 0);
 229
 230     for (a = ntohl(ht->oldTable); a; a = na) {
 231         if (dbread(ut, a, (char *)&bh, sizeof(bh))) {
 232             Log("ht_FreeTable: dbread failed\n");
 233             return BUDB_IO;
 234         }
 235         na = ntohl(bh.next);
 236         if ((code = FreeBlock(ut, &bh, a)))
 237             return code;
 238     }
 239     plen = &ht->oldLength;
 240     pprog = &ht->progress;
 241     if (set_word_addr(ut, 0, &db.h, &ht->oldTable, 0)
 242         || set_word_addr(ut, 0, &db.h, plen, 0)
 243         || set_word_addr(ut, 0, &db.h, pprog, 0))
 244         return BUDB_IO;
 245     mht->oldLength = mht->progress = 0;
 246     return 0;
 247 }
 248
 249 afs_int32
 250 ht_GetTableBlock(struct ubik_trans *ut, struct memoryHashTable *mht,
 251                  afs_uint32 hash, int old, struct memoryHTBlock **blockP,
 252                  int *boP)
 253 {
 254     struct hashTable *ht = NULL;
 255     struct memoryHTBlock **b;
 256     int hi, bi;
 257     struct memoryHTBlock ***blocksP;
 258     int *sizeP;
 259     int n;
 260     int i;
 261     int length;
 262     dbadr ta = 0;
 263
 264     if ((mht == 0)
 265         || ((ht = mht->ht) == 0)
 266         ) {
 267         db_panic("some ht called with bad mht");
 268     }
 269
 270     *blockP = 0;
 271
 272     if (old) {
 273         if ((length = mht->oldLength) == 0)
 274             return 0;           /* no entries */
 275         hi = hash % length;
 276         if (hi < mht->progress)
 277             return 0;           /* no such entry */
 278         blocksP = &mht->oldBlocks;
 279         sizeP = &mht->oldSize;
 280     } else {
 281         if ((length = mht->length) == 0)
 282             return 0;           /* no entries */
 283         hi = hash % length;
 284         blocksP = &mht->blocks;
 285         sizeP = &mht->size;
 286     }
 287
 288     bi = hi / nHTBuckets;       /* block index */
 289     *boP = hi - bi * nHTBuckets;        /* block offset ptr */
 290
 291     if (*blocksP == 0) {
 292         *sizeP = ht_TableSize(length);
 293         *blocksP = (struct memoryHTBlock **)malloc(*sizeP);
 294         memset(*blocksP, 0, *sizeP);
 295     }
 296     n = *sizeP / sizeof(struct memoryHTBlock *);
 297     if (bi >= n)
 298         db_panic("table size inconsistent");
 299     b = *blocksP;
 300
 301     /* find an allocated block or the beginning of the block array */
 302     for (i = bi; (i > 0) && (b[i] == 0); i--);
 303
 304     while (1) {
 305         if (b[i] == 0) {
 306             if (i == 0) {       /* the first block is found from the hashTable */
 307                 ta = ntohl(old ? ht->oldTable : ht->table);
 308                 if (ta == 0)
 309                     db_panic("non-zero length, but no table");
 310             }
 311             /* else ta is set from last time around loop */
 312             b[i] =
 313                 (struct memoryHTBlock *)malloc(sizeof(struct memoryHTBlock));
 314             b[i]->a = ta;
 315             b[i]->valid = 0;
 316         }
 317
 318         if (!b[i]->valid) {
 319             if (dbread(ut, b[i]->a, (char *)&b[i]->b, sizeof(struct htBlock)))
 320                 return BUDB_IO;
 321             b[i]->valid = 1;
 322         }
 323
 324         if (i == bi) {
 325             *blockP = b[bi];
 326             /* printf("ht_GetTableBlock: hash %d block %d offset %d\n",
 327              * hash, *blockP, *boP); */
 328             return 0;
 329         }
 330
 331         ta = ntohl(b[i++]->b.h.next);   /* get next ptr from current block */
 332     }
 333 }
 334
 335 /* ht_MaybeAdjust
 336  *      Decide when to push the current hash table to the old hash table.
 337  *      The entries in the old hash table are VALID, and are slowly hashed
 338  *      into the current table.
 339  */
 340
 341 static afs_int32
 342 ht_MaybeAdjust(struct ubik_trans *ut, struct memoryHashTable *mht)
 343 {
 344     struct hashTable *ht = mht->ht;
 345     int numberEntries = ntohl(ht->entries);
 346
 347     /* old hash table must be empty */
 348     if (mht->oldLength != 0)
 349         return (0);
 350
 351     /*
 352      * It costs a lot to grow and shrink the hash table. Therefore, we will not
 353      * shrink the hash table (only grow it). If the table is more than 2 entries per
 354      * chain (average) we need to grow: push the entries to the old hash table.
 355      *
 356      * Don't shrink it:
 357      * || ((mht->length > nHTBuckets) && (numberEntries*8 < mht->length))
 358      */
 359
 360     /* Only grow a hash table if the number of entries is twice the
 361      * number of hash length and is less than 20,450 (20 hash blocks). This
 362      * means that the volname hash table will not grow (its initial
 363      * hashtable size contains 30,600 buckets). Earlier revisions of
 364      * the buserver have the initial size at 510 and 5,100 buckets -
 365      * in which case we do want to grow it). We don't grow anything larger
 366      * than 20,450 entries because it's expensive to re-hash everything.
 367      */
 368     if ((numberEntries > mht->length * 2) && (numberEntries < 20450)) { /* push current hash table to old hash table */
 369         ht->oldLength = ht->length;
 370         ht->oldTable = ht->table;
 371         ht->progress = 0;
 372         ht->length = 0;
 373         ht->table = 0;
 374         if (dbwrite
 375             (ut, ((char *)ht - (char *)&db.h), (char *)ht, sizeof(*ht)))
 376             return BUDB_IO;
 377
 378         ht_Reset(mht);
 379         LogDebug(2, "ht_MaybeAdjust: push ht to old\n");
 380     }
 381     return 0;
 382 }
 383
 384 dbadr
 385 ht_LookupBucket(struct ubik_trans *ut, struct memoryHashTable *mht,
 386                 afs_uint32 hash, int old)
 387 {
 388     struct memoryHTBlock *block;
 389     int bo;
 390     afs_int32 code;
 391
 392     if ((old ? mht->oldLength : mht->length) == 0)
 393         return 0;
 394     code = ht_GetTableBlock(ut, mht, hash, old, &block, &bo);
 395     if (code || (block == 0))
 396         return 0;
 397     return ntohl(block->b.bucket[bo]);
 398 }
 399
 400 /* This function is not too bad, for small hash tables, but suffers, I think,
 401  * from insufficient mixing of the hash information. */
 402
 403 afs_uint32
 404 Old2StringHashFunction(unsigned char *str)
 405 {
 406     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 407     while (*str)
 408         hash = (hash << 1) + (hash >> 31) + *str++;
 409     return hash;
 410 }
 411
 412 /* This was actually a coding error, and produces dreadful results.  The
 413  * problem is that the hash needs to be mixed up not the incoming character. */
 414
 415 afs_uint32
 416 Old3StringHashFunction(unsigned char *str)
 417 {
 418     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 419     while (*str)
 420         hash += (*str++) * 0x072a51a4;
 421     return hash;
 422 }
 423
 424 /* This function is pretty good.  Its main problem is that the low two bits of
 425  * the hash multiplier are zero which tends to shift information too far left.
 426  * It behaves especially badly for hash tables whose size is a power of two. */
 427
 428 afs_uint32
 429 Old4StringHashFunction(unsigned char *str)
 430 {
 431     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 432     while (*str)
 433         hash = (*str++) + hash * 0x072a51a4;
 434     return hash;
 435 }
 436
 437 /* While this is good for a hash table with 500 buckets it is nearly as bad as
 438  * #3 with a hash table as big as 8200. */
 439
 440 afs_uint32
 441 Old5StringHashFunction(unsigned char *str)
 442 {
 443     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 444     while (*str)
 445         hash += (*str++);
 446     return hash;
 447 }
 448
 449 /* This was an attempt to produce a hash function with the smallest and
 450  * simplest mixing multiplier.  This is only a little worse than the real one,
 451  * and the difference seems to be smaller with larger hash tables.  It behaves
 452  * better than the random hash function. */
 453
 454 afs_uint32
 455 Old6StringHashFunction(unsigned char *str)
 456 {
 457     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 458     while (*str)
 459         hash = hash * 0x81 + (*str++);
 460     return hash;
 461 }
 462
 463 /* This actually seems to be little better then the real one.  Having the same
 464  * number of bits but only 5 bits apart seems to produce worse results but
 465  * having the bits spanning the same range farther apart also doesn't do as
 466  * well.  All these differences are fairly small, however. */
 467
 468 afs_uint32
 469 Old7StringHashFunction(unsigned char *str)
 470 {
 471     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 472     while (*str)
 473         hash = hash * 0x42108421 + (*str++);
 474     return hash;
 475 }
 476
 477 /* This function tries to provide some non-linearity by providing some feedback
 478  * from higher-order bits in the word.  It also uses shifts instead of
 479  * multiplies, which may be faster on some architectures. */
 480
 481 afs_uint32
 482 Old8StringHashFunction(unsigned char *str)
 483 {
 484     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 485     while (*str)
 486         hash =
 487             hash + (hash << 7) + (hash << 14) + (hash << 21) + (hash << 28) +
 488             (hash >> 17) + *str++;
 489     return hash;
 490 }
 491
 492 /* This is the result of the above search for good hash functions.  It seems
 493  * that the choice of multipliers is somewhat arbitrary but has several
 494  * constraints.  It shouldn't have too many or too few one bits and should be
 495  * odd.  It behaves beeter than the random hash function. */
 496
 497 afs_uint32
 498 StringHashFunction(unsigned char *str)
 499 {
 500     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 501     /* The multiplicative constant should be odd and have a goodly number of
 502      * one bits. */
 503     while (*str)
 504         hash = (*str++) + hash * 0x10204081;
 505     return hash;
 506 }
 507
 508 afs_uint32
 509 IdHashFunction(afs_uint32 id)
 510 {
 511     afs_uint32 l, r;
 512     id *= 81847;
 513     l = id | 0xaaaaaaaa;
 514     r = id | 0x55555555;
 515     return (l * r);
 516 }
 517
 518 /* The minimum hash table blocks to allocate. Each block contains 510
 519  * buckets. They hash table grows when the number of entries reaches
 520  * twice the number of buckets.
 521  */
 522 int
 523 ht_minHBlocks(struct memoryHashTable *mht)
 524 {
 525     int retval;
 526
 527     switch (ntohl(mht->ht->functionType)) {
 528     case HT_dumpIden_FUNCTION:
 529     case HT_dumpName_FUNCTION:  /* hash table able to handle (befor it grows) ... */
 530         retval = 2;             /*     1,020 dump entries */
 531         break;
 532
 533     case HT_tapeName_FUNCTION:
 534         retval = 4;             /*      2,040 tape entries */
 535         break;
 536
 537     case HT_volName_FUNCTION:
 538         retval = 60;            /*     61,200 volInfo entries (with different names) */
 539         break;
 540
 541     default:
 542         db_panic("Illegal hash function type");
 543         retval = -1; /* not reached */
 544     }
 545     return (retval);
 546 }
 547
 548 afs_uint32
 549 ht_HashEntry(struct memoryHashTable *mht,
 550              char *e)                           /* entry's address (in b) */
 551 {
 552     int type = ntohl(mht->ht->functionType);
 553     afs_uint32 retval;
 554
 555     switch (type) {
 556     case HT_dumpIden_FUNCTION:
 557         retval = IdHashFunction(ntohl(((struct dump *)e)->id));
 558         LogDebug(5, "HashEntry: dumpid returns %d\n", retval);
 559         break;
 560
 561     case HT_dumpName_FUNCTION:
 562         retval = StringHashFunction((unsigned char *)((struct dump *)e)->dumpName);
 563         LogDebug(5, "HashEntry: dumpname returns %d\n", retval);
 564         break;
 565
 566     case HT_tapeName_FUNCTION:
 567         retval = StringHashFunction((unsigned char *)((struct tape *)e)->name);
 568         LogDebug(5, "HashEntry: tapename returns %d\n", retval);
 569         break;
 570
 571     case HT_volName_FUNCTION:
 572         retval = StringHashFunction((unsigned char *)((struct volInfo *)e)->name);
 573         LogDebug(5, "HashEntry: volname returns %d\n", retval);
 574         break;
 575
 576     default:
 577         db_panic("illegal hash function");
 578         retval = -1; /* not reached */
 579     }
 580
 581     return (retval);
 582 }
 583
 584
 585 /* ht_GetType
 586  *      returns a ptr to the memory hash table for the specified hash
 587  *      list.
 588  */
 589
 590 struct memoryHashTable *
 591 ht_GetType(int type, int *e_sizeP)
 592 {
 593     struct memoryHashTable *mht;
 594
 595     if ((type <= 0) || (type > HT_MAX_FUNCTION))
 596         return 0;
 597
 598     if (e_sizeP)
 599         *e_sizeP = sizeFunctions[type];
 600     switch (type) {
 601     case HT_dumpIden_FUNCTION:
 602         mht = &db.dumpIden;
 603         break;
 604
 605     case HT_dumpName_FUNCTION:
 606         mht = &db.dumpName;
 607         break;
 608
 609     case HT_tapeName_FUNCTION:
 610         mht = &db.tapeName;
 611         break;
 612
 613     case HT_volName_FUNCTION:
 614         mht = &db.volName;
 615         break;
 616
 617     default:
 618         return 0;
 619     }
 620     if (ntohl(mht->ht->functionType) != type)
 621         db_panic("ht types don't match");
 622     return mht;
 623 }
 624
 625 static int
 626 ht_KeyMatch(int type, char *key, char *e)
 627 {
 628     switch (type) {
 629     case HT_dumpIden_FUNCTION:
 630         return *(dumpId *) key == ntohl(((struct dump *)e)->id);
 631     case HT_dumpName_FUNCTION:
 632         return strcmp(key, ((struct dump *)e)->dumpName) == 0;
 633     case HT_tapeName_FUNCTION:
 634         return strcmp(key, ((struct tape *)e)->name) == 0;
 635     case HT_volName_FUNCTION:
 636         return strcmp(key, ((struct volInfo *)e)->name) == 0;
 637
 638     default:
 639         db_panic("illegal hash function");
 640     }
 641     /* not reached */
 642     return 0;
 643 }
 644
 645 /* ht_LookupEntry
 646  * entry:
 647  *      ut - ubik transaction
 648  *      mht - memory hash table ptr
 649  *      key - hash and lookup key
 650  * exit:
 651  *      eaP - dbaddr of entry found or zero if failed
 652   *     e - contents of located entry
 653  */
 654
 655 afs_int32
 656 ht_LookupEntry(struct ubik_trans *ut,
 657                struct memoryHashTable *mht,
 658                void *key,       /* pointer to lookup key to match */
 659                dbadr *eaP,      /* db addr of entry found or zero */
 660                void *e)         /* contents of located entry */
 661 {
 662     struct hashTable *ht = NULL;
 663     int type;
 664     int e_size;
 665     int old;
 666     afs_uint32 hash;
 667     dbadr a;
 668
 669     if (!key || !eaP || !e)
 670         db_panic("null ptrs passed to LookupEntry");
 671     if (!(mht && (ht = mht->ht)))
 672         db_panic("some ht called with bad mht");
 673
 674     *eaP = 0;                   /* initialize not-found indicator */
 675
 676     type = ntohl(ht->functionType);
 677     e_size = sizeFunctions[type];
 678     if (type == HT_dumpIden_FUNCTION)
 679         hash = IdHashFunction(*(dumpId *) key);
 680     else
 681         hash = StringHashFunction(key);
 682
 683     for (old = 0;; old++) {
 684         a = ht_LookupBucket(ut, mht, hash, old);
 685         while (a) {
 686             if (dbread(ut, a, e, e_size))
 687                 return BUDB_IO;
 688             if (ht_KeyMatch(type, key, e)) {
 689                 *eaP = a;
 690                 return 0;
 691             }
 692             a = ntohl(*(dbadr *) ((char *)e + mht->threadOffset));
 693         }
 694         if (old)
 695             return 0;
 696     }
 697 }
 698
 699 /* ht_HashInList
 700  * entry:
 701  *      opQuota - max # of items to move
 702  * exit:
 703  *      opQuota - adjusted to reflect # of moves
 704  */
 705
 706 static afs_int32
 707 ht_HashInList(struct ubik_trans *ut, struct memoryHashTable *mht,
 708               int *opQuota, struct memoryHTBlock *block, int blockOffset)
 709 {
 710     struct hashTable *ht = mht->ht;
 711     afs_int32 code;
 712     dbadr ea, next_ea;
 713     dbadr listA;
 714     char e[sizeof(struct block)];       /* unnecessarily conservative */
 715     int e_size = sizeFunctions[ntohl(ht->functionType)];
 716
 717     if (mht->length == 0) {
 718         if ((code = ht_AllocTable(ut, mht))) {
 719             Log("ht_HashInList: ht_AllocTable failed\n");
 720             return code;
 721         }
 722     }
 723
 724     listA = ntohl(block->b.bucket[blockOffset]);
 725
 726     if (listA == 0) {
 727         Log("ht_HashInList: expecting non-zero bucket\n");
 728         return 0;
 729     }
 730
 731     for (ea = listA; ea; ea = next_ea) {        /*f */
 732
 733         LogDebug(3, "ht_HashInList: move entry at %d, type %d\n", ea,
 734                  ntohl(mht->ht->functionType));
 735
 736         if (dbread(ut, ea, e, e_size))
 737             return BUDB_IO;
 738
 739         /* LogNetDump((struct dump *) e); */
 740
 741         /* get the address of the next item on the list */
 742         next_ea = ntohl(*(dbadr *) (e + mht->threadOffset));
 743
 744         /* write the link into the bucket */
 745         code =
 746             set_word_addr(ut, block->a, &block->b,
 747                           &block->b.bucket[blockOffset], htonl(next_ea));
 748         if (code) {
 749             Log("ht_HashInList: bucket update failed\n");
 750             return (code);
 751         }
 752
 753         {
 754             struct memoryHTBlock *block;
 755             int bo;
 756             afs_uint32 hash;
 757
 758             /* get the hash value */
 759             hash = ht_HashEntry(mht, e) % mht->length;
 760             LogDebug(4, "ht_HashInList: moved to %d\n", hash);
 761
 762             /* get the new hash table block */
 763             code = ht_GetTableBlock(ut, mht, hash, 0 /*old */ , &block, &bo);
 764             if (code) {
 765                 Log("ht_HashInList: ht_GetTableBlock failed\n");
 766                 return code;
 767             }
 768             if (block == 0) {
 769                 Log("ht_HashInList: ht_GetTableBlock returned 0\n");
 770                 return BUDB_INTERNALERROR;
 771             }
 772
 773             /* Chain entry at front of bucket;
 774              * first threadOffset of entry = bucket
 775              * then bucket = addr of entry
 776              */
 777             if (set_word_offset
 778                 (ut, ea, e, mht->threadOffset, block->b.bucket[bo])
 779                 || set_word_addr(ut, block->a, &block->b,
 780                                  &block->b.bucket[bo], htonl(ea)))
 781                 return BUDB_IO;
 782         }
 783
 784         if (--(*opQuota) == 0)
 785             break;
 786     }                           /*f */
 787     return 0;
 788 }
 789
 790
 791 /* ht_MoveEntries
 792  *      The hash table is needs to be re-sized. Move entries from the old
 793  *      to the new.
 794  */
 795
 796 static afs_int32
 797 ht_MoveEntries(struct ubik_trans *ut, struct memoryHashTable *mht)
 798 {
 799     struct memoryHTBlock *block;
 800     afs_uint32 hash;
 801     int count;
 802     int bo;
 803     afs_int32 code;
 804     afs_int32 *pprog;
 805
 806     if (mht->oldLength == 0)
 807         return 0;
 808
 809     LogDebug(3, "ht_MoveEntries:\n");
 810     /* we assume here that the hash function will map numbers smaller than the
 811      * size of the hash table straight through to hash table indexes.
 812      */
 813     hash = mht->progress;
 814
 815     /* get hash table block ? */
 816     code = ht_GetTableBlock(ut, mht, hash, 1 /*old */ , &block, &bo);
 817     if (code)
 818         return code;
 819
 820     if (block == 0)
 821         return BUDB_INTERNALERROR;
 822
 823     count = 10;                 /* max. # entries to move */
 824
 825     do {
 826         if (block->b.bucket[bo]) {
 827             code = ht_HashInList(ut, mht, &count, block, bo);
 828             if (code) {
 829                 Log("ht_MoveEntries: ht_HashInList failed\n");
 830                 return (BUDB_IO);
 831             }
 832         }
 833
 834         if (block->b.bucket[bo] == 0) {
 835             /* this bucket is now empty */
 836             mht->progress++;
 837         }
 838
 839         /* don't exceed the quota of items to be moved */
 840         if (count == 0)
 841             break;
 842
 843     } while (++bo < nHTBuckets);
 844
 845     if (mht->progress >= mht->oldLength)
 846         return (ht_FreeTable(ut, mht));
 847
 848     pprog = &mht->ht->progress;
 849     if (set_word_addr(ut, 0, &db.h, pprog, htonl(mht->progress))) {
 850         Log("ht_MoveEntries: progress set failed\n");
 851         return BUDB_IO;
 852     }
 853     return 0;
 854 }
 855
 856
 857 #ifdef notdef
 858 static afs_int32
 859 ht_MoveEntries(struct ubik_trans *ut, struct memoryHashTable *mht)
 860 {
 861     afs_uint32 hash;
 862     int bo;
 863     struct memoryHTBlock *block;
 864     afs_int32 code;
 865
 866     if (mht->oldLength == 0)
 867         return 0;
 868
 869     LogDebug(3, "ht_MoveEntries:\n");
 870     /* we assume here that the hash function will map numbers smaller than the
 871      * size of the hash table straight through to hash table indexes.
 872      */
 873     hash = mht->progress;
 874
 875     /* get hash table block ? */
 876     code = ht_GetTableBlock(ut, mht, hash, 1 /*old */ , &block, &bo);
 877     if (code)
 878         return code;
 879
 880     if (block == 0)
 881         return BUDB_INTERNALERROR;
 882
 883     do {
 884         mht->progress++;
 885         if (block->b.bucket[bo]) {
 886             code = ht_HashInList(ut, mht, ntohl(block->b.bucket[bo]));
 887             if (code) {
 888                 Log("ht_MoveEntries: ht_HashInList failed\n");
 889                 return (BUDB_IO);
 890             }
 891             code =
 892                 set_word_addr(ut, block->a, &block->b, &block->b.bucket[bo],
 893                               0);
 894             if (code) {
 895                 Log("ht_MoveEntries: clear old entry failed\n");
 896                 return BUDB_IO;
 897             }
 898             break;
 899         }
 900     } while (++bo < nHTBuckets);
 901
 902     if (mht->progress >= mht->oldLength)
 903         return (ht_FreeTable(ut, mht));
 904
 905     if (set_word_addr(ut, 0, &db.h, &mht->ht->progress, htonl(mht->progress))) {
 906         Log("ht_MoveEntries: progress set failed\n");
 907         return BUDB_IO;
 908     }
 909     return 0;
 910 }
 911 #endif /* notdef */
 912
 913 afs_int32
 914 ht_HashIn(struct ubik_trans *ut,
 915           struct memoryHashTable *mht,
 916           dbadr ea,                     /* block db address */
 917           void *e)                      /* entry's address (in b) */
 918 {
 919     struct hashTable *ht = NULL;
 920     afs_uint32 hash;
 921     struct memoryHTBlock *block;
 922     int bo;
 923     afs_int32 code;
 924     afs_int32 *pentries;
 925
 926     if (!(mht && (ht = mht->ht)))
 927         db_panic("some ht called with bad mht");
 928
 929     if ((code = ht_MaybeAdjust(ut, mht)))
 930         return code;
 931     if (mht->length == 0)
 932         if ((code = ht_AllocTable(ut, mht)))
 933             return code;
 934
 935     hash = ht_HashEntry(mht, e);
 936     code = ht_GetTableBlock(ut, mht, hash, 0 /*old */ , &block, &bo);
 937     if (code)
 938         return code;
 939     if (!block)
 940         return BUDB_INTERNALERROR;
 941
 942     code = set_word_offset(ut, ea, e, mht->threadOffset, block->b.bucket[bo]);
 943     if (code)
 944         return BUDB_IO;
 945     LogDebug(5, "Hashin: set %d to %d\n", mht->threadOffset,
 946              block->b.bucket[bo]);
 947
 948     code =
 949         set_word_addr(ut, block->a, &block->b, &block->b.bucket[bo],
 950                       htonl(ea));
 951     if (code)
 952         return BUDB_IO;
 953     LogDebug(5, "Hashin: set %"AFS_PTR_FMT" to %d\n",
 954              &block->b.bucket[bo], htonl(ea));
 955
 956     pentries = &ht->entries;
 957     code =
 958         set_word_addr(ut, 0, &db.h, pentries,
 959                       htonl(ntohl(ht->entries) + 1));
 960     if (code)
 961         return BUDB_IO;
 962
 963     return ht_MoveEntries(ut, mht);
 964 }
 965
 966 /* RemoveFromList - generic procedure to delete an entry from a list given its
 967  * head and thread offset.  Only a single long is modified by this routine.
 968  * The head pointer is modified, in place, using set_word_addr if the entry is
 969  * at the head of the list, otherwise only the thread of the previous entry is
 970  * modified.  The entry pointer is only used to calculate the thread offset,
 971  * but is not otherwise used. */
 972
 973 afs_int32
 974 RemoveFromList(struct ubik_trans *ut,
 975                dbadr ea,        /* db addr of head structure */
 976                void *e,         /* head structure */
 977                dbadr *head,     /* address of head pointer */
 978                dbadr ta,        /* db addr of strucure to be removed */
 979                void *t,         /* structure being removed */
 980                dbadr *thread)   /* pointer to thread pointer */
 981 {
 982     afs_int32 code;
 983     int threadOffset = ((char *)thread - (char *)t);
 984     dbadr next_a;               /* db addr of next element in list */
 985     dbadr loop_a;               /* db addr of current list element */
 986
 987     if (*head == 0)
 988         return -1;              /* empty list: not found */
 989     next_a = ntohl(*head);      /* start at head of list */
 990     if (next_a == ta) {         /* remove from head of list */
 991         code = set_word_addr(ut, ea, e, head, *thread);
 992         return code;
 993     }
 994     do {
 995         loop_a = next_a;
 996         code =
 997             dbread(ut, loop_a + threadOffset, (char *)&next_a, sizeof(dbadr));
 998         if (code)
 999             return code;
1000         if (next_a == 0)
1001             return -1;          /* end of list: not found */
1002     } while (ta != (next_a = ntohl(next_a)));
1003     code = dbwrite(ut, loop_a + threadOffset, (char *)thread, sizeof(dbadr));
1004     return code;
1005 }
1006
1007 afs_int32
1008 ht_HashOutT(struct ubik_trans *ut, struct memoryHashTable *mht,
1009             afs_uint32 hash, dbadr ea, char *e, int old)
1010 {
1011     struct memoryHTBlock *block;
1012     int bo;
1013     afs_int32 code;
1014     afs_int32 *pentries;
1015
1016     if ((old ? mht->oldLength : mht->length) == 0)
1017         return -1;
1018     code = ht_GetTableBlock(ut, mht, hash, old, &block, &bo);
1019     if (code)
1020         return code;
1021     if ((block == 0) || (block->b.bucket[bo] == 0))
1022         return -1;
1023
1024     code =
1025         RemoveFromList(ut, block->a, (char *)&block->b, &block->b.bucket[bo],
1026                        ea, e, (dbadr *) (e + mht->threadOffset));
1027     if (code)
1028         return code;
1029 #if 0
1030     net_ea = htonl(ea);
1031     unthread_ea = *(afs_int32 *) ((char *)e + mht->threadOffset);
1032     if (block->b.bucket[bo] == net_ea) {
1033         if (set_word_addr
1034             (ut, block->a, &block->b, &block->b.bucket[bo], unthread_ea))
1035             return BUDB_IO;
1036         goto done;
1037     }
1038     loop_a = ntohl(block->b.bucket[bo]);
1039     while (1) {
1040         if (dbread
1041             (ut, loop_a + mht->threadOffset, (char *)&next_loop_a,
1042              sizeof(dbadr)))
1043             return BUDB_IO;
1044         if (next_loop_a == 0)
1045             return -1;          /* not found */
1046         if (net_ea == next_loop_a) {
1047             if (dbwrite
1048                 (ut, loop_a + mht->threadOffset, (char *)&unthread_ea,
1049                  sizeof(dbadr)))
1050                 return BUDB_IO;
1051             goto done;
1052         }
1053         loop_a = ntohl(next_loop_a);
1054     }
1055   done:
1056 #endif
1057     pentries = &mht->ht->entries;
1058     if (set_word_addr
1059         (ut, 0, &db.h, pentries, htonl(ntohl(mht->ht->entries) - 1)))
1060         return BUDB_IO;
1061     return 0;
1062 }
1063
1064 afs_int32
1065 ht_HashOut(struct ubik_trans *ut, struct memoryHashTable *mht, dbadr ea,
1066            void *e)
1067 {
1068     afs_uint32 hash;
1069     afs_int32 code;
1070
1071     if (!mht)
1072         db_panic("some ht called with bad mht");
1073     hash = ht_HashEntry(mht, e);
1074     if (mht->oldLength) {
1075         code = ht_HashOutT(ut, mht, hash, ea, e, 1 /*old */ );
1076         if (code == 0)
1077             return 0;
1078         else if (code != -1)
1079             ERROR(code);
1080     }
1081     if (mht->length == 0)       /* not found */
1082         ERROR(BUDB_INTERNALERROR);
1083     code = ht_HashOutT(ut, mht, hash, ea, e, 0 /*old */ );
1084     if (code == -1)
1085         ERROR(BUDB_NOENT);
1086     if (code)
1087         ERROR(code);
1088
1089     code = ht_MoveEntries(ut, mht);
1090     if (code)
1091         ERROR(code);
1092     code = ht_MaybeAdjust(ut, mht);
1093     if (code)
1094         ERROR(code);
1095
1096   error_exit:
1097     return (code);
1098 }
1099
1100 /* generic hash table traversal routines */
1101
1102
1103 afs_int32
1104 scanHashTableBlock(struct ubik_trans *ut,
1105                    struct memoryHashTable *mhtPtr,
1106                    struct htBlock *htBlockPtr,
1107                    int old,
1108                    afs_int32 length,    /* size of whole hash table */
1109                    int index,           /* base index of this block */
1110                    int (*selectFn) (dbadr, void *, void *),
1111                    int (*operationFn) (dbadr, void *, void *),
1112                    void *rockPtr)
1113 {
1114     int type;                   /* hash table type */
1115     int entrySize;              /* hashed entry size */
1116
1117     char entry[sizeof(struct block)];
1118     dbadr entryAddr, nextEntryAddr;
1119
1120     int i;
1121
1122     type = ntohl(mhtPtr->ht->functionType);
1123     entrySize = sizeFunctions[type];
1124
1125     /* step through this hash table block, being careful to stop
1126      * before the end of the overall hash table
1127      */
1128
1129     for (i = 0; (i < nHTBuckets) && (index < length); i++, index++) {   /*f */
1130         entryAddr = 0;
1131         nextEntryAddr = ntohl(htBlockPtr->bucket[i]);
1132
1133         /* if this is the old hash table, all entries below the progress mark
1134          * should have been moved to the new hash table
1135          */
1136         if (old && (index < mhtPtr->progress) && nextEntryAddr)
1137             return BUDB_INTERNALERROR;
1138
1139         /* now walk down the chain of each bucket */
1140         while (nextEntryAddr) { /*w */
1141
1142             entryAddr = nextEntryAddr;
1143             if (dbread(ut, entryAddr, &entry[0], entrySize))
1144                 return (BUDB_INTERNALERROR);
1145
1146             if ((*selectFn) (entryAddr, &entry[0], rockPtr)) {
1147                 (*operationFn) (entryAddr, &entry[0], rockPtr);
1148             }
1149
1150             nextEntryAddr =
1151                 ntohl(*((dbadr *) (entry + mhtPtr->threadOffset)));
1152         }                       /*w */
1153
1154     }                           /*f */
1155
1156     return (0);
1157 }
1158
1159 afs_int32
1160 scanHashTable(struct ubik_trans *ut, struct memoryHashTable *mhtPtr,
1161               int (*selectFn) (dbadr, void *, void *),
1162               int (*operationFn) (dbadr, void *, void *),
1163               void *rockPtr)
1164 {
1165     struct htBlock hashTableBlock;
1166     dbadr tableAddr;            /* disk addr of hash block */
1167     int tableLength;            /* # entries */
1168     int blockLength;            /* # blocks */
1169     int hashIndex;
1170     int old;
1171     int i;
1172     afs_int32 code = 0;
1173
1174     extern int nHTBuckets;      /* # buckets in a hash table */
1175
1176     for (old = 0; old <= 1; old++) {    /*fo */
1177         if (old) {
1178             /* check the old hash table */
1179             tableLength = mhtPtr->oldLength;
1180             if (tableLength == 0)
1181                 continue;       /* nothing to do */
1182
1183             tableAddr = ntohl(mhtPtr->ht->oldTable);
1184         } else {
1185             /* check current hash table */
1186             tableLength = mhtPtr->length;
1187             if (tableLength == 0)
1188                 continue;       /* nothing to do */
1189
1190             tableAddr = ntohl(mhtPtr->ht->table);
1191         }
1192
1193         blockLength = (tableLength - 1) / nHTBuckets;
1194         hashIndex = 0;
1195
1196         /* follow the hash chain */
1197         for (i = 0; i <= blockLength; i++) {    /*fi */
1198             /* chain too short */
1199             if (tableAddr == 0)
1200                 ERROR(BUDB_DATABASEINCONSISTENT);
1201
1202             code =
1203                 dbread(ut, tableAddr, &hashTableBlock,
1204                        sizeof(hashTableBlock));
1205             if (code)
1206                 goto error_exit;
1207
1208             code =
1209                 scanHashTableBlock(ut, mhtPtr, &hashTableBlock, old,
1210                                    tableLength, hashIndex, selectFn,
1211                                    operationFn, rockPtr);
1212             if (code)
1213                 goto error_exit;
1214
1215             hashIndex += nHTBuckets;
1216             tableAddr = ntohl(hashTableBlock.h.next);
1217         }                       /*fi */
1218     }                           /*fo */
1219
1220   error_exit:
1221     return (code);
1222 }