src/apps/mail/Words.cpp

   1 /*
   2 Open Tracker License
   3
   4 Terms and Conditions
   5
   6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved.
   7
   8 Permission is hereby granted, free of charge, to any person obtaining a copy of
   9 this software and associated documentation files (the "Software"), to deal in
  10 the Software without restriction, including without limitation the rights to
  11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  12 of the Software, and to permit persons to whom the Software is furnished to do
  13 so, subject to the following conditions:
  14
  15 The above copyright notice and this permission notice applies to all licensees
  16 and shall be included in all copies or substantial portions of the Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
  23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24
  25 Except as contained in this notice, the name of Be Incorporated shall not be
  26 used in advertising or otherwise to promote the sale, use or other dealings in
  27 this Software without prior written authorization from Be Incorporated.
  28
  29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
  30 of Be Incorporated in the United States and other countries. Other brand product
  31 names are registered trademarks or trademarks of their respective holders.
  32 All rights reserved.
  33 */
  34
  35
  36 #include "Words.h"
  37
  38 #include <ctype.h>
  39 #include <stdio.h>
  40 #include <string.h>
  41
  42
  43 enum {
  44         FIND_WORD,
  45         GET_WORD,
  46         GET_FLAGS
  47 };
  48
  49
  50 /*
  51         MAXMETAPH is the length of the Metaphone code.
  52
  53         Four is a good compromise value for English names. For comparing words
  54         which are not names or for some non-English names, use a longer code
  55         length for more precise matches.
  56
  57         The default here is 5.
  58 */
  59
  60 #define MAXMETAPH 6
  61
  62
  63 // Character coding array, A-Z
  64 static char vsvfn[26] = { 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0,
  65         2, 4, 4, 1, 0, 0, 0, 8, 0};
  66
  67
  68 static const char* gCmpKey;
  69
  70
  71 static int
  72 word_cmp(BString** firstArg, BString** secondArg)
  73 {
  74         return word_match(gCmpKey, (*firstArg)->String()) - word_match(gCmpKey,
  75                 (*secondArg)->String());
  76 }
  77
  78
  79 Words::Words(bool useMetaphone)
  80         :
  81         fUseMetaphone(useMetaphone)
  82 {
  83
  84 }
  85
  86
  87 Words::Words(BPositionIO* thes, bool useMetaphone)
  88         :
  89         WIndex(thes),
  90         fUseMetaphone(useMetaphone)
  91 {
  92
  93 }
  94
  95
  96 Words::~Words(void)
  97 {
  98
  99 }
 100
 101
 102 Words::Words(const char* dataPath, const char* indexPath, bool useMetaphone)
 103         :
 104         fUseMetaphone(useMetaphone)
 105 {
 106         if (!useMetaphone)
 107                 fEntrySize = sizeof(uint32);
 108         SetTo(dataPath, indexPath);
 109 }
 110
 111
 112 status_t
 113 Words::BuildIndex(void)
 114 {
 115         // Parse the Words file...
 116
 117         // Buffer Stuff
 118         char                    buffer[16384];
 119         char                    *nptr, *eptr;
 120         int64                   blockOffset;
 121         int32                   blockSize;
 122
 123         // The Word Entry
 124         WIndexEntry             entry;
 125         char                    entryName[256], *namePtr = entryName;
 126         char                    suffixName[256];
 127         char                    flags[32], *flagsPtr = flags;
 128
 129         // State Info
 130         int32                   state = FIND_WORD;
 131
 132         // Make sure we are at start of file
 133         fDataFile->Seek(0, SEEK_SET);
 134         entry.offset = -1;
 135
 136         // Read blocks from thes until eof
 137         while (true) {
 138                 // Get next block
 139                 blockOffset = fDataFile->Position();
 140                 if ((blockSize = fDataFile->Read(buffer, 16384)) == 0)
 141                         break;
 142
 143                 // parse block
 144                 for (nptr = buffer, eptr = buffer + blockSize; nptr < eptr; nptr++) {
 145                         // Looking for start of word?
 146                         if (state == FIND_WORD) {
 147                                 // Is start of word?
 148                                 if (isalpha(*nptr)) {
 149                                         state = GET_WORD;
 150                                         *namePtr++ = *nptr; // copy word
 151                                         entry.offset = blockOffset + (nptr - buffer);
 152                                 } else {
 153                                         entry.offset++;
 154                                 }
 155                         } else if ((*nptr == '\n') || (*nptr == '\r')) {
 156                                 // End of word?
 157
 158                                 if (namePtr != entryName) {
 159                                         // Add previous entry to word index
 160                                         *namePtr = 0; // terminate word
 161                                         *flagsPtr = 0; // terminate flags
 162                                         NormalizeWord(entryName, entryName);
 163                                         // Add base word
 164                                         entry.key = GetKey(entryName);
 165                                         AddItem(&entry);
 166
 167                                         // Add suffixed words if any
 168                                         if (flagsPtr != flags) {
 169                                                 // printf("Base: %s, flags: %s\n", entryName, flags);
 170                                                 for (flagsPtr = flags; *flagsPtr != 0; flagsPtr++) {
 171                                                         if (suffix_word(suffixName, entryName, *flagsPtr)) {
 172                                                                 // printf("Suffix: %s\n", suffixName);
 173                                                                 entry.key = GetKey(suffixName);
 174                                                                 AddItem(&entry);
 175                                                         }
 176                                                 }
 177                                         }
 178                                 }
 179                                 // Init new entry
 180                                 state = FIND_WORD;
 181                                 namePtr = entryName;
 182                                 flagsPtr = flags;
 183                         } else if (state == GET_WORD) {
 184                                 // Start of flags?
 185                                 if (*nptr == '/') {
 186                                         *namePtr = 0; // terminate word
 187                                         // printf("Found word: %s\n", entryName);
 188
 189                                         state = GET_FLAGS;
 190                                 } else {
 191                                         *namePtr++ = *nptr; // copy word
 192                                 }
 193                         } else if (state == GET_FLAGS) // Are we getting the flags?
 194                                 *flagsPtr++ = *nptr; // copy flag
 195                 }       // End for (nptr = buffer, eptr = buffer + blockSize;
 196                                 // nptr < eptr; nptr++, entry.size++)
 197         } // End while (true)
 198
 199         SortItems();
 200         return B_OK;
 201 }
 202
 203
 204 int32
 205 Words::GetKey(const char* s)
 206 {
 207         if (fUseMetaphone) {
 208                 char            Metaph[12];
 209                 const char      *sPtr;
 210                 int32           key = 0;
 211                 int32           offset;
 212                 char            c;
 213
 214                 metaphone(s, Metaph, GENERATE);
 215                 // Compact Metaphone from 6 bytes to 4
 216
 217                 // printf("%s -> %s: \n", s, Metaph);
 218
 219                 for (sPtr = Metaph, offset = 25; *sPtr; sPtr++, offset -= 5) {
 220                         c = *sPtr - 'A';
 221                         // printf("%d,", int16(c));
 222                         key |= int32(c) << offset;
 223                 }
 224                 for (; offset >= 0; offset -= 5)
 225                         key |= int32(31) << offset;
 226                 // printf(": %ld\n", key);
 227                 return key;
 228         } else {
 229                 return WIndex::GetKey(s);
 230         }
 231 }
 232
 233
 234 // Macros to access the character coding array
 235 #define vowel(x)        (vsvfn[(x) - 'A'] & 1)  // AEIOU
 236 #define same(x)         (vsvfn[(x) - 'A'] & 2)  // FJLMNR
 237 #define varson(x)       (vsvfn[(x) - 'A'] & 4)  // CGPST
 238 #define frontv(x)       (vsvfn[(x) - 'A'] & 8)  // EIY
 239 #define noghf(x)        (vsvfn[(x) - 'A'] & 16) // BDH
 240 #define NUL '\0'
 241
 242 /*
 243         metaphone()
 244
 245         Arguments:
 246         1 - The word to be converted to a metaphone code.
 247         2 - A MAXMETAPH + 1 char field for the result.
 248         3 - Function flag:
 249         If 0: Compute the Metaphone code for the first argument,
 250                 then compare it to the Metaphone code passed in the second argument.
 251         If 1: Compute the Metaphone code for the first argument,
 252                 then store the result in the area pointed to by the second argument.
 253
 254         Returns:
 255         If function code is 0, returns Success_ for a match, else Error_.
 256         If function code is 1, returns Success_.
 257 */
 258
 259
 260 bool
 261 metaphone(const char* Word, char* Metaph, metaphlag Flag)
 262 {
 263         char *n, *n_start, *n_end;                      // Pointers to string
 264         char *metaph = NULL, *metaph_end;       // Pointers to metaph
 265         char ntrans[512];                                       // Word with uppercase letters
 266         char newm[MAXMETAPH + 4];                       // New metaph for comparison
 267         int KSflag;                                                     // State flag for X translation
 268
 269         // Copy word to internal buffer, dropping non-alphabetic characters
 270         // and converting to upper case.
 271
 272         for (n = ntrans + 1, n_end = ntrans + sizeof(ntrans) - 2;
 273                 *Word && n < n_end; ++Word) {
 274                 if (isalpha(*Word))
 275                         *n++ = toupper(*Word);
 276         }
 277
 278         if (n == ntrans + 1)
 279                 return false;           // Return if zero characters
 280         else
 281                         n_end = n;              //      Set end of string pointer
 282
 283         // Pad with NULs, front and rear
 284
 285         *n++ = NUL;
 286         *n = NUL;
 287         n = ntrans;
 288         *n++ = NUL;
 289
 290         // If doing comparison, redirect pointers
 291
 292         if (COMPARE == Flag) {
 293                 metaph = Metaph;
 294                 Metaph = newm;
 295         }
 296
 297         // Check for PN, KN, GN, WR, WH, and X at start
 298
 299         switch (*n) {
 300                 case 'P':
 301                 case 'K':
 302                 case 'G':
 303                         if ('N' == *(n + 1))
 304                                 *n++ = NUL;
 305                         break;
 306
 307                 case 'A':
 308                         if ('E' == *(n + 1))
 309                                 *n++ = NUL;
 310                         break;
 311
 312                 case 'W':
 313                         if ('R' == *(n + 1)) {
 314                                 *n++ = NUL;
 315                         } else if ('H' == *(n + 1)) {
 316                                 *(n + 1) = *n;
 317                                 *n++ = NUL;
 318                         }
 319                         break;
 320
 321                 case 'X':
 322                         *n = 'S';
 323                         break;
 324         }
 325
 326         // Now loop through the string, stopping at the end of the string
 327         // or when the computed Metaphone code is MAXMETAPH characters long.
 328
 329         KSflag = false; // State flag for KStranslation
 330
 331         for (metaph_end = Metaph + MAXMETAPH, n_start = n;
 332                 n <= n_end && Metaph < metaph_end; ++n) {
 333                 if (KSflag) {
 334                         KSflag = false;
 335                         *Metaph++ = *n;
 336                 } else {
 337                         // Drop duplicates except for CC
 338
 339                         if (*(n - 1) == *n && *n != 'C')
 340                                 continue;
 341
 342                         // Check for F J L M N R or first letter vowel
 343
 344                         if (same(*n) || (n == n_start && vowel(*n))) {
 345                                 *Metaph++ = *n;
 346                         } else {
 347                                 switch (*n) {
 348                                         case 'B':
 349                                                 if (n < n_end || *(n - 1) != 'M')
 350                                                         *Metaph++ = *n;
 351                                                 break;
 352
 353                                         case 'C':
 354                                                 if (*(n - 1) != 'S' || !frontv(*(n + 1))) {
 355                                                         if ('I' == *(n + 1) && 'A' == *(n + 2))
 356                                                                 *Metaph++ = 'X';
 357                                                         else if (frontv(*(n + 1)))
 358                                                                 *Metaph++ = 'S';
 359                                                         else if ('H' == *(n + 1)) {
 360                                                                 *Metaph++ = ((n == n_start && !vowel(*(n + 2)))
 361                                                                         || 'S' == *(n - 1)) ? 'K' : 'X';
 362                                                         } else {
 363                                                                 *Metaph++ = 'K';
 364                                                         }
 365                                                 }
 366                                                 break;
 367
 368                                         case 'D':
 369                                                 *Metaph++ = ('G' == *(n + 1) && frontv(*(n + 2)))
 370                                                         ? 'J' : 'T';
 371                                                 break;
 372
 373                                         case 'G':
 374                                                 if ((*(n + 1) != 'H' || vowel(*(n + 2)))
 375                                                         && (*(n + 1) != 'N' || ((n + 1) < n_end
 376                                                                 && (*(n + 2) != 'E' || *(n + 3) != 'D')))
 377                                                         && (*(n - 1) != 'D' || !frontv(*(n + 1)))) {
 378                                                         *Metaph++ = (frontv(*(n + 1))
 379                                                                 && *(n + 2) != 'G') ? 'J' : 'K';
 380                                                 } else if ('H' == *(n + 1) && !noghf(*(n - 3))
 381                                                         && *(n - 4) != 'H') {
 382                                                         *Metaph++ = 'F';
 383                                                 }
 384                                                 break;
 385
 386                                         case 'H':
 387                                                 if (!varson(*(n - 1))
 388                                                         && (!vowel(*(n - 1)) || vowel(*(n + 1)))) {
 389                                                         *Metaph++ = 'H';
 390                                                 }
 391                                                 break;
 392
 393                                         case 'K':
 394                                                 if (*(n - 1) != 'C')
 395                                                         *Metaph++ = 'K';
 396                                                 break;
 397
 398                                         case 'P':
 399                                                 *Metaph++ = ('H' == *(n + 1)) ? 'F' : 'P';
 400                                                 break;
 401
 402                                         case 'Q':
 403                                                 *Metaph++ = 'K';
 404                                                 break;
 405
 406                                         case 'S':
 407                                                 *Metaph++ = ('H' == *(n + 1) || ('I' == *(n + 1)
 408                                                                 && ('O' == *(n + 2) || 'A' == *(n + 2))))
 409                                                         ? 'X' : 'S';
 410                                                 break;
 411
 412                                         case 'T':
 413                                                 if ('I' == *(n + 1)
 414                                                         && ('O' == *(n + 2) || 'A' == *(n + 2))) {
 415                                                         *Metaph++ = 'X';
 416                                                 } else if ('H' == *(n + 1)) {
 417                                                         *Metaph++ = 'O';
 418                                                 } else if (*(n + 1) != 'C' || *(n + 2) != 'H') {
 419                                                         *Metaph++ = 'T';
 420                                                 }
 421                                                 break;
 422
 423                                         case 'V':
 424                                                 *Metaph++ = 'F';
 425                                                 break;
 426
 427                                         case 'W':
 428                                         case 'Y':
 429                                                 if (vowel(*(n + 1)))
 430                                                         *Metaph++ = *n;
 431                                                 break;
 432
 433                                         case 'X':
 434                                                 if (n == n_start) {
 435                                                         *Metaph++ = 'S';
 436                                                 } else {
 437                                                         *Metaph++ = 'K';
 438                                                         KSflag = true;
 439                                                 }
 440                                                 break;
 441
 442                                         case 'Z':
 443                                                 *Metaph++ = 'S';
 444                                                 break;
 445                                 }
 446                         }
 447                 }
 448
 449                 // Compare new Metaphone code with old
 450                 if (COMPARE == Flag
 451                         && *(Metaph - 1) != metaph[(Metaph - newm) - 1]) {
 452                         return false;
 453                 }
 454         }
 455
 456         // If comparing, check if Metaphone codes were equal in length
 457         if (COMPARE == Flag && metaph[Metaph - newm])
 458                 return false;
 459
 460         *Metaph = NUL;
 461         return true;
 462 }
 463
 464
 465 int
 466 word_match(const char* reference, const char* test)
 467 {
 468         const char      *s1, *s2;
 469         int32           x = 0;
 470         char            c1, c2;
 471         s1 = test;
 472         s2 = reference;
 473
 474         bool a, b;
 475
 476         while (*s2 || *s1) {
 477                 c1 = tolower(*s1);
 478                 c2 = tolower(*s2);
 479
 480                 if (*s2 && *s1) {
 481                         if (c1 != c2) {
 482                                 a = (tolower(s1[1]) == c2);
 483                                 b = (tolower(s2[1]) == c1);
 484                                 // Reversed pair
 485                                 if (a && b) {
 486                                         x += 1;
 487                                         s1++;
 488                                         s2++;
 489                                 }
 490                                 // Extra character
 491                                 if (a) {
 492                                         x += 1;
 493                                         s1++;
 494                                 }
 495                                 // Missing Character
 496                                 else if (b) {
 497                                         x += 1;
 498                                         s2++;
 499                                 }
 500                                 // Equivalent Character
 501                                 else if (vsvfn[(unsigned)c1] == vsvfn[(unsigned)c2])
 502                                         x++;
 503                                 // Unrelated Character
 504                                 else
 505                                         x += 3;
 506                         }
 507                 } else {
 508                         x += 1;
 509                 }
 510
 511                 if (*s2)
 512                         s2++;
 513                 if (*s1)
 514                         s1++;
 515         }
 516
 517         return x;
 518 }
 519
 520
 521 int32
 522 suffix_word(char* dst, const char* src, char flag)
 523 {
 524         char* end;
 525
 526         end = stpcpy(dst, src);
 527         flag = toupper(flag);
 528         switch(flag) {
 529                 case 'V':
 530                         switch(end[-1]) {
 531                                 case 'e':
 532                                         end = stpcpy(end - 1, "ive");
 533                                         break;
 534                                 default:
 535                                         end = stpcpy(end, "ive");
 536                                         break;
 537                         }
 538                         break;
 539                 case 'N':
 540                         switch(end[-1]) {
 541                                 case 'e':
 542                                         end = stpcpy(end - 1, "ion");
 543                                         break;
 544                                 case 'y':
 545                                         end = stpcpy(end - 1, "ication");
 546                                         break;
 547                                 default:
 548                                         end = stpcpy(end, "en");
 549                                         break;
 550                         }
 551                         break;
 552                 case 'X':
 553                         switch(end[-1]) {
 554                                 case 'e':
 555                                         end = stpcpy(end - 1, "ions");
 556                                         break;
 557                                 case 'y':
 558                                         end = stpcpy(end - 1, "ications");
 559                                         break;
 560                                 default:
 561                                         end = stpcpy(end, "ens");
 562                                         break;
 563                         }
 564                         break;
 565                 case 'H':
 566                         switch(end[-1]) {
 567                                 case 'y':
 568                                         end = stpcpy(end - 1, "ieth");
 569                                         break;
 570                                 default:
 571                                         end = stpcpy(end, "th");
 572                                         break;
 573                         }
 574                         break;
 575                 case 'Y':
 576                         end = stpcpy(end, "ly");
 577                         break;
 578                 case 'G':
 579                         switch(end[-1]) {
 580                                 case 'e':
 581                                         end = stpcpy(end - 1, "ing");
 582                                         break;
 583                                 default:
 584                                         end = stpcpy(end, "ing");
 585                                         break;
 586                         }
 587                         break;
 588                 case 'J':
 589                         switch(end[-1]) {
 590                                 case 'e':
 591                                         end = stpcpy(end - 1, "ings");
 592                                         break;
 593                                 default:
 594                                         end = stpcpy(end, "ings");
 595                                         break;
 596                         }
 597                         break;
 598                 case 'D':
 599                         switch(end[-1]) {
 600                                 case 'e':
 601                                         end = stpcpy(end - 1, "ed");
 602                                         break;
 603                                 case 'y':
 604                                         if (!strchr("aeiou", end[-2])) {
 605                                                 end = stpcpy(end - 1, "ied");
 606                                                 break;
 607                                         }
 608                                         // Fall through
 609                                 default:
 610                                         end = stpcpy(end, "ed");
 611                                         break;
 612                         }
 613                         break;
 614                 case 'T':
 615                         switch(end[-1]) {
 616                                 case 'e':
 617                                         end = stpcpy(end - 1, "est");
 618                                         break;
 619                                 case 'y':
 620                                         if (!strchr("aeiou", end[-2])) {
 621                                                 end = stpcpy(end - 1, "iest");
 622                                                 break;
 623                                         }
 624                                         // Fall through
 625                                 default:
 626                                         end = stpcpy(end, "est");
 627                                         break;
 628                         }
 629                         break;
 630                 case 'R':
 631                         switch(end[-1]) {
 632                                 case 'e':
 633                                         end = stpcpy(end - 1, "er");
 634                                         break;
 635                                 case 'y':
 636                                         if (!strchr("aeiou", end[-2])) {
 637                                                 end = stpcpy(end - 1, "ier");
 638                                                 break;
 639                                         }
 640                                         // Fall through
 641                                 default:
 642                                         end = stpcpy(end, "er");
 643                                         break;
 644                         }
 645                         break;
 646                 case 'Z':
 647                         switch(end[-1]) {
 648                                 case 'e':
 649                                         end = stpcpy(end - 1, "ers");
 650                                         break;
 651                                 case 'y':
 652                                         if (!strchr("aeiou", end[-2])) {
 653                                                 end = stpcpy(end - 1, "iers");
 654                                                 break;
 655                                         }
 656                                         // Fall through
 657                                 default:
 658                                         end = stpcpy(end, "ers");
 659                                         break;
 660                         }
 661                         break;
 662                 case 'S':
 663                         switch(end[-1]) {
 664                                 case 's':
 665                                 case 'x':
 666                                 case 'z':
 667                                 case 'h':
 668                                         end = stpcpy(end, "es");
 669                                         break;
 670                                 case 'y':
 671                                         if (!strchr("aeiou", end[-2])) {
 672                                                 end = stpcpy(end - 1, "ies");
 673                                                 break;
 674                                         }
 675                                         // Fall through
 676                                 default:
 677                                         end = stpcpy(end, "s");
 678                                         break;
 679                         }
 680                         break;
 681                 case 'P':
 682                         switch(end[-1]) {
 683                                 case 'y':
 684                                         if (!strchr("aeiou", end[-2])) {
 685                                                 end = stpcpy(end - 1, "iness");
 686                                                 break;
 687                                         }
 688                                         // Fall through
 689                                 default:
 690                                         end = stpcpy(end, "ness");
 691                                         break;
 692                         }
 693                         break;
 694                 case 'M':
 695                         end = stpcpy(end, "'s");
 696                         break;
 697                 default:
 698                         return 0;
 699         }
 700         return end - dst;
 701 }
 702
 703
 704 int32
 705 Words::FindBestMatches(BList* matches, const char* s)
 706 {
 707         int32           index;
 708         // printf("*** Looking for %s: ***\n", s);
 709
 710         if ((index = FindFirst(s)) >= 0) {
 711                 BString srcWord(s);
 712                 FileEntry* entry;
 713                 WIndexEntry* indexEntry;
 714
 715                 int32           key = (ItemAt(index))->key;
 716                 int32           suffixLength;
 717                 char            word[128], suffixWord[128];
 718                 const char      *src, *testWord;
 719                 const char      *suffixFlags;
 720                 char            *dst;
 721
 722                 gCmpKey = srcWord.String();
 723
 724                 uint8           hashTable[32];
 725                 uint8           hashValue, highHash, lowHash;
 726                 for (int32 i = 0; i < 32; i++)
 727                         hashTable[i] = 0;
 728
 729                 do {
 730                         indexEntry = ItemAt(index);
 731                         // Hash the entry offset; we use this to make sure we don't add
 732                         // the same word file entry twice;
 733                         // It is possible for the same entry in the words file to have
 734                         // multiple entries in the index.
 735
 736                         hashValue = indexEntry->offset % 256;
 737                         highHash = hashValue >> 3;
 738                         lowHash = 0x01 << (hashValue & 0x07);
 739
 740                         // printf("Testing Entry: %ld: hash=%d, highHash=%d, lowHash=%d\n",
 741                         // indexEntry->offset, hashValue, (uint16)highHash,
 742                         // (uint16)lowHash);
 743
 744                         // Has this entry offset been seen before?
 745                         if (!(hashTable[highHash] & lowHash)) {
 746                                 // printf("New Entry\n");
 747                                 hashTable[highHash] |= lowHash; // Mark this offset so we don't add it twice
 748
 749                                 entry = GetEntry(index);
 750                                 src = entry->String();
 751                                 while (*src && !isalpha(*src))
 752                                         src++;
 753                                 dst = word;
 754                                 while (*src && *src != '/')
 755                                         *dst++ = *src++;
 756                                 *dst = 0;
 757                                 if (*src == '/')
 758                                         suffixFlags = src + 1;
 759                                 else
 760                                         suffixFlags = src;
 761
 762                                 // printf("Base Word: %s\n", word);
 763                                 // printf("Flags: %s\n", suffixFlags);
 764                                 testWord = word; // Test the base word first
 765                                 do {
 766                                         // printf("Testing: %s\n", testWord);
 767                                         // Does this word match the key
 768                                         if ((GetKey(testWord) == key)
 769                                         // And does it look close enough to the compare key?
 770                                         // word_match(gCmpKey, testWord)
 771                                         //      <= int32((strlen(gCmpKey)-1)/2))
 772                                                 && word_match(gCmpKey, testWord)
 773                                                         <= int32(float(strlen(gCmpKey)-1)*.75)) {
 774                                                 // printf("Added: %s\n", testWord);
 775                                                 matches->AddItem((void*)(new BString(testWord)));
 776                                         }
 777
 778                                         // If suffix, transform and test
 779                                         if (*suffixFlags) {
 780                                                 // Repeat until valid suffix found or end is reached
 781                                                 suffixLength = 0;
 782                                                 while (*suffixFlags
 783                                                         && !(suffixLength = suffix_word(suffixWord,
 784                                                                 word, *suffixFlags++))) {}
 785                                                 if (suffixLength)
 786                                                         testWord = suffixWord;
 787                                                 else
 788                                                         testWord = NULL;
 789                                         } else {
 790                                                 testWord = NULL;
 791                                         }
 792                                 } while (testWord);
 793                                 delete entry;
 794                         }
 795                         // else printf("Redundant entry\n");
 796
 797                         index++;
 798                 } while (key == (ItemAt(index))->key);
 799
 800                 return matches->CountItems();
 801         } else {
 802                 return 0;
 803         }
 804 }
 805
 806
 807 void
 808 sort_word_list(BList* matches, const char* reference)
 809 {
 810         if (matches->CountItems() > 0) {
 811                 BString srcWord(reference);
 812                 gCmpKey = srcWord.String();
 813                 matches->SortItems((int(*)(const void*, const void*))word_cmp);
 814         }
 815 }
 816