1 Index: src/hunspell/affixmgr.cxx
2 ===================================================================
3 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/affixmgr.cxx,v
4 retrieving revision 1.41
5 diff -u -r1.41 affixmgr.cxx
6 --- src/hunspell/affixmgr.cxx 16 Dec 2011 09:15:34 -0000 1.41
7 +++ src/hunspell/affixmgr.cxx 29 May 2014 01:05:07 -0000
12 +#ifdef HUNSPELL_CHROME_CLIENT
13 +AffixMgr::AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md)
15 + bdict_reader = reader;
17 AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
20 // register hash manager and load affix data from aff file
27 +#ifdef HUNSPELL_CHROME_CLIENT
28 + // Define dummy parameters for parse_file() to avoid changing the parameters
29 + // of parse_file(). This may make it easier to merge the changes of the
30 + // original hunspell.
31 + const char* affpath = NULL;
32 + const char* key = NULL;
34 for (int j=0; j < CONTSIZE; j++) {
39 if (parse_file(affpath, key)) {
40 HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
42 char * line; // io buffers
43 char ft; // affix type
45 +#ifdef HUNSPELL_CHROME_CLIENT
46 + // open the affix file
47 + // We're always UTF-8
50 + // A BDICT file stores PFX and SFX lines in a special section and it provides
51 + // a special line iterator for reading PFX and SFX lines.
52 + // We create a FileMgr object from this iterator and parse PFX and SFX lines
53 + // before parsing other lines.
54 + hunspell::LineIterator affix_iterator = bdict_reader->GetAffixLineIterator();
55 + FileMgr* iterator = new FileMgr(&affix_iterator);
57 + HUNSPELL_WARNING(stderr,
58 + "error: could not create a FileMgr from an affix line iterator.\n");
62 + while ((line = iterator->getline())) {
64 + if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
65 + if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
67 + parse_affix(line, ft, iterator, NULL);
71 + // Create a FileMgr object for reading lines except PFX and SFX lines.
72 + // We don't need to change the loop below since our FileMgr emulates the
74 + hunspell::LineIterator other_iterator = bdict_reader->GetOtherLineIterator();
75 + FileMgr * afflst = new FileMgr(&other_iterator);
77 + HUNSPELL_WARNING(stderr,
78 + "error: could not create a FileMgr from an other line iterator.\n");
82 // checking flag duplication
83 char dupflags[CONTSIZE];
84 char dupflags_ini = 1;
86 HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
91 // step one is to parse the affix file building up the internal
92 // affix data structures
94 while ((line = afflst->getline()) != NULL) {
97 +#ifndef HUNSPELL_CHROME_CLIENT
98 /* remove byte order mark */
102 memmove(line, line+3, strlen(line+3)+1);
107 /* parse in the keyboard string */
108 if (strncmp(line,"KEY",3) == 0) {
113 +#ifndef HUNSPELL_CHROME_CLIENT
114 /* parse in the typical fault correcting table */
115 if (strncmp(line,"REP",3) == 0) {
116 if (parse_reptable(line, afflst)) {
123 /* parse in the input conversion table */
124 if (strncmp(line,"ICONV",5) == 0) {
129 +#ifndef HUNSPELL_CHROME_CLIENT
130 /* parse this affix: P - prefix, S - suffix */
132 if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
140 finishFileMgr(afflst);
141 @@ -1307,6 +1365,26 @@
145 +#ifdef HUNSPELL_CHROME_CLIENT
146 + const char *pattern, *pattern2;
147 + hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator();
148 + while (iterator.GetNext(&pattern, &pattern2)) {
150 + lenr = strlen(pattern2);
151 + lenp = strlen(pattern);
153 + // search every occurence of the pattern in the word
154 + while ((r=strstr(r, pattern)) != NULL) {
155 + strcpy(candidate, word);
156 + if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
157 + strcpy(candidate+(r-word), pattern2);
158 + strcpy(candidate+(r-word)+lenr, r+lenp);
159 + if (candidate_check(candidate,strlen(candidate))) return 1;
160 + r++; // search for the next letter
165 if ((wl < 2) || !numrep) return 0;
167 for (int i=0; i < numrep; i++ ) {
168 @@ -1323,6 +1401,7 @@
169 r++; // search for the next letter
176 @@ -4219,6 +4298,7 @@
179 aflag = pHMgr->decode_flag(piece);
180 +#ifndef HUNSPELL_CHROME_CLIENT // We don't check for duplicates.
181 if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
182 ((at == 'P') && (dupflags[aflag] & dupPFX))) {
183 HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix flag\n",
184 @@ -4226,6 +4306,7 @@
185 // return 1; XXX permissive mode for bad dictionaries
187 dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);
191 // piece 3 - is cross product indicator
192 Index: src/hunspell/affixmgr.hxx
193 ===================================================================
194 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/affixmgr.hxx,v
195 retrieving revision 1.15
196 diff -u -r1.15 affixmgr.hxx
197 --- src/hunspell/affixmgr.hxx 13 Oct 2011 13:41:54 -0000 1.15
198 +++ src/hunspell/affixmgr.hxx 29 May 2014 01:05:07 -0000
203 +#ifdef HUNSPELL_CHROME_CLIENT
207 +// This class provides an implementation of the contclasses array in AffixMgr
208 +// that is normally a large static array. We should almost never need more than
209 +// 256 elements, so this class only allocates that much to start off with. If
210 +// elements higher than that are actually used, we'll automatically expand.
214 + // Pre-allocate a buffer so that typically, we'll never have to resize.
218 + char& operator[](size_t index) {
219 + EnsureSizeIs(index + 1);
220 + return data[index];
223 + void EnsureSizeIs(size_t new_size) {
224 + if (data.size() >= new_size)
225 + return; // Nothing to do.
227 + size_t old_size = data.size();
228 + data.resize(new_size);
229 + memset(&data[old_size], 0, new_size - old_size);
232 + std::vector<char> data;
235 +#endif // HUNSPELL_CHROME_CLIENT
237 class LIBHUNSPELL_DLL_EXPORTED AffixMgr
240 @@ -106,12 +140,20 @@
243 int havecontclass; // boolean variable
244 +#ifdef HUNSPELL_CHROME_CLIENT
245 + ContClasses contclasses;
247 char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
252 +#ifdef HUNSPELL_CHROME_CLIENT
253 + AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md);
255 AffixMgr(const char * affpath, HashMgr** ptr, int * md,
256 const char * key = NULL);
259 struct hentry * affix_check(const char * word, int len,
260 const unsigned short needflag = (unsigned short) 0,
262 int get_fullstrip() const;
265 +#ifdef HUNSPELL_CHROME_CLIENT
266 + // Not owned by us, owned by the Hunspell object.
267 + hunspell::BDictReader* bdict_reader;
269 int parse_file(const char * affpath, const char * key);
270 int parse_flag(char * line, unsigned short * out, FileMgr * af);
271 int parse_num(char * line, int * out, FileMgr * af);
277 Index: src/hunspell/filemgr.cxx
278 ===================================================================
279 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/filemgr.cxx,v
280 retrieving revision 1.5
281 diff -u -r1.5 filemgr.cxx
282 --- src/hunspell/filemgr.cxx 23 Jun 2011 09:21:50 -0000 1.5
283 +++ src/hunspell/filemgr.cxx 29 May 2014 01:05:07 -0000
286 #include "filemgr.hxx"
288 +#ifdef HUNSPELL_CHROME_CLIENT
289 +#include "third_party/hunspell/google/bdict_reader.h"
291 +FileMgr::FileMgr(hunspell::LineIterator* iterator) : iterator_(iterator) {
294 +FileMgr::~FileMgr() {
297 +char * FileMgr::getline() {
298 + // Read one line from a BDICT file and store the line to our line buffer.
299 + // To emulate the original FileMgr::getline(), this function returns
300 + // the pointer to our line buffer if we can read a line without errors.
301 + // Otherwise, this function returns NULL.
302 + bool result = iterator_->AdvanceAndCopy(line_, BUFSIZE - 1);
303 + return result ? line_ : NULL;
306 +int FileMgr::getlinenum() {
307 + // This function is used only for displaying a line number that causes a
308 + // parser error. For a BDICT file, providing a line number doesn't help
309 + // identifying the place where causes a parser error so much since it is a
310 + // binary file. So, we just return 0.
314 int FileMgr::fail(const char * err, const char * par) {
315 fprintf(stderr, err, par);
318 int FileMgr::getlinenum() {
322 Index: src/hunspell/filemgr.hxx
323 ===================================================================
324 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/filemgr.hxx,v
325 retrieving revision 1.3
326 diff -u -r1.3 filemgr.hxx
327 --- src/hunspell/filemgr.hxx 15 Apr 2010 11:22:08 -0000 1.3
328 +++ src/hunspell/filemgr.hxx 29 May 2014 01:05:07 -0000
330 #include "hunzip.hxx"
333 +#ifdef HUNSPELL_CHROME_CLIENT
334 +namespace hunspell {
336 +} // namespace hunspell
338 +// A class which encapsulates operations of reading a BDICT file.
339 +// Chrome uses a BDICT file to compress hunspell dictionaries. A BDICT file is
340 +// a binary file converted from a DIC file and an AFF file. (See
341 +// "bdict_reader.h" for its format.)
342 +// This class encapsulates the operations of reading a BDICT file and emulates
343 +// the original FileMgr operations for AffixMgr so that it can read a BDICT
344 +// file without so many changes.
347 + FileMgr(hunspell::LineIterator* iterator);
353 + hunspell::LineIterator* iterator_;
354 + char line_[BUFSIZE + 50]; // input buffer
357 class LIBHUNSPELL_DLL_EXPORTED FileMgr
365 Index: src/hunspell/hashmgr.cxx
366 ===================================================================
367 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/hashmgr.cxx,v
368 retrieving revision 1.12
369 diff -u -r1.12 hashmgr.cxx
370 --- src/hunspell/hashmgr.cxx 23 Jun 2011 09:21:50 -0000 1.12
371 +++ src/hunspell/hashmgr.cxx 29 May 2014 01:05:07 -0000
374 // build a hash table from a munched word list
376 +#ifdef HUNSPELL_CHROME_CLIENT
377 +HashMgr::HashMgr(hunspell::BDictReader* reader)
379 + bdict_reader = reader;
381 HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
386 flag_mode = FLAG_CHAR;
390 forbiddenword = FORBIDDENWORD; // forbidden word signing flag
391 +#ifdef HUNSPELL_CHROME_CLIENT
392 + // No tables to load, just the AF lines.
393 + load_config(NULL, NULL);
394 + int ec = LoadAFLines();
396 load_config(apath, key);
397 int ec = load_tables(tpath, key);
400 /* error condition - what should we do here */
401 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
403 if (ignorechars) free(ignorechars);
404 if (ignorechars_utf16) free(ignorechars_utf16);
406 +#ifdef HUNSPELL_CHROME_CLIENT
407 + EmptyHentryCache();
408 + for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();
409 + it != pointer_to_strings_.end(); ++it) {
413 #ifdef MOZILLA_CLIENT
418 +#ifdef HUNSPELL_CHROME_CLIENT
419 +void HashMgr::EmptyHentryCache() {
420 + // We need to delete each cache entry, and each additional one in the linked
421 + // list of homonyms.
422 + for (HEntryCache::iterator i = hentry_cache.begin();
423 + i != hentry_cache.end(); ++i) {
424 + hentry* cur = i->second;
426 + hentry* next = cur->next_homonym;
427 + DeleteHashEntry(cur);
431 + hentry_cache.clear();
435 // lookup a root word in the hashtable
437 struct hentry * HashMgr::lookup(const char *word) const
439 +#ifdef HUNSPELL_CHROME_CLIENT
440 + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
441 + int affix_count = bdict_reader->FindWord(word, affix_ids);
442 + if (affix_count == 0) { // look for custom added word
443 + std::map<base::StringPiece, int>::const_iterator iter =
444 + custom_word_to_affix_id_map_.find(word);
445 + if (iter != custom_word_to_affix_id_map_.end()) {
447 + affix_ids[0] = iter->second;
451 + static const int kMaxWordLen = 128;
452 + static char word_buf[kMaxWordLen];
453 + // To take account of null-termination, we use upto 127.
454 + strncpy(word_buf, word, kMaxWordLen - 1);
456 + return AffixIDsToHentry(word_buf, affix_ids, affix_count);
460 dp = tableptr[hash(word)];
461 @@ -109,12 +164,14 @@
468 // add a word to the hash table (private)
469 int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
470 int al, const char * desc, bool onlyupcase)
472 +#ifndef HUNSPELL_CHROME_CLIENT
473 bool upcasehomonym = false;
474 int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
475 // variable-length hash record with word and optional fields
477 if (hp->astr) free(hp->astr);
481 + std::map<base::StringPiece, int>::iterator iter =
482 + custom_word_to_affix_id_map_.find(word);
483 + if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added
484 + std::string* new_string_word = new std::string(word);
485 + pointer_to_strings_.push_back(new_string_word);
486 + base::StringPiece sp(*(new_string_word));
487 + custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words
495 // remove word (personal dictionary function for standalone applications)
496 int HashMgr::remove(const char * word)
498 +#ifdef HUNSPELL_CHROME_CLIENT
499 + std::map<base::StringPiece, int>::iterator iter =
500 + custom_word_to_affix_id_map_.find(word);
501 + if (iter != custom_word_to_affix_id_map_.end())
502 + custom_word_to_affix_id_map_.erase(iter);
504 struct hentry * dp = lookup(word);
506 if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
509 dp = dp->next_homonym;
516 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
517 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
519 +#ifdef HUNSPELL_CHROME_CLIENT
520 + // Return NULL if dictionary is not valid.
521 + if (!bdict_reader->IsValid())
524 + // This function is only ever called by one place and not nested. We can
525 + // therefore keep static state between calls and use |col| as a "reset" flag
526 + // to avoid changing the API. It is set to -1 for the first call.
527 + // Allocate the iterator on the heap to prevent an exit time destructor.
528 + static hunspell::WordIterator& word_iterator =
529 + *new hunspell::WordIterator(bdict_reader->GetAllWordIterator());
532 + word_iterator = bdict_reader->GetAllWordIterator();
535 + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
536 + static const int kMaxWordLen = 128;
537 + static char word[kMaxWordLen];
538 + int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids);
539 + if (affix_count == 0)
541 + short word_len = static_cast<short>(strlen(word));
543 + // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct,
544 + // i.e. a struct which uses its array 'word[1]' as a variable-length array.
545 + // As noted above, this function is not nested. So, we just use a static
546 + // struct which consists of an hentry and a char[kMaxWordLen], and initialize
547 + // the static struct and return it for now.
548 + // No need to create linked lists for the extra affixes.
551 + char word[kMaxWordLen];
554 + return InitHashEntry(&hash_entry.entry, sizeof(hash_entry),
555 + &word[0], word_len, affix_ids[0]);
557 if (hp && hp->next != NULL) return hp->next;
558 for (col++; col < tablesize; col++) {
559 if (tableptr[col]) return tableptr[col];
560 @@ -346,11 +459,13 @@
561 // null at end and reset to start
567 // load a munched word list and build a hash table on the fly
568 int HashMgr::load_tables(const char * tpath, const char * key)
570 +#ifndef HUNSPELL_CHROME_CLIENT
584 int HashMgr::hash(const char * word) const
586 +#ifdef HUNSPELL_CHROME_CLIENT
590 for (int i=0; i < 4 && *word != 0; i++)
591 hv = (hv << 8) | (*word++);
595 return (unsigned long) hv % tablesize;
599 int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
603 // open the affix file
604 +#ifdef HUNSPELL_CHROME_CLIENT
605 + hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();
606 + FileMgr * afflst = new FileMgr(&iterator);
608 FileMgr * afflst = new FileMgr(affpath, key);
611 HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
613 @@ -802,6 +927,121 @@
617 +#ifdef HUNSPELL_CHROME_CLIENT
618 +int HashMgr::LoadAFLines()
620 + utf8 = 1; // We always use UTF-8.
622 + // Read in all the AF lines which tell us the rules for each affix group ID.
623 + hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator();
624 + FileMgr afflst(&iterator);
625 + while (char* line = afflst.getline()) {
626 + int rv = parse_aliasf(line, &afflst);
634 +hentry* HashMgr::InitHashEntry(hentry* entry,
638 + int affix_index) const {
639 + // Return if the given buffer doesn't have enough space for a hentry struct
640 + // or the given word is too long.
641 + // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is
642 + // better to return an error if the given word is too long and prevent
643 + // an unexpected result caused by a long word.
644 + const int kMaxWordLen = 128;
645 + if (item_size < sizeof(hentry) + word_length + 1 ||
646 + word_length >= kMaxWordLen)
649 + // Initialize a hentry struct with the given parameters, and
650 + // append the given string at the end of this hentry struct.
651 + memset(entry, 0, item_size);
653 + entry->alen = static_cast<short>(
654 + const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af));
655 + entry->blen = static_cast<unsigned char>(word_length);
656 + memcpy(&entry->word, word, word_length);
661 +hentry* HashMgr::CreateHashEntry(const char* word,
663 + int affix_index) const {
664 + // Return if the given word is too long.
665 + // (See the comment in HashMgr::InitHashEntry().)
666 + const int kMaxWordLen = 128;
667 + if (word_length >= kMaxWordLen)
670 + const size_t kEntrySize = sizeof(hentry) + word_length + 1;
671 + struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize));
673 + InitHashEntry(entry, kEntrySize, word, word_length, affix_index);
678 +void HashMgr::DeleteHashEntry(hentry* entry) const {
682 +hentry* HashMgr::AffixIDsToHentry(char* word,
684 + int affix_count) const
686 + if (affix_count == 0)
689 + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
690 + std::string std_word(word);
691 + HEntryCache::iterator found = cache.find(std_word);
692 + if (found != cache.end()) {
693 + // We must return an existing hentry for the same word if we've previously
694 + // handed one out. Hunspell will compare pointers in some cases to see if
695 + // two words it has found are the same.
696 + return found->second;
699 + short word_len = static_cast<short>(strlen(word));
701 + // We can get a number of prefixes per word. There will normally be only one,
702 + // but if not, there will be a linked list of "hentry"s for the "homonym"s
704 + struct hentry* first_he = NULL;
705 + struct hentry* prev_he = NULL; // For making linked list.
706 + for (int i = 0; i < affix_count; i++) {
707 + struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]);
713 + prev_he->next_homonym = he;
717 + cache[std_word] = first_he; // Save this word in the cache for later.
721 +hentry* HashMgr::GetHentryFromHEntryCache(char* word) {
722 + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
723 + std::string std_word(word);
724 + HEntryCache::iterator found = cache.find(std_word);
725 + if (found != cache.end())
726 + return found->second;
732 int HashMgr::is_aliasf() {
733 return (aliasf != NULL);
735 Index: src/hunspell/hashmgr.hxx
736 ===================================================================
737 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/hashmgr.hxx,v
738 retrieving revision 1.3
739 diff -u -r1.3 hashmgr.hxx
740 --- src/hunspell/hashmgr.hxx 15 Apr 2010 11:22:08 -0000 1.3
741 +++ src/hunspell/hashmgr.hxx 29 May 2014 01:05:07 -0000
743 #include "htypes.hxx"
744 #include "filemgr.hxx"
746 +#ifdef HUNSPELL_CHROME_CLIENT
750 +#include "base/stl_util.h"
751 +#include "base/strings/string_piece.h"
752 +#include "third_party/hunspell/google/bdict_reader.h"
755 enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
757 class LIBHUNSPELL_DLL_EXPORTED HashMgr
759 +#ifdef HUNSPELL_CHROME_CLIENT
760 + // Not owned by this class, owned by the Hunspell object.
761 + hunspell::BDictReader* bdict_reader;
762 + std::map<base::StringPiece, int> custom_word_to_affix_id_map_;
763 + std::vector<std::string*> pointer_to_strings_;
766 struct hentry ** tableptr;
772 +#ifdef HUNSPELL_CHROME_CLIENT
773 + HashMgr(hunspell::BDictReader* reader);
775 + // Return the hentry corresponding to the given word. Returns NULL if the
776 + // word is not there in the cache.
777 + hentry* GetHentryFromHEntryCache(char* word);
779 + // Called before we do a new operation. This will empty the cache of pointers
780 + // to hentries that we have cached. In Chrome, we make these on-demand, but
781 + // they must live as long as the single spellcheck operation that they're part
782 + // of since Hunspell will save pointers to various ones as it works.
784 + // This function allows that cache to be emptied and not grow infinitely.
785 + void EmptyHentryCache();
787 HashMgr(const char * tpath, const char * apath, const char * key = NULL);
791 struct hentry * lookup(const char *) const;
793 int al, const char * desc, bool onlyupcase);
794 int load_config(const char * affpath, const char * key);
795 int parse_aliasf(char * line, FileMgr * af);
797 +#ifdef HUNSPELL_CHROME_CLIENT
798 + // Loads the AF lines from a BDICT.
799 + // A BDICT file compresses its AF lines to save memory.
800 + // This function decompresses each AF line and call parse_aliasf().
803 + // Helper functions that create a new hentry struct, initialize it, and
805 + // These functions encapsulate non-trivial operations in creating and
806 + // initializing a hentry struct from BDICT data to avoid changing code so much
807 + // even when a hentry struct is changed.
808 + hentry* InitHashEntry(hentry* entry,
812 + int affix_index) const;
813 + hentry* CreateHashEntry(const char* word,
815 + int affix_index) const;
816 + void DeleteHashEntry(hentry* entry) const;
818 + // Converts the list of affix IDs to a linked list of hentry structures. The
819 + // hentry structures will point to the given word. The returned pointer will
820 + // be a statically allocated variable that will change for the next call. The
821 + // |word| buffer must be the same.
822 + hentry* AffixIDsToHentry(char* word, int* affix_ids, int affix_count) const;
824 + // See EmptyHentryCache above. Note that each one is actually a linked list
825 + // followed by the homonym pointer.
826 + typedef std::map<std::string, hentry*> HEntryCache;
827 + HEntryCache hentry_cache;
830 int add_hidden_capitalized_word(char * word, int wbl, int wcl,
831 unsigned short * flags, int al, char * dp, int captype);
832 int parse_aliasm(char * line, FileMgr * af);
833 Index: src/hunspell/htypes.hxx
834 ===================================================================
835 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/htypes.hxx,v
836 retrieving revision 1.3
837 diff -u -r1.3 htypes.hxx
838 --- src/hunspell/htypes.hxx 6 Sep 2010 07:58:53 -0000 1.3
839 +++ src/hunspell/htypes.hxx 29 May 2014 01:05:07 -0000
844 +#ifdef HUNSPELL_CHROME_CLIENT
845 +// This is a workaround for preventing errors in parsing Turkish BDICs, which
846 +// contain very long AF lines (~ 12,000 chars).
847 +// TODO(hbono) change the HashMgr::parse_aliasf() function to be able to parse
848 +// longer lines than MAXDELEN.
849 +#define MAXDELEN (8192 * 2)
851 +#define MAXDELEN 8192
852 +#endif // HUNSPELL_CHROME_CLIENT
856 #define ROTATE(v,q) \
857 Index: src/hunspell/hunspell.cxx
858 ===================================================================
859 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/hunspell.cxx,v
860 retrieving revision 1.29
861 diff -u -r1.29 hunspell.cxx
862 --- src/hunspell/hunspell.cxx 23 Jun 2011 09:21:50 -0000 1.29
863 +++ src/hunspell/hunspell.cxx 29 May 2014 01:05:07 -0000
866 #include "hunspell.hxx"
867 #include "hunspell.h"
868 +#ifndef HUNSPELL_CHROME_CLIENT
869 #ifndef MOZILLA_CLIENT
873 #include "csutil.hxx"
875 +#ifdef HUNSPELL_CHROME_CLIENT
876 +Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)
878 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
885 +#ifndef HUNSPELL_CHROME_CLIENT
886 affixpath = mystrdup(affpath);
890 +#ifdef HUNSPELL_CHROME_CLIENT
891 + bdict_reader = new hunspell::BDictReader;
892 + bdict_reader->Init(bdict_data, bdict_length);
894 + pHMgr[0] = new HashMgr(bdict_reader);
895 + if (pHMgr[0]) maxdic = 1;
897 + pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);
899 /* first set up the hash manager */
900 pHMgr[0] = new HashMgr(dpath, affpath, key);
901 if (pHMgr[0]) maxdic = 1;
903 /* next set up the affix manager */
904 /* it needs access to the hash manager lookup methods */
905 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
908 /* get the preferred try string and the dictionary */
909 /* encoding from the Affix Manager for that dictionary */
911 wordbreak = pAMgr->get_breaktable();
913 /* and finally set up the suggestion manager */
914 +#ifdef HUNSPELL_CHROME_CLIENT
915 + pSMgr = new SuggestMgr(bdict_reader, try_string, MAXSUGGESTION, pAMgr);
917 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
919 if (try_string) free(try_string);
924 if (encoding) free(encoding);
926 +#ifdef HUNSPELL_CHROME_CLIENT
927 + if (bdict_reader) delete bdict_reader;
928 + bdict_reader = NULL;
930 if (affixpath) free(affixpath);
935 +#ifndef HUNSPELL_CHROME_CLIENT
936 // load extra dictionaries
937 int Hunspell::add_dic(const char * dpath, const char * key) {
938 if (maxdic == MAXDIC || !affixpath) return 1;
940 if (pHMgr[maxdic]) maxdic++; else return 1;
945 // make a copy of src at destination while removing all leading
946 // blanks and removing any trailing periods after recording
949 int Hunspell::spell(const char * word, int * info, char ** root)
951 +#ifdef HUNSPELL_CHROME_CLIENT
952 + if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();
954 struct hentry * rv=NULL;
955 // need larger vector. For example, Turkish capital letter I converted a
956 // 2-byte UTF-8 character (dotless i) by mkallsmall.
961 +#ifdef HUNSPELL_CHROME_CLIENT
962 + // We need to check if the word length is valid to make coverity (Event
963 + // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy.
964 + if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) || (!utf8 && strlen(word) >= MAXWORDLEN))
968 // word reversing wrapper for complex prefixes
969 if (complexprefixes) {
973 int Hunspell::suggest(char*** slst, const char * word)
975 +#ifdef HUNSPELL_CHROME_CLIENT
976 + if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();
979 char cw[MAXWORDUTF8LEN];
980 char wspace[MAXWORDUTF8LEN];
981 @@ -1921,13 +1963,21 @@
983 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
985 +#ifdef HUNSPELL_CHROME_CLIENT
988 return (Hunhandle*)(new Hunspell(affpath, dpath));
992 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
995 +#ifdef HUNSPELL_CHROME_CLIENT
998 return (Hunhandle*)(new Hunspell(affpath, dpath, key));
1002 void Hunspell_destroy(Hunhandle *pHunspell)
1003 Index: src/hunspell/hunspell.hxx
1004 ===================================================================
1005 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/hunspell.hxx,v
1006 retrieving revision 1.6
1007 diff -u -r1.6 hunspell.hxx
1008 --- src/hunspell/hunspell.hxx 21 Jan 2011 17:30:41 -0000 1.6
1009 +++ src/hunspell/hunspell.hxx 29 May 2014 01:05:07 -0000
1011 #include "suggestmgr.hxx"
1012 #include "langnum.hxx"
1014 +#ifdef HUNSPELL_CHROME_CLIENT
1015 +#include "third_party/hunspell/google/bdict_reader.h"
1018 #define SPELL_XML "<?xml?>"
1022 HashMgr* pHMgr[MAXDIC];
1025 +#ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead.
1029 struct cs_info * csconv;
1032 int complexprefixes;
1035 +#ifdef HUNSPELL_CHROME_CLIENT
1036 + // Not owned by us, owned by the Hunspell object.
1037 + hunspell::BDictReader* bdict_reader;
1042 /* Hunspell(aff, dic) - constructor of Hunspell class
1043 * input: path of affix file and dictionary file
1046 +#ifdef HUNSPELL_CHROME_CLIENT
1047 + Hunspell(const unsigned char* bdict_data, size_t bdict_length);
1049 Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
1053 +#ifndef HUNSPELL_CHROME_CLIENT
1054 /* load extra dictionaries (only dic files) */
1055 int add_dic(const char * dpath, const char * key = NULL);
1058 /* spell(word) - spellcheck word
1059 * output: 0 = bad word, not 0 = good word
1060 Index: src/hunspell/replist.hxx
1061 ===================================================================
1062 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/replist.hxx,v
1063 retrieving revision 1.2
1064 diff -u -r1.2 replist.hxx
1065 --- src/hunspell/replist.hxx 15 Apr 2010 11:22:09 -0000 1.2
1066 +++ src/hunspell/replist.hxx 29 May 2014 01:05:07 -0000
1068 #ifndef _REPLIST_HXX_
1069 #define _REPLIST_HXX_
1071 +#ifdef HUNSPELL_CHROME_CLIENT
1072 +// Compilation issues in spellchecker.cc think near is a macro, therefore
1073 +// removing it here solves that problem.
1077 #include "hunvisapi.h"
1079 #include "w_char.hxx"
1080 Index: src/hunspell/suggestmgr.cxx
1081 ===================================================================
1082 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/suggestmgr.cxx,v
1083 retrieving revision 1.24
1084 diff -u -r1.24 suggestmgr.cxx
1085 --- src/hunspell/suggestmgr.cxx 14 Feb 2011 21:47:24 -0000 1.24
1086 +++ src/hunspell/suggestmgr.cxx 29 May 2014 01:05:07 -0000
1089 const w_char W_VLINE = { '\0', '|' };
1091 +#ifdef HUNSPELL_CHROME_CLIENT
1093 +// A simple class which creates temporary hentry objects which are available
1094 +// only in a scope. To conceal memory operations from SuggestMgr functions,
1095 +// this object automatically deletes all hentry objects created through
1096 +// CreateScopedHashEntry() calls in its destructor. So, the following snippet
1097 +// raises a memory error.
1099 +// hentry* bad_copy = NULL;
1101 +// ScopedHashEntryFactory factory;
1102 +// hentry* scoped_copy = factory.CreateScopedHashEntry(0, source);
1104 +// bad_copy = scoped_copy;
1106 +// if (bad_copy->word[0]) // memory for scoped_copy has been deleted!
1108 +// As listed in the above snippet, it is simple to use this class.
1109 +// 1. Declare an instance of this ScopedHashEntryFactory, and;
1110 +// 2. Call its CreateHashEntry() member instead of using 'new hentry' or
1113 +class ScopedHashEntryFactory {
1115 + ScopedHashEntryFactory();
1116 + ~ScopedHashEntryFactory();
1118 + // Creates a temporary copy of the given hentry struct.
1119 + // The returned copy is available only while this object is available.
1120 + // NOTE: this function just calls memcpy() in creating a copy of the given
1121 + // hentry struct, i.e. it does NOT copy objects referred by pointers of the
1122 + // given hentry struct.
1123 + hentry* CreateScopedHashEntry(int index, const hentry* source);
1126 + // A struct which encapsulates the new hentry struct introduced in hunspell
1127 + // 1.2.8. For a pointer to an hentry struct 'h', hunspell 1.2.8 stores a word
1128 + // (including a NUL character) into 'h->word[0]',...,'h->word[h->blen]' even
1129 + // though arraysize(h->word[]) is 1. Also, it changed 'astr' to a pointer so
1130 + // it can store affix flags into 'h->astr[0]',...,'h->astr[alen-1]'. To handle
1131 + // this new hentry struct, we define a struct which combines three values: an
1132 + // hentry struct 'hentry'; a char array 'word[kMaxWordLen]', and; an unsigned
1133 + // short array 'astr' so a hentry struct 'h' returned from
1134 + // CreateScopedHashEntry() satisfies the following equations:
1135 + // hentry* h = factory.CreateScopedHashEntry(0, source);
1136 + // h->word[0] == ((HashEntryItem*)h)->entry.word[0].
1137 + // h->word[1] == ((HashEntryItem*)h)->word[0].
1139 + // h->word[h->blen] == ((HashEntryItem*)h)->word[h->blen-1].
1140 + // h->astr[0] == ((HashEntryItem*)h)->astr[0].
1141 + // h->astr[1] == ((HashEntryItem*)h)->astr[1].
1143 + // h->astr[h->alen-1] == ((HashEntryItem*)h)->astr[h->alen-1].
1145 + kMaxWordLen = 128,
1148 + struct HashEntryItem {
1150 + char word[kMaxWordLen];
1151 + unsigned short astr[kMaxAffixLen];
1154 + HashEntryItem hash_items_[MAX_ROOTS];
1157 +ScopedHashEntryFactory::ScopedHashEntryFactory() {
1158 + memset(&hash_items_[0], 0, sizeof(hash_items_));
1161 +ScopedHashEntryFactory::~ScopedHashEntryFactory() {
1164 +hentry* ScopedHashEntryFactory::CreateScopedHashEntry(int index,
1165 + const hentry* source) {
1166 + if (index >= MAX_ROOTS || source->blen >= kMaxWordLen)
1169 + // Retrieve a HashEntryItem struct from our spool, initialize it, and
1170 + // returns the address of its 'hentry' member.
1171 + size_t source_size = sizeof(hentry) + source->blen + 1;
1172 + HashEntryItem* hash_item = &hash_items_[index];
1173 + memcpy(&hash_item->entry, source, source_size);
1174 + if (source->astr) {
1175 + hash_item->entry.alen = source->alen;
1176 + if (hash_item->entry.alen > kMaxAffixLen)
1177 + hash_item->entry.alen = kMaxAffixLen;
1178 + memcpy(hash_item->astr, source->astr, hash_item->entry.alen * sizeof(hash_item->astr[0]));
1179 + hash_item->entry.astr = &hash_item->astr[0];
1181 + return &hash_item->entry;
1188 +#ifdef HUNSPELL_CHROME_CLIENT
1189 +SuggestMgr::SuggestMgr(hunspell::BDictReader* reader,
1190 + const char * tryme, int maxn,
1193 + bdict_reader = reader;
1195 SuggestMgr::SuggestMgr(const char * tryme, int maxn,
1200 // register affix manager and check in string of chars to
1201 // try when building candidate suggestions
1202 @@ -407,6 +512,49 @@
1204 int wl = strlen(word);
1205 if (wl < 2 || ! pAMgr) return ns;
1207 +#ifdef HUNSPELL_CHROME_CLIENT
1208 + const char *pattern, *pattern2;
1209 + hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator();
1210 + while (iterator.GetNext(&pattern, &pattern2)) {
1212 + lenr = strlen(pattern2);
1213 + lenp = strlen(pattern);
1215 + // search every occurence of the pattern in the word
1216 + while ((r=strstr(r, pattern)) != NULL) {
1217 + strcpy(candidate, word);
1218 + if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
1219 + strcpy(candidate+(r-word), pattern2);
1220 + strcpy(candidate+(r-word)+lenr, r+lenp);
1221 + ns = testsug(wlst, candidate, wl-lenp+lenr, ns, cpdsuggest, NULL, NULL);
1222 + if (ns == -1) return -1;
1223 + // check REP suggestions with space
1224 + char * sp = strchr(candidate, ' ');
1226 + char * prev = candidate;
1229 + if (checkword(prev, strlen(prev), 0, NULL, NULL)) {
1232 + ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, NULL);
1233 + if (ns == -1) return -1;
1235 + free(wlst[ns - 1]);
1236 + wlst[ns - 1] = mystrdup(candidate);
1237 + if (!wlst[ns - 1]) return -1;
1242 + sp = strchr(prev, ' ');
1245 + r++; // search for the next letter
1249 int numrep = pAMgr->get_numrep();
1250 struct replentry* reptable = pAMgr->get_reptable();
1251 if (reptable==NULL) return ns;
1253 r++; // search for the next letter
1261 // error is missing a letter it needs
1262 int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest)
1264 - char candidate[MAXSWUTF8L];
1265 + // TODO(rouslan): Remove the interim change below when this patch lands:
1266 + // http://sf.net/tracker/?func=detail&aid=3595024&group_id=143754&atid=756395
1267 + char candidate[MAXSWUTF8L + 4];
1269 clock_t timelimit = clock();
1270 int timer = MINTIMER;
1271 @@ -700,8 +851,10 @@
1272 // error is missing a letter it needs
1273 int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
1275 - w_char candidate_utf[MAXSWL];
1276 - char candidate[MAXSWUTF8L];
1277 + // TODO(rouslan): Remove the interim change below when this patch lands:
1278 + // http://sf.net/tracker/?func=detail&aid=3595024&group_id=143754&atid=756395
1279 + w_char candidate_utf[MAXSWL + 1];
1280 + char candidate[MAXSWUTF8L + 4];
1282 clock_t timelimit = clock();
1283 int timer = MINTIMER;
1284 @@ -1057,6 +1210,9 @@
1286 struct hentry* hp = NULL;
1288 +#ifdef HUNSPELL_CHROME_CLIENT
1289 + ScopedHashEntryFactory hash_entry_factory;
1291 phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
1292 char target[MAXSWUTF8L];
1293 char candidate[MAXSWUTF8L];
1294 @@ -1115,7 +1271,11 @@
1296 if (sc > scores[lp]) {
1298 +#ifdef HUNSPELL_CHROME_CLIENT
1299 + roots[lp] = hash_entry_factory.CreateScopedHashEntry(lp, hp);
1304 for (j=0; j < MAX_ROOTS; j++)
1305 if (scores[j] < lval) {
1306 @@ -1948,16 +2108,14 @@
1310 - c = (char *) malloc((m + 1) * (n + 1));
1311 - b = (char *) malloc((m + 1) * (n + 1));
1312 + c = (char *) calloc(m + 1, n + 1);
1313 + b = (char *) calloc(m + 1, n + 1);
1320 - for (i = 1; i <= m; i++) c[i*(n+1)] = 0;
1321 - for (j = 0; j <= n; j++) c[j] = 0;
1322 for (i = 1; i <= m; i++) {
1323 for (j = 1; j <= n; j++) {
1324 if ( ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1)))
1325 Index: src/hunspell/suggestmgr.hxx
1326 ===================================================================
1327 RCS file: /cvsroot/hunspell/hunspell/src/hunspell/suggestmgr.hxx,v
1328 retrieving revision 1.5
1329 diff -u -r1.5 suggestmgr.hxx
1330 --- src/hunspell/suggestmgr.hxx 21 Jan 2011 22:10:24 -0000 1.5
1331 +++ src/hunspell/suggestmgr.hxx 29 May 2014 01:05:07 -0000
1336 +#ifdef HUNSPELL_CHROME_CLIENT
1337 + SuggestMgr(hunspell::BDictReader* reader, const char * tryme, int maxn, AffixMgr *aptr);
1339 SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
1343 int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug);
1345 char * suggest_morph_for_spelling_error(const char * word);
1348 +#ifdef HUNSPELL_CHROME_CLIENT
1349 + // Not owned by us, owned by the Hunspell object.
1350 + hunspell::BDictReader* bdict_reader;
1352 int testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest,
1353 int * timer, clock_t * timelimit);
1354 int checkword(const char *, int, int, int *, clock_t *);