1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
8 #include "base/file_util.h"
9 #include "base/format_macros.h"
10 #include "base/i18n/icu_string_conversions.h"
11 #include "base/strings/stringprintf.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "chrome/tools/convert_dict/aff_reader.h"
14 #include "chrome/tools/convert_dict/dic_reader.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "third_party/hunspell/google/bdict_reader.h"
17 #include "third_party/hunspell/google/bdict_writer.h"
21 // Compares the given word list with the serialized trie to make sure they
23 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc").
24 bool VerifyWords(const convert_dict::DicReader::WordList
& org_words
,
25 const std::string
& serialized
) {
26 hunspell::BDictReader reader
;
28 reader
.Init(reinterpret_cast<const unsigned char*>(serialized
.data()),
31 hunspell::WordIterator iter
= reader
.GetAllWordIterator();
33 int affix_ids
[hunspell::BDict::MAX_AFFIXES_PER_WORD
];
35 static const int kBufSize
= 128;
37 for (size_t i
= 0; i
< org_words
.size(); i
++) {
38 SCOPED_TRACE(base::StringPrintf(
39 "org_words[%" PRIuS
"]: %s", i
, org_words
[i
].first
.c_str()));
41 int affix_matches
= iter
.Advance(buf
, kBufSize
, affix_ids
);
42 EXPECT_NE(0, affix_matches
);
43 EXPECT_EQ(org_words
[i
].first
, std::string(buf
));
44 EXPECT_EQ(affix_matches
, static_cast<int>(org_words
[i
].second
.size()));
46 // Check the individual affix indices.
47 for (size_t affix_index
= 0; affix_index
< org_words
[i
].second
.size();
49 EXPECT_EQ(affix_ids
[affix_index
], org_words
[i
].second
[affix_index
]);
56 // Implements the test process used by ConvertDictTest.
57 // This function encapsulates all complicated operations used by
58 // ConvertDictTest so we can conceal them from the tests themselves.
59 // This function consists of the following parts:
60 // * Creates a dummy affix file and a dictionary file.
61 // * Reads the dummy files.
62 // * Creates bdict data.
63 // * Verify the bdict data.
64 void RunDictionaryTest(const char* codepage
,
65 const std::map
<base::string16
, bool>& word_list
) {
66 // Create an affix data and a dictionary data.
67 std::string
aff_data(base::StringPrintf("SET %s\n", codepage
));
69 std::string
dic_data(base::StringPrintf("%" PRIuS
"\n", word_list
.size()));
70 for (std::map
<base::string16
, bool>::const_iterator it
= word_list
.begin();
71 it
!= word_list
.end(); ++it
) {
72 std::string encoded_word
;
73 EXPECT_TRUE(UTF16ToCodepage(it
->first
,
75 base::OnStringConversionError::FAIL
,
77 dic_data
+= encoded_word
;
81 // Create a temporary affix file and a dictionary file from the test data.
82 base::FilePath aff_file
;
83 base::CreateTemporaryFile(&aff_file
);
84 base::WriteFile(aff_file
, aff_data
.c_str(), aff_data
.length());
86 base::FilePath dic_file
;
87 base::CreateTemporaryFile(&dic_file
);
88 base::WriteFile(dic_file
, dic_data
.c_str(), dic_data
.length());
91 // Read the above affix file with AffReader and read the dictionary file
92 // with DicReader, respectively.
93 convert_dict::AffReader
aff_reader(aff_file
);
94 EXPECT_TRUE(aff_reader
.Read());
96 convert_dict::DicReader
dic_reader(dic_file
);
97 EXPECT_TRUE(dic_reader
.Read(&aff_reader
));
99 // Verify this DicReader includes all the input words.
100 EXPECT_EQ(word_list
.size(), dic_reader
.words().size());
101 for (size_t i
= 0; i
< dic_reader
.words().size(); ++i
) {
102 SCOPED_TRACE(base::StringPrintf("dic_reader.words()[%" PRIuS
"]: %s",
103 i
, dic_reader
.words()[i
].first
.c_str()));
104 base::string16
word(base::UTF8ToUTF16(dic_reader
.words()[i
].first
));
105 EXPECT_TRUE(word_list
.find(word
) != word_list
.end());
108 // Create BDICT data and verify it.
109 hunspell::BDictWriter writer
;
110 writer
.SetComment(aff_reader
.comments());
111 writer
.SetAffixRules(aff_reader
.affix_rules());
112 writer
.SetAffixGroups(aff_reader
.GetAffixGroups());
113 writer
.SetReplacements(aff_reader
.replacements());
114 writer
.SetOtherCommands(aff_reader
.other_commands());
115 writer
.SetWords(dic_reader
.words());
117 std::string bdict_data
= writer
.GetBDict();
118 VerifyWords(dic_reader
.words(), bdict_data
);
119 EXPECT_TRUE(hunspell::BDict::Verify(bdict_data
.data(), bdict_data
.size()));
121 // Trim the end of this BDICT and verify our verifier tells these trimmed
122 // BDICTs are corrupted.
123 for (size_t i
= 1; i
< bdict_data
.size(); ++i
) {
124 SCOPED_TRACE(base::StringPrintf("i = %" PRIuS
, i
));
125 EXPECT_FALSE(hunspell::BDict::Verify(bdict_data
.data(),
126 bdict_data
.size() - i
));
130 // Deletes the temporary files.
131 // We need to delete them after the above AffReader and DicReader are deleted
132 // since they close the input files in their destructors.
133 base::DeleteFile(aff_file
, false);
134 base::DeleteFile(dic_file
, false);
139 // Tests whether or not our DicReader can read all the input English words
140 TEST(ConvertDictTest
, English
) {
141 const char kCodepage
[] = "UTF-8";
142 const wchar_t* kWords
[] = {
152 std::map
<base::string16
, bool> word_list
;
153 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(kWords
); ++i
)
155 std::make_pair
<base::string16
, bool>(base::WideToUTF16(kWords
[i
]),
158 RunDictionaryTest(kCodepage
, word_list
);
161 // Tests whether or not our DicReader can read all the input Russian words.
162 TEST(ConvertDictTest
, Russian
) {
163 const char kCodepage
[] = "KOI8-R";
164 const wchar_t* kWords
[] = {
168 L
"\x043e\x043d\x0430",
169 L
"\x043e\x043d\x043e",
172 L
"\x043e\x043d\x0438",
175 std::map
<base::string16
, bool> word_list
;
176 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(kWords
); ++i
)
178 std::make_pair
<base::string16
, bool>(base::WideToUTF16(kWords
[i
]),
181 RunDictionaryTest(kCodepage
, word_list
);
184 // Tests whether or not our DicReader can read all the input Hungarian words.
185 TEST(ConvertDictTest
, Hungarian
) {
186 const char kCodepage
[] = "ISO8859-2";
187 const wchar_t* kWords
[] = {
192 L
"\x006d\x0061\x0067\x0061",
196 L
"\x00f6\x006e\x00f6\x006b",
197 L
"\x006d\x0061\x0067\x0075\x006b",
200 std::map
<base::string16
, bool> word_list
;
201 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(kWords
); ++i
)
203 std::make_pair
<base::string16
, bool>(base::WideToUTF16(kWords
[i
]),
206 RunDictionaryTest(kCodepage
, word_list
);