Change "percentage change" function and add comments/test for it.
[chromium-blink-merge.git] / chrome / tools / convert_dict / convert_dict_unittest.cc
blobeb7ed65446346d949b102b407129d5f1ae2c728a
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <map>
6 #include <string>
8 #include "base/file_util.h"
9 #include "base/format_macros.h"
10 #include "base/i18n/icu_string_conversions.h"
11 #include "base/strings/stringprintf.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "chrome/tools/convert_dict/aff_reader.h"
14 #include "chrome/tools/convert_dict/dic_reader.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "third_party/hunspell/google/bdict_reader.h"
17 #include "third_party/hunspell/google/bdict_writer.h"
19 namespace {
21 // Compares the given word list with the serialized trie to make sure they
22 // are the same.
23 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc").
24 bool VerifyWords(const convert_dict::DicReader::WordList& org_words,
25 const std::string& serialized) {
26 hunspell::BDictReader reader;
27 EXPECT_TRUE(
28 reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()),
29 serialized.size()));
31 hunspell::WordIterator iter = reader.GetAllWordIterator();
33 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
35 static const int kBufSize = 128;
36 char buf[kBufSize];
37 for (size_t i = 0; i < org_words.size(); i++) {
38 SCOPED_TRACE(base::StringPrintf(
39 "org_words[%" PRIuS "]: %s", i, org_words[i].first.c_str()));
41 int affix_matches = iter.Advance(buf, kBufSize, affix_ids);
42 EXPECT_NE(0, affix_matches);
43 EXPECT_EQ(org_words[i].first, std::string(buf));
44 EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size()));
46 // Check the individual affix indices.
47 for (size_t affix_index = 0; affix_index < org_words[i].second.size();
48 affix_index++) {
49 EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]);
53 return true;
56 // Implements the test process used by ConvertDictTest.
57 // This function encapsulates all complicated operations used by
58 // ConvertDictTest so we can conceal them from the tests themselves.
59 // This function consists of the following parts:
60 // * Creates a dummy affix file and a dictionary file.
61 // * Reads the dummy files.
62 // * Creates bdict data.
63 // * Verify the bdict data.
64 void RunDictionaryTest(const char* codepage,
65 const std::map<base::string16, bool>& word_list) {
66 // Create an affix data and a dictionary data.
67 std::string aff_data(base::StringPrintf("SET %s\n", codepage));
69 std::string dic_data(base::StringPrintf("%" PRIuS "\n", word_list.size()));
70 for (std::map<base::string16, bool>::const_iterator it = word_list.begin();
71 it != word_list.end(); ++it) {
72 std::string encoded_word;
73 EXPECT_TRUE(UTF16ToCodepage(it->first,
74 codepage,
75 base::OnStringConversionError::FAIL,
76 &encoded_word));
77 dic_data += encoded_word;
78 dic_data += "\n";
81 // Create a temporary affix file and a dictionary file from the test data.
82 base::FilePath aff_file;
83 base::CreateTemporaryFile(&aff_file);
84 base::WriteFile(aff_file, aff_data.c_str(), aff_data.length());
86 base::FilePath dic_file;
87 base::CreateTemporaryFile(&dic_file);
88 base::WriteFile(dic_file, dic_data.c_str(), dic_data.length());
91 // Read the above affix file with AffReader and read the dictionary file
92 // with DicReader, respectively.
93 convert_dict::AffReader aff_reader(aff_file);
94 EXPECT_TRUE(aff_reader.Read());
96 convert_dict::DicReader dic_reader(dic_file);
97 EXPECT_TRUE(dic_reader.Read(&aff_reader));
99 // Verify this DicReader includes all the input words.
100 EXPECT_EQ(word_list.size(), dic_reader.words().size());
101 for (size_t i = 0; i < dic_reader.words().size(); ++i) {
102 SCOPED_TRACE(base::StringPrintf("dic_reader.words()[%" PRIuS "]: %s",
103 i, dic_reader.words()[i].first.c_str()));
104 base::string16 word(base::UTF8ToUTF16(dic_reader.words()[i].first));
105 EXPECT_TRUE(word_list.find(word) != word_list.end());
108 // Create BDICT data and verify it.
109 hunspell::BDictWriter writer;
110 writer.SetComment(aff_reader.comments());
111 writer.SetAffixRules(aff_reader.affix_rules());
112 writer.SetAffixGroups(aff_reader.GetAffixGroups());
113 writer.SetReplacements(aff_reader.replacements());
114 writer.SetOtherCommands(aff_reader.other_commands());
115 writer.SetWords(dic_reader.words());
117 std::string bdict_data = writer.GetBDict();
118 VerifyWords(dic_reader.words(), bdict_data);
119 EXPECT_TRUE(hunspell::BDict::Verify(bdict_data.data(), bdict_data.size()));
121 // Trim the end of this BDICT and verify our verifier tells these trimmed
122 // BDICTs are corrupted.
123 for (size_t i = 1; i < bdict_data.size(); ++i) {
124 SCOPED_TRACE(base::StringPrintf("i = %" PRIuS, i));
125 EXPECT_FALSE(hunspell::BDict::Verify(bdict_data.data(),
126 bdict_data.size() - i));
130 // Deletes the temporary files.
131 // We need to delete them after the above AffReader and DicReader are deleted
132 // since they close the input files in their destructors.
133 base::DeleteFile(aff_file, false);
134 base::DeleteFile(dic_file, false);
137 } // namespace
139 // Tests whether or not our DicReader can read all the input English words
140 TEST(ConvertDictTest, English) {
141 const char kCodepage[] = "UTF-8";
142 const wchar_t* kWords[] = {
143 L"I",
144 L"he",
145 L"she",
146 L"it",
147 L"we",
148 L"you",
149 L"they",
152 std::map<base::string16, bool> word_list;
153 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)
154 word_list.insert(
155 std::make_pair<base::string16, bool>(base::WideToUTF16(kWords[i]),
156 true));
158 RunDictionaryTest(kCodepage, word_list);
161 // Tests whether or not our DicReader can read all the input Russian words.
162 TEST(ConvertDictTest, Russian) {
163 const char kCodepage[] = "KOI8-R";
164 const wchar_t* kWords[] = {
165 L"\x044f",
166 L"\x0442\x044b",
167 L"\x043e\x043d",
168 L"\x043e\x043d\x0430",
169 L"\x043e\x043d\x043e",
170 L"\x043c\x044b",
171 L"\x0432\x044b",
172 L"\x043e\x043d\x0438",
175 std::map<base::string16, bool> word_list;
176 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)
177 word_list.insert(
178 std::make_pair<base::string16, bool>(base::WideToUTF16(kWords[i]),
179 true));
181 RunDictionaryTest(kCodepage, word_list);
184 // Tests whether or not our DicReader can read all the input Hungarian words.
185 TEST(ConvertDictTest, Hungarian) {
186 const char kCodepage[] = "ISO8859-2";
187 const wchar_t* kWords[] = {
188 L"\x00e9\x006e",
189 L"\x0074\x0065",
190 L"\x0151",
191 L"\x00f6\x006e",
192 L"\x006d\x0061\x0067\x0061",
193 L"\x006d\x0069",
194 L"\x0074\x0069",
195 L"\x0151\x006b",
196 L"\x00f6\x006e\x00f6\x006b",
197 L"\x006d\x0061\x0067\x0075\x006b",
200 std::map<base::string16, bool> word_list;
201 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)
202 word_list.insert(
203 std::make_pair<base::string16, bool>(base::WideToUTF16(kWords[i]),
204 true));
206 RunDictionaryTest(kCodepage, word_list);