Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / chrome / tools / convert_dict / aff_reader.h
blob6a1aae8729baa4580e81d5ae80dad0f974162b00
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
6 #define CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
8 #include <map>
9 #include <stdio.h>
10 #include <string>
11 #include <vector>
13 namespace base {
14 class FilePath;
17 namespace convert_dict {
19 class AffReader {
20 public:
21 explicit AffReader(const base::FilePath& path);
22 ~AffReader();
24 bool Read();
26 // Returns whether this file uses indexed affixes, or, on false, whether the
27 // rule string will be specified literally in the .dic file. This must be
28 // called after Read().
29 bool has_indexed_affixes() const { return has_indexed_affixes_; }
31 // Returns a string representing the encoding of the dictionary. This will
32 // default to ISO-8859-1 if the .aff file does not specify it.
33 const char* encoding() const { return encoding_.c_str(); }
35 // Converts the given string from the file encoding to UTF-8, returning true
36 // on success.
37 bool EncodingToUTF8(const std::string& encoded, std::string* utf8) const;
39 // Adds a new affix string, returning the index. If it already exists, returns
40 // the index of the existing one. This is used to convert .dic files which
41 // list the
42 // You must not call this until after Read();
43 int GetAFIndexForAFString(const std::string& af_string);
45 // Getters for the computed data.
46 const std::string& comments() const { return intro_comment_; }
47 const std::vector<std::string>& affix_rules() const { return affix_rules_; }
48 const std::vector< std::pair<std::string, std::string> >&
49 replacements() const {
50 return replacements_;
52 const std::vector<std::string>& other_commands() const {
53 return other_commands_;
56 // Returns the affix groups ("AF" lines) for this file. The indices into this
57 // are 1-based, but we don't use the 0th item, so lookups will have to
58 // subtract one to get the index. This is how hunspell stores this data.
59 std::vector<std::string> GetAffixGroups() const;
61 private:
62 // Command-specific handlers. These are given the string folling the
63 // command. The input rule may be modified arbitrarily by the function.
64 int AddAffixGroup(std::string* rule); // Returns the new affix group ID.
65 void AddAffix(std::string* rule); // SFX/PFX
66 void AddReplacement(std::string* rule);
67 // void HandleFlag(std::string* rule);
69 // Used to handle "other" commands. The "raw" just saves the line as-is.
70 // The "encoded" version converts the line to UTF-8 and saves it.
71 void HandleRawCommand(const std::string& line);
72 void HandleEncodedCommand(const std::string& line);
74 FILE* file_;
76 // Comments from the beginning of the file. This is everything before the
77 // first command. We want to store this since it often contains the copyright
78 // information.
79 std::string intro_comment_;
81 // Encoding of the source words.
82 std::string encoding_;
84 // Affix rules. These are populated by "AF" commands. The .dic file can refer
85 // to these by index. They are indexed by their string value (the list of
86 // characters representing rules), and map to the numeric affix IDs.
88 // These can also be added using GetAFIndexForAFString.
89 std::map<std::string, int> affix_groups_;
91 // True when the affixes were specified in the .aff file using indices. The
92 // dictionary reader uses this to see how it should treat the stuff after the
93 // word on each line.
94 bool has_indexed_affixes_;
96 // SFX and PFX commands. This is a list of each of those lines in the order
97 // they appear in the file. They have been re-encoded.
98 std::vector<std::string> affix_rules_;
100 // Replacement commands. The first string is a possible input, and the second
101 // is the replacment.
102 std::vector< std::pair<std::string, std::string> > replacements_;
104 // All other commands.
105 std::vector<std::string> other_commands_;
108 } // namespace convert_dict
110 #endif // CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__