Don't preload rarely seen large images
[chromium-blink-merge.git] / third_party / cld / encodings / proto / encodings.pb.h
blob41d93f30be8661612d32e76221d59d522b17a65d
1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef ENCODINGS_PROTO_ENCODINGS_PB_H_
6 #define ENCODINGS_PROTO_ENCODINGS_PB_H_
8 enum Encoding {
9 ISO_8859_1 = 0, // Teragram ASCII
10 ISO_8859_2 = 1, // Teragram Latin2
11 ISO_8859_3 = 2, // in BasisTech but not in Teragram
12 ISO_8859_4 = 3, // Teragram Latin4
13 ISO_8859_5 = 4, // Teragram ISO-8859-5
14 ISO_8859_6 = 5, // Teragram Arabic
15 ISO_8859_7 = 6, // Teragram Greek
16 ISO_8859_8 = 7, // Teragram Hebrew
17 ISO_8859_9 = 8, // in BasisTech but not in Teragram
18 ISO_8859_10 = 9, // in BasisTech but not in Teragram
19 JAPANESE_EUC_JP = 10, // Teragram EUC_JP
20 JAPANESE_SHIFT_JIS = 11, // Teragram SJS
21 JAPANESE_JIS = 12, // Teragram JIS
22 CHINESE_BIG5 = 13, // Teragram BIG5
23 CHINESE_GB = 14, // Teragram GB
24 CHINESE_EUC_CN = 15, // Misnamed. Should be EUC_TW. Was Basis Tech
25 // CNS11643EUC, before that Teragram EUC-CN(!)
26 // See //i18n/basistech/basistech_encodings.h
27 KOREAN_EUC_KR = 16, // Teragram KSC
28 UNICODE = 17, // Teragram Unicode
29 CHINESE_EUC_DEC = 18, // Misnamed. Should be EUC_TW. Was Basis Tech
30 // CNS11643EUC, before that Teragram EUC.
31 CHINESE_CNS = 19, // Misnamed. Should be EUC_TW. Was Basis Tech
32 // CNS11643EUC, before that Teragram CNS.
33 CHINESE_BIG5_CP950 = 20, // Teragram BIG5_CP950
34 JAPANESE_CP932 = 21, // Teragram CP932
35 UTF8 = 22,
36 UNKNOWN_ENCODING = 23,
37 ASCII_7BIT = 24, // ISO_8859_1 with all characters <= 127.
38 // Should be present only in the crawler
39 // and in the repository,
40 // *never* as a result of Document::encoding().
41 RUSSIAN_KOI8_R = 25, // Teragram KOI8R
42 RUSSIAN_CP1251 = 26, // Teragram CP1251
44 //----------------------------------------------------------
45 // These are _not_ output from teragram. Instead, they are as
46 // detected in the headers of usenet articles.
47 MSFT_CP1252 = 27, // 27: CP1252 aka MSFT euro ascii
48 RUSSIAN_KOI8_RU = 28, // CP21866 aka KOI8-U, used for Ukrainian.
49 // Misnamed, this is _not_ KOI8-RU but KOI8-U.
50 // KOI8-U is used much more often than KOI8-RU.
51 MSFT_CP1250 = 29, // CP1250 aka MSFT eastern european
52 ISO_8859_15 = 30, // aka ISO_8859_0 aka ISO_8859_1 euroized
53 //----------------------------------------------------------
55 //----------------------------------------------------------
56 // These are in BasisTech but not in Teragram. They are
57 // needed for new interface languages. Now detected by
58 // research langid
59 MSFT_CP1254 = 31, // used for Turkish
60 MSFT_CP1257 = 32, // used in Baltic countries
61 //----------------------------------------------------------
63 //----------------------------------------------------------
64 //----------------------------------------------------------
65 // New encodings detected by Teragram
66 ISO_8859_11 = 33, // aka TIS-620, used for Thai
67 MSFT_CP874 = 34, // used for Thai
68 MSFT_CP1256 = 35, // used for Arabic
70 //----------------------------------------------------------
71 // Detected as ISO_8859_8 by Teragram, but can be found in META tags
72 MSFT_CP1255 = 36, // Logical Hebrew Microsoft
73 ISO_8859_8_I = 37, // Iso Hebrew Logical
74 HEBREW_VISUAL = 38, // Iso Hebrew Visual
75 //----------------------------------------------------------
77 //----------------------------------------------------------
78 // Detected by research langid
79 CZECH_CP852 = 39,
80 CZECH_CSN_369103 = 40, // aka ISO_IR_139 aka KOI8_CS
81 MSFT_CP1253 = 41, // used for Greek
82 RUSSIAN_CP866 = 42,
83 //----------------------------------------------------------
85 //----------------------------------------------------------
86 // Handled by iconv in glibc
87 ISO_8859_13 = 43,
88 ISO_2022_KR = 44,
89 GBK = 45,
90 GB18030 = 46,
91 BIG5_HKSCS = 47,
92 ISO_2022_CN = 48,
94 //-----------------------------------------------------------
95 // Detected by xin liu's detector
96 // Handled by transcoder
97 // (Indic encodings)
99 TSCII = 49,
100 TAMIL_MONO = 50,
101 TAMIL_BI = 51,
102 JAGRAN = 52,
105 MACINTOSH_ROMAN = 53,
106 UTF7 = 54,
107 BHASKAR = 55, // Indic encoding - Devanagari
108 HTCHANAKYA = 56, // 56 Indic encoding - Devanagari
110 //-----------------------------------------------------------
111 // These allow a single place (inputconverter and outputconverter)
112 // to do UTF-16 <==> UTF-8 bulk conversions and UTF-32 <==> UTF-8
113 // bulk conversions, with interchange-valid checking on input and
114 // fallback if needed on ouput.
115 UTF16BE = 57, // big-endian UTF-16
116 UTF16LE = 58, // little-endian UTF-16
117 UTF32BE = 59, // big-endian UTF-32
118 UTF32LE = 60, // little-endian UTF-32
119 //-----------------------------------------------------------
121 //-----------------------------------------------------------
122 // An encoding that means "This is not text, but it may have some
123 // simple ASCII text embedded". Intended input conversion (not yet
124 // implemented) is to keep strings of >=4 seven-bit ASCII characters
125 // (follow each kept string with an ASCII space), delete the rest of
126 // the bytes. This will pick up and allow indexing of e.g. captions
127 // in JPEGs. No output conversion needed.
128 BINARYENC = 61,
129 //-----------------------------------------------------------
131 //-----------------------------------------------------------
132 // Some Web pages allow a mixture of HZ-GB and GB-2312 by using
133 // ~{ ... ~} for 2-byte pairs, and the browsers support this.
134 HZ_GB_2312 = 62,
135 //-----------------------------------------------------------
137 //-----------------------------------------------------------
138 // Some external vendors make the common input error of
139 // converting MSFT_CP1252 to UTF8 *twice*. No output conversion needed.
140 UTF8UTF8 = 63,
141 //-----------------------------------------------------------
143 //-----------------------------------------------------------
144 // Handled by transcoder for tamil language specific font
145 // encodings without the support for detection at present.
146 TAM_ELANGO = 64, // Elango - Tamil
147 TAM_LTTMBARANI = 65, // Barani - Tamil
148 TAM_SHREE = 66, // Shree - Tamil
149 TAM_TBOOMIS = 67, // TBoomis - Tamil
150 TAM_TMNEWS = 68, // TMNews - Tamil
151 TAM_WEBTAMIL = 69, // Webtamil - Tamil
152 //-----------------------------------------------------------
154 //-----------------------------------------------------------
155 // Shift_JIS variants used by Japanese cell phone carriers.
156 KDDI_SHIFT_JIS = 70,
157 DOCOMO_SHIFT_JIS = 71,
158 SOFTBANK_SHIFT_JIS = 72,
159 // ISO-2022-JP variants used by KDDI and SoftBank.
160 KDDI_ISO_2022_JP = 73,
161 SOFTBANK_ISO_2022_JP = 74,
162 //-----------------------------------------------------------
164 NUM_ENCODINGS = 75, // Always keep this at the end. It is not a
165 // valid Encoding enum, it is only used to
166 // indicate the total number of Encodings.
169 #endif // ENCODINGS_PROTO_ENCODINGS_PB_H_