Version 4.0.0.1, tag libreoffice-4.0.0.1
[LibreOffice.git] / i18npool / source / languagetag / simple-langtag.cxx
blobd96f721dbeefba53c3b95fb727e402b92cf1b928
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 /** Cheap and cheesy replacement code for liblangtag on systems that do not
11 allow / want LGPL code or dependencies on glib.
13 XXX NOTE: This code does not check language tags for validity or if they
14 are registered with IANA, does not canonicalize or strip default script
15 tags if included nor does it do any other fancy stuff that liblangtag is
16 capable of. It just makes depending code work without.
19 #include <cstdlib>
20 #include <cstring>
21 #include <cstdio>
23 namespace {
25 typedef int lt_bool_t;
27 struct lt_error_t {
28 void *something;
29 lt_error_t() : something(NULL) {}
32 static void* g_malloc(size_t s)
34 return malloc(s);
37 static void g_free(void* p)
39 if (p)
40 free(p);
43 static void lt_error_unref(lt_error_t *error)
45 if (error)
47 g_free( error->something);
48 g_free( error);
52 struct my_ref
54 sal_uInt32 mnRef;
55 explicit my_ref() : mnRef(1) {}
56 virtual ~my_ref() {}
57 void incRef() { ++mnRef; }
58 void decRef() { if (--mnRef == 0) delete this; }
61 struct my_t_impl : public my_ref
63 char* mpStr;
64 explicit my_t_impl() : my_ref(), mpStr(NULL) {}
65 virtual ~my_t_impl() { g_free( mpStr); }
66 explicit my_t_impl( const my_t_impl& r )
68 my_ref(),
69 mpStr(r.mpStr ? strdup( r.mpStr) : NULL)
72 my_t_impl& operator=( const my_t_impl& r )
74 if (this == &r)
75 return *this;
76 g_free( mpStr);
77 mpStr = (r.mpStr ? strdup( r.mpStr) : NULL);
78 return *this;
80 void assign( const char* str )
82 g_free( mpStr);
83 mpStr = (str ? strdup( str) : NULL);
85 void assign( const char* str, const char* stop )
87 g_free( mpStr);
88 if (str && str < stop)
90 mpStr = static_cast<char*>(g_malloc( stop - str + 1));
91 memcpy( mpStr, str, stop - str);
92 mpStr[stop - str] = 0;
94 else
95 mpStr = NULL;
97 void append( const char* str, const char* stop )
99 if (str && str < stop)
101 size_t nOld = mpStr ? strlen( mpStr) : 0;
102 size_t nNew = nOld + (stop - str) + 1;
103 char* p = static_cast<char*>(g_malloc( nNew));
104 if (nOld)
105 memcpy( p, mpStr, nOld);
106 memcpy( p + nOld, str, stop - str);
107 p[nNew-1] = 0;
108 g_free( mpStr);
109 mpStr = p;
112 void zero()
114 g_free( mpStr);
115 mpStr = NULL;
119 struct lt_lang_t : public my_t_impl
121 explicit lt_lang_t() : my_t_impl() {}
122 virtual ~lt_lang_t() {}
125 struct lt_script_t : public my_t_impl
127 explicit lt_script_t() : my_t_impl() {}
128 virtual ~lt_script_t() {}
131 struct lt_region_t : public my_t_impl
133 explicit lt_region_t() : my_t_impl() {}
134 virtual ~lt_region_t() {}
137 struct lt_tag_t : public my_t_impl
139 lt_lang_t maLanguage;
140 lt_script_t maScript;
141 lt_region_t maRegion;
142 explicit lt_tag_t() : my_t_impl(), maLanguage(), maScript(), maRegion() {}
143 virtual ~lt_tag_t() {}
144 explicit lt_tag_t( const lt_tag_t& r )
146 my_t_impl( r),
147 maLanguage( r.maLanguage),
148 maScript( r.maScript),
149 maRegion( r.maRegion)
152 lt_tag_t& operator=( const lt_tag_t& r )
154 if (this == &r)
155 return *this;
156 my_t_impl::operator=( r);
157 maLanguage = r.maLanguage;
158 maScript = r.maScript;
159 maRegion = r.maRegion;
160 return *this;
162 void assign( const char* str )
164 maLanguage.zero();
165 maScript.zero();
166 maRegion.zero();
167 my_t_impl::assign( str);
171 static void lt_db_initialize() { }
172 static void lt_db_finalize() { }
173 static void lt_db_set_datadir( const char* /* dir */ ) { }
175 static lt_tag_t* lt_tag_new(void)
177 return new lt_tag_t;
180 static lt_tag_t* lt_tag_copy(lt_tag_t *tag)
182 return (tag ? new lt_tag_t( *tag) : NULL);
185 static void lt_tag_unref(lt_tag_t *tag)
187 if (tag)
188 tag->decRef();
191 /** See http://tools.ietf.org/html/rfc5646
193 We are simply ignorant of grandfathered (irregular and regular) subtags and
194 may either bail out or accept them, sorry (or not). However, we do accept
195 any i-* irregular and x-* privateuse. Subtags are not checked for validity
196 (alpha, digit, registered, ...).
198 static lt_bool_t lt_tag_parse(lt_tag_t *tag,
199 const char *tag_string,
200 lt_error_t **error)
202 (void) error;
203 if (!tag)
204 return 0;
205 tag->assign( tag_string);
206 if (!tag_string)
207 return 0;
208 // In case we supported other subtags this would get more complicated.
209 my_t_impl* aSubtags[] = { &tag->maLanguage, &tag->maScript, &tag->maRegion, NULL };
210 my_t_impl** ppSub = &aSubtags[0];
211 const char* pStart = tag_string;
212 const char* p = pStart;
213 const char* pEnd = pStart + strlen( pStart); // scanning includes \0
214 bool bStartLang = true;
215 bool bPrivate = false;
216 for ( ; p <= pEnd && ppSub && *ppSub; ++p)
218 if (p == pEnd || *p == '-')
220 size_t nLen = p - pStart;
221 if (*ppSub == &tag->maLanguage)
223 if (bStartLang)
225 bStartLang = false;
226 switch (nLen)
228 case 1: // irregular or privateuse
229 if (*pStart == 'i' || *pStart == 'x')
231 (*ppSub)->assign( pStart, p);
232 bPrivate = true;
234 else
235 return 0; // bad
236 break;
237 case 2: // ISO 639 alpha-2
238 case 3: // ISO 639 alpha-3
239 (*ppSub)->assign( pStart, p);
240 break;
241 case 4: // reserved for future use
242 return 0; // bad
243 break;
244 case 5:
245 case 6:
246 case 7:
247 case 8: // registered language subtag
248 (*ppSub++)->assign( pStart, p);
249 break;
250 default:
251 return 0; // bad
254 else
256 if (nLen > 8)
257 return 0; // bad
258 if (bPrivate)
260 // Any combination of "x" 1*("-" (2*8alphanum))
261 // allowed, store first as language and return ok.
262 // For i-* simply assume the same.
263 (*ppSub)->append( pStart-1, p);
264 return !0; // ok
266 else if (nLen == 3)
268 // extlang subtag, 1 to 3 allowed we don't check that.
269 // But if it's numeric it's a region UN M.49 code
270 // instead and no script subtag is present, so advance.
271 if ('0' <= *pStart && *pStart <= '9')
273 ppSub += 2; // &tag->maRegion XXX watch this when inserting fields
274 --p;
275 continue; // for
277 else
278 (*ppSub)->append( pStart-1, p);
280 else
282 // Not part of language subtag, advance.
283 ++ppSub;
284 --p;
285 continue; // for
289 else if (*ppSub == &tag->maScript)
291 switch (nLen)
293 case 4:
294 // script subtag, or a (DIGIT 3alphanum) variant with
295 // no script and no region in which case we stop
296 // parsing.
297 if ('0' <= *pStart && *pStart <= '9')
298 ppSub = NULL;
299 else
300 (*ppSub++)->assign( pStart, p);
301 break;
302 case 3:
303 // This may be a region UN M.49 code if 3DIGIT and no
304 // script code present. Just check first character and
305 // advance.
306 if ('0' <= *pStart && *pStart <= '9')
308 ++ppSub;
309 --p;
310 continue; // for
312 else
313 return 0; // bad
314 break;
315 case 2:
316 // script omitted, region subtag, advance.
317 ++ppSub;
318 --p;
319 continue; // for
320 break;
321 case 1:
322 // script omitted, region omitted, extension subtag
323 // with singleton, stop parsing
324 ppSub = NULL;
325 break;
326 case 5:
327 case 6:
328 case 7:
329 case 8:
330 // script omitted, region omitted, variant subtag, stop
331 // parsing.
332 ppSub = NULL;
333 default:
334 return 0; // bad
337 else if (*ppSub == &tag->maRegion)
339 if (nLen == 2 || nLen == 3)
340 (*ppSub++)->assign( pStart, p);
341 else
342 return 0; // bad
344 pStart = p+1;
347 return !0;
350 static char* lt_tag_canonicalize(lt_tag_t *tag,
351 lt_error_t **error)
353 (void) error;
354 return tag && tag->mpStr ? strdup( tag->mpStr) : NULL;
357 static const lt_lang_t* lt_tag_get_language(const lt_tag_t *tag)
359 return tag && tag->maLanguage.mpStr ? &tag->maLanguage : NULL;
362 static const lt_script_t *lt_tag_get_script(const lt_tag_t *tag)
364 return tag && tag->maScript.mpStr ? &tag->maScript : NULL;
367 static const lt_region_t *lt_tag_get_region(const lt_tag_t *tag)
369 return tag && tag->maRegion.mpStr ? &tag->maRegion : NULL;
372 static const char *lt_lang_get_tag(const lt_lang_t *lang)
374 return lang ? lang->mpStr : NULL;
377 static const char *lt_script_get_tag(const lt_script_t *script)
379 return script ? script->mpStr : NULL;
382 static const char *lt_region_get_tag(const lt_region_t *region)
384 return region ? region->mpStr : NULL;
387 #ifdef erDEBUG
388 static void lt_tag_dump(const lt_tag_t *tag)
390 fprintf( stderr, "\n");
391 fprintf( stderr, "SimpleLangtag langtag: %s\n", tag->mpStr);
392 fprintf( stderr, "SimpleLangtag language: %s\n", tag->maLanguage.mpStr);
393 fprintf( stderr, "SimpleLangtag script: %s\n", tag->maScript.mpStr);
394 fprintf( stderr, "SimpleLangtag region: %s\n", tag->maRegion.mpStr);
396 #endif
400 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */