1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 /** Cheap and cheesy replacement code for liblangtag on systems that do not
11 allow / want LGPL code or dependencies on glib.
13 XXX NOTE: This code does not check language tags for validity or if they
14 are registered with IANA, does not canonicalize or strip default script
15 tags if included nor does it do any other fancy stuff that liblangtag is
16 capable of. It just makes depending code work without.
25 typedef int lt_bool_t
;
29 lt_error_t() : something(NULL
) {}
32 static void* g_malloc(size_t s
)
37 static void g_free(void* p
)
43 static void lt_error_unref(lt_error_t
*error
)
47 g_free( error
->something
);
55 explicit my_ref() : mnRef(1) {}
57 void incRef() { ++mnRef
; }
58 void decRef() { if (--mnRef
== 0) delete this; }
61 struct my_t_impl
: public my_ref
64 explicit my_t_impl() : my_ref(), mpStr(NULL
) {}
65 virtual ~my_t_impl() { g_free( mpStr
); }
66 explicit my_t_impl( const my_t_impl
& r
)
69 mpStr(r
.mpStr
? strdup( r
.mpStr
) : NULL
)
72 my_t_impl
& operator=( const my_t_impl
& r
)
77 mpStr
= (r
.mpStr
? strdup( r
.mpStr
) : NULL
);
80 void assign( const char* str
)
83 mpStr
= (str
? strdup( str
) : NULL
);
85 void assign( const char* str
, const char* stop
)
88 if (str
&& str
< stop
)
90 mpStr
= static_cast<char*>(g_malloc( stop
- str
+ 1));
91 memcpy( mpStr
, str
, stop
- str
);
92 mpStr
[stop
- str
] = 0;
97 void append( const char* str
, const char* stop
)
99 if (str
&& str
< stop
)
101 size_t nOld
= mpStr
? strlen( mpStr
) : 0;
102 size_t nNew
= nOld
+ (stop
- str
) + 1;
103 char* p
= static_cast<char*>(g_malloc( nNew
));
105 memcpy( p
, mpStr
, nOld
);
106 memcpy( p
+ nOld
, str
, stop
- str
);
119 struct lt_lang_t
: public my_t_impl
121 explicit lt_lang_t() : my_t_impl() {}
122 virtual ~lt_lang_t() {}
125 struct lt_script_t
: public my_t_impl
127 explicit lt_script_t() : my_t_impl() {}
128 virtual ~lt_script_t() {}
131 struct lt_region_t
: public my_t_impl
133 explicit lt_region_t() : my_t_impl() {}
134 virtual ~lt_region_t() {}
137 struct lt_tag_t
: public my_t_impl
139 lt_lang_t maLanguage
;
140 lt_script_t maScript
;
141 lt_region_t maRegion
;
142 explicit lt_tag_t() : my_t_impl(), maLanguage(), maScript(), maRegion() {}
143 virtual ~lt_tag_t() {}
144 explicit lt_tag_t( const lt_tag_t
& r
)
147 maLanguage( r
.maLanguage
),
148 maScript( r
.maScript
),
149 maRegion( r
.maRegion
)
152 lt_tag_t
& operator=( const lt_tag_t
& r
)
156 my_t_impl::operator=( r
);
157 maLanguage
= r
.maLanguage
;
158 maScript
= r
.maScript
;
159 maRegion
= r
.maRegion
;
162 void assign( const char* str
)
167 my_t_impl::assign( str
);
171 static void lt_db_initialize() { }
172 static void lt_db_finalize() { }
173 static void lt_db_set_datadir( const char* /* dir */ ) { }
175 static lt_tag_t
* lt_tag_new(void)
180 static lt_tag_t
* lt_tag_copy(lt_tag_t
*tag
)
182 return (tag
? new lt_tag_t( *tag
) : NULL
);
185 static void lt_tag_unref(lt_tag_t
*tag
)
191 /** See http://tools.ietf.org/html/rfc5646
193 We are simply ignorant of grandfathered (irregular and regular) subtags and
194 may either bail out or accept them, sorry (or not). However, we do accept
195 any i-* irregular and x-* privateuse. Subtags are not checked for validity
196 (alpha, digit, registered, ...).
198 static lt_bool_t
lt_tag_parse(lt_tag_t
*tag
,
199 const char *tag_string
,
205 tag
->assign( tag_string
);
208 // In case we supported other subtags this would get more complicated.
209 my_t_impl
* aSubtags
[] = { &tag
->maLanguage
, &tag
->maScript
, &tag
->maRegion
, NULL
};
210 my_t_impl
** ppSub
= &aSubtags
[0];
211 const char* pStart
= tag_string
;
212 const char* p
= pStart
;
213 const char* pEnd
= pStart
+ strlen( pStart
); // scanning includes \0
214 bool bStartLang
= true;
215 bool bPrivate
= false;
216 for ( ; p
<= pEnd
&& ppSub
&& *ppSub
; ++p
)
218 if (p
== pEnd
|| *p
== '-')
220 size_t nLen
= p
- pStart
;
221 if (*ppSub
== &tag
->maLanguage
)
228 case 1: // irregular or privateuse
229 if (*pStart
== 'i' || *pStart
== 'x')
231 (*ppSub
)->assign( pStart
, p
);
237 case 2: // ISO 639 alpha-2
238 case 3: // ISO 639 alpha-3
239 (*ppSub
)->assign( pStart
, p
);
241 case 4: // reserved for future use
247 case 8: // registered language subtag
248 (*ppSub
++)->assign( pStart
, p
);
260 // Any combination of "x" 1*("-" (2*8alphanum))
261 // allowed, store first as language and return ok.
262 // For i-* simply assume the same.
263 (*ppSub
)->append( pStart
-1, p
);
268 // extlang subtag, 1 to 3 allowed we don't check that.
269 // But if it's numeric it's a region UN M.49 code
270 // instead and no script subtag is present, so advance.
271 if ('0' <= *pStart
&& *pStart
<= '9')
273 ppSub
+= 2; // &tag->maRegion XXX watch this when inserting fields
278 (*ppSub
)->append( pStart
-1, p
);
282 // Not part of language subtag, advance.
289 else if (*ppSub
== &tag
->maScript
)
294 // script subtag, or a (DIGIT 3alphanum) variant with
295 // no script and no region in which case we stop
297 if ('0' <= *pStart
&& *pStart
<= '9')
300 (*ppSub
++)->assign( pStart
, p
);
303 // This may be a region UN M.49 code if 3DIGIT and no
304 // script code present. Just check first character and
306 if ('0' <= *pStart
&& *pStart
<= '9')
316 // script omitted, region subtag, advance.
322 // script omitted, region omitted, extension subtag
323 // with singleton, stop parsing
330 // script omitted, region omitted, variant subtag, stop
337 else if (*ppSub
== &tag
->maRegion
)
339 if (nLen
== 2 || nLen
== 3)
340 (*ppSub
++)->assign( pStart
, p
);
350 static char* lt_tag_canonicalize(lt_tag_t
*tag
,
354 return tag
&& tag
->mpStr
? strdup( tag
->mpStr
) : NULL
;
357 static const lt_lang_t
* lt_tag_get_language(const lt_tag_t
*tag
)
359 return tag
&& tag
->maLanguage
.mpStr
? &tag
->maLanguage
: NULL
;
362 static const lt_script_t
*lt_tag_get_script(const lt_tag_t
*tag
)
364 return tag
&& tag
->maScript
.mpStr
? &tag
->maScript
: NULL
;
367 static const lt_region_t
*lt_tag_get_region(const lt_tag_t
*tag
)
369 return tag
&& tag
->maRegion
.mpStr
? &tag
->maRegion
: NULL
;
372 static const char *lt_lang_get_tag(const lt_lang_t
*lang
)
374 return lang
? lang
->mpStr
: NULL
;
377 static const char *lt_script_get_tag(const lt_script_t
*script
)
379 return script
? script
->mpStr
: NULL
;
382 static const char *lt_region_get_tag(const lt_region_t
*region
)
384 return region
? region
->mpStr
: NULL
;
388 static void lt_tag_dump(const lt_tag_t
*tag
)
390 fprintf( stderr
, "\n");
391 fprintf( stderr
, "SimpleLangtag langtag: %s\n", tag
->mpStr
);
392 fprintf( stderr
, "SimpleLangtag language: %s\n", tag
->maLanguage
.mpStr
);
393 fprintf( stderr
, "SimpleLangtag script: %s\n", tag
->maScript
.mpStr
);
394 fprintf( stderr
, "SimpleLangtag region: %s\n", tag
->maRegion
.mpStr
);
400 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */