utf8: add unit test for g_utf8_make_valid
[glib.git] / glib / ggettext.c
blob19f5f4178be7093c635c5ec84edc04b65c7eb13a
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1998 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
25 #include "config.h"
27 #include "ggettext.h"
28 #include "glibintl.h"
29 #include "glib-private.h"
31 #include "galloca.h"
32 #include "gthread.h"
33 #include "gmem.h"
34 #ifdef G_OS_WIN32
35 #include "gwin32.h"
36 #include "gfileutils.h"
37 #include "gstrfuncs.h"
38 #include "glib-init.h"
39 #endif
41 #include <string.h>
42 #include <locale.h>
43 #include <libintl.h>
45 #ifdef G_OS_WIN32
47 /**
48 * _glib_get_locale_dir:
50 * Return the path to the share\locale or lib\locale subfolder of the
51 * GLib installation folder. The path is in the system codepage. We
52 * have to use system codepage as bindtextdomain() doesn't have a
53 * UTF-8 interface.
55 gchar *
56 _glib_get_locale_dir (void)
58 gchar *install_dir = NULL, *locale_dir;
59 gchar *retval = NULL;
61 if (glib_dll != NULL)
62 install_dir = g_win32_get_package_installation_directory_of_module (glib_dll);
64 if (install_dir)
67 * Append "/share/locale" or "/lib/locale" depending on whether
68 * autoconfigury detected GNU gettext or not.
70 const char *p = GLIB_LOCALE_DIR + strlen (GLIB_LOCALE_DIR);
71 while (*--p != '/')
73 while (*--p != '/')
76 locale_dir = g_build_filename (install_dir, p, NULL);
78 retval = g_win32_locale_filename_from_utf8 (locale_dir);
80 g_free (install_dir);
81 g_free (locale_dir);
84 if (retval)
85 return retval;
86 else
87 return g_strdup ("");
90 #undef GLIB_LOCALE_DIR
92 #endif /* G_OS_WIN32 */
95 static void
96 ensure_gettext_initialized (void)
98 static gsize initialised;
100 if (g_once_init_enter (&initialised))
102 #ifdef G_OS_WIN32
103 gchar *tmp = _glib_get_locale_dir ();
104 bindtextdomain (GETTEXT_PACKAGE, tmp);
105 g_free (tmp);
106 #else
107 bindtextdomain (GETTEXT_PACKAGE, GLIB_LOCALE_DIR);
108 #endif
109 # ifdef HAVE_BIND_TEXTDOMAIN_CODESET
110 bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
111 # endif
112 g_once_init_leave (&initialised, TRUE);
117 * glib_gettext:
118 * @str: The string to be translated
120 * Returns the translated string from the glib translations.
121 * This is an internal function and should only be used by
122 * the internals of glib (such as libgio).
124 * Returns: the transation of @str to the current locale
126 const gchar *
127 glib_gettext (const gchar *str)
129 ensure_gettext_initialized ();
131 return g_dgettext (GETTEXT_PACKAGE, str);
135 * glib_pgettext:
136 * @msgctxtid: a combined message context and message id, separated
137 * by a \004 character
138 * @msgidoffset: the offset of the message id in @msgctxid
140 * This function is a variant of glib_gettext() which supports
141 * a disambiguating message context. See g_dpgettext() for full
142 * details.
144 * This is an internal function and should only be used by
145 * the internals of glib (such as libgio).
147 * Returns: the translation of @str to the current locale
149 const gchar *
150 glib_pgettext (const gchar *msgctxtid,
151 gsize msgidoffset)
153 ensure_gettext_initialized ();
155 return g_dpgettext (GETTEXT_PACKAGE, msgctxtid, msgidoffset);
159 * g_strip_context:
160 * @msgid: a string
161 * @msgval: another string
163 * An auxiliary function for gettext() support (see Q_()).
165 * Returns: @msgval, unless @msgval is identical to @msgid
166 * and contains a '|' character, in which case a pointer to
167 * the substring of msgid after the first '|' character is returned.
169 * Since: 2.4
171 const gchar *
172 g_strip_context (const gchar *msgid,
173 const gchar *msgval)
175 if (msgval == msgid)
177 const char *c = strchr (msgid, '|');
178 if (c != NULL)
179 return c + 1;
182 return msgval;
186 * g_dpgettext:
187 * @domain: (nullable): the translation domain to use, or %NULL to use
188 * the domain set with textdomain()
189 * @msgctxtid: a combined message context and message id, separated
190 * by a \004 character
191 * @msgidoffset: the offset of the message id in @msgctxid
193 * This function is a variant of g_dgettext() which supports
194 * a disambiguating message context. GNU gettext uses the
195 * '\004' character to separate the message context and
196 * message id in @msgctxtid.
197 * If 0 is passed as @msgidoffset, this function will fall back to
198 * trying to use the deprecated convention of using "|" as a separation
199 * character.
201 * This uses g_dgettext() internally. See that functions for differences
202 * with dgettext() proper.
204 * Applications should normally not use this function directly,
205 * but use the C_() macro for translations with context.
207 * Returns: The translated string
209 * Since: 2.16
211 const gchar *
212 g_dpgettext (const gchar *domain,
213 const gchar *msgctxtid,
214 gsize msgidoffset)
216 const gchar *translation;
217 gchar *sep;
219 translation = g_dgettext (domain, msgctxtid);
221 if (translation == msgctxtid)
223 if (msgidoffset > 0)
224 return msgctxtid + msgidoffset;
225 sep = strchr (msgctxtid, '|');
227 if (sep)
229 /* try with '\004' instead of '|', in case
230 * xgettext -kQ_:1g was used
232 gchar *tmp = g_alloca (strlen (msgctxtid) + 1);
233 strcpy (tmp, msgctxtid);
234 tmp[sep - msgctxtid] = '\004';
236 translation = g_dgettext (domain, tmp);
238 if (translation == tmp)
239 return sep + 1;
243 return translation;
246 /* This function is taken from gettext.h
247 * GNU gettext uses '\004' to separate context and msgid in .mo files.
250 * g_dpgettext2:
251 * @domain: (nullable): the translation domain to use, or %NULL to use
252 * the domain set with textdomain()
253 * @context: the message context
254 * @msgid: the message
256 * This function is a variant of g_dgettext() which supports
257 * a disambiguating message context. GNU gettext uses the
258 * '\004' character to separate the message context and
259 * message id in @msgctxtid.
261 * This uses g_dgettext() internally. See that functions for differences
262 * with dgettext() proper.
264 * This function differs from C_() in that it is not a macro and
265 * thus you may use non-string-literals as context and msgid arguments.
267 * Returns: The translated string
269 * Since: 2.18
271 const gchar *
272 g_dpgettext2 (const gchar *domain,
273 const gchar *msgctxt,
274 const gchar *msgid)
276 size_t msgctxt_len = strlen (msgctxt) + 1;
277 size_t msgid_len = strlen (msgid) + 1;
278 const char *translation;
279 char* msg_ctxt_id;
281 msg_ctxt_id = g_alloca (msgctxt_len + msgid_len);
283 memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1);
284 msg_ctxt_id[msgctxt_len - 1] = '\004';
285 memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len);
287 translation = g_dgettext (domain, msg_ctxt_id);
289 if (translation == msg_ctxt_id)
291 /* try the old way of doing message contexts, too */
292 msg_ctxt_id[msgctxt_len - 1] = '|';
293 translation = g_dgettext (domain, msg_ctxt_id);
295 if (translation == msg_ctxt_id)
296 return msgid;
299 return translation;
302 static gboolean
303 _g_dgettext_should_translate (void)
305 static gsize translate = 0;
306 enum {
307 SHOULD_TRANSLATE = 1,
308 SHOULD_NOT_TRANSLATE = 2
311 if (G_UNLIKELY (g_once_init_enter (&translate)))
313 gboolean should_translate = TRUE;
315 const char *default_domain = textdomain (NULL);
316 const char *translator_comment = gettext ("");
317 #ifndef G_OS_WIN32
318 const char *translate_locale = setlocale (LC_MESSAGES, NULL);
319 #else
320 const char *translate_locale = g_win32_getlocale ();
321 #endif
322 /* We should NOT translate only if all the following hold:
323 * - user has called textdomain() and set textdomain to non-default
324 * - default domain has no translations
325 * - locale does not start with "en_" and is not "C"
327 * Rationale:
328 * - If text domain is still the default domain, maybe user calls
329 * it later. Continue with old behavior of translating.
330 * - If locale starts with "en_", we can continue using the
331 * translations even if the app doesn't have translations for
332 * this locale. That is, en_UK and en_CA for example.
333 * - If locale is "C", maybe user calls setlocale(LC_ALL,"") later.
334 * Continue with old behavior of translating.
336 if (!default_domain || !translator_comment || !translate_locale ||
337 (0 != strcmp (default_domain, "messages") &&
338 '\0' == *translator_comment &&
339 0 != strncmp (translate_locale, "en_", 3) &&
340 0 != strcmp (translate_locale, "C")))
341 should_translate = FALSE;
343 g_once_init_leave (&translate,
344 should_translate ?
345 SHOULD_TRANSLATE :
346 SHOULD_NOT_TRANSLATE);
349 return translate == SHOULD_TRANSLATE;
353 * g_dgettext:
354 * @domain: (nullable): the translation domain to use, or %NULL to use
355 * the domain set with textdomain()
356 * @msgid: message to translate
358 * This function is a wrapper of dgettext() which does not translate
359 * the message if the default domain as set with textdomain() has no
360 * translations for the current locale.
362 * The advantage of using this function over dgettext() proper is that
363 * libraries using this function (like GTK+) will not use translations
364 * if the application using the library does not have translations for
365 * the current locale. This results in a consistent English-only
366 * interface instead of one having partial translations. For this
367 * feature to work, the call to textdomain() and setlocale() should
368 * precede any g_dgettext() invocations. For GTK+, it means calling
369 * textdomain() before gtk_init or its variants.
371 * This function disables translations if and only if upon its first
372 * call all the following conditions hold:
374 * - @domain is not %NULL
376 * - textdomain() has been called to set a default text domain
378 * - there is no translations available for the default text domain
379 * and the current locale
381 * - current locale is not "C" or any English locales (those
382 * starting with "en_")
384 * Note that this behavior may not be desired for example if an application
385 * has its untranslated messages in a language other than English. In those
386 * cases the application should call textdomain() after initializing GTK+.
388 * Applications should normally not use this function directly,
389 * but use the _() macro for translations.
391 * Returns: The translated string
393 * Since: 2.18
395 const gchar *
396 g_dgettext (const gchar *domain,
397 const gchar *msgid)
399 if (domain && G_UNLIKELY (!_g_dgettext_should_translate ()))
400 return msgid;
402 return dgettext (domain, msgid);
406 * g_dcgettext:
407 * @domain: (nullable): the translation domain to use, or %NULL to use
408 * the domain set with textdomain()
409 * @msgid: message to translate
410 * @category: a locale category
412 * This is a variant of g_dgettext() that allows specifying a locale
413 * category instead of always using `LC_MESSAGES`. See g_dgettext() for
414 * more information about how this functions differs from calling
415 * dcgettext() directly.
417 * Returns: the translated string for the given locale category
419 * Since: 2.26
421 const gchar *
422 g_dcgettext (const gchar *domain,
423 const gchar *msgid,
424 gint category)
426 if (domain && G_UNLIKELY (!_g_dgettext_should_translate ()))
427 return msgid;
429 return dcgettext (domain, msgid, category);
433 * g_dngettext:
434 * @domain: (nullable): the translation domain to use, or %NULL to use
435 * the domain set with textdomain()
436 * @msgid: message to translate
437 * @msgid_plural: plural form of the message
438 * @n: the quantity for which translation is needed
440 * This function is a wrapper of dngettext() which does not translate
441 * the message if the default domain as set with textdomain() has no
442 * translations for the current locale.
444 * See g_dgettext() for details of how this differs from dngettext()
445 * proper.
447 * Returns: The translated string
449 * Since: 2.18
451 const gchar *
452 g_dngettext (const gchar *domain,
453 const gchar *msgid,
454 const gchar *msgid_plural,
455 gulong n)
457 if (domain && G_UNLIKELY (!_g_dgettext_should_translate ()))
458 return n == 1 ? msgid : msgid_plural;
460 return dngettext (domain, msgid, msgid_plural, n);
465 * SECTION:i18n
466 * @title: Internationalization
467 * @short_description: gettext support macros
468 * @see_also: the gettext manual
470 * GLib doesn't force any particular localization method upon its users.
471 * But since GLib itself is localized using the gettext() mechanism, it seems
472 * natural to offer the de-facto standard gettext() support macros in an
473 * easy-to-use form.
475 * In order to use these macros in an application, you must include
476 * `<glib/gi18n.h>`. For use in a library, you must include
477 * `<glib/gi18n-lib.h>`
478 * after defining the %GETTEXT_PACKAGE macro suitably for your library:
479 * |[<!-- language="C" -->
480 * #define GETTEXT_PACKAGE "gtk20"
481 * #include <glib/gi18n-lib.h>
482 * ]|
483 * For an application, note that you also have to call bindtextdomain(),
484 * bind_textdomain_codeset(), textdomain() and setlocale() early on in your
485 * main() to make gettext() work. For example:
486 * |[<!-- language="C" -->
487 * #include <glib/gi18n.h>
488 * #include <locale.h>
490 * int
491 * main (int argc, char **argv)
493 * setlocale (LC_ALL, "");
494 * bindtextdomain (GETTEXT_PACKAGE, DATADIR "/locale");
495 * bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
496 * textdomain (GETTEXT_PACKAGE);
498 * // Rest of your application.
500 * ]|
501 * where `DATADIR` is as typically provided by automake.
503 * For a library, you only have to call bindtextdomain() and
504 * bind_textdomain_codeset() in your initialization function. If your library
505 * doesn't have an initialization function, you can call the functions before
506 * the first translated message.
508 * The
509 * [gettext manual](http://www.gnu.org/software/gettext/manual/gettext.html#Maintainers)
510 * covers details of how to integrate gettext into a project’s build system and
511 * workflow.
515 * _:
516 * @String: the string to be translated
518 * Marks a string for translation, gets replaced with the translated string
519 * at runtime.
521 * Since: 2.4
525 * Q_:
526 * @String: the string to be translated, with a '|'-separated prefix
527 * which must not be translated
529 * Like _(), but handles context in message ids. This has the advantage
530 * that the string can be adorned with a prefix to guarantee uniqueness
531 * and provide context to the translator.
533 * One use case given in the gettext manual is GUI translation, where one
534 * could e.g. disambiguate two "Open" menu entries as "File|Open" and
535 * "Printer|Open". Another use case is the string "Russian" which may
536 * have to be translated differently depending on whether it's the name
537 * of a character set or a language. This could be solved by using
538 * "charset|Russian" and "language|Russian".
540 * See the C_() macro for a different way to mark up translatable strings
541 * with context.
543 * If you are using the Q_() macro, you need to make sure that you pass
544 * `--keyword=Q_` to xgettext when extracting messages.
545 * If you are using GNU gettext >= 0.15, you can also use
546 * `--keyword=Q_:1g` to let xgettext split the context
547 * string off into a msgctxt line in the po file.
549 * Returns: the translated message
551 * Since: 2.4
555 * C_:
556 * @Context: a message context, must be a string literal
557 * @String: a message id, must be a string literal
559 * Uses gettext to get the translation for @String. @Context is
560 * used as a context. This is mainly useful for short strings which
561 * may need different translations, depending on the context in which
562 * they are used.
563 * |[<!-- language="C" -->
564 * label1 = C_("Navigation", "Back");
565 * label2 = C_("Body part", "Back");
566 * ]|
568 * If you are using the C_() macro, you need to make sure that you pass
569 * `--keyword=C_:1c,2` to xgettext when extracting messages.
570 * Note that this only works with GNU gettext >= 0.15.
572 * Returns: the translated message
574 * Since: 2.16
578 * N_:
579 * @String: the string to be translated
581 * Only marks a string for translation. This is useful in situations
582 * where the translated strings can't be directly used, e.g. in string
583 * array initializers. To get the translated string, call gettext()
584 * at runtime.
585 * |[<!-- language="C" -->
587 * static const char *messages[] = {
588 * N_("some very meaningful message"),
589 * N_("and another one")
590 * };
591 * const char *string;
592 * ...
593 * string
594 * = index &gt; 1 ? _("a default message") : gettext (messages[index]);
596 * fputs (string);
597 * ...
599 * ]|
601 * Since: 2.4
605 * NC_:
606 * @Context: a message context, must be a string literal
607 * @String: a message id, must be a string literal
609 * Only marks a string for translation, with context.
610 * This is useful in situations where the translated strings can't
611 * be directly used, e.g. in string array initializers. To get the
612 * translated string, you should call g_dpgettext2() at runtime.
614 * |[<!-- language="C" -->
616 * static const char *messages[] = {
617 * NC_("some context", "some very meaningful message"),
618 * NC_("some context", "and another one")
619 * };
620 * const char *string;
621 * ...
622 * string
623 * = index > 1 ? g_dpgettext2 (NULL, "some context", "a default message")
624 * : g_dpgettext2 (NULL, "some context", messages[index]);
626 * fputs (string);
627 * ...
629 * ]|
631 * If you are using the NC_() macro, you need to make sure that you pass
632 * `--keyword=NC_:1c,2` to xgettext when extracting messages.
633 * Note that this only works with GNU gettext >= 0.15. Intltool has support
634 * for the NC_() macro since version 0.40.1.
636 * Since: 2.18