2 * utf.c: UTF-8 conversion routines
4 * ====================================================================
5 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
24 #include <apr_strings.h>
26 #include <apr_xlate.h>
28 #include "svn_string.h"
29 #include "svn_error.h"
30 #include "svn_pools.h"
31 #include "svn_ctype.h"
34 #include "svn_private_config.h"
35 #include "win32_xlate.h"
39 #define SVN_UTF_NTOU_XLATE_HANDLE "svn-utf-ntou-xlate-handle"
40 #define SVN_UTF_UTON_XLATE_HANDLE "svn-utf-uton-xlate-handle"
43 #define SVN_APR_UTF8_CHARSET "UTF-8"
45 #define SVN_APR_UTF8_CHARSET (const char*)1208
49 static apr_thread_mutex_t
*xlate_handle_mutex
= NULL
;
52 /* The xlate handle cache is a global hash table with linked lists of xlate
53 * handles. In multi-threaded environments, a thread "borrows" an xlate
54 * handle from the cache during a translation and puts it back afterwards.
55 * This avoids holding a global lock for all translations.
56 * If there is no handle for a particular key when needed, a new is
57 * handle is created and put in the cache after use.
58 * This means that there will be at most N handles open for a key, where N
59 * is the number of simultanous handles in use for that key. */
61 typedef struct xlate_handle_node_t
{
63 /* FALSE if the handle is not valid, since its pool is being
66 /* The name of a char encoding or APR_LOCALE_CHARSET. */
67 const char *frompage
, *topage
;
68 struct xlate_handle_node_t
*next
;
69 } xlate_handle_node_t
;
71 /* This maps const char * userdata_key strings to xlate_handle_node_t **
72 handles to the first entry in the linked list of xlate handles. We don't
73 store the pointer to the list head directly in the hash table, since we
74 remove/insert entries at the head in the list in the code below, and
75 we can't use apr_hash_set() in each character translation because that
76 function allocates memory in each call where the value is non-NULL.
77 Since these allocations take place in a global pool, this would be a
79 static apr_hash_t
*xlate_handle_hash
= NULL
;
81 /* Clean up the xlate handle cache. */
83 xlate_cleanup(void *arg
)
85 /* We set the cache variables to NULL so that translation works in other
86 cleanup functions, even if it isn't cached then. */
88 apr_thread_mutex_destroy(xlate_handle_mutex
);
89 xlate_handle_mutex
= NULL
;
91 xlate_handle_hash
= NULL
;
96 /* Set the handle of ARG to NULL. */
98 xlate_handle_node_cleanup(void *arg
)
100 xlate_handle_node_t
*node
= arg
;
107 svn_utf_initialize(apr_pool_t
*pool
)
111 apr_thread_mutex_t
*mutex
;
114 if (!xlate_handle_hash
)
116 /* We create our own subpool, which we protect with the mutex.
117 We can't use the pool passed to us by the caller, since we will
118 use it for xlate handle allocations, possibly in multiple threads,
119 and pool allocation is not thread-safe. */
120 subpool
= svn_pool_create(pool
);
122 if (apr_thread_mutex_create(&mutex
, APR_THREAD_MUTEX_DEFAULT
, subpool
)
124 xlate_handle_mutex
= mutex
;
129 xlate_handle_hash
= apr_hash_make(subpool
);
130 apr_pool_cleanup_register(subpool
, NULL
, xlate_cleanup
,
131 apr_pool_cleanup_null
);
135 /* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and
136 * FROMPAGE can be any valid arguments of the same name to
137 * apr_xlate_open(). Allocate the returned string in POOL. */
139 get_xlate_key(const char *topage
,
140 const char *frompage
,
144 /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
145 * topage/frompage is really an int, not a valid string. So generate a
146 * unique key accordingly. */
147 if (frompage
== SVN_APR_LOCALE_CHARSET
)
148 frompage
= "APR_LOCALE_CHARSET";
149 else if (frompage
== SVN_APR_DEFAULT_CHARSET
)
150 frompage
= "APR_DEFAULT_CHARSET";
152 if (topage
== SVN_APR_LOCALE_CHARSET
)
153 topage
= "APR_LOCALE_CHARSET";
154 else if (topage
== SVN_APR_DEFAULT_CHARSET
)
155 topage
= "APR_DEFAULT_CHARSET";
157 return apr_pstrcat(pool
, "svn-utf-", frompage
, "to", topage
,
158 "-xlate-handle", NULL
);
160 /* OS400 code pages are always ints. */
161 return apr_psprintf(pool
, "svn-utf-%dto%d-xlate-handle", (int)frompage
,
166 /* Set *RET to a handle node for converting from FROMPAGE to TOPAGE,
167 creating the handle node if it doesn't exist in USERDATA_KEY.
168 If a node is not cached and apr_xlate_open() returns APR_EINVAL or
169 APR_ENOTIMPL, set (*RET)->handle to NULL. If fail for any other
170 reason, return the error.
172 Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
173 hasn't been called or USERDATA_KEY is NULL. Else, allocate them
174 in the pool of xlate_handle_hash. */
176 get_xlate_handle_node(xlate_handle_node_t
**ret
,
177 const char *topage
, const char *frompage
,
178 const char *userdata_key
, apr_pool_t
*pool
)
180 xlate_handle_node_t
**old_node_p
;
181 xlate_handle_node_t
*old_node
= NULL
;
182 apr_status_t apr_err
;
184 svn_error_t
*err
= NULL
;
186 /* If we already have a handle, just return it. */
189 if (xlate_handle_hash
)
192 apr_err
= apr_thread_mutex_lock(xlate_handle_mutex
);
193 if (apr_err
!= APR_SUCCESS
)
194 return svn_error_create(apr_err
, NULL
,
195 _("Can't lock charset translation mutex"));
197 old_node_p
= apr_hash_get(xlate_handle_hash
, userdata_key
,
198 APR_HASH_KEY_STRING
);
200 old_node
= *old_node_p
;
203 /* Ensure that the handle is still valid. */
206 /* Remove from the list. */
207 *old_node_p
= old_node
->next
;
208 old_node
->next
= NULL
;
210 apr_err
= apr_thread_mutex_unlock(xlate_handle_mutex
);
211 if (apr_err
!= APR_SUCCESS
)
212 return svn_error_create(apr_err
, NULL
,
213 _("Can't unlock charset "
214 "translation mutex"));
224 /* We fall back on a per-pool cache instead. */
225 apr_pool_userdata_get(&p
, userdata_key
, pool
);
227 /* Ensure that the handle is still valid. */
228 if (old_node
&& old_node
->valid
)
236 /* Note that we still have the mutex locked (if it is initialized), so we
237 can use the global pool for creating the new xlate handle. */
239 /* The error handling doesn't support the following cases, since we don't
240 use them currently. Catch this here. */
242 /* On OS400 V5R4 with UTF support, APR_DEFAULT_CHARSET and
243 * APR_LOCALE_CHARSET are both UTF-8 (CCSID 1208), so we won't get far
244 * with this assert active. */
245 assert(frompage
!= SVN_APR_DEFAULT_CHARSET
246 && topage
!= SVN_APR_DEFAULT_CHARSET
247 && (frompage
!= SVN_APR_LOCALE_CHARSET
248 || topage
!= SVN_APR_LOCALE_CHARSET
));
251 /* Use the correct pool for creating the handle. */
252 if (userdata_key
&& xlate_handle_hash
)
253 pool
= apr_hash_pool_get(xlate_handle_hash
);
255 /* Try to create a handle. */
257 apr_err
= svn_subr__win32_xlate_open((win32_xlate_t
**)&handle
, topage
,
260 apr_err
= apr_xlate_open(&handle
, (int)topage
, (int)frompage
, pool
);
262 apr_err
= apr_xlate_open(&handle
, topage
, frompage
, pool
);
265 if (APR_STATUS_IS_EINVAL(apr_err
) || APR_STATUS_IS_ENOTIMPL(apr_err
))
267 else if (apr_err
!= APR_SUCCESS
)
270 /* Can't use svn_error_wrap_apr here because it calls functions in
271 this file, leading to infinite recursion. */
273 if (frompage
== SVN_APR_LOCALE_CHARSET
)
274 errstr
= apr_psprintf(pool
,
275 _("Can't create a character converter from "
276 "native encoding to '%s'"), topage
);
277 else if (topage
== SVN_APR_LOCALE_CHARSET
)
278 errstr
= apr_psprintf(pool
,
279 _("Can't create a character converter from "
280 "'%s' to native encoding"), frompage
);
282 errstr
= apr_psprintf(pool
,
283 _("Can't create a character converter from "
284 "'%s' to '%s'"), frompage
, topage
);
286 /* Handle the error condition normally prevented by the assert
288 errstr
= apr_psprintf(pool
,
289 _("Can't create a character converter from "
290 "'%i' to '%i'"), frompage
, topage
);
292 err
= svn_error_create(apr_err
, NULL
, errstr
);
296 /* Allocate and initialize the node. */
297 *ret
= apr_palloc(pool
, sizeof(xlate_handle_node_t
));
298 (*ret
)->handle
= handle
;
299 (*ret
)->valid
= TRUE
;
300 (*ret
)->frompage
= ((frompage
!= SVN_APR_LOCALE_CHARSET
)
301 ? apr_pstrdup(pool
, frompage
) : frompage
);
302 (*ret
)->topage
= ((topage
!= SVN_APR_LOCALE_CHARSET
)
303 ? apr_pstrdup(pool
, topage
) : topage
);
306 /* If we are called from inside a pool cleanup handler, the just created
307 xlate handle will be closed when that handler returns by a newly
308 registered cleanup handler, however, the handle is still cached by us.
309 To prevent this, we register a cleanup handler that will reset the valid
310 flag of our node, so we don't use an invalid handle. */
312 apr_pool_cleanup_register(pool
, *ret
, xlate_handle_node_cleanup
,
313 apr_pool_cleanup_null
);
316 /* Don't need the lock anymore. */
318 if (userdata_key
&& xlate_handle_hash
)
320 apr_status_t unlock_err
= apr_thread_mutex_unlock(xlate_handle_mutex
);
321 if (unlock_err
!= APR_SUCCESS
)
322 return svn_error_create(unlock_err
, NULL
,
323 _("Can't unlock charset translation mutex"));
330 /* Put back NODE into the xlate handle cache for use by other calls.
331 If there is no global cache, store the handle in POOL.
332 Ignore errors related to locking/unlocking the mutex.
333 ### Mutex errors here are very weird. Should we handle them "correctly"
334 ### even if that complicates error handling in the routines below? */
336 put_xlate_handle_node(xlate_handle_node_t
*node
,
337 const char *userdata_key
,
340 assert(node
->next
== NULL
);
343 if (xlate_handle_hash
)
345 xlate_handle_node_t
**node_p
;
347 if (apr_thread_mutex_lock(xlate_handle_mutex
) != APR_SUCCESS
)
350 node_p
= apr_hash_get(xlate_handle_hash
, userdata_key
,
351 APR_HASH_KEY_STRING
);
354 userdata_key
= apr_pstrdup(apr_hash_pool_get(xlate_handle_hash
),
356 node_p
= apr_palloc(apr_hash_pool_get(xlate_handle_hash
),
359 apr_hash_set(xlate_handle_hash
, userdata_key
,
360 APR_HASH_KEY_STRING
, node_p
);
362 node
->next
= *node_p
;
365 if (apr_thread_mutex_unlock(xlate_handle_mutex
) != APR_SUCCESS
)
371 /* Store it in the per-pool cache. */
372 apr_pool_userdata_set(node
, userdata_key
, apr_pool_cleanup_null
, pool
);
376 /* Return the apr_xlate handle for converting native characters to UTF-8. */
378 get_ntou_xlate_handle_node(xlate_handle_node_t
**ret
, apr_pool_t
*pool
)
380 return get_xlate_handle_node(ret
, SVN_APR_UTF8_CHARSET
,
381 SVN_APR_LOCALE_CHARSET
,
382 SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
386 /* Return the apr_xlate handle for converting UTF-8 to native characters.
387 Create one if it doesn't exist. If unable to find a handle, or
388 unable to create one because apr_xlate_open returned APR_EINVAL, then
389 set *RET to null and return SVN_NO_ERROR; if fail for some other
390 reason, return error. */
392 get_uton_xlate_handle_node(xlate_handle_node_t
**ret
, apr_pool_t
*pool
)
394 return get_xlate_handle_node(ret
, SVN_APR_LOCALE_CHARSET
,
395 SVN_APR_UTF8_CHARSET
,
396 SVN_UTF_UTON_XLATE_HANDLE
, pool
);
400 /* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn
401 sequences, allocating the result in POOL. */
403 fuzzy_escape(const char *src
, apr_size_t len
, apr_pool_t
*pool
)
405 const char *src_orig
= src
, *src_end
= src
+ len
;
406 apr_size_t new_len
= 0;
408 const char *new_orig
;
410 /* First count how big a dest string we'll need. */
411 while (src
< src_end
)
413 if (! svn_ctype_isascii(*src
) || *src
== '\0')
414 new_len
+= 5; /* 5 slots, for "?\XXX" */
416 new_len
+= 1; /* one slot for the 7-bit char */
421 /* Allocate that amount. */
422 new = apr_palloc(pool
, new_len
+ 1);
426 /* And fill it up. */
427 while (src_orig
< src_end
)
429 if (! svn_ctype_isascii(*src_orig
) || src_orig
== '\0')
431 /* This is the same format as svn_xml_fuzzy_escape uses, but that
432 function escapes different characters. Please keep in sync!
433 ### If we add another fuzzy escape somewhere, we should abstract
434 ### this out to a common function. */
435 sprintf(new, "?\\%03u", (unsigned char) *src_orig
);
452 /* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
453 in *DEST, which is allocated in POOL. */
455 convert_to_stringbuf(xlate_handle_node_t
*node
,
456 const char *src_data
,
457 apr_size_t src_length
,
458 svn_stringbuf_t
**dest
,
462 apr_status_t apr_err
;
464 apr_err
= svn_subr__win32_xlate_to_stringbuf((win32_xlate_t
*) node
->handle
,
465 src_data
, src_length
,
468 apr_size_t buflen
= src_length
* 2;
469 apr_status_t apr_err
;
470 apr_size_t srclen
= src_length
;
471 apr_size_t destlen
= buflen
;
474 /* Initialize *DEST to an empty stringbuf.
475 A 1:2 ratio of input bytes to output bytes (as assigned above)
476 should be enough for most translations, and if it turns out not
477 to be enough, we'll grow the buffer again, sizing it based on a
478 1:3 ratio of the remainder of the string. */
479 *dest
= svn_stringbuf_create_ensure(buflen
+ 1, pool
);
480 destbuf
= (*dest
)->data
;
482 /* Not only does it not make sense to convert an empty string, but
483 apr-iconv is quite unreasonable about not allowing that. */
489 /* Set up state variables for xlate. */
490 destlen
= buflen
- (*dest
)->len
;
492 /* Attempt the conversion. */
493 apr_err
= apr_xlate_conv_buffer(node
->handle
,
494 src_data
+ (src_length
- srclen
),
496 (*dest
)->data
+ (*dest
)->len
,
499 /* Now, update the *DEST->len to track the amount of output data
500 churned out so far from this loop. */
501 (*dest
)->len
+= ((buflen
- (*dest
)->len
) - destlen
);
502 buflen
+= srclen
* 3; /* 3 is middle ground, 2 wasn't enough
503 for all characters in the buffer, 4 is
504 maximum character size (currently) */
507 } while (apr_err
== APR_SUCCESS
&& srclen
!= 0);
510 /* If we exited the loop with an error, return the error. */
516 /* Can't use svn_error_wrap_apr here because it calls functions in
517 this file, leading to infinite recursion. */
519 if (node
->frompage
== SVN_APR_LOCALE_CHARSET
)
520 errstr
= apr_psprintf
521 (pool
, _("Can't convert string from native encoding to '%s':"),
523 else if (node
->topage
== SVN_APR_LOCALE_CHARSET
)
524 errstr
= apr_psprintf
525 (pool
, _("Can't convert string from '%s' to native encoding:"),
528 errstr
= apr_psprintf
529 (pool
, _("Can't convert string from '%s' to '%s':"),
530 node
->frompage
, node
->topage
);
532 /* On OS400 V5R4 every possible node->topage and node->frompage
533 * *really* is an int. */
534 errstr
= apr_psprintf
535 (pool
, _("Can't convert string from CCSID '%i' to CCSID '%i'"),
536 node
->frompage
, node
->topage
);
538 err
= svn_error_create(apr_err
, NULL
, fuzzy_escape(src_data
,
540 return svn_error_create(apr_err
, err
, errstr
);
542 /* Else, exited due to success. Trim the result buffer down to the
544 (*dest
)->data
[(*dest
)->len
] = '\0';
550 /* Return APR_EINVAL if the first LEN bytes of DATA contain anything
551 other than seven-bit, non-control (except for whitespace) ASCII
552 characters, finding the error pool from POOL. Otherwise, return
555 check_non_ascii(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
557 const char *data_start
= data
;
559 for (; len
> 0; --len
, data
++)
561 if ((! apr_isascii(*data
))
562 || ((! apr_isspace(*data
))
563 && apr_iscntrl(*data
)))
565 /* Show the printable part of the data, followed by the
566 decimal code of the questionable character. Because if a
567 user ever gets this error, she's going to have to spend
568 time tracking down the non-ASCII data, so we want to help
569 as much as possible. And yes, we just call the unsafe
570 data "non-ASCII", even though the actual constraint is
571 somewhat more complex than that. */
573 if (data
- data_start
)
575 const char *error_data
576 = apr_pstrndup(pool
, data_start
, (data
- data_start
));
578 return svn_error_createf
580 _("Safe data '%s' was followed by non-ASCII byte %d: "
581 "unable to convert to/from UTF-8"),
582 error_data
, *((const unsigned char *) data
));
586 return svn_error_createf
588 _("Non-ASCII character (code %d) detected, "
589 "and unable to convert to/from UTF-8"),
590 *((const unsigned char *) data
));
598 /* Construct an error with a suitable message to describe the invalid UTF-8
599 * sequence DATA of length LEN (which may have embedded NULLs). We can't
600 * simply print the data, almost by definition we don't really know how it
604 invalid_utf8(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
606 const char *last
= svn_utf__last_valid(data
, len
);
607 const char *valid_txt
= "", *invalid_txt
= "";
608 int i
, valid
, invalid
;
610 /* We will display at most 24 valid octets (this may split a leading
611 multi-byte character) as that should fit on one 80 character line. */
615 for (i
= 0; i
< valid
; ++i
)
616 valid_txt
= apr_pstrcat(pool
, valid_txt
,
617 apr_psprintf(pool
, " %02x",
618 (unsigned char)last
[i
-valid
]), NULL
);
620 /* 4 invalid octets will guarantee that the faulty octet is displayed */
621 invalid
= data
+ len
- last
;
624 for (i
= 0; i
< invalid
; ++i
)
625 invalid_txt
= apr_pstrcat(pool
, invalid_txt
,
626 apr_psprintf(pool
, " %02x",
627 (unsigned char)last
[i
]), NULL
);
629 return svn_error_createf(APR_EINVAL
, NULL
,
630 _("Valid UTF-8 data\n(hex:%s)\n"
631 "followed by invalid UTF-8 sequence\n(hex:%s)"),
632 valid_txt
, invalid_txt
);
635 /* Verify that the sequence DATA of length LEN is valid UTF-8 */
637 check_utf8(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
639 if (! svn_utf__is_valid(data
, len
))
640 return invalid_utf8(data
, len
, pool
);
644 /* Verify that the NULL terminated sequence DATA is valid UTF-8 */
646 check_cstring_utf8(const char *data
, apr_pool_t
*pool
)
649 if (! svn_utf__cstring_is_valid(data
))
650 return invalid_utf8(data
, strlen(data
), pool
);
656 svn_utf_stringbuf_to_utf8(svn_stringbuf_t
**dest
,
657 const svn_stringbuf_t
*src
,
660 xlate_handle_node_t
*node
;
663 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
667 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, dest
, pool
);
669 err
= check_utf8((*dest
)->data
, (*dest
)->len
, pool
);
673 err
= check_non_ascii(src
->data
, src
->len
, pool
);
675 *dest
= svn_stringbuf_dup(src
, pool
);
678 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
685 svn_utf_string_to_utf8(const svn_string_t
**dest
,
686 const svn_string_t
*src
,
689 svn_stringbuf_t
*destbuf
;
690 xlate_handle_node_t
*node
;
693 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
697 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, &destbuf
, pool
);
699 err
= check_utf8(destbuf
->data
, destbuf
->len
, pool
);
701 *dest
= svn_string_create_from_buf(destbuf
, pool
);
705 err
= check_non_ascii(src
->data
, src
->len
, pool
);
707 *dest
= svn_string_dup(src
, pool
);
710 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
716 /* Common implementation for svn_utf_cstring_to_utf8,
717 svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
718 svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as
719 the translator and allocating from POOL. */
721 convert_cstring(const char **dest
,
723 xlate_handle_node_t
*node
,
728 svn_stringbuf_t
*destbuf
;
729 SVN_ERR(convert_to_stringbuf(node
, src
, strlen(src
),
731 *dest
= destbuf
->data
;
735 apr_size_t len
= strlen(src
);
736 SVN_ERR(check_non_ascii(src
, len
, pool
));
737 *dest
= apr_pstrmemdup(pool
, src
, len
);
744 svn_utf_cstring_to_utf8(const char **dest
,
748 xlate_handle_node_t
*node
;
751 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
752 err
= convert_cstring(dest
, src
, node
, pool
);
753 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
755 SVN_ERR(check_cstring_utf8(*dest
, pool
));
762 svn_utf_cstring_to_utf8_ex2(const char **dest
,
764 const char *frompage
,
767 xlate_handle_node_t
*node
;
769 const char *convset_key
= get_xlate_key(SVN_APR_UTF8_CHARSET
, frompage
,
772 SVN_ERR(get_xlate_handle_node(&node
, SVN_APR_UTF8_CHARSET
, frompage
,
774 err
= convert_cstring(dest
, src
, node
, pool
);
775 put_xlate_handle_node(node
, convset_key
, pool
);
777 SVN_ERR(check_cstring_utf8(*dest
, pool
));
784 svn_utf_cstring_to_utf8_ex(const char **dest
,
786 const char *frompage
,
787 const char *convset_key
,
790 return svn_utf_cstring_to_utf8_ex2(dest
, src
, frompage
, pool
);
795 svn_utf_stringbuf_from_utf8(svn_stringbuf_t
**dest
,
796 const svn_stringbuf_t
*src
,
799 xlate_handle_node_t
*node
;
802 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
806 err
= check_utf8(src
->data
, src
->len
, pool
);
808 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, dest
, pool
);
812 err
= check_non_ascii(src
->data
, src
->len
, pool
);
814 *dest
= svn_stringbuf_dup(src
, pool
);
817 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
824 svn_utf_string_from_utf8(const svn_string_t
**dest
,
825 const svn_string_t
*src
,
828 svn_stringbuf_t
*dbuf
;
829 xlate_handle_node_t
*node
;
832 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
836 err
= check_utf8(src
->data
, src
->len
, pool
);
838 err
= convert_to_stringbuf(node
, src
->data
, src
->len
,
841 *dest
= svn_string_create_from_buf(dbuf
, pool
);
845 err
= check_non_ascii(src
->data
, src
->len
, pool
);
847 *dest
= svn_string_dup(src
, pool
);
850 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
857 svn_utf_cstring_from_utf8(const char **dest
,
861 xlate_handle_node_t
*node
;
864 SVN_ERR(check_utf8(src
, strlen(src
), pool
));
866 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
867 err
= convert_cstring(dest
, src
, node
, pool
);
868 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
875 svn_utf_cstring_from_utf8_ex2(const char **dest
,
880 xlate_handle_node_t
*node
;
882 const char *convset_key
= get_xlate_key(topage
, SVN_APR_UTF8_CHARSET
,
885 SVN_ERR(check_utf8(src
, strlen(src
), pool
));
887 SVN_ERR(get_xlate_handle_node(&node
, topage
, SVN_APR_UTF8_CHARSET
,
889 err
= convert_cstring(dest
, src
, node
, pool
);
890 put_xlate_handle_node(node
, convset_key
, pool
);
897 svn_utf_cstring_from_utf8_ex(const char **dest
,
900 const char *convset_key
,
903 return svn_utf_cstring_from_utf8_ex2(dest
, src
, topage
, pool
);
908 svn_utf__cstring_from_utf8_fuzzy(const char *src
,
910 svn_error_t
*(*convert_from_utf8
)
911 (const char **, const char *, apr_pool_t
*))
913 const char *escaped
, *converted
;
916 escaped
= fuzzy_escape(src
, strlen(src
), pool
);
918 /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
919 contain only 7-bit bytes :-). Recode to native... */
920 err
= convert_from_utf8(((const char **) &converted
), escaped
, pool
);
924 svn_error_clear(err
);
930 /* ### Check the client locale, maybe we can avoid that second
931 * conversion! See Ulrich Drepper's patch at
932 * http://subversion.tigris.org/issues/show_bug.cgi?id=807.
938 svn_utf_cstring_from_utf8_fuzzy(const char *src
,
941 return svn_utf__cstring_from_utf8_fuzzy(src
, pool
,
942 svn_utf_cstring_from_utf8
);
947 svn_utf_cstring_from_utf8_stringbuf(const char **dest
,
948 const svn_stringbuf_t
*src
,
951 svn_stringbuf_t
*destbuf
;
953 SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf
, src
, pool
));
954 *dest
= destbuf
->data
;
961 svn_utf_cstring_from_utf8_string(const char **dest
,
962 const svn_string_t
*src
,
965 svn_stringbuf_t
*dbuf
;
966 xlate_handle_node_t
*node
;
969 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
973 err
= check_utf8(src
->data
, src
->len
, pool
);
975 err
= convert_to_stringbuf(node
, src
->data
, src
->len
,
982 err
= check_non_ascii(src
->data
, src
->len
, pool
);
984 *dest
= apr_pstrmemdup(pool
, src
->data
, src
->len
);
987 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);