2 * utf.c: UTF-8 conversion routines
4 * ====================================================================
5 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
24 #include <apr_strings.h>
26 #include <apr_xlate.h>
28 #include "svn_string.h"
29 #include "svn_error.h"
30 #include "svn_pools.h"
31 #include "svn_ctype.h"
34 #include "svn_private_config.h"
35 #include "win32_xlate.h"
39 #define SVN_UTF_NTOU_XLATE_HANDLE "svn-utf-ntou-xlate-handle"
40 #define SVN_UTF_UTON_XLATE_HANDLE "svn-utf-uton-xlate-handle"
43 #define SVN_APR_UTF8_CHARSET "UTF-8"
45 #define SVN_APR_UTF8_CHARSET (const char*)1208
49 static apr_thread_mutex_t
*xlate_handle_mutex
= NULL
;
52 /* The xlate handle cache is a global hash table with linked lists of xlate
53 * handles. In multi-threaded environments, a thread "borrows" an xlate
54 * handle from the cache during a translation and puts it back afterwards.
55 * This avoids holding a global lock for all translations.
56 * If there is no handle for a particular key when needed, a new is
57 * handle is created and put in the cache after use.
58 * This means that there will be at most N handles open for a key, where N
59 * is the number of simultanous handles in use for that key. */
61 typedef struct xlate_handle_node_t
{
63 /* FALSE if the handle is not valid, since its pool is being
66 /* The name of a char encoding or APR_LOCALE_CHARSET. */
67 const char *frompage
, *topage
;
68 struct xlate_handle_node_t
*next
;
69 } xlate_handle_node_t
;
71 /* This maps const char * userdata_key strings to xlate_handle_node_t **
72 handles to the first entry in the linked list of xlate handles. We don't
73 store the pointer to the list head directly in the hash table, since we
74 remove/insert entries at the head in the list in the code below, and
75 we can't use apr_hash_set() in each character translation because that
76 function allocates memory in each call where the value is non-NULL.
77 Since these allocations take place in a global pool, this would be a
79 static apr_hash_t
*xlate_handle_hash
= NULL
;
81 /* Clean up the xlate handle cache. */
83 xlate_cleanup(void *arg
)
85 /* We set the cache variables to NULL so that translation works in other
86 cleanup functions, even if it isn't cached then. */
88 apr_thread_mutex_destroy(xlate_handle_mutex
);
89 xlate_handle_mutex
= NULL
;
91 xlate_handle_hash
= NULL
;
96 /* Set the handle of ARG to NULL. */
98 xlate_handle_node_cleanup(void *arg
)
100 xlate_handle_node_t
*node
= arg
;
107 svn_utf_initialize(apr_pool_t
*pool
)
111 apr_thread_mutex_t
*mutex
;
114 if (!xlate_handle_hash
)
116 /* We create our own subpool, which we protect with the mutex.
117 We can't use the pool passed to us by the caller, since we will
118 use it for xlate handle allocations, possibly in multiple threads,
119 and pool allocation is not thread-safe. */
120 subpool
= svn_pool_create(pool
);
122 if (apr_thread_mutex_create(&mutex
, APR_THREAD_MUTEX_DEFAULT
, subpool
)
124 xlate_handle_mutex
= mutex
;
129 xlate_handle_hash
= apr_hash_make(subpool
);
130 apr_pool_cleanup_register(subpool
, NULL
, xlate_cleanup
,
131 apr_pool_cleanup_null
);
135 /* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and
136 * FROMPAGE can be any valid arguments of the same name to
137 * apr_xlate_open(). Allocate the returned string in POOL. */
139 get_xlate_key(const char *topage
,
140 const char *frompage
,
144 /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
145 * topage/frompage is really an int, not a valid string. So generate a
146 * unique key accordingly. */
147 if (frompage
== SVN_APR_LOCALE_CHARSET
)
148 frompage
= "APR_LOCALE_CHARSET";
149 else if (frompage
== SVN_APR_DEFAULT_CHARSET
)
150 frompage
= "APR_DEFAULT_CHARSET";
152 if (topage
== SVN_APR_LOCALE_CHARSET
)
153 topage
= "APR_LOCALE_CHARSET";
154 else if (topage
== SVN_APR_DEFAULT_CHARSET
)
155 topage
= "APR_DEFAULT_CHARSET";
157 return apr_pstrcat(pool
, "svn-utf-", frompage
, "to", topage
,
158 "-xlate-handle", NULL
);
160 /* OS400 code pages are always ints. */
161 return apr_psprintf(pool
, "svn-utf-%dto%d-xlate-handle", (int)frompage
,
166 /* Set *RET to a handle node for converting from FROMPAGE to TOPAGE,
167 creating the handle node if it doesn't exist in USERDATA_KEY.
168 If a node is not cached and apr_xlate_open() returns APR_EINVAL or
169 APR_ENOTIMPL, set (*RET)->handle to NULL. If fail for any other
170 reason, return the error.
172 Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
173 hasn't been called or USERDATA_KEY is NULL. Else, allocate them
174 in the pool of xlate_handle_hash. */
176 get_xlate_handle_node(xlate_handle_node_t
**ret
,
177 const char *topage
, const char *frompage
,
178 const char *userdata_key
, apr_pool_t
*pool
)
180 xlate_handle_node_t
**old_node_p
;
181 xlate_handle_node_t
*old_node
= NULL
;
182 apr_status_t apr_err
;
184 svn_error_t
*err
= NULL
;
186 /* If we already have a handle, just return it. */
189 if (xlate_handle_hash
)
192 apr_err
= apr_thread_mutex_lock(xlate_handle_mutex
);
193 if (apr_err
!= APR_SUCCESS
)
194 return svn_error_create(apr_err
, NULL
,
195 _("Can't lock charset translation mutex"));
197 old_node_p
= apr_hash_get(xlate_handle_hash
, userdata_key
,
198 APR_HASH_KEY_STRING
);
200 old_node
= *old_node_p
;
203 /* Ensure that the handle is still valid. */
206 /* Remove from the list. */
207 *old_node_p
= old_node
->next
;
208 old_node
->next
= NULL
;
210 apr_err
= apr_thread_mutex_unlock(xlate_handle_mutex
);
211 if (apr_err
!= APR_SUCCESS
)
212 return svn_error_create(apr_err
, NULL
,
213 _("Can't unlock charset "
214 "translation mutex"));
224 /* We fall back on a per-pool cache instead. */
225 apr_pool_userdata_get(&p
, userdata_key
, pool
);
227 /* Ensure that the handle is still valid. */
228 if (old_node
&& old_node
->valid
)
236 /* Note that we still have the mutex locked (if it is initialized), so we
237 can use the global pool for creating the new xlate handle. */
239 /* The error handling doesn't support the following cases, since we don't
240 use them currently. Catch this here. */
242 /* On OS400 V5R4 with UTF support, APR_DEFAULT_CHARSET and
243 * APR_LOCALE_CHARSET are both UTF-8 (CCSID 1208), so we won't get far
244 * with this assert active. */
245 assert(frompage
!= SVN_APR_DEFAULT_CHARSET
246 && topage
!= SVN_APR_DEFAULT_CHARSET
247 && (frompage
!= SVN_APR_LOCALE_CHARSET
248 || topage
!= SVN_APR_LOCALE_CHARSET
));
251 /* Use the correct pool for creating the handle. */
252 if (userdata_key
&& xlate_handle_hash
)
253 pool
= apr_hash_pool_get(xlate_handle_hash
);
255 /* Try to create a handle. */
257 apr_err
= svn_subr__win32_xlate_open((win32_xlate_t
**)&handle
, topage
,
260 apr_err
= apr_xlate_open(&handle
, (int)topage
, (int)frompage
, pool
);
262 apr_err
= apr_xlate_open(&handle
, topage
, frompage
, pool
);
265 if (APR_STATUS_IS_EINVAL(apr_err
) || APR_STATUS_IS_ENOTIMPL(apr_err
))
267 else if (apr_err
!= APR_SUCCESS
)
270 /* Can't use svn_error_wrap_apr here because it calls functions in
271 this file, leading to infinite recursion. */
273 if (frompage
== SVN_APR_LOCALE_CHARSET
)
274 errstr
= apr_psprintf(pool
,
275 _("Can't create a character converter from "
276 "native encoding to '%s'"), topage
);
277 else if (topage
== SVN_APR_LOCALE_CHARSET
)
278 errstr
= apr_psprintf(pool
,
279 _("Can't create a character converter from "
280 "'%s' to native encoding"), frompage
);
282 errstr
= apr_psprintf(pool
,
283 _("Can't create a character converter from "
284 "'%s' to '%s'"), frompage
, topage
);
286 /* Handle the error condition normally prevented by the assert
288 errstr
= apr_psprintf(pool
,
289 _("Can't create a character converter from "
290 "'%i' to '%i'"), frompage
, topage
);
292 err
= svn_error_create(apr_err
, NULL
, errstr
);
296 /* Allocate and initialize the node. */
297 *ret
= apr_palloc(pool
, sizeof(xlate_handle_node_t
));
298 (*ret
)->handle
= handle
;
299 (*ret
)->valid
= TRUE
;
300 (*ret
)->frompage
= ((frompage
!= SVN_APR_LOCALE_CHARSET
)
301 ? apr_pstrdup(pool
, frompage
) : frompage
);
302 (*ret
)->topage
= ((topage
!= SVN_APR_LOCALE_CHARSET
)
303 ? apr_pstrdup(pool
, topage
) : topage
);
306 /* If we are called from inside a pool cleanup handler, the just created
307 xlate handle will be closed when that handler returns by a newly
308 registered cleanup handler, however, the handle is still cached by us.
309 To prevent this, we register a cleanup handler that will reset the valid
310 flag of our node, so we don't use an invalid handle. */
312 apr_pool_cleanup_register(pool
, *ret
, xlate_handle_node_cleanup
,
313 apr_pool_cleanup_null
);
316 /* Don't need the lock anymore. */
318 if (userdata_key
&& xlate_handle_hash
)
320 apr_status_t unlock_err
= apr_thread_mutex_unlock(xlate_handle_mutex
);
321 if (unlock_err
!= APR_SUCCESS
)
322 return svn_error_create(unlock_err
, NULL
,
323 _("Can't unlock charset translation mutex"));
330 /* Put back NODE into the xlate handle cache for use by other calls.
331 If there is no global cache, store the handle in POOL.
332 Ignore errors related to locking/unlocking the mutex.
333 ### Mutex errors here are very weird. Should we handle them "correctly"
334 ### even if that complicates error handling in the routines below? */
336 put_xlate_handle_node(xlate_handle_node_t
*node
,
337 const char *userdata_key
,
340 assert(node
->next
== NULL
);
343 if (xlate_handle_hash
)
345 xlate_handle_node_t
**node_p
;
347 if (apr_thread_mutex_lock(xlate_handle_mutex
) != APR_SUCCESS
)
350 node_p
= apr_hash_get(xlate_handle_hash
, userdata_key
,
351 APR_HASH_KEY_STRING
);
354 userdata_key
= apr_pstrdup(apr_hash_pool_get(xlate_handle_hash
),
356 node_p
= apr_palloc(apr_hash_pool_get(xlate_handle_hash
),
359 apr_hash_set(xlate_handle_hash
, userdata_key
,
360 APR_HASH_KEY_STRING
, node_p
);
362 node
->next
= *node_p
;
365 if (apr_thread_mutex_unlock(xlate_handle_mutex
) != APR_SUCCESS
)
371 /* Store it in the per-pool cache. */
372 apr_pool_userdata_set(node
, userdata_key
, apr_pool_cleanup_null
, pool
);
376 /* Return the apr_xlate handle for converting native characters to UTF-8. */
378 get_ntou_xlate_handle_node(xlate_handle_node_t
**ret
, apr_pool_t
*pool
)
380 return get_xlate_handle_node(ret
, SVN_APR_UTF8_CHARSET
,
381 SVN_APR_LOCALE_CHARSET
,
382 SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
386 /* Return the apr_xlate handle for converting UTF-8 to native characters.
387 Create one if it doesn't exist. If unable to find a handle, or
388 unable to create one because apr_xlate_open returned APR_EINVAL, then
389 set *RET to null and return SVN_NO_ERROR; if fail for some other
390 reason, return error. */
392 get_uton_xlate_handle_node(xlate_handle_node_t
**ret
, apr_pool_t
*pool
)
394 return get_xlate_handle_node(ret
, SVN_APR_LOCALE_CHARSET
,
395 SVN_APR_UTF8_CHARSET
,
396 SVN_UTF_UTON_XLATE_HANDLE
, pool
);
400 /* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn
401 sequences, allocating the result in POOL. */
403 fuzzy_escape(const char *src
, apr_size_t len
, apr_pool_t
*pool
)
405 const char *src_orig
= src
, *src_end
= src
+ len
;
406 apr_size_t new_len
= 0;
408 const char *new_orig
;
410 /* First count how big a dest string we'll need. */
411 while (src
< src_end
)
413 if (! svn_ctype_isascii(*src
) || *src
== '\0')
414 new_len
+= 5; /* 5 slots, for "?\XXX" */
416 new_len
+= 1; /* one slot for the 7-bit char */
421 /* Allocate that amount. */
422 new = apr_palloc(pool
, new_len
+ 1);
426 /* And fill it up. */
427 while (src_orig
< src_end
)
429 if (! svn_ctype_isascii(*src_orig
) || src_orig
== '\0')
431 /* This is the same format as svn_xml_fuzzy_escape uses, but that
432 function escapes different characters. Please keep in sync!
433 ### If we add another fuzzy escape somewhere, we should abstract
434 ### this out to a common function. */
435 sprintf(new, "?\\%03u", (unsigned char) *src_orig
);
452 /* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
453 in *DEST, which is allocated in POOL. */
455 convert_to_stringbuf(xlate_handle_node_t
*node
,
456 const char *src_data
,
457 apr_size_t src_length
,
458 svn_stringbuf_t
**dest
,
462 apr_status_t apr_err
;
464 apr_err
= svn_subr__win32_xlate_to_stringbuf((win32_xlate_t
*) node
->handle
,
465 src_data
, src_length
,
468 apr_size_t buflen
= src_length
* 2;
469 apr_status_t apr_err
;
470 apr_size_t srclen
= src_length
;
471 apr_size_t destlen
= buflen
;
474 /* Initialize *DEST to an empty stringbuf. */
475 *dest
= svn_stringbuf_create("", pool
);
476 destbuf
= (*dest
)->data
;
478 /* Not only does it not make sense to convert an empty string, but
479 apr-iconv is quite unreasonable about not allowing that. */
485 /* A 1:2 ratio of input bytes to output bytes (as assigned above)
486 should be enough for most translations, and if it turns out not
487 to be enough, we'll grow the buffer again, sizing it based on a
488 1:3 ratio of the remainder of the string. */
490 svn_stringbuf_ensure(*dest
, buflen
+ 1);
492 /* Set up state variables for xlate. */
493 destlen
= buflen
- (*dest
)->len
;
495 /* Attempt the conversion. */
496 apr_err
= apr_xlate_conv_buffer(node
->handle
,
497 src_data
+ (src_length
- srclen
),
499 (*dest
)->data
+ (*dest
)->len
,
502 /* Now, update the *DEST->len to track the amount of output data
503 churned out so far from this loop. */
504 (*dest
)->len
+= ((buflen
- (*dest
)->len
) - destlen
);
505 buflen
+= srclen
* 3; /* 3 is middle ground, 2 wasn't enough
506 for all characters in the buffer, 4 is
507 maximum character size (currently) */
510 } while (apr_err
== APR_SUCCESS
&& srclen
!= 0);
513 /* If we exited the loop with an error, return the error. */
519 /* Can't use svn_error_wrap_apr here because it calls functions in
520 this file, leading to infinite recursion. */
522 if (node
->frompage
== SVN_APR_LOCALE_CHARSET
)
523 errstr
= apr_psprintf
524 (pool
, _("Can't convert string from native encoding to '%s':"),
526 else if (node
->topage
== SVN_APR_LOCALE_CHARSET
)
527 errstr
= apr_psprintf
528 (pool
, _("Can't convert string from '%s' to native encoding:"),
531 errstr
= apr_psprintf
532 (pool
, _("Can't convert string from '%s' to '%s':"),
533 node
->frompage
, node
->topage
);
535 /* On OS400 V5R4 every possible node->topage and node->frompage
536 * *really* is an int. */
537 errstr
= apr_psprintf
538 (pool
, _("Can't convert string from CCSID '%i' to CCSID '%i'"),
539 node
->frompage
, node
->topage
);
541 err
= svn_error_create(apr_err
, NULL
, fuzzy_escape(src_data
,
543 return svn_error_create(apr_err
, err
, errstr
);
545 /* Else, exited due to success. Trim the result buffer down to the
547 (*dest
)->data
[(*dest
)->len
] = '\0';
553 /* Return APR_EINVAL if the first LEN bytes of DATA contain anything
554 other than seven-bit, non-control (except for whitespace) ASCII
555 characters, finding the error pool from POOL. Otherwise, return
558 check_non_ascii(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
560 const char *data_start
= data
;
562 for (; len
> 0; --len
, data
++)
564 if ((! apr_isascii(*data
))
565 || ((! apr_isspace(*data
))
566 && apr_iscntrl(*data
)))
568 /* Show the printable part of the data, followed by the
569 decimal code of the questionable character. Because if a
570 user ever gets this error, she's going to have to spend
571 time tracking down the non-ASCII data, so we want to help
572 as much as possible. And yes, we just call the unsafe
573 data "non-ASCII", even though the actual constraint is
574 somewhat more complex than that. */
576 if (data
- data_start
)
578 const char *error_data
579 = apr_pstrndup(pool
, data_start
, (data
- data_start
));
581 return svn_error_createf
583 _("Safe data '%s' was followed by non-ASCII byte %d: "
584 "unable to convert to/from UTF-8"),
585 error_data
, *((const unsigned char *) data
));
589 return svn_error_createf
591 _("Non-ASCII character (code %d) detected, "
592 "and unable to convert to/from UTF-8"),
593 *((const unsigned char *) data
));
601 /* Construct an error with a suitable message to describe the invalid UTF-8
602 * sequence DATA of length LEN (which may have embedded NULLs). We can't
603 * simply print the data, almost by definition we don't really know how it
607 invalid_utf8(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
609 const char *last
= svn_utf__last_valid(data
, len
);
610 const char *valid_txt
= "", *invalid_txt
= "";
611 int i
, valid
, invalid
;
613 /* We will display at most 24 valid octets (this may split a leading
614 multi-byte character) as that should fit on one 80 character line. */
618 for (i
= 0; i
< valid
; ++i
)
619 valid_txt
= apr_pstrcat(pool
, valid_txt
,
620 apr_psprintf(pool
, " %02x",
621 (unsigned char)last
[i
-valid
]), NULL
);
623 /* 4 invalid octets will guarantee that the faulty octet is displayed */
624 invalid
= data
+ len
- last
;
627 for (i
= 0; i
< invalid
; ++i
)
628 invalid_txt
= apr_pstrcat(pool
, invalid_txt
,
629 apr_psprintf(pool
, " %02x",
630 (unsigned char)last
[i
]), NULL
);
632 return svn_error_createf(APR_EINVAL
, NULL
,
633 _("Valid UTF-8 data\n(hex:%s)\n"
634 "followed by invalid UTF-8 sequence\n(hex:%s)"),
635 valid_txt
, invalid_txt
);
638 /* Verify that the sequence DATA of length LEN is valid UTF-8 */
640 check_utf8(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
642 if (! svn_utf__is_valid(data
, len
))
643 return invalid_utf8(data
, len
, pool
);
647 /* Verify that the NULL terminated sequence DATA is valid UTF-8 */
649 check_cstring_utf8(const char *data
, apr_pool_t
*pool
)
652 if (! svn_utf__cstring_is_valid(data
))
653 return invalid_utf8(data
, strlen(data
), pool
);
659 svn_utf_stringbuf_to_utf8(svn_stringbuf_t
**dest
,
660 const svn_stringbuf_t
*src
,
663 xlate_handle_node_t
*node
;
666 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
670 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, dest
, pool
);
672 err
= check_utf8((*dest
)->data
, (*dest
)->len
, pool
);
676 err
= check_non_ascii(src
->data
, src
->len
, pool
);
678 *dest
= svn_stringbuf_dup(src
, pool
);
681 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
688 svn_utf_string_to_utf8(const svn_string_t
**dest
,
689 const svn_string_t
*src
,
692 svn_stringbuf_t
*destbuf
;
693 xlate_handle_node_t
*node
;
696 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
700 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, &destbuf
, pool
);
702 err
= check_utf8(destbuf
->data
, destbuf
->len
, pool
);
704 *dest
= svn_string_create_from_buf(destbuf
, pool
);
708 err
= check_non_ascii(src
->data
, src
->len
, pool
);
710 *dest
= svn_string_dup(src
, pool
);
713 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
719 /* Common implementation for svn_utf_cstring_to_utf8,
720 svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
721 svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as
722 the translator and allocating from POOL. */
724 convert_cstring(const char **dest
,
726 xlate_handle_node_t
*node
,
731 svn_stringbuf_t
*destbuf
;
732 SVN_ERR(convert_to_stringbuf(node
, src
, strlen(src
),
734 *dest
= destbuf
->data
;
738 apr_size_t len
= strlen(src
);
739 SVN_ERR(check_non_ascii(src
, len
, pool
));
740 *dest
= apr_pstrmemdup(pool
, src
, len
);
747 svn_utf_cstring_to_utf8(const char **dest
,
751 xlate_handle_node_t
*node
;
754 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
755 err
= convert_cstring(dest
, src
, node
, pool
);
756 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
758 SVN_ERR(check_cstring_utf8(*dest
, pool
));
765 svn_utf_cstring_to_utf8_ex2(const char **dest
,
767 const char *frompage
,
770 xlate_handle_node_t
*node
;
772 const char *convset_key
= get_xlate_key(SVN_APR_UTF8_CHARSET
, frompage
,
775 SVN_ERR(get_xlate_handle_node(&node
, SVN_APR_UTF8_CHARSET
, frompage
,
777 err
= convert_cstring(dest
, src
, node
, pool
);
778 put_xlate_handle_node(node
, convset_key
, pool
);
780 SVN_ERR(check_cstring_utf8(*dest
, pool
));
787 svn_utf_cstring_to_utf8_ex(const char **dest
,
789 const char *frompage
,
790 const char *convset_key
,
793 return svn_utf_cstring_to_utf8_ex2(dest
, src
, frompage
, pool
);
798 svn_utf_stringbuf_from_utf8(svn_stringbuf_t
**dest
,
799 const svn_stringbuf_t
*src
,
802 xlate_handle_node_t
*node
;
805 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
809 err
= check_utf8(src
->data
, src
->len
, pool
);
811 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, dest
, pool
);
815 err
= check_non_ascii(src
->data
, src
->len
, pool
);
817 *dest
= svn_stringbuf_dup(src
, pool
);
820 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
827 svn_utf_string_from_utf8(const svn_string_t
**dest
,
828 const svn_string_t
*src
,
831 svn_stringbuf_t
*dbuf
;
832 xlate_handle_node_t
*node
;
835 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
839 err
= check_utf8(src
->data
, src
->len
, pool
);
841 err
= convert_to_stringbuf(node
, src
->data
, src
->len
,
844 *dest
= svn_string_create_from_buf(dbuf
, pool
);
848 err
= check_non_ascii(src
->data
, src
->len
, pool
);
850 *dest
= svn_string_dup(src
, pool
);
853 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
860 svn_utf_cstring_from_utf8(const char **dest
,
864 xlate_handle_node_t
*node
;
867 SVN_ERR(check_utf8(src
, strlen(src
), pool
));
869 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
870 err
= convert_cstring(dest
, src
, node
, pool
);
871 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
878 svn_utf_cstring_from_utf8_ex2(const char **dest
,
883 xlate_handle_node_t
*node
;
885 const char *convset_key
= get_xlate_key(topage
, SVN_APR_UTF8_CHARSET
,
888 SVN_ERR(check_utf8(src
, strlen(src
), pool
));
890 SVN_ERR(get_xlate_handle_node(&node
, topage
, SVN_APR_UTF8_CHARSET
,
892 err
= convert_cstring(dest
, src
, node
, pool
);
893 put_xlate_handle_node(node
, convset_key
, pool
);
900 svn_utf_cstring_from_utf8_ex(const char **dest
,
903 const char *convset_key
,
906 return svn_utf_cstring_from_utf8_ex2(dest
, src
, topage
, pool
);
911 svn_utf__cstring_from_utf8_fuzzy(const char *src
,
913 svn_error_t
*(*convert_from_utf8
)
914 (const char **, const char *, apr_pool_t
*))
916 const char *escaped
, *converted
;
919 escaped
= fuzzy_escape(src
, strlen(src
), pool
);
921 /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
922 contain only 7-bit bytes :-). Recode to native... */
923 err
= convert_from_utf8(((const char **) &converted
), escaped
, pool
);
927 svn_error_clear(err
);
933 /* ### Check the client locale, maybe we can avoid that second
934 * conversion! See Ulrich Drepper's patch at
935 * http://subversion.tigris.org/issues/show_bug.cgi?id=807.
941 svn_utf_cstring_from_utf8_fuzzy(const char *src
,
944 return svn_utf__cstring_from_utf8_fuzzy(src
, pool
,
945 svn_utf_cstring_from_utf8
);
950 svn_utf_cstring_from_utf8_stringbuf(const char **dest
,
951 const svn_stringbuf_t
*src
,
954 svn_stringbuf_t
*destbuf
;
956 SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf
, src
, pool
));
957 *dest
= destbuf
->data
;
964 svn_utf_cstring_from_utf8_string(const char **dest
,
965 const svn_string_t
*src
,
968 svn_stringbuf_t
*dbuf
;
969 xlate_handle_node_t
*node
;
972 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
976 err
= check_utf8(src
->data
, src
->len
, pool
);
978 err
= convert_to_stringbuf(node
, src
->data
, src
->len
,
985 err
= check_non_ascii(src
->data
, src
->len
, pool
);
987 *dest
= apr_pstrmemdup(pool
, src
->data
, src
->len
);
990 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);