In the command-line client, forbid
[svn.git] / subversion / libsvn_subr / utf.c
blobf4022e8949d9fbed5298fdaecf8932c172e150df
1 /*
2 * utf.c: UTF-8 conversion routines
4 * ====================================================================
5 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
21 #include <string.h>
22 #include <assert.h>
24 #include <apr_strings.h>
25 #include <apr_lib.h>
26 #include <apr_xlate.h>
28 #include "svn_string.h"
29 #include "svn_error.h"
30 #include "svn_pools.h"
31 #include "svn_ctype.h"
32 #include "svn_utf.h"
33 #include "utf_impl.h"
34 #include "svn_private_config.h"
35 #include "win32_xlate.h"
39 #define SVN_UTF_NTOU_XLATE_HANDLE "svn-utf-ntou-xlate-handle"
40 #define SVN_UTF_UTON_XLATE_HANDLE "svn-utf-uton-xlate-handle"
42 #ifndef AS400
43 #define SVN_APR_UTF8_CHARSET "UTF-8"
44 #else
45 #define SVN_APR_UTF8_CHARSET (const char*)1208
46 #endif
48 #if APR_HAS_THREADS
49 static apr_thread_mutex_t *xlate_handle_mutex = NULL;
50 #endif
52 /* The xlate handle cache is a global hash table with linked lists of xlate
53 * handles. In multi-threaded environments, a thread "borrows" an xlate
54 * handle from the cache during a translation and puts it back afterwards.
55 * This avoids holding a global lock for all translations.
56 * If there is no handle for a particular key when needed, a new is
57 * handle is created and put in the cache after use.
58 * This means that there will be at most N handles open for a key, where N
59 * is the number of simultanous handles in use for that key. */
61 typedef struct xlate_handle_node_t {
62 apr_xlate_t *handle;
63 /* FALSE if the handle is not valid, since its pool is being
64 destroyed. */
65 svn_boolean_t valid;
66 /* The name of a char encoding or APR_LOCALE_CHARSET. */
67 const char *frompage, *topage;
68 struct xlate_handle_node_t *next;
69 } xlate_handle_node_t;
71 /* This maps const char * userdata_key strings to xlate_handle_node_t **
72 handles to the first entry in the linked list of xlate handles. We don't
73 store the pointer to the list head directly in the hash table, since we
74 remove/insert entries at the head in the list in the code below, and
75 we can't use apr_hash_set() in each character translation because that
76 function allocates memory in each call where the value is non-NULL.
77 Since these allocations take place in a global pool, this would be a
78 memory leak. */
79 static apr_hash_t *xlate_handle_hash = NULL;
81 /* Clean up the xlate handle cache. */
82 static apr_status_t
83 xlate_cleanup(void *arg)
85 /* We set the cache variables to NULL so that translation works in other
86 cleanup functions, even if it isn't cached then. */
87 #if APR_HAS_THREADS
88 apr_thread_mutex_destroy(xlate_handle_mutex);
89 xlate_handle_mutex = NULL;
90 #endif
91 xlate_handle_hash = NULL;
93 return APR_SUCCESS;
96 /* Set the handle of ARG to NULL. */
97 static apr_status_t
98 xlate_handle_node_cleanup(void *arg)
100 xlate_handle_node_t *node = arg;
102 node->valid = FALSE;
103 return APR_SUCCESS;
106 void
107 svn_utf_initialize(apr_pool_t *pool)
109 apr_pool_t *subpool;
110 #if APR_HAS_THREADS
111 apr_thread_mutex_t *mutex;
112 #endif
114 if (!xlate_handle_hash)
116 /* We create our own subpool, which we protect with the mutex.
117 We can't use the pool passed to us by the caller, since we will
118 use it for xlate handle allocations, possibly in multiple threads,
119 and pool allocation is not thread-safe. */
120 subpool = svn_pool_create(pool);
121 #if APR_HAS_THREADS
122 if (apr_thread_mutex_create(&mutex, APR_THREAD_MUTEX_DEFAULT, subpool)
123 == APR_SUCCESS)
124 xlate_handle_mutex = mutex;
125 else
126 return;
127 #endif
129 xlate_handle_hash = apr_hash_make(subpool);
130 apr_pool_cleanup_register(subpool, NULL, xlate_cleanup,
131 apr_pool_cleanup_null);
135 /* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and
136 * FROMPAGE can be any valid arguments of the same name to
137 * apr_xlate_open(). Allocate the returned string in POOL. */
138 static const char*
139 get_xlate_key(const char *topage,
140 const char *frompage,
141 apr_pool_t *pool)
143 #ifndef AS400
144 /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
145 * topage/frompage is really an int, not a valid string. So generate a
146 * unique key accordingly. */
147 if (frompage == SVN_APR_LOCALE_CHARSET)
148 frompage = "APR_LOCALE_CHARSET";
149 else if (frompage == SVN_APR_DEFAULT_CHARSET)
150 frompage = "APR_DEFAULT_CHARSET";
152 if (topage == SVN_APR_LOCALE_CHARSET)
153 topage = "APR_LOCALE_CHARSET";
154 else if (topage == SVN_APR_DEFAULT_CHARSET)
155 topage = "APR_DEFAULT_CHARSET";
157 return apr_pstrcat(pool, "svn-utf-", frompage, "to", topage,
158 "-xlate-handle", NULL);
159 #else
160 /* OS400 code pages are always ints. */
161 return apr_psprintf(pool, "svn-utf-%dto%d-xlate-handle", (int)frompage,
162 (int)topage);
163 #endif
166 /* Set *RET to a handle node for converting from FROMPAGE to TOPAGE,
167 creating the handle node if it doesn't exist in USERDATA_KEY.
168 If a node is not cached and apr_xlate_open() returns APR_EINVAL or
169 APR_ENOTIMPL, set (*RET)->handle to NULL. If fail for any other
170 reason, return the error.
172 Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
173 hasn't been called or USERDATA_KEY is NULL. Else, allocate them
174 in the pool of xlate_handle_hash. */
175 static svn_error_t *
176 get_xlate_handle_node(xlate_handle_node_t **ret,
177 const char *topage, const char *frompage,
178 const char *userdata_key, apr_pool_t *pool)
180 xlate_handle_node_t **old_node_p;
181 xlate_handle_node_t *old_node = NULL;
182 apr_status_t apr_err;
183 apr_xlate_t *handle;
184 svn_error_t *err = NULL;
186 /* If we already have a handle, just return it. */
187 if (userdata_key)
189 if (xlate_handle_hash)
191 #if APR_HAS_THREADS
192 apr_err = apr_thread_mutex_lock(xlate_handle_mutex);
193 if (apr_err != APR_SUCCESS)
194 return svn_error_create(apr_err, NULL,
195 _("Can't lock charset translation mutex"));
196 #endif
197 old_node_p = apr_hash_get(xlate_handle_hash, userdata_key,
198 APR_HASH_KEY_STRING);
199 if (old_node_p)
200 old_node = *old_node_p;
201 if (old_node)
203 /* Ensure that the handle is still valid. */
204 if (old_node->valid)
206 /* Remove from the list. */
207 *old_node_p = old_node->next;
208 old_node->next = NULL;
209 #if APR_HAS_THREADS
210 apr_err = apr_thread_mutex_unlock(xlate_handle_mutex);
211 if (apr_err != APR_SUCCESS)
212 return svn_error_create(apr_err, NULL,
213 _("Can't unlock charset "
214 "translation mutex"));
215 #endif
216 *ret = old_node;
217 return SVN_NO_ERROR;
221 else
223 void *p;
224 /* We fall back on a per-pool cache instead. */
225 apr_pool_userdata_get(&p, userdata_key, pool);
226 old_node = p;
227 /* Ensure that the handle is still valid. */
228 if (old_node && old_node->valid)
230 *ret = old_node;
231 return SVN_NO_ERROR;
236 /* Note that we still have the mutex locked (if it is initialized), so we
237 can use the global pool for creating the new xlate handle. */
239 /* The error handling doesn't support the following cases, since we don't
240 use them currently. Catch this here. */
241 #ifndef AS400
242 /* On OS400 V5R4 with UTF support, APR_DEFAULT_CHARSET and
243 * APR_LOCALE_CHARSET are both UTF-8 (CCSID 1208), so we won't get far
244 * with this assert active. */
245 assert(frompage != SVN_APR_DEFAULT_CHARSET
246 && topage != SVN_APR_DEFAULT_CHARSET
247 && (frompage != SVN_APR_LOCALE_CHARSET
248 || topage != SVN_APR_LOCALE_CHARSET));
249 #endif
251 /* Use the correct pool for creating the handle. */
252 if (userdata_key && xlate_handle_hash)
253 pool = apr_hash_pool_get(xlate_handle_hash);
255 /* Try to create a handle. */
256 #if defined( WIN32)
257 apr_err = svn_subr__win32_xlate_open((win32_xlate_t **)&handle, topage,
258 frompage, pool);
259 #elif defined(AS400)
260 apr_err = apr_xlate_open(&handle, (int)topage, (int)frompage, pool);
261 #else
262 apr_err = apr_xlate_open(&handle, topage, frompage, pool);
263 #endif
265 if (APR_STATUS_IS_EINVAL(apr_err) || APR_STATUS_IS_ENOTIMPL(apr_err))
266 handle = NULL;
267 else if (apr_err != APR_SUCCESS)
269 const char *errstr;
270 /* Can't use svn_error_wrap_apr here because it calls functions in
271 this file, leading to infinite recursion. */
272 #ifndef AS400
273 if (frompage == SVN_APR_LOCALE_CHARSET)
274 errstr = apr_psprintf(pool,
275 _("Can't create a character converter from "
276 "native encoding to '%s'"), topage);
277 else if (topage == SVN_APR_LOCALE_CHARSET)
278 errstr = apr_psprintf(pool,
279 _("Can't create a character converter from "
280 "'%s' to native encoding"), frompage);
281 else
282 errstr = apr_psprintf(pool,
283 _("Can't create a character converter from "
284 "'%s' to '%s'"), frompage, topage);
285 #else
286 /* Handle the error condition normally prevented by the assert
287 * above. */
288 errstr = apr_psprintf(pool,
289 _("Can't create a character converter from "
290 "'%i' to '%i'"), frompage, topage);
291 #endif
292 err = svn_error_create(apr_err, NULL, errstr);
293 goto cleanup;
296 /* Allocate and initialize the node. */
297 *ret = apr_palloc(pool, sizeof(xlate_handle_node_t));
298 (*ret)->handle = handle;
299 (*ret)->valid = TRUE;
300 (*ret)->frompage = ((frompage != SVN_APR_LOCALE_CHARSET)
301 ? apr_pstrdup(pool, frompage) : frompage);
302 (*ret)->topage = ((topage != SVN_APR_LOCALE_CHARSET)
303 ? apr_pstrdup(pool, topage) : topage);
304 (*ret)->next = NULL;
306 /* If we are called from inside a pool cleanup handler, the just created
307 xlate handle will be closed when that handler returns by a newly
308 registered cleanup handler, however, the handle is still cached by us.
309 To prevent this, we register a cleanup handler that will reset the valid
310 flag of our node, so we don't use an invalid handle. */
311 if (handle)
312 apr_pool_cleanup_register(pool, *ret, xlate_handle_node_cleanup,
313 apr_pool_cleanup_null);
315 cleanup:
316 /* Don't need the lock anymore. */
317 #if APR_HAS_THREADS
318 if (userdata_key && xlate_handle_hash)
320 apr_status_t unlock_err = apr_thread_mutex_unlock(xlate_handle_mutex);
321 if (unlock_err != APR_SUCCESS)
322 return svn_error_create(unlock_err, NULL,
323 _("Can't unlock charset translation mutex"));
325 #endif
327 return err;
330 /* Put back NODE into the xlate handle cache for use by other calls.
331 If there is no global cache, store the handle in POOL.
332 Ignore errors related to locking/unlocking the mutex.
333 ### Mutex errors here are very weird. Should we handle them "correctly"
334 ### even if that complicates error handling in the routines below? */
335 static void
336 put_xlate_handle_node(xlate_handle_node_t *node,
337 const char *userdata_key,
338 apr_pool_t *pool)
340 assert(node->next == NULL);
341 if (!userdata_key)
342 return;
343 if (xlate_handle_hash)
345 xlate_handle_node_t **node_p;
346 #if APR_HAS_THREADS
347 if (apr_thread_mutex_lock(xlate_handle_mutex) != APR_SUCCESS)
348 abort();
349 #endif
350 node_p = apr_hash_get(xlate_handle_hash, userdata_key,
351 APR_HASH_KEY_STRING);
352 if (node_p == NULL)
354 userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash),
355 userdata_key);
356 node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash),
357 sizeof(*node_p));
358 *node_p = NULL;
359 apr_hash_set(xlate_handle_hash, userdata_key,
360 APR_HASH_KEY_STRING, node_p);
362 node->next = *node_p;
363 *node_p = node;
364 #if APR_HAS_THREADS
365 if (apr_thread_mutex_unlock(xlate_handle_mutex) != APR_SUCCESS)
366 abort();
367 #endif
369 else
371 /* Store it in the per-pool cache. */
372 apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool);
376 /* Return the apr_xlate handle for converting native characters to UTF-8. */
377 static svn_error_t *
378 get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
380 return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET,
381 SVN_APR_LOCALE_CHARSET,
382 SVN_UTF_NTOU_XLATE_HANDLE, pool);
386 /* Return the apr_xlate handle for converting UTF-8 to native characters.
387 Create one if it doesn't exist. If unable to find a handle, or
388 unable to create one because apr_xlate_open returned APR_EINVAL, then
389 set *RET to null and return SVN_NO_ERROR; if fail for some other
390 reason, return error. */
391 static svn_error_t *
392 get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
394 return get_xlate_handle_node(ret, SVN_APR_LOCALE_CHARSET,
395 SVN_APR_UTF8_CHARSET,
396 SVN_UTF_UTON_XLATE_HANDLE, pool);
400 /* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn
401 sequences, allocating the result in POOL. */
402 static const char *
403 fuzzy_escape(const char *src, apr_size_t len, apr_pool_t *pool)
405 const char *src_orig = src, *src_end = src + len;
406 apr_size_t new_len = 0;
407 char *new;
408 const char *new_orig;
410 /* First count how big a dest string we'll need. */
411 while (src < src_end)
413 if (! svn_ctype_isascii(*src) || *src == '\0')
414 new_len += 5; /* 5 slots, for "?\XXX" */
415 else
416 new_len += 1; /* one slot for the 7-bit char */
418 src++;
421 /* Allocate that amount. */
422 new = apr_palloc(pool, new_len + 1);
424 new_orig = new;
426 /* And fill it up. */
427 while (src_orig < src_end)
429 if (! svn_ctype_isascii(*src_orig) || src_orig == '\0')
431 /* This is the same format as svn_xml_fuzzy_escape uses, but that
432 function escapes different characters. Please keep in sync!
433 ### If we add another fuzzy escape somewhere, we should abstract
434 ### this out to a common function. */
435 sprintf(new, "?\\%03u", (unsigned char) *src_orig);
436 new += 5;
438 else
440 *new = *src_orig;
441 new += 1;
444 src_orig++;
447 *new = '\0';
449 return new_orig;
452 /* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
453 in *DEST, which is allocated in POOL. */
454 static svn_error_t *
455 convert_to_stringbuf(xlate_handle_node_t *node,
456 const char *src_data,
457 apr_size_t src_length,
458 svn_stringbuf_t **dest,
459 apr_pool_t *pool)
461 #ifdef WIN32
462 apr_status_t apr_err;
464 apr_err = svn_subr__win32_xlate_to_stringbuf((win32_xlate_t *) node->handle,
465 src_data, src_length,
466 dest, pool);
467 #else
468 apr_size_t buflen = src_length * 2;
469 apr_status_t apr_err;
470 apr_size_t srclen = src_length;
471 apr_size_t destlen = buflen;
472 char *destbuf;
474 /* Initialize *DEST to an empty stringbuf. */
475 *dest = svn_stringbuf_create("", pool);
476 destbuf = (*dest)->data;
478 /* Not only does it not make sense to convert an empty string, but
479 apr-iconv is quite unreasonable about not allowing that. */
480 if (src_length == 0)
481 return SVN_NO_ERROR;
485 /* A 1:2 ratio of input bytes to output bytes (as assigned above)
486 should be enough for most translations, and if it turns out not
487 to be enough, we'll grow the buffer again, sizing it based on a
488 1:3 ratio of the remainder of the string. */
490 svn_stringbuf_ensure(*dest, buflen + 1);
492 /* Set up state variables for xlate. */
493 destlen = buflen - (*dest)->len;
495 /* Attempt the conversion. */
496 apr_err = apr_xlate_conv_buffer(node->handle,
497 src_data + (src_length - srclen),
498 &srclen,
499 (*dest)->data + (*dest)->len,
500 &destlen);
502 /* Now, update the *DEST->len to track the amount of output data
503 churned out so far from this loop. */
504 (*dest)->len += ((buflen - (*dest)->len) - destlen);
505 buflen += srclen * 3; /* 3 is middle ground, 2 wasn't enough
506 for all characters in the buffer, 4 is
507 maximum character size (currently) */
510 } while (apr_err == APR_SUCCESS && srclen != 0);
511 #endif
513 /* If we exited the loop with an error, return the error. */
514 if (apr_err)
516 const char *errstr;
517 svn_error_t *err;
519 /* Can't use svn_error_wrap_apr here because it calls functions in
520 this file, leading to infinite recursion. */
521 #ifndef AS400
522 if (node->frompage == SVN_APR_LOCALE_CHARSET)
523 errstr = apr_psprintf
524 (pool, _("Can't convert string from native encoding to '%s':"),
525 node->topage);
526 else if (node->topage == SVN_APR_LOCALE_CHARSET)
527 errstr = apr_psprintf
528 (pool, _("Can't convert string from '%s' to native encoding:"),
529 node->frompage);
530 else
531 errstr = apr_psprintf
532 (pool, _("Can't convert string from '%s' to '%s':"),
533 node->frompage, node->topage);
534 #else
535 /* On OS400 V5R4 every possible node->topage and node->frompage
536 * *really* is an int. */
537 errstr = apr_psprintf
538 (pool, _("Can't convert string from CCSID '%i' to CCSID '%i'"),
539 node->frompage, node->topage);
540 #endif
541 err = svn_error_create(apr_err, NULL, fuzzy_escape(src_data,
542 src_length, pool));
543 return svn_error_create(apr_err, err, errstr);
545 /* Else, exited due to success. Trim the result buffer down to the
546 right length. */
547 (*dest)->data[(*dest)->len] = '\0';
549 return SVN_NO_ERROR;
553 /* Return APR_EINVAL if the first LEN bytes of DATA contain anything
554 other than seven-bit, non-control (except for whitespace) ASCII
555 characters, finding the error pool from POOL. Otherwise, return
556 SVN_NO_ERROR. */
557 static svn_error_t *
558 check_non_ascii(const char *data, apr_size_t len, apr_pool_t *pool)
560 const char *data_start = data;
562 for (; len > 0; --len, data++)
564 if ((! apr_isascii(*data))
565 || ((! apr_isspace(*data))
566 && apr_iscntrl(*data)))
568 /* Show the printable part of the data, followed by the
569 decimal code of the questionable character. Because if a
570 user ever gets this error, she's going to have to spend
571 time tracking down the non-ASCII data, so we want to help
572 as much as possible. And yes, we just call the unsafe
573 data "non-ASCII", even though the actual constraint is
574 somewhat more complex than that. */
576 if (data - data_start)
578 const char *error_data
579 = apr_pstrndup(pool, data_start, (data - data_start));
581 return svn_error_createf
582 (APR_EINVAL, NULL,
583 _("Safe data '%s' was followed by non-ASCII byte %d: "
584 "unable to convert to/from UTF-8"),
585 error_data, *((const unsigned char *) data));
587 else
589 return svn_error_createf
590 (APR_EINVAL, NULL,
591 _("Non-ASCII character (code %d) detected, "
592 "and unable to convert to/from UTF-8"),
593 *((const unsigned char *) data));
598 return SVN_NO_ERROR;
601 /* Construct an error with a suitable message to describe the invalid UTF-8
602 * sequence DATA of length LEN (which may have embedded NULLs). We can't
603 * simply print the data, almost by definition we don't really know how it
604 * is encoded.
606 static svn_error_t *
607 invalid_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
609 const char *last = svn_utf__last_valid(data, len);
610 const char *valid_txt = "", *invalid_txt = "";
611 int i, valid, invalid;
613 /* We will display at most 24 valid octets (this may split a leading
614 multi-byte character) as that should fit on one 80 character line. */
615 valid = last - data;
616 if (valid > 24)
617 valid = 24;
618 for (i = 0; i < valid; ++i)
619 valid_txt = apr_pstrcat(pool, valid_txt,
620 apr_psprintf(pool, " %02x",
621 (unsigned char)last[i-valid]), NULL);
623 /* 4 invalid octets will guarantee that the faulty octet is displayed */
624 invalid = data + len - last;
625 if (invalid > 4)
626 invalid = 4;
627 for (i = 0; i < invalid; ++i)
628 invalid_txt = apr_pstrcat(pool, invalid_txt,
629 apr_psprintf(pool, " %02x",
630 (unsigned char)last[i]), NULL);
632 return svn_error_createf(APR_EINVAL, NULL,
633 _("Valid UTF-8 data\n(hex:%s)\n"
634 "followed by invalid UTF-8 sequence\n(hex:%s)"),
635 valid_txt, invalid_txt);
638 /* Verify that the sequence DATA of length LEN is valid UTF-8 */
639 static svn_error_t *
640 check_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
642 if (! svn_utf__is_valid(data, len))
643 return invalid_utf8(data, len, pool);
644 return SVN_NO_ERROR;
647 /* Verify that the NULL terminated sequence DATA is valid UTF-8 */
648 static svn_error_t *
649 check_cstring_utf8(const char *data, apr_pool_t *pool)
652 if (! svn_utf__cstring_is_valid(data))
653 return invalid_utf8(data, strlen(data), pool);
654 return SVN_NO_ERROR;
658 svn_error_t *
659 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
660 const svn_stringbuf_t *src,
661 apr_pool_t *pool)
663 xlate_handle_node_t *node;
664 svn_error_t *err;
666 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
668 if (node->handle)
670 err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
671 if (! err)
672 err = check_utf8((*dest)->data, (*dest)->len, pool);
674 else
676 err = check_non_ascii(src->data, src->len, pool);
677 if (! err)
678 *dest = svn_stringbuf_dup(src, pool);
681 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
683 return err;
687 svn_error_t *
688 svn_utf_string_to_utf8(const svn_string_t **dest,
689 const svn_string_t *src,
690 apr_pool_t *pool)
692 svn_stringbuf_t *destbuf;
693 xlate_handle_node_t *node;
694 svn_error_t *err;
696 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
698 if (node->handle)
700 err = convert_to_stringbuf(node, src->data, src->len, &destbuf, pool);
701 if (! err)
702 err = check_utf8(destbuf->data, destbuf->len, pool);
703 if (! err)
704 *dest = svn_string_create_from_buf(destbuf, pool);
706 else
708 err = check_non_ascii(src->data, src->len, pool);
709 if (! err)
710 *dest = svn_string_dup(src, pool);
713 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
715 return err;
719 /* Common implementation for svn_utf_cstring_to_utf8,
720 svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
721 svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as
722 the translator and allocating from POOL. */
723 static svn_error_t *
724 convert_cstring(const char **dest,
725 const char *src,
726 xlate_handle_node_t *node,
727 apr_pool_t *pool)
729 if (node->handle)
731 svn_stringbuf_t *destbuf;
732 SVN_ERR(convert_to_stringbuf(node, src, strlen(src),
733 &destbuf, pool));
734 *dest = destbuf->data;
736 else
738 apr_size_t len = strlen(src);
739 SVN_ERR(check_non_ascii(src, len, pool));
740 *dest = apr_pstrmemdup(pool, src, len);
742 return SVN_NO_ERROR;
746 svn_error_t *
747 svn_utf_cstring_to_utf8(const char **dest,
748 const char *src,
749 apr_pool_t *pool)
751 xlate_handle_node_t *node;
752 svn_error_t *err;
754 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
755 err = convert_cstring(dest, src, node, pool);
756 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
757 SVN_ERR(err);
758 SVN_ERR(check_cstring_utf8(*dest, pool));
760 return SVN_NO_ERROR;
764 svn_error_t *
765 svn_utf_cstring_to_utf8_ex2(const char **dest,
766 const char *src,
767 const char *frompage,
768 apr_pool_t *pool)
770 xlate_handle_node_t *node;
771 svn_error_t *err;
772 const char *convset_key = get_xlate_key(SVN_APR_UTF8_CHARSET, frompage,
773 pool);
775 SVN_ERR(get_xlate_handle_node(&node, SVN_APR_UTF8_CHARSET, frompage,
776 convset_key, pool));
777 err = convert_cstring(dest, src, node, pool);
778 put_xlate_handle_node(node, convset_key, pool);
779 SVN_ERR(err);
780 SVN_ERR(check_cstring_utf8(*dest, pool));
782 return SVN_NO_ERROR;
786 svn_error_t *
787 svn_utf_cstring_to_utf8_ex(const char **dest,
788 const char *src,
789 const char *frompage,
790 const char *convset_key,
791 apr_pool_t *pool)
793 return svn_utf_cstring_to_utf8_ex2(dest, src, frompage, pool);
797 svn_error_t *
798 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
799 const svn_stringbuf_t *src,
800 apr_pool_t *pool)
802 xlate_handle_node_t *node;
803 svn_error_t *err;
805 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
807 if (node->handle)
809 err = check_utf8(src->data, src->len, pool);
810 if (! err)
811 err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
813 else
815 err = check_non_ascii(src->data, src->len, pool);
816 if (! err)
817 *dest = svn_stringbuf_dup(src, pool);
820 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
822 return err;
826 svn_error_t *
827 svn_utf_string_from_utf8(const svn_string_t **dest,
828 const svn_string_t *src,
829 apr_pool_t *pool)
831 svn_stringbuf_t *dbuf;
832 xlate_handle_node_t *node;
833 svn_error_t *err;
835 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
837 if (node->handle)
839 err = check_utf8(src->data, src->len, pool);
840 if (! err)
841 err = convert_to_stringbuf(node, src->data, src->len,
842 &dbuf, pool);
843 if (! err)
844 *dest = svn_string_create_from_buf(dbuf, pool);
846 else
848 err = check_non_ascii(src->data, src->len, pool);
849 if (! err)
850 *dest = svn_string_dup(src, pool);
853 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
855 return err;
859 svn_error_t *
860 svn_utf_cstring_from_utf8(const char **dest,
861 const char *src,
862 apr_pool_t *pool)
864 xlate_handle_node_t *node;
865 svn_error_t *err;
867 SVN_ERR(check_utf8(src, strlen(src), pool));
869 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
870 err = convert_cstring(dest, src, node, pool);
871 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
873 return err;
877 svn_error_t *
878 svn_utf_cstring_from_utf8_ex2(const char **dest,
879 const char *src,
880 const char *topage,
881 apr_pool_t *pool)
883 xlate_handle_node_t *node;
884 svn_error_t *err;
885 const char *convset_key = get_xlate_key(topage, SVN_APR_UTF8_CHARSET,
886 pool);
888 SVN_ERR(check_utf8(src, strlen(src), pool));
890 SVN_ERR(get_xlate_handle_node(&node, topage, SVN_APR_UTF8_CHARSET,
891 convset_key, pool));
892 err = convert_cstring(dest, src, node, pool);
893 put_xlate_handle_node(node, convset_key, pool);
895 return err;
899 svn_error_t *
900 svn_utf_cstring_from_utf8_ex(const char **dest,
901 const char *src,
902 const char *topage,
903 const char *convset_key,
904 apr_pool_t *pool)
906 return svn_utf_cstring_from_utf8_ex2(dest, src, topage, pool);
910 const char *
911 svn_utf__cstring_from_utf8_fuzzy(const char *src,
912 apr_pool_t *pool,
913 svn_error_t *(*convert_from_utf8)
914 (const char **, const char *, apr_pool_t *))
916 const char *escaped, *converted;
917 svn_error_t *err;
919 escaped = fuzzy_escape(src, strlen(src), pool);
921 /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
922 contain only 7-bit bytes :-). Recode to native... */
923 err = convert_from_utf8(((const char **) &converted), escaped, pool);
925 if (err)
927 svn_error_clear(err);
928 return escaped;
930 else
931 return converted;
933 /* ### Check the client locale, maybe we can avoid that second
934 * conversion! See Ulrich Drepper's patch at
935 * http://subversion.tigris.org/issues/show_bug.cgi?id=807.
940 const char *
941 svn_utf_cstring_from_utf8_fuzzy(const char *src,
942 apr_pool_t *pool)
944 return svn_utf__cstring_from_utf8_fuzzy(src, pool,
945 svn_utf_cstring_from_utf8);
949 svn_error_t *
950 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
951 const svn_stringbuf_t *src,
952 apr_pool_t *pool)
954 svn_stringbuf_t *destbuf;
956 SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf, src, pool));
957 *dest = destbuf->data;
959 return SVN_NO_ERROR;
963 svn_error_t *
964 svn_utf_cstring_from_utf8_string(const char **dest,
965 const svn_string_t *src,
966 apr_pool_t *pool)
968 svn_stringbuf_t *dbuf;
969 xlate_handle_node_t *node;
970 svn_error_t *err;
972 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
974 if (node->handle)
976 err = check_utf8(src->data, src->len, pool);
977 if (! err)
978 err = convert_to_stringbuf(node, src->data, src->len,
979 &dbuf, pool);
980 if (! err)
981 *dest = dbuf->data;
983 else
985 err = check_non_ascii(src->data, src->len, pool);
986 if (! err)
987 *dest = apr_pstrmemdup(pool, src->data, src->len);
990 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
992 return err;