Reorganize the output to "svnserve --help".
[svn.git] / subversion / libsvn_subr / utf.c
blobccbcc038a1e6c6f5eedafcec4a34d5552555ef98
1 /*
2 * utf.c: UTF-8 conversion routines
4 * ====================================================================
5 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
21 #include <string.h>
22 #include <assert.h>
24 #include <apr_strings.h>
25 #include <apr_lib.h>
26 #include <apr_xlate.h>
28 #include "svn_string.h"
29 #include "svn_error.h"
30 #include "svn_pools.h"
31 #include "svn_ctype.h"
32 #include "svn_utf.h"
33 #include "utf_impl.h"
34 #include "svn_private_config.h"
35 #include "win32_xlate.h"
39 #define SVN_UTF_NTOU_XLATE_HANDLE "svn-utf-ntou-xlate-handle"
40 #define SVN_UTF_UTON_XLATE_HANDLE "svn-utf-uton-xlate-handle"
42 #ifndef AS400
43 #define SVN_APR_UTF8_CHARSET "UTF-8"
44 #else
45 #define SVN_APR_UTF8_CHARSET (const char*)1208
46 #endif
48 #if APR_HAS_THREADS
49 static apr_thread_mutex_t *xlate_handle_mutex = NULL;
50 #endif
52 /* The xlate handle cache is a global hash table with linked lists of xlate
53 * handles. In multi-threaded environments, a thread "borrows" an xlate
54 * handle from the cache during a translation and puts it back afterwards.
55 * This avoids holding a global lock for all translations.
56 * If there is no handle for a particular key when needed, a new is
57 * handle is created and put in the cache after use.
58 * This means that there will be at most N handles open for a key, where N
59 * is the number of simultanous handles in use for that key. */
61 typedef struct xlate_handle_node_t {
62 apr_xlate_t *handle;
63 /* FALSE if the handle is not valid, since its pool is being
64 destroyed. */
65 svn_boolean_t valid;
66 /* The name of a char encoding or APR_LOCALE_CHARSET. */
67 const char *frompage, *topage;
68 struct xlate_handle_node_t *next;
69 } xlate_handle_node_t;
71 /* This maps const char * userdata_key strings to xlate_handle_node_t **
72 handles to the first entry in the linked list of xlate handles. We don't
73 store the pointer to the list head directly in the hash table, since we
74 remove/insert entries at the head in the list in the code below, and
75 we can't use apr_hash_set() in each character translation because that
76 function allocates memory in each call where the value is non-NULL.
77 Since these allocations take place in a global pool, this would be a
78 memory leak. */
79 static apr_hash_t *xlate_handle_hash = NULL;
81 /* Clean up the xlate handle cache. */
82 static apr_status_t
83 xlate_cleanup(void *arg)
85 /* We set the cache variables to NULL so that translation works in other
86 cleanup functions, even if it isn't cached then. */
87 #if APR_HAS_THREADS
88 apr_thread_mutex_destroy(xlate_handle_mutex);
89 xlate_handle_mutex = NULL;
90 #endif
91 xlate_handle_hash = NULL;
93 return APR_SUCCESS;
96 /* Set the handle of ARG to NULL. */
97 static apr_status_t
98 xlate_handle_node_cleanup(void *arg)
100 xlate_handle_node_t *node = arg;
102 node->valid = FALSE;
103 return APR_SUCCESS;
106 void
107 svn_utf_initialize(apr_pool_t *pool)
109 apr_pool_t *subpool;
110 #if APR_HAS_THREADS
111 apr_thread_mutex_t *mutex;
112 #endif
114 if (!xlate_handle_hash)
116 /* We create our own subpool, which we protect with the mutex.
117 We can't use the pool passed to us by the caller, since we will
118 use it for xlate handle allocations, possibly in multiple threads,
119 and pool allocation is not thread-safe. */
120 subpool = svn_pool_create(pool);
121 #if APR_HAS_THREADS
122 if (apr_thread_mutex_create(&mutex, APR_THREAD_MUTEX_DEFAULT, subpool)
123 == APR_SUCCESS)
124 xlate_handle_mutex = mutex;
125 else
126 return;
127 #endif
129 xlate_handle_hash = apr_hash_make(subpool);
130 apr_pool_cleanup_register(subpool, NULL, xlate_cleanup,
131 apr_pool_cleanup_null);
135 /* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and
136 * FROMPAGE can be any valid arguments of the same name to
137 * apr_xlate_open(). Allocate the returned string in POOL. */
138 static const char*
139 get_xlate_key(const char *topage,
140 const char *frompage,
141 apr_pool_t *pool)
143 #ifndef AS400
144 /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
145 * topage/frompage is really an int, not a valid string. So generate a
146 * unique key accordingly. */
147 if (frompage == SVN_APR_LOCALE_CHARSET)
148 frompage = "APR_LOCALE_CHARSET";
149 else if (frompage == SVN_APR_DEFAULT_CHARSET)
150 frompage = "APR_DEFAULT_CHARSET";
152 if (topage == SVN_APR_LOCALE_CHARSET)
153 topage = "APR_LOCALE_CHARSET";
154 else if (topage == SVN_APR_DEFAULT_CHARSET)
155 topage = "APR_DEFAULT_CHARSET";
157 return apr_pstrcat(pool, "svn-utf-", frompage, "to", topage,
158 "-xlate-handle", NULL);
159 #else
160 /* OS400 code pages are always ints. */
161 return apr_psprintf(pool, "svn-utf-%dto%d-xlate-handle", (int)frompage,
162 (int)topage);
163 #endif
166 /* Set *RET to a handle node for converting from FROMPAGE to TOPAGE,
167 creating the handle node if it doesn't exist in USERDATA_KEY.
168 If a node is not cached and apr_xlate_open() returns APR_EINVAL or
169 APR_ENOTIMPL, set (*RET)->handle to NULL. If fail for any other
170 reason, return the error.
172 Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
173 hasn't been called or USERDATA_KEY is NULL. Else, allocate them
174 in the pool of xlate_handle_hash. */
175 static svn_error_t *
176 get_xlate_handle_node(xlate_handle_node_t **ret,
177 const char *topage, const char *frompage,
178 const char *userdata_key, apr_pool_t *pool)
180 xlate_handle_node_t **old_node_p;
181 xlate_handle_node_t *old_node = NULL;
182 apr_status_t apr_err;
183 apr_xlate_t *handle;
184 svn_error_t *err = NULL;
186 /* If we already have a handle, just return it. */
187 if (userdata_key)
189 if (xlate_handle_hash)
191 #if APR_HAS_THREADS
192 apr_err = apr_thread_mutex_lock(xlate_handle_mutex);
193 if (apr_err != APR_SUCCESS)
194 return svn_error_create(apr_err, NULL,
195 _("Can't lock charset translation mutex"));
196 #endif
197 old_node_p = apr_hash_get(xlate_handle_hash, userdata_key,
198 APR_HASH_KEY_STRING);
199 if (old_node_p)
200 old_node = *old_node_p;
201 if (old_node)
203 /* Ensure that the handle is still valid. */
204 if (old_node->valid)
206 /* Remove from the list. */
207 *old_node_p = old_node->next;
208 old_node->next = NULL;
209 #if APR_HAS_THREADS
210 apr_err = apr_thread_mutex_unlock(xlate_handle_mutex);
211 if (apr_err != APR_SUCCESS)
212 return svn_error_create(apr_err, NULL,
213 _("Can't unlock charset "
214 "translation mutex"));
215 #endif
216 *ret = old_node;
217 return SVN_NO_ERROR;
221 else
223 void *p;
224 /* We fall back on a per-pool cache instead. */
225 apr_pool_userdata_get(&p, userdata_key, pool);
226 old_node = p;
227 /* Ensure that the handle is still valid. */
228 if (old_node && old_node->valid)
230 *ret = old_node;
231 return SVN_NO_ERROR;
236 /* Note that we still have the mutex locked (if it is initialized), so we
237 can use the global pool for creating the new xlate handle. */
239 /* The error handling doesn't support the following cases, since we don't
240 use them currently. Catch this here. */
241 #ifndef AS400
242 /* On OS400 V5R4 with UTF support, APR_DEFAULT_CHARSET and
243 * APR_LOCALE_CHARSET are both UTF-8 (CCSID 1208), so we won't get far
244 * with this assert active. */
245 assert(frompage != SVN_APR_DEFAULT_CHARSET
246 && topage != SVN_APR_DEFAULT_CHARSET
247 && (frompage != SVN_APR_LOCALE_CHARSET
248 || topage != SVN_APR_LOCALE_CHARSET));
249 #endif
251 /* Use the correct pool for creating the handle. */
252 if (userdata_key && xlate_handle_hash)
253 pool = apr_hash_pool_get(xlate_handle_hash);
255 /* Try to create a handle. */
256 #if defined( WIN32)
257 apr_err = svn_subr__win32_xlate_open((win32_xlate_t **)&handle, topage,
258 frompage, pool);
259 #elif defined(AS400)
260 apr_err = apr_xlate_open(&handle, (int)topage, (int)frompage, pool);
261 #else
262 apr_err = apr_xlate_open(&handle, topage, frompage, pool);
263 #endif
265 if (APR_STATUS_IS_EINVAL(apr_err) || APR_STATUS_IS_ENOTIMPL(apr_err))
266 handle = NULL;
267 else if (apr_err != APR_SUCCESS)
269 const char *errstr;
270 /* Can't use svn_error_wrap_apr here because it calls functions in
271 this file, leading to infinite recursion. */
272 #ifndef AS400
273 if (frompage == SVN_APR_LOCALE_CHARSET)
274 errstr = apr_psprintf(pool,
275 _("Can't create a character converter from "
276 "native encoding to '%s'"), topage);
277 else if (topage == SVN_APR_LOCALE_CHARSET)
278 errstr = apr_psprintf(pool,
279 _("Can't create a character converter from "
280 "'%s' to native encoding"), frompage);
281 else
282 errstr = apr_psprintf(pool,
283 _("Can't create a character converter from "
284 "'%s' to '%s'"), frompage, topage);
285 #else
286 /* Handle the error condition normally prevented by the assert
287 * above. */
288 errstr = apr_psprintf(pool,
289 _("Can't create a character converter from "
290 "'%i' to '%i'"), frompage, topage);
291 #endif
292 err = svn_error_create(apr_err, NULL, errstr);
293 goto cleanup;
296 /* Allocate and initialize the node. */
297 *ret = apr_palloc(pool, sizeof(xlate_handle_node_t));
298 (*ret)->handle = handle;
299 (*ret)->valid = TRUE;
300 (*ret)->frompage = ((frompage != SVN_APR_LOCALE_CHARSET)
301 ? apr_pstrdup(pool, frompage) : frompage);
302 (*ret)->topage = ((topage != SVN_APR_LOCALE_CHARSET)
303 ? apr_pstrdup(pool, topage) : topage);
304 (*ret)->next = NULL;
306 /* If we are called from inside a pool cleanup handler, the just created
307 xlate handle will be closed when that handler returns by a newly
308 registered cleanup handler, however, the handle is still cached by us.
309 To prevent this, we register a cleanup handler that will reset the valid
310 flag of our node, so we don't use an invalid handle. */
311 if (handle)
312 apr_pool_cleanup_register(pool, *ret, xlate_handle_node_cleanup,
313 apr_pool_cleanup_null);
315 cleanup:
316 /* Don't need the lock anymore. */
317 #if APR_HAS_THREADS
318 if (userdata_key && xlate_handle_hash)
320 apr_status_t unlock_err = apr_thread_mutex_unlock(xlate_handle_mutex);
321 if (unlock_err != APR_SUCCESS)
322 return svn_error_create(unlock_err, NULL,
323 _("Can't unlock charset translation mutex"));
325 #endif
327 return err;
330 /* Put back NODE into the xlate handle cache for use by other calls.
331 If there is no global cache, store the handle in POOL.
332 Ignore errors related to locking/unlocking the mutex.
333 ### Mutex errors here are very weird. Should we handle them "correctly"
334 ### even if that complicates error handling in the routines below? */
335 static void
336 put_xlate_handle_node(xlate_handle_node_t *node,
337 const char *userdata_key,
338 apr_pool_t *pool)
340 assert(node->next == NULL);
341 if (!userdata_key)
342 return;
343 if (xlate_handle_hash)
345 xlate_handle_node_t **node_p;
346 #if APR_HAS_THREADS
347 if (apr_thread_mutex_lock(xlate_handle_mutex) != APR_SUCCESS)
348 abort();
349 #endif
350 node_p = apr_hash_get(xlate_handle_hash, userdata_key,
351 APR_HASH_KEY_STRING);
352 if (node_p == NULL)
354 userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash),
355 userdata_key);
356 node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash),
357 sizeof(*node_p));
358 *node_p = NULL;
359 apr_hash_set(xlate_handle_hash, userdata_key,
360 APR_HASH_KEY_STRING, node_p);
362 node->next = *node_p;
363 *node_p = node;
364 #if APR_HAS_THREADS
365 if (apr_thread_mutex_unlock(xlate_handle_mutex) != APR_SUCCESS)
366 abort();
367 #endif
369 else
371 /* Store it in the per-pool cache. */
372 apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool);
376 /* Return the apr_xlate handle for converting native characters to UTF-8. */
377 static svn_error_t *
378 get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
380 return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET,
381 SVN_APR_LOCALE_CHARSET,
382 SVN_UTF_NTOU_XLATE_HANDLE, pool);
386 /* Return the apr_xlate handle for converting UTF-8 to native characters.
387 Create one if it doesn't exist. If unable to find a handle, or
388 unable to create one because apr_xlate_open returned APR_EINVAL, then
389 set *RET to null and return SVN_NO_ERROR; if fail for some other
390 reason, return error. */
391 static svn_error_t *
392 get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
394 return get_xlate_handle_node(ret, SVN_APR_LOCALE_CHARSET,
395 SVN_APR_UTF8_CHARSET,
396 SVN_UTF_UTON_XLATE_HANDLE, pool);
400 /* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn
401 sequences, allocating the result in POOL. */
402 static const char *
403 fuzzy_escape(const char *src, apr_size_t len, apr_pool_t *pool)
405 const char *src_orig = src, *src_end = src + len;
406 apr_size_t new_len = 0;
407 char *new;
408 const char *new_orig;
410 /* First count how big a dest string we'll need. */
411 while (src < src_end)
413 if (! svn_ctype_isascii(*src) || *src == '\0')
414 new_len += 5; /* 5 slots, for "?\XXX" */
415 else
416 new_len += 1; /* one slot for the 7-bit char */
418 src++;
421 /* Allocate that amount. */
422 new = apr_palloc(pool, new_len + 1);
424 new_orig = new;
426 /* And fill it up. */
427 while (src_orig < src_end)
429 if (! svn_ctype_isascii(*src_orig) || src_orig == '\0')
431 /* This is the same format as svn_xml_fuzzy_escape uses, but that
432 function escapes different characters. Please keep in sync!
433 ### If we add another fuzzy escape somewhere, we should abstract
434 ### this out to a common function. */
435 sprintf(new, "?\\%03u", (unsigned char) *src_orig);
436 new += 5;
438 else
440 *new = *src_orig;
441 new += 1;
444 src_orig++;
447 *new = '\0';
449 return new_orig;
452 /* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
453 in *DEST, which is allocated in POOL. */
454 static svn_error_t *
455 convert_to_stringbuf(xlate_handle_node_t *node,
456 const char *src_data,
457 apr_size_t src_length,
458 svn_stringbuf_t **dest,
459 apr_pool_t *pool)
461 #ifdef WIN32
462 apr_status_t apr_err;
464 apr_err = svn_subr__win32_xlate_to_stringbuf((win32_xlate_t *) node->handle,
465 src_data, src_length,
466 dest, pool);
467 #else
468 apr_size_t buflen = src_length * 2;
469 apr_status_t apr_err;
470 apr_size_t srclen = src_length;
471 apr_size_t destlen = buflen;
472 char *destbuf;
474 /* Initialize *DEST to an empty stringbuf.
475 A 1:2 ratio of input bytes to output bytes (as assigned above)
476 should be enough for most translations, and if it turns out not
477 to be enough, we'll grow the buffer again, sizing it based on a
478 1:3 ratio of the remainder of the string. */
479 *dest = svn_stringbuf_create_ensure(buflen + 1, pool);
480 destbuf = (*dest)->data;
482 /* Not only does it not make sense to convert an empty string, but
483 apr-iconv is quite unreasonable about not allowing that. */
484 if (src_length == 0)
485 return SVN_NO_ERROR;
489 /* Set up state variables for xlate. */
490 destlen = buflen - (*dest)->len;
492 /* Attempt the conversion. */
493 apr_err = apr_xlate_conv_buffer(node->handle,
494 src_data + (src_length - srclen),
495 &srclen,
496 (*dest)->data + (*dest)->len,
497 &destlen);
499 /* Now, update the *DEST->len to track the amount of output data
500 churned out so far from this loop. */
501 (*dest)->len += ((buflen - (*dest)->len) - destlen);
502 buflen += srclen * 3; /* 3 is middle ground, 2 wasn't enough
503 for all characters in the buffer, 4 is
504 maximum character size (currently) */
507 } while (apr_err == APR_SUCCESS && srclen != 0);
508 #endif
510 /* If we exited the loop with an error, return the error. */
511 if (apr_err)
513 const char *errstr;
514 svn_error_t *err;
516 /* Can't use svn_error_wrap_apr here because it calls functions in
517 this file, leading to infinite recursion. */
518 #ifndef AS400
519 if (node->frompage == SVN_APR_LOCALE_CHARSET)
520 errstr = apr_psprintf
521 (pool, _("Can't convert string from native encoding to '%s':"),
522 node->topage);
523 else if (node->topage == SVN_APR_LOCALE_CHARSET)
524 errstr = apr_psprintf
525 (pool, _("Can't convert string from '%s' to native encoding:"),
526 node->frompage);
527 else
528 errstr = apr_psprintf
529 (pool, _("Can't convert string from '%s' to '%s':"),
530 node->frompage, node->topage);
531 #else
532 /* On OS400 V5R4 every possible node->topage and node->frompage
533 * *really* is an int. */
534 errstr = apr_psprintf
535 (pool, _("Can't convert string from CCSID '%i' to CCSID '%i'"),
536 node->frompage, node->topage);
537 #endif
538 err = svn_error_create(apr_err, NULL, fuzzy_escape(src_data,
539 src_length, pool));
540 return svn_error_create(apr_err, err, errstr);
542 /* Else, exited due to success. Trim the result buffer down to the
543 right length. */
544 (*dest)->data[(*dest)->len] = '\0';
546 return SVN_NO_ERROR;
550 /* Return APR_EINVAL if the first LEN bytes of DATA contain anything
551 other than seven-bit, non-control (except for whitespace) ASCII
552 characters, finding the error pool from POOL. Otherwise, return
553 SVN_NO_ERROR. */
554 static svn_error_t *
555 check_non_ascii(const char *data, apr_size_t len, apr_pool_t *pool)
557 const char *data_start = data;
559 for (; len > 0; --len, data++)
561 if ((! apr_isascii(*data))
562 || ((! apr_isspace(*data))
563 && apr_iscntrl(*data)))
565 /* Show the printable part of the data, followed by the
566 decimal code of the questionable character. Because if a
567 user ever gets this error, she's going to have to spend
568 time tracking down the non-ASCII data, so we want to help
569 as much as possible. And yes, we just call the unsafe
570 data "non-ASCII", even though the actual constraint is
571 somewhat more complex than that. */
573 if (data - data_start)
575 const char *error_data
576 = apr_pstrndup(pool, data_start, (data - data_start));
578 return svn_error_createf
579 (APR_EINVAL, NULL,
580 _("Safe data '%s' was followed by non-ASCII byte %d: "
581 "unable to convert to/from UTF-8"),
582 error_data, *((const unsigned char *) data));
584 else
586 return svn_error_createf
587 (APR_EINVAL, NULL,
588 _("Non-ASCII character (code %d) detected, "
589 "and unable to convert to/from UTF-8"),
590 *((const unsigned char *) data));
595 return SVN_NO_ERROR;
598 /* Construct an error with a suitable message to describe the invalid UTF-8
599 * sequence DATA of length LEN (which may have embedded NULLs). We can't
600 * simply print the data, almost by definition we don't really know how it
601 * is encoded.
603 static svn_error_t *
604 invalid_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
606 const char *last = svn_utf__last_valid(data, len);
607 const char *valid_txt = "", *invalid_txt = "";
608 int i, valid, invalid;
610 /* We will display at most 24 valid octets (this may split a leading
611 multi-byte character) as that should fit on one 80 character line. */
612 valid = last - data;
613 if (valid > 24)
614 valid = 24;
615 for (i = 0; i < valid; ++i)
616 valid_txt = apr_pstrcat(pool, valid_txt,
617 apr_psprintf(pool, " %02x",
618 (unsigned char)last[i-valid]), NULL);
620 /* 4 invalid octets will guarantee that the faulty octet is displayed */
621 invalid = data + len - last;
622 if (invalid > 4)
623 invalid = 4;
624 for (i = 0; i < invalid; ++i)
625 invalid_txt = apr_pstrcat(pool, invalid_txt,
626 apr_psprintf(pool, " %02x",
627 (unsigned char)last[i]), NULL);
629 return svn_error_createf(APR_EINVAL, NULL,
630 _("Valid UTF-8 data\n(hex:%s)\n"
631 "followed by invalid UTF-8 sequence\n(hex:%s)"),
632 valid_txt, invalid_txt);
635 /* Verify that the sequence DATA of length LEN is valid UTF-8 */
636 static svn_error_t *
637 check_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
639 if (! svn_utf__is_valid(data, len))
640 return invalid_utf8(data, len, pool);
641 return SVN_NO_ERROR;
644 /* Verify that the NULL terminated sequence DATA is valid UTF-8 */
645 static svn_error_t *
646 check_cstring_utf8(const char *data, apr_pool_t *pool)
649 if (! svn_utf__cstring_is_valid(data))
650 return invalid_utf8(data, strlen(data), pool);
651 return SVN_NO_ERROR;
655 svn_error_t *
656 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
657 const svn_stringbuf_t *src,
658 apr_pool_t *pool)
660 xlate_handle_node_t *node;
661 svn_error_t *err;
663 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
665 if (node->handle)
667 err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
668 if (! err)
669 err = check_utf8((*dest)->data, (*dest)->len, pool);
671 else
673 err = check_non_ascii(src->data, src->len, pool);
674 if (! err)
675 *dest = svn_stringbuf_dup(src, pool);
678 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
680 return err;
684 svn_error_t *
685 svn_utf_string_to_utf8(const svn_string_t **dest,
686 const svn_string_t *src,
687 apr_pool_t *pool)
689 svn_stringbuf_t *destbuf;
690 xlate_handle_node_t *node;
691 svn_error_t *err;
693 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
695 if (node->handle)
697 err = convert_to_stringbuf(node, src->data, src->len, &destbuf, pool);
698 if (! err)
699 err = check_utf8(destbuf->data, destbuf->len, pool);
700 if (! err)
701 *dest = svn_string_create_from_buf(destbuf, pool);
703 else
705 err = check_non_ascii(src->data, src->len, pool);
706 if (! err)
707 *dest = svn_string_dup(src, pool);
710 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
712 return err;
716 /* Common implementation for svn_utf_cstring_to_utf8,
717 svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
718 svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as
719 the translator and allocating from POOL. */
720 static svn_error_t *
721 convert_cstring(const char **dest,
722 const char *src,
723 xlate_handle_node_t *node,
724 apr_pool_t *pool)
726 if (node->handle)
728 svn_stringbuf_t *destbuf;
729 SVN_ERR(convert_to_stringbuf(node, src, strlen(src),
730 &destbuf, pool));
731 *dest = destbuf->data;
733 else
735 apr_size_t len = strlen(src);
736 SVN_ERR(check_non_ascii(src, len, pool));
737 *dest = apr_pstrmemdup(pool, src, len);
739 return SVN_NO_ERROR;
743 svn_error_t *
744 svn_utf_cstring_to_utf8(const char **dest,
745 const char *src,
746 apr_pool_t *pool)
748 xlate_handle_node_t *node;
749 svn_error_t *err;
751 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
752 err = convert_cstring(dest, src, node, pool);
753 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
754 SVN_ERR(err);
755 SVN_ERR(check_cstring_utf8(*dest, pool));
757 return SVN_NO_ERROR;
761 svn_error_t *
762 svn_utf_cstring_to_utf8_ex2(const char **dest,
763 const char *src,
764 const char *frompage,
765 apr_pool_t *pool)
767 xlate_handle_node_t *node;
768 svn_error_t *err;
769 const char *convset_key = get_xlate_key(SVN_APR_UTF8_CHARSET, frompage,
770 pool);
772 SVN_ERR(get_xlate_handle_node(&node, SVN_APR_UTF8_CHARSET, frompage,
773 convset_key, pool));
774 err = convert_cstring(dest, src, node, pool);
775 put_xlate_handle_node(node, convset_key, pool);
776 SVN_ERR(err);
777 SVN_ERR(check_cstring_utf8(*dest, pool));
779 return SVN_NO_ERROR;
783 svn_error_t *
784 svn_utf_cstring_to_utf8_ex(const char **dest,
785 const char *src,
786 const char *frompage,
787 const char *convset_key,
788 apr_pool_t *pool)
790 return svn_utf_cstring_to_utf8_ex2(dest, src, frompage, pool);
794 svn_error_t *
795 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
796 const svn_stringbuf_t *src,
797 apr_pool_t *pool)
799 xlate_handle_node_t *node;
800 svn_error_t *err;
802 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
804 if (node->handle)
806 err = check_utf8(src->data, src->len, pool);
807 if (! err)
808 err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
810 else
812 err = check_non_ascii(src->data, src->len, pool);
813 if (! err)
814 *dest = svn_stringbuf_dup(src, pool);
817 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
819 return err;
823 svn_error_t *
824 svn_utf_string_from_utf8(const svn_string_t **dest,
825 const svn_string_t *src,
826 apr_pool_t *pool)
828 svn_stringbuf_t *dbuf;
829 xlate_handle_node_t *node;
830 svn_error_t *err;
832 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
834 if (node->handle)
836 err = check_utf8(src->data, src->len, pool);
837 if (! err)
838 err = convert_to_stringbuf(node, src->data, src->len,
839 &dbuf, pool);
840 if (! err)
841 *dest = svn_string_create_from_buf(dbuf, pool);
843 else
845 err = check_non_ascii(src->data, src->len, pool);
846 if (! err)
847 *dest = svn_string_dup(src, pool);
850 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
852 return err;
856 svn_error_t *
857 svn_utf_cstring_from_utf8(const char **dest,
858 const char *src,
859 apr_pool_t *pool)
861 xlate_handle_node_t *node;
862 svn_error_t *err;
864 SVN_ERR(check_utf8(src, strlen(src), pool));
866 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
867 err = convert_cstring(dest, src, node, pool);
868 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
870 return err;
874 svn_error_t *
875 svn_utf_cstring_from_utf8_ex2(const char **dest,
876 const char *src,
877 const char *topage,
878 apr_pool_t *pool)
880 xlate_handle_node_t *node;
881 svn_error_t *err;
882 const char *convset_key = get_xlate_key(topage, SVN_APR_UTF8_CHARSET,
883 pool);
885 SVN_ERR(check_utf8(src, strlen(src), pool));
887 SVN_ERR(get_xlate_handle_node(&node, topage, SVN_APR_UTF8_CHARSET,
888 convset_key, pool));
889 err = convert_cstring(dest, src, node, pool);
890 put_xlate_handle_node(node, convset_key, pool);
892 return err;
896 svn_error_t *
897 svn_utf_cstring_from_utf8_ex(const char **dest,
898 const char *src,
899 const char *topage,
900 const char *convset_key,
901 apr_pool_t *pool)
903 return svn_utf_cstring_from_utf8_ex2(dest, src, topage, pool);
907 const char *
908 svn_utf__cstring_from_utf8_fuzzy(const char *src,
909 apr_pool_t *pool,
910 svn_error_t *(*convert_from_utf8)
911 (const char **, const char *, apr_pool_t *))
913 const char *escaped, *converted;
914 svn_error_t *err;
916 escaped = fuzzy_escape(src, strlen(src), pool);
918 /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
919 contain only 7-bit bytes :-). Recode to native... */
920 err = convert_from_utf8(((const char **) &converted), escaped, pool);
922 if (err)
924 svn_error_clear(err);
925 return escaped;
927 else
928 return converted;
930 /* ### Check the client locale, maybe we can avoid that second
931 * conversion! See Ulrich Drepper's patch at
932 * http://subversion.tigris.org/issues/show_bug.cgi?id=807.
937 const char *
938 svn_utf_cstring_from_utf8_fuzzy(const char *src,
939 apr_pool_t *pool)
941 return svn_utf__cstring_from_utf8_fuzzy(src, pool,
942 svn_utf_cstring_from_utf8);
946 svn_error_t *
947 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
948 const svn_stringbuf_t *src,
949 apr_pool_t *pool)
951 svn_stringbuf_t *destbuf;
953 SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf, src, pool));
954 *dest = destbuf->data;
956 return SVN_NO_ERROR;
960 svn_error_t *
961 svn_utf_cstring_from_utf8_string(const char **dest,
962 const svn_string_t *src,
963 apr_pool_t *pool)
965 svn_stringbuf_t *dbuf;
966 xlate_handle_node_t *node;
967 svn_error_t *err;
969 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
971 if (node->handle)
973 err = check_utf8(src->data, src->len, pool);
974 if (! err)
975 err = convert_to_stringbuf(node, src->data, src->len,
976 &dbuf, pool);
977 if (! err)
978 *dest = dbuf->data;
980 else
982 err = check_non_ascii(src->data, src->len, pool);
983 if (! err)
984 *dest = apr_pstrmemdup(pool, src->data, src->len);
987 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
989 return err;