Change the format of the revprops block sent in svnserve for
[svn.git] / subversion / libsvn_subr / path.c
blobc5917a8075111d3c2816b8d8476d1948432d4485
1 /*
2 * paths.c: a path manipulation library using svn_stringbuf_t
4 * ====================================================================
5 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
21 #include <string.h>
22 #include <assert.h>
24 #include <apr_file_info.h>
25 #include <apr_lib.h>
27 #include "svn_string.h"
28 #include "svn_path.h"
29 #include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */
30 #include "svn_utf.h"
31 #include "svn_io.h" /* for svn_io_stat() */
32 #include "svn_ctype.h"
35 /* The canonical empty path. Can this be changed? Well, change the empty
36 test below and the path library will work, not so sure about the fs/wc
37 libraries. */
38 #define SVN_EMPTY_PATH ""
40 /* TRUE if s is the canonical empty path, FALSE otherwise */
41 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
43 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
44 this be changed? Well, the path library will work, not so sure about
45 the OS! */
46 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
49 const char *
50 svn_path_internal_style(const char *path, apr_pool_t *pool)
52 if ('/' != SVN_PATH_LOCAL_SEPARATOR)
54 char *p = apr_pstrdup(pool, path);
55 path = p;
57 /* Convert all local-style separators to the canonical ones. */
58 for (; *p != '\0'; ++p)
59 if (*p == SVN_PATH_LOCAL_SEPARATOR)
60 *p = '/';
63 return svn_path_canonicalize(path, pool);
64 /* FIXME: Should also remove trailing /.'s, if the style says so. */
68 const char *
69 svn_path_local_style(const char *path, apr_pool_t *pool)
71 path = svn_path_canonicalize(path, pool);
72 /* FIXME: Should also remove trailing /.'s, if the style says so. */
74 /* Internally, Subversion represents the current directory with the
75 empty string. But users like to see "." . */
76 if (SVN_PATH_IS_EMPTY(path))
77 return ".";
79 /* If PATH is a URL, the "local style" is the same as the input. */
80 if (svn_path_is_url(path))
81 return apr_pstrdup(pool, path);
83 if ('/' != SVN_PATH_LOCAL_SEPARATOR)
85 char *p = apr_pstrdup(pool, path);
86 path = p;
88 /* Convert all canonical separators to the local-style ones. */
89 for (; *p != '\0'; ++p)
90 if (*p == '/')
91 *p = SVN_PATH_LOCAL_SEPARATOR;
94 return path;
99 #ifndef NDEBUG
100 static svn_boolean_t
101 is_canonical(const char *path,
102 apr_size_t len)
104 return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len)
105 && (svn_dirent_is_root(path, len) ||
106 (len <= 1 || path[len-1] != '/')));
108 #endif
111 char *svn_path_join(const char *base,
112 const char *component,
113 apr_pool_t *pool)
115 apr_size_t blen = strlen(base);
116 apr_size_t clen = strlen(component);
117 char *path;
119 assert(is_canonical(base, blen));
120 assert(is_canonical(component, clen));
122 /* If the component is absolute, then return it. */
123 if (*component == '/')
124 return apr_pmemdup(pool, component, clen + 1);
126 /* If either is empty return the other */
127 if (SVN_PATH_IS_EMPTY(base))
128 return apr_pmemdup(pool, component, clen + 1);
129 if (SVN_PATH_IS_EMPTY(component))
130 return apr_pmemdup(pool, base, blen + 1);
132 if (blen == 1 && base[0] == '/')
133 blen = 0; /* Ignore base, just return separator + component */
135 /* Construct the new, combined path. */
136 path = apr_palloc(pool, blen + 1 + clen + 1);
137 memcpy(path, base, blen);
138 path[blen] = '/';
139 memcpy(path + blen + 1, component, clen + 1);
141 return path;
144 char *svn_path_join_many(apr_pool_t *pool, const char *base, ...)
146 #define MAX_SAVED_LENGTHS 10
147 apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
148 apr_size_t total_len;
149 int nargs;
150 va_list va;
151 const char *s;
152 apr_size_t len;
153 char *path;
154 char *p;
155 svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE;
156 int base_arg = 0;
158 total_len = strlen(base);
160 assert(is_canonical(base, total_len));
162 if (total_len == 1 && *base == '/')
163 base_is_root = TRUE;
164 else if (SVN_PATH_IS_EMPTY(base))
166 total_len = sizeof(SVN_EMPTY_PATH) - 1;
167 base_is_empty = TRUE;
170 saved_lengths[0] = total_len;
172 /* Compute the length of the resulting string. */
174 nargs = 0;
175 va_start(va, base);
176 while ((s = va_arg(va, const char *)) != NULL)
178 len = strlen(s);
180 assert(is_canonical(s, len));
182 if (SVN_PATH_IS_EMPTY(s))
183 continue;
185 if (nargs++ < MAX_SAVED_LENGTHS)
186 saved_lengths[nargs] = len;
188 if (*s == '/')
190 /* an absolute path. skip all components to this point and reset
191 the total length. */
192 total_len = len;
193 base_arg = nargs;
194 base_is_root = len == 1;
195 base_is_empty = FALSE;
197 else if (nargs == base_arg
198 || (nargs == base_arg + 1 && base_is_root)
199 || base_is_empty)
201 /* if we have skipped everything up to this arg, then the base
202 and all prior components are empty. just set the length to
203 this component; do not add a separator. If the base is empty
204 we can now ignore it. */
205 if (base_is_empty)
207 base_is_empty = FALSE;
208 total_len = 0;
210 total_len += len;
212 else
214 total_len += 1 + len;
217 va_end(va);
219 /* base == "/" and no further components. just return that. */
220 if (base_is_root && total_len == 1)
221 return apr_pmemdup(pool, "/", 2);
223 /* we got the total size. allocate it, with room for a NULL character. */
224 path = p = apr_palloc(pool, total_len + 1);
226 /* if we aren't supposed to skip forward to an absolute component, and if
227 this is not an empty base that we are skipping, then copy the base
228 into the output. */
229 if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty))
231 if (SVN_PATH_IS_EMPTY(base))
232 memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
233 else
234 memcpy(p, base, len = saved_lengths[0]);
235 p += len;
238 nargs = 0;
239 va_start(va, base);
240 while ((s = va_arg(va, const char *)) != NULL)
242 if (SVN_PATH_IS_EMPTY(s))
243 continue;
245 if (++nargs < base_arg)
246 continue;
248 if (nargs < MAX_SAVED_LENGTHS)
249 len = saved_lengths[nargs];
250 else
251 len = strlen(s);
253 /* insert a separator if we aren't copying in the first component
254 (which can happen when base_arg is set). also, don't put in a slash
255 if the prior character is a slash (occurs when prior component
256 is "/"). */
257 if (p != path && p[-1] != '/')
258 *p++ = '/';
260 /* copy the new component and advance the pointer */
261 memcpy(p, s, len);
262 p += len;
264 va_end(va);
266 *p = '\0';
267 assert((apr_size_t)(p - path) == total_len);
269 return path;
274 apr_size_t
275 svn_path_component_count(const char *path)
277 apr_size_t count = 0;
279 assert(is_canonical(path, strlen(path)));
281 while (*path)
283 const char *start;
285 while (*path == '/')
286 ++path;
288 start = path;
290 while (*path && *path != '/')
291 ++path;
293 if (path != start)
294 ++count;
297 return count;
301 /* Return the length of substring necessary to encompass the entire
302 * previous path segment in PATH, which should be a LEN byte string.
304 * A trailing slash will not be included in the returned length except
305 * in the case in which PATH is absolute and there are no more
306 * previous segments.
308 static apr_size_t
309 previous_segment(const char *path,
310 apr_size_t len)
312 if (len == 0)
313 return 0;
315 while (len > 0 && path[--len] != '/')
318 if (len == 0 && path[0] == '/')
319 return 1;
320 else
321 return len;
325 void
326 svn_path_add_component(svn_stringbuf_t *path,
327 const char *component)
329 apr_size_t len = strlen(component);
331 assert(is_canonical(path->data, path->len));
332 assert(is_canonical(component, len));
334 /* Append a dir separator, but only if this path is neither empty
335 nor consists of a single dir separator already. */
336 if ((! SVN_PATH_IS_EMPTY(path->data))
337 && (! ((path->len == 1) && (*(path->data) == '/'))))
339 char dirsep = '/';
340 svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep));
343 svn_stringbuf_appendbytes(path, component, len);
347 void
348 svn_path_remove_component(svn_stringbuf_t *path)
350 assert(is_canonical(path->data, path->len));
352 path->len = previous_segment(path->data, path->len);
353 path->data[path->len] = '\0';
357 void
358 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n)
360 while (n > 0)
362 svn_path_remove_component(path);
363 n--;
368 char *
369 svn_path_dirname(const char *path, apr_pool_t *pool)
371 apr_size_t len = strlen(path);
373 assert(is_canonical(path, len));
375 return apr_pstrmemdup(pool, path, previous_segment(path, len));
379 char *
380 svn_path_basename(const char *path, apr_pool_t *pool)
382 apr_size_t len = strlen(path);
383 apr_size_t start;
385 assert(is_canonical(path, len));
387 if (len == 1 && path[0] == '/')
388 start = 0;
389 else
391 start = len;
392 while (start > 0 && path[start - 1] != '/')
393 --start;
396 return apr_pstrmemdup(pool, path + start, len - start);
400 void
401 svn_path_split(const char *path,
402 const char **dirpath,
403 const char **base_name,
404 apr_pool_t *pool)
406 assert(dirpath != base_name);
408 if (dirpath)
409 *dirpath = svn_path_dirname(path, pool);
411 if (base_name)
412 *base_name = svn_path_basename(path, pool);
417 svn_path_is_empty(const char *path)
419 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
421 if (SVN_PATH_IS_EMPTY(path))
422 return 1;
424 return 0;
428 /* We decided against using apr_filepath_root here because of the negative
429 performance impact (creating a pool and converting strings ). */
430 svn_boolean_t
431 svn_dirent_is_root(const char *dirent, apr_size_t len)
433 /* directory is root if it's equal to '/' */
434 if (len == 1 && dirent[0] == '/')
435 return TRUE;
437 #if defined(WIN32) || defined(__CYGWIN__)
438 /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
439 are also root directories */
440 if ((len == 2 || len == 3) &&
441 (dirent[1] == ':') &&
442 ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
443 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
444 (len == 2 || (dirent[2] == '/' && len == 3)))
445 return TRUE;
447 /* On Windows and Cygwin, both //drive and //drive//share are root
448 directories */
449 if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
450 && dirent[len - 1] != '/')
452 int segments = 0;
453 int i;
454 for (i = len; i >= 2; i--)
456 if (dirent[i] == '/')
458 segments ++;
459 if (segments > 1)
460 return FALSE;
463 return (segments <= 1);
465 #endif /* WIN32 or Cygwin */
467 return FALSE;
472 svn_path_compare_paths(const char *path1,
473 const char *path2)
475 apr_size_t path1_len = strlen(path1);
476 apr_size_t path2_len = strlen(path2);
477 apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len);
478 apr_size_t i = 0;
480 assert(is_canonical(path1, path1_len));
481 assert(is_canonical(path2, path2_len));
483 /* Skip past common prefix. */
484 while (i < min_len && path1[i] == path2[i])
485 ++i;
487 /* Are the paths exactly the same? */
488 if ((path1_len == path2_len) && (i >= min_len))
489 return 0;
491 /* Children of paths are greater than their parents, but less than
492 greater siblings of their parents. */
493 if ((path1[i] == '/') && (path2[i] == 0))
494 return 1;
495 if ((path2[i] == '/') && (path1[i] == 0))
496 return -1;
497 if (path1[i] == '/')
498 return -1;
499 if (path2[i] == '/')
500 return 1;
502 /* Common prefix was skipped above, next character is compared to
503 determine order. We need to use an unsigned comparison, though,
504 so a "next character" of NULL (0x00) sorts numerically
505 smallest. */
506 return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1;
510 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
512 * This function handles everything except the URL-handling logic
513 * of svn_path_get_longest_ancestor, and assumes that PATH1 and
514 * PATH2 are *not* URLs.
516 * If the two paths do not share a common ancestor, return 0.
518 * New strings are allocated in POOL.
520 static apr_size_t
521 get_path_ancestor_length(const char *path1,
522 const char *path2,
523 apr_pool_t *pool)
525 apr_size_t path1_len, path2_len;
526 apr_size_t i = 0;
527 apr_size_t last_dirsep = 0;
529 path1_len = strlen(path1);
530 path2_len = strlen(path2);
532 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
533 return 0;
535 while (path1[i] == path2[i])
537 /* Keep track of the last directory separator we hit. */
538 if (path1[i] == '/')
539 last_dirsep = i;
541 i++;
543 /* If we get to the end of either path, break out. */
544 if ((i == path1_len) || (i == path2_len))
545 break;
548 /* two special cases:
549 1. '/' is the longest common ancestor of '/' and '/foo'
550 2. '/' is the longest common ancestor of '/rif' and '/raf' */
551 if (i == 1 && path1[0] == '/' && path2[0] == '/')
552 return 1;
554 /* last_dirsep is now the offset of the last directory separator we
555 crossed before reaching a non-matching byte. i is the offset of
556 that non-matching byte. */
557 if (((i == path1_len) && (path2[i] == '/'))
558 || ((i == path2_len) && (path1[i] == '/'))
559 || ((i == path1_len) && (i == path2_len)))
560 return i;
561 else
562 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
563 return 1;
564 return last_dirsep;
568 char *
569 svn_path_get_longest_ancestor(const char *path1,
570 const char *path2,
571 apr_pool_t *pool)
573 svn_boolean_t path1_is_url, path2_is_url;
574 path1_is_url = svn_path_is_url(path1);
575 path2_is_url = svn_path_is_url(path2);
577 if (path1_is_url && path2_is_url)
579 apr_size_t path_ancestor_len;
580 apr_size_t i = 0;
582 /* Find ':' */
583 while (1)
585 /* No shared protocol => no common prefix */
586 if (path1[i] != path2[i])
587 return apr_pmemdup(pool, SVN_EMPTY_PATH,
588 sizeof(SVN_EMPTY_PATH));
590 if (path1[i] == ':')
591 break;
593 /* They're both URLs, so EOS can't come before ':' */
594 assert((path1[i] != '\0') && (path2[i] != '\0'));
596 i++;
599 i += 3; /* Advance past '://' */
601 path_ancestor_len = get_path_ancestor_length(path1 + i, path2 + i,
602 pool);
604 if (path_ancestor_len == 0 ||
605 (path_ancestor_len == 1 && (path1 + i)[0] == '/'))
606 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
607 else
608 return apr_pstrndup(pool, path1, path_ancestor_len + i);
611 else if ((! path1_is_url) && (! path2_is_url))
613 return apr_pstrndup(pool, path1,
614 get_path_ancestor_length(path1, path2, pool));
617 else
619 /* A URL and a non-URL => no common prefix */
620 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
625 const char *
626 svn_path_is_child(const char *path1,
627 const char *path2,
628 apr_pool_t *pool)
630 apr_size_t i;
632 /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */
633 /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */
635 /* Allow "" and "foo" to be parent/child */
636 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
638 if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */
639 || path2[0] == '/') /* "/foo" not a child */
640 return NULL;
641 else
642 /* everything else is child */
643 return pool ? apr_pstrdup(pool, path2) : path2;
646 /* Reach the end of at least one of the paths. How should we handle
647 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
648 appear to arise in the current Subversion code, it's not clear to me
649 if they should be parent/child or not. */
650 for (i = 0; path1[i] && path2[i]; i++)
651 if (path1[i] != path2[i])
652 return NULL;
654 /* There are two cases that are parent/child
655 ... path1[i] == '\0'
656 .../foo path2[i] == '/'
658 / path1[i] == '\0'
659 /foo path2[i] != '/'
661 if (path1[i] == '\0' && path2[i])
663 if (path2[i] == '/')
664 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
665 else if (i == 1 && path1[0] == '/')
666 return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1;
669 /* Otherwise, path2 isn't a child. */
670 return NULL;
674 svn_boolean_t
675 svn_path_is_ancestor(const char *path1, const char *path2)
677 apr_size_t path1_len = strlen(path1);
679 /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
680 if (SVN_PATH_IS_EMPTY(path1))
681 return *path2 != '/';
683 /* If path1 is a prefix of path2, then:
684 - If path1 ends in a path separator,
685 - If the paths are of the same length
687 - path2 starts a new path component after the common prefix,
688 then path1 is an ancestor. */
689 if (strncmp(path1, path2, path1_len) == 0)
690 return path1[path1_len - 1] == '/'
691 || (path2[path1_len] == '/' || path2[path1_len] == '\0');
693 return FALSE;
697 apr_array_header_t *
698 svn_path_decompose(const char *path,
699 apr_pool_t *pool)
701 apr_size_t i, oldi;
703 apr_array_header_t *components =
704 apr_array_make(pool, 1, sizeof(const char *));
706 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
708 if (SVN_PATH_IS_EMPTY(path))
709 return components; /* ### Should we return a "" component? */
711 /* If PATH is absolute, store the '/' as the first component. */
712 i = oldi = 0;
713 if (path[i] == '/')
715 char dirsep = '/';
717 APR_ARRAY_PUSH(components, const char *)
718 = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep));
720 i++;
721 oldi++;
722 if (path[i] == '\0') /* path is a single '/' */
723 return components;
728 if ((path[i] == '/') || (path[i] == '\0'))
730 if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi))
731 APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH;
732 else
733 APR_ARRAY_PUSH(components, const char *)
734 = apr_pstrmemdup(pool, path + oldi, i - oldi);
736 i++;
737 oldi = i; /* skipping past the dirsep */
738 continue;
740 i++;
742 while (path[i-1]);
744 return components;
748 const char *
749 svn_path_compose(const apr_array_header_t *components,
750 apr_pool_t *pool)
752 apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths));
753 apr_size_t max_length = components->nelts;
754 char *path;
755 char *p;
756 int i;
758 /* Get the length of each component so a total length can be
759 calculated. */
760 for (i = 0; i < components->nelts; ++i)
762 apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *));
763 lengths[i] = l;
764 max_length += l;
767 path = apr_palloc(pool, max_length + 1);
768 p = path;
770 for (i = 0; i < components->nelts; ++i)
772 /* Append a '/' to the path. Handle the case with an absolute
773 path where a '/' appears in the first component. Only append
774 a '/' if the component is the second component that does not
775 follow a "/" first component; or it is the third or later
776 component. */
777 if (i > 1 ||
778 (i == 1 && strcmp("/", APR_ARRAY_IDX(components,
780 const char *)) != 0))
782 *p++ = '/';
785 memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]);
786 p += lengths[i];
789 *p = '\0';
791 return path;
795 svn_boolean_t
796 svn_path_is_single_path_component(const char *name)
798 /* assert (is_canonical (name, strlen (name))); ### Expensive strlen */
800 /* Can't be empty or `..' */
801 if (SVN_PATH_IS_EMPTY(name)
802 || (name[0] == '.' && name[1] == '.' && name[2] == '\0'))
803 return FALSE;
805 /* Slashes are bad, m'kay... */
806 if (strchr(name, '/') != NULL)
807 return FALSE;
809 /* It is valid. */
810 return TRUE;
814 svn_boolean_t
815 svn_path_is_backpath_present(const char *path)
817 int len = strlen(path);
819 if (! strcmp(path, ".."))
820 return TRUE;
822 if (! strncmp(path, "../", 3))
823 return TRUE;
825 if (strstr(path, "/../") != NULL)
826 return TRUE;
828 if (len >= 3
829 && (! strncmp(path + len - 3, "/..", 3)))
830 return TRUE;
832 return FALSE;
836 /*** URI Stuff ***/
838 /* Examine PATH as a potential URI, and return a substring of PATH
839 that immediately follows the (scheme):// portion of the URI, or
840 NULL if PATH doesn't appear to be a valid URI. The returned value
841 is not alloced -- it shares memory with PATH. */
842 static const char *
843 skip_uri_scheme(const char *path)
845 apr_size_t j;
847 /* A scheme is terminated by a : and cannot contain any /'s. */
848 for (j = 0; path[j] && path[j] != ':'; ++j)
849 if (path[j] == '/')
850 return NULL;
852 if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/')
853 return path + j + 3;
855 return NULL;
859 svn_boolean_t
860 svn_path_is_url(const char *path)
862 /* ### This function is reaaaaaaaaaaaaaally stupid right now.
863 We're just going to look for:
865 (scheme)://(optional_stuff)
867 Where (scheme) has no ':' or '/' characters.
869 Someday it might be nice to have an actual URI parser here.
871 return skip_uri_scheme(path) ? TRUE : FALSE;
876 /* Here is the BNF for path components in a URI. "pchar" is a
877 character in a path component.
879 pchar = unreserved | escaped |
880 ":" | "@" | "&" | "=" | "+" | "$" | ","
881 unreserved = alphanum | mark
882 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
884 Note that "escaped" doesn't really apply to what users can put in
885 their paths, so that really means the set of characters is:
887 alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
889 static const char uri_char_validity[256] = {
890 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
891 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
892 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
893 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
895 /* 64 */
896 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
897 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
898 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
899 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
901 /* 128 */
902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
903 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
904 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
905 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
907 /* 192 */
908 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
909 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
910 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
911 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
915 svn_boolean_t
916 svn_path_is_uri_safe(const char *path)
918 apr_size_t i;
920 /* Skip the URI scheme. */
921 path = skip_uri_scheme(path);
923 /* No scheme? Get outta here. */
924 if (! path)
925 return FALSE;
927 /* Skip to the first slash that's after the URI scheme. */
928 path = strchr(path, '/');
930 /* If there's no first slash, then there's only a host portion;
931 therefore there couldn't be any uri-unsafe characters after the
932 host... so return true. */
933 if (path == NULL)
934 return TRUE;
936 for (i = 0; path[i]; i++)
938 /* Allow '%XX' (where each X is a hex digit) */
939 if (path[i] == '%')
941 if (apr_isxdigit(path[i + 1]) && apr_isxdigit(path[i + 2]))
943 i += 2;
944 continue;
946 return FALSE;
948 else if (! uri_char_validity[((unsigned char)path[i])])
950 return FALSE;
954 return TRUE;
958 /* URI-encode each character c in PATH for which TABLE[c] is 0.
959 If no encoding was needed, return PATH, else return a new string allocated
960 in POOL. */
961 static const char *
962 uri_escape(const char *path, const char table[], apr_pool_t *pool)
964 svn_stringbuf_t *retstr;
965 apr_size_t i, copied = 0;
966 int c;
968 retstr = svn_stringbuf_create("", pool);
969 for (i = 0; path[i]; i++)
971 c = (unsigned char)path[i];
972 if (table[c])
973 continue;
975 /* If we got here, we're looking at a character that isn't
976 supported by the (or at least, our) URI encoding scheme. We
977 need to escape this character. */
979 /* First things first, copy all the good stuff that we haven't
980 yet copied into our output buffer. */
981 if (i - copied)
982 svn_stringbuf_appendbytes(retstr, path + copied,
983 i - copied);
985 /* Now, sprintf() in our escaped character, making sure our
986 buffer is big enough to hold the '%' and two digits. We cast
987 the C to unsigned char here because the 'X' format character
988 will be tempted to treat it as an unsigned int...which causes
989 problem when messing with 0x80-0xFF chars. We also need space
990 for a null as sprintf will write one. */
991 svn_stringbuf_ensure(retstr, retstr->len + 4);
992 sprintf(retstr->data + retstr->len, "%%%02X", (unsigned char)c);
993 retstr->len += 3;
995 /* Finally, update our copy counter. */
996 copied = i + 1;
999 /* If we didn't encode anything, we don't need to duplicate the string. */
1000 if (retstr->len == 0)
1001 return path;
1003 /* Anything left to copy? */
1004 if (i - copied)
1005 svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
1007 /* retstr is null-terminated either by sprintf or the svn_stringbuf
1008 functions. */
1010 return retstr->data;
1014 const char *
1015 svn_path_uri_encode(const char *path, apr_pool_t *pool)
1017 const char *ret;
1019 ret = uri_escape(path, uri_char_validity, pool);
1021 /* Our interface guarantees a copy. */
1022 if (ret == path)
1023 return apr_pstrdup(pool, path);
1024 else
1025 return ret;
1028 static const char iri_escape_chars[256] = {
1029 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1030 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1031 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1032 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1033 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1034 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1035 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1038 /* 128 */
1039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1049 const char *
1050 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool)
1052 return uri_escape(iri, iri_escape_chars, pool);
1055 static const char uri_autoescape_chars[256] = {
1056 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1058 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1059 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1061 /* 64 */
1062 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1063 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1064 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1065 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
1067 /* 128 */
1068 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1073 /* 192 */
1074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1080 const char *
1081 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool)
1083 return uri_escape(uri, uri_autoescape_chars, pool);
1086 const char *
1087 svn_path_uri_decode(const char *path, apr_pool_t *pool)
1089 svn_stringbuf_t *retstr;
1090 apr_size_t i;
1091 svn_boolean_t query_start = FALSE;
1093 retstr = svn_stringbuf_create("", pool);
1095 /* avoid repeated realloc */
1096 svn_stringbuf_ensure(retstr, strlen(path) + 1);
1098 retstr->len = 0;
1099 for (i = 0; path[i]; i++)
1101 char c = path[i];
1103 if (c == '?')
1105 /* Mark the start of the query string, if it exists. */
1106 query_start = TRUE;
1108 else if (c == '+' && query_start)
1110 /* Only do this if we are into the query string.
1111 * RFC 2396, section 3.3 */
1112 c = ' ';
1114 else if (c == '%' && apr_isxdigit(path[i + 1])
1115 && apr_isxdigit(path[i+2]))
1117 char digitz[3];
1118 digitz[0] = path[++i];
1119 digitz[1] = path[++i];
1120 digitz[2] = '\0';
1121 c = (char)(strtol(digitz, NULL, 16));
1124 retstr->data[retstr->len++] = c;
1127 /* Null-terminate this bad-boy. */
1128 retstr->data[retstr->len] = 0;
1130 return retstr->data;
1134 const char *
1135 svn_path_url_add_component(const char *url,
1136 const char *component,
1137 apr_pool_t *pool)
1139 /* URL can have trailing '/' */
1140 url = svn_path_canonicalize(url, pool);
1142 return svn_path_join(url, svn_path_uri_encode(component, pool), pool);
1145 svn_error_t *
1146 svn_path_get_absolute(const char **pabsolute,
1147 const char *relative,
1148 apr_pool_t *pool)
1150 char *buffer;
1151 apr_status_t apr_err;
1152 const char *path_apr;
1154 if (svn_path_is_url(relative))
1156 *pabsolute = apr_pstrdup(pool, relative);
1157 return SVN_NO_ERROR;
1160 SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1162 apr_err = apr_filepath_merge(&buffer, NULL,
1163 path_apr,
1164 APR_FILEPATH_NOTRELATIVE
1165 | APR_FILEPATH_TRUENAME,
1166 pool);
1167 if (apr_err)
1168 return svn_error_createf(SVN_ERR_BAD_FILENAME, NULL,
1169 _("Couldn't determine absolute path of '%s'"),
1170 svn_path_local_style(relative, pool));
1172 SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1173 *pabsolute = svn_path_canonicalize(*pabsolute, pool);
1174 return SVN_NO_ERROR;
1178 svn_error_t *
1179 svn_path_split_if_file(const char *path,
1180 const char **pdirectory,
1181 const char **pfile,
1182 apr_pool_t *pool)
1184 apr_finfo_t finfo;
1185 svn_error_t *err;
1187 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
1189 err = svn_io_stat(&finfo, path, APR_FINFO_TYPE, pool);
1190 if (err && ! APR_STATUS_IS_ENOENT(err->apr_err))
1191 return err;
1193 if (err || finfo.filetype == APR_REG)
1195 svn_error_clear(err);
1196 svn_path_split(path, pdirectory, pfile, pool);
1198 else if (finfo.filetype == APR_DIR)
1200 *pdirectory = path;
1201 *pfile = SVN_EMPTY_PATH;
1203 else
1205 return svn_error_createf(SVN_ERR_BAD_FILENAME, NULL,
1206 _("'%s' is neither a file nor a directory name"),
1207 svn_path_local_style(path, pool));
1210 return SVN_NO_ERROR;
1214 const char *
1215 svn_path_canonicalize(const char *path, apr_pool_t *pool)
1217 char *canon, *dst;
1218 const char *src;
1219 apr_size_t seglen;
1220 apr_size_t canon_segments = 0;
1221 svn_boolean_t uri;
1223 dst = canon = apr_pcalloc(pool, strlen(path) + 1);
1225 /* Copy over the URI scheme if present. */
1226 src = skip_uri_scheme(path);
1227 if (src)
1229 uri = TRUE;
1230 memcpy(dst, path, src - path);
1231 dst += (src - path);
1233 else
1235 uri = FALSE;
1236 src = path;
1239 /* If this is an absolute path, then just copy over the initial
1240 separator character. */
1241 if (*src == '/')
1243 *(dst++) = *(src++);
1245 #if defined(WIN32) || defined(__CYGWIN__)
1246 /* On Windows permit two leading separator characters which means an
1247 * UNC path. However, a double slash in a URI after the scheme is never
1248 * valid. */
1249 if (!uri && *src == '/')
1250 *(dst++) = *(src++);
1251 #endif /* WIN32 or Cygwin */
1255 while (*src)
1257 /* Parse each segment, find the closing '/' */
1258 const char *next = src;
1259 while (*next && (*next != '/'))
1260 ++next;
1262 seglen = next - src;
1264 if (seglen == 0 || (seglen == 1 && src[0] == '.'))
1266 /* Noop segment, so do nothing. */
1268 else
1270 /* An actual segment, append it to the destination path */
1271 if (*next)
1272 seglen++;
1273 memcpy(dst, src, seglen);
1274 dst += seglen;
1275 canon_segments++;
1278 /* Skip over trailing slash to the next segment. */
1279 src = next;
1280 if (*src)
1281 src++;
1284 /* Remove the trailing slash. */
1285 if ((canon_segments > 0 || uri) && *(dst - 1) == '/')
1286 dst--;
1288 *dst = '\0';
1290 #if defined(WIN32) || defined(__CYGWIN__)
1291 /* Skip leading double slashes when there are less than 2
1292 * canon segments. UNC paths *MUST* have two segments. */
1293 if (canon_segments < 2 && canon[0] == '/' && canon[1] == '/')
1294 return canon + 1;
1295 #endif /* WIN32 or Cygwin */
1297 return canon;
1301 svn_boolean_t
1302 svn_path_is_canonical(const char *path, apr_pool_t *pool)
1304 return (strcmp(path, svn_path_canonicalize(path, pool)) == 0);
1309 /** Get APR's internal path encoding. */
1310 static svn_error_t *
1311 get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool)
1313 apr_status_t apr_err;
1314 int encoding_style;
1316 apr_err = apr_filepath_encoding(&encoding_style, pool);
1317 if (apr_err)
1318 return svn_error_wrap_apr(apr_err,
1319 _("Can't determine the native path encoding"));
1321 /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
1322 Well, for now we'll just punt to the svn_utf_ functions;
1323 those will at least do the ASCII-subset check. */
1324 *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8);
1325 return SVN_NO_ERROR;
1329 svn_error_t *
1330 svn_path_cstring_from_utf8(const char **path_apr,
1331 const char *path_utf8,
1332 apr_pool_t *pool)
1334 svn_boolean_t path_is_utf8;
1335 SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1336 if (path_is_utf8)
1338 *path_apr = apr_pstrdup(pool, path_utf8);
1339 return SVN_NO_ERROR;
1341 else
1342 return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool);
1346 svn_error_t *
1347 svn_path_cstring_to_utf8(const char **path_utf8,
1348 const char *path_apr,
1349 apr_pool_t *pool)
1351 svn_boolean_t path_is_utf8;
1352 SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1353 if (path_is_utf8)
1355 *path_utf8 = apr_pstrdup(pool, path_apr);
1356 return SVN_NO_ERROR;
1358 else
1359 return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool);
1363 /* Return a copy of PATH, allocated from POOL, for which control
1364 characters have been escaped using the form \NNN (where NNN is the
1365 octal representation of the byte's ordinal value). */
1366 static const char *
1367 illegal_path_escape(const char *path, apr_pool_t *pool)
1369 svn_stringbuf_t *retstr;
1370 apr_size_t i, copied = 0;
1371 int c;
1373 retstr = svn_stringbuf_create("", pool);
1374 for (i = 0; path[i]; i++)
1376 c = (unsigned char)path[i];
1377 if (! svn_ctype_iscntrl(c))
1378 continue;
1380 /* If we got here, we're looking at a character that isn't
1381 supported by the (or at least, our) URI encoding scheme. We
1382 need to escape this character. */
1384 /* First things first, copy all the good stuff that we haven't
1385 yet copied into our output buffer. */
1386 if (i - copied)
1387 svn_stringbuf_appendbytes(retstr, path + copied,
1388 i - copied);
1390 /* Now, sprintf() in our escaped character, making sure our
1391 buffer is big enough to hold the '%' and two digits. We cast
1392 the C to unsigned char here because the 'X' format character
1393 will be tempted to treat it as an unsigned int...which causes
1394 problem when messing with 0x80-0xFF chars. We also need space
1395 for a null as sprintf will write one. */
1396 /*### The backslash separator doesn't work too great with Windows,
1397 but it's what we'll use for consistency with invalid utf8
1398 formatting (until someone has a better idea) */
1399 svn_stringbuf_ensure(retstr, retstr->len + 4);
1400 sprintf(retstr->data + retstr->len, "\\%03o", (unsigned char)c);
1401 retstr->len += 4;
1403 /* Finally, update our copy counter. */
1404 copied = i + 1;
1407 /* If we didn't encode anything, we don't need to duplicate the string. */
1408 if (retstr->len == 0)
1409 return path;
1411 /* Anything left to copy? */
1412 if (i - copied)
1413 svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
1415 /* retstr is null-terminated either by sprintf or the svn_stringbuf
1416 functions. */
1418 return retstr->data;
1421 svn_error_t *
1422 svn_path_check_valid(const char *path, apr_pool_t *pool)
1424 const char *c;
1426 for (c = path; *c; c++)
1428 if (svn_ctype_iscntrl(*c))
1430 return svn_error_createf
1431 (SVN_ERR_FS_PATH_SYNTAX, NULL,
1432 _("Invalid control character '0x%02x' in path '%s'"),
1434 illegal_path_escape(svn_path_local_style(path, pool), pool));
1438 return SVN_NO_ERROR;
1441 void
1442 svn_path_splitext(const char **path_root,
1443 const char **path_ext,
1444 const char *path,
1445 apr_pool_t *pool)
1447 const char *last_dot, *last_slash;
1449 /* Easy out -- why do all the work when there's no way to report it? */
1450 if (! (path_root || path_ext))
1451 return;
1453 /* Do we even have a period in this thing? And if so, is there
1454 anything after it? We look for the "rightmost" period in the
1455 string. */
1456 last_dot = strrchr(path, '.');
1457 if (last_dot && (last_dot + 1 != '\0'))
1459 /* If we have a period, we need to make sure it occurs in the
1460 final path component -- that there's no path separator
1461 between the last period and the end of the PATH -- otherwise,
1462 it doesn't count. Also, we want to make sure that our period
1463 isn't the first character of the last component. */
1464 last_slash = strrchr(path, '/');
1465 if ((last_slash && (last_dot > (last_slash + 1)))
1466 || ((! last_slash) && (last_dot > path + 1)))
1468 if (path_root)
1469 *path_root = apr_pstrmemdup(pool, path,
1470 (last_dot - path + 1) * sizeof(*path));
1471 if (path_ext)
1472 *path_ext = apr_pstrdup(pool, last_dot + 1);
1473 return;
1476 /* If we get here, we never found a suitable separator character, so
1477 there's no split. */
1478 if (path_root)
1479 *path_root = apr_pstrdup(pool, path);
1480 if (path_ext)
1481 *path_ext = "";