Skip a test when run against old servers.
[svn.git] / subversion / libsvn_subr / path.c
blob751c73906b78cad42fed19ba366584a774dbe29a
1 /*
2 * paths.c: a path manipulation library using svn_stringbuf_t
4 * ====================================================================
5 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
21 #include <string.h>
22 #include <assert.h>
24 #include <apr_file_info.h>
25 #include <apr_lib.h>
27 #include "svn_string.h"
28 #include "svn_path.h"
29 #include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */
30 #include "svn_utf.h"
31 #include "svn_io.h" /* for svn_io_stat() */
32 #include "svn_ctype.h"
35 /* The canonical empty path. Can this be changed? Well, change the empty
36 test below and the path library will work, not so sure about the fs/wc
37 libraries. */
38 #define SVN_EMPTY_PATH ""
40 /* TRUE if s is the canonical empty path, FALSE otherwise */
41 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
43 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
44 this be changed? Well, the path library will work, not so sure about
45 the OS! */
46 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
49 const char *
50 svn_path_internal_style(const char *path, apr_pool_t *pool)
52 if ('/' != SVN_PATH_LOCAL_SEPARATOR)
54 char *p = apr_pstrdup(pool, path);
55 path = p;
57 /* Convert all local-style separators to the canonical ones. */
58 for (; *p != '\0'; ++p)
59 if (*p == SVN_PATH_LOCAL_SEPARATOR)
60 *p = '/';
63 return svn_path_canonicalize(path, pool);
64 /* FIXME: Should also remove trailing /.'s, if the style says so. */
68 const char *
69 svn_path_local_style(const char *path, apr_pool_t *pool)
71 path = svn_path_canonicalize(path, pool);
72 /* FIXME: Should also remove trailing /.'s, if the style says so. */
74 /* Internally, Subversion represents the current directory with the
75 empty string. But users like to see "." . */
76 if (SVN_PATH_IS_EMPTY(path))
77 return ".";
79 /* If PATH is a URL, the "local style" is the same as the input. */
80 if (svn_path_is_url(path))
81 return apr_pstrdup(pool, path);
83 if ('/' != SVN_PATH_LOCAL_SEPARATOR)
85 char *p = apr_pstrdup(pool, path);
86 path = p;
88 /* Convert all canonical separators to the local-style ones. */
89 for (; *p != '\0'; ++p)
90 if (*p == '/')
91 *p = SVN_PATH_LOCAL_SEPARATOR;
94 return path;
99 #ifndef NDEBUG
100 static svn_boolean_t
101 is_canonical(const char *path,
102 apr_size_t len)
104 return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len)
105 && (svn_dirent_is_root(path, len) ||
106 (len <= 1 || path[len-1] != '/')));
108 #endif
111 char *svn_path_join(const char *base,
112 const char *component,
113 apr_pool_t *pool)
115 apr_size_t blen = strlen(base);
116 apr_size_t clen = strlen(component);
117 char *path;
119 assert(is_canonical(base, blen));
120 assert(is_canonical(component, clen));
122 /* If the component is absolute, then return it. */
123 if (*component == '/')
124 return apr_pmemdup(pool, component, clen + 1);
126 /* If either is empty return the other */
127 if (SVN_PATH_IS_EMPTY(base))
128 return apr_pmemdup(pool, component, clen + 1);
129 if (SVN_PATH_IS_EMPTY(component))
130 return apr_pmemdup(pool, base, blen + 1);
132 if (blen == 1 && base[0] == '/')
133 blen = 0; /* Ignore base, just return separator + component */
135 /* Construct the new, combined path. */
136 path = apr_palloc(pool, blen + 1 + clen + 1);
137 memcpy(path, base, blen);
138 path[blen] = '/';
139 memcpy(path + blen + 1, component, clen + 1);
141 return path;
144 char *svn_path_join_many(apr_pool_t *pool, const char *base, ...)
146 #define MAX_SAVED_LENGTHS 10
147 apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
148 apr_size_t total_len;
149 int nargs;
150 va_list va;
151 const char *s;
152 apr_size_t len;
153 char *path;
154 char *p;
155 svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE;
156 int base_arg = 0;
158 total_len = strlen(base);
160 assert(is_canonical(base, total_len));
162 if (total_len == 1 && *base == '/')
163 base_is_root = TRUE;
164 else if (SVN_PATH_IS_EMPTY(base))
166 total_len = sizeof(SVN_EMPTY_PATH) - 1;
167 base_is_empty = TRUE;
170 saved_lengths[0] = total_len;
172 /* Compute the length of the resulting string. */
174 nargs = 0;
175 va_start(va, base);
176 while ((s = va_arg(va, const char *)) != NULL)
178 len = strlen(s);
180 assert(is_canonical(s, len));
182 if (SVN_PATH_IS_EMPTY(s))
183 continue;
185 if (nargs++ < MAX_SAVED_LENGTHS)
186 saved_lengths[nargs] = len;
188 if (*s == '/')
190 /* an absolute path. skip all components to this point and reset
191 the total length. */
192 total_len = len;
193 base_arg = nargs;
194 base_is_root = len == 1;
195 base_is_empty = FALSE;
197 else if (nargs == base_arg
198 || (nargs == base_arg + 1 && base_is_root)
199 || base_is_empty)
201 /* if we have skipped everything up to this arg, then the base
202 and all prior components are empty. just set the length to
203 this component; do not add a separator. If the base is empty
204 we can now ignore it. */
205 if (base_is_empty)
207 base_is_empty = FALSE;
208 total_len = 0;
210 total_len += len;
212 else
214 total_len += 1 + len;
217 va_end(va);
219 /* base == "/" and no further components. just return that. */
220 if (base_is_root && total_len == 1)
221 return apr_pmemdup(pool, "/", 2);
223 /* we got the total size. allocate it, with room for a NULL character. */
224 path = p = apr_palloc(pool, total_len + 1);
226 /* if we aren't supposed to skip forward to an absolute component, and if
227 this is not an empty base that we are skipping, then copy the base
228 into the output. */
229 if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty))
231 if (SVN_PATH_IS_EMPTY(base))
232 memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
233 else
234 memcpy(p, base, len = saved_lengths[0]);
235 p += len;
238 nargs = 0;
239 va_start(va, base);
240 while ((s = va_arg(va, const char *)) != NULL)
242 if (SVN_PATH_IS_EMPTY(s))
243 continue;
245 if (++nargs < base_arg)
246 continue;
248 if (nargs < MAX_SAVED_LENGTHS)
249 len = saved_lengths[nargs];
250 else
251 len = strlen(s);
253 /* insert a separator if we aren't copying in the first component
254 (which can happen when base_arg is set). also, don't put in a slash
255 if the prior character is a slash (occurs when prior component
256 is "/"). */
257 if (p != path && p[-1] != '/')
258 *p++ = '/';
260 /* copy the new component and advance the pointer */
261 memcpy(p, s, len);
262 p += len;
264 va_end(va);
266 *p = '\0';
267 assert((apr_size_t)(p - path) == total_len);
269 return path;
274 apr_size_t
275 svn_path_component_count(const char *path)
277 apr_size_t count = 0;
279 assert(is_canonical(path, strlen(path)));
281 while (*path)
283 const char *start;
285 while (*path == '/')
286 ++path;
288 start = path;
290 while (*path && *path != '/')
291 ++path;
293 if (path != start)
294 ++count;
297 return count;
301 /* Return the length of substring necessary to encompass the entire
302 * previous path segment in PATH, which should be a LEN byte string.
304 * A trailing slash will not be included in the returned length except
305 * in the case in which PATH is absolute and there are no more
306 * previous segments.
308 static apr_size_t
309 previous_segment(const char *path,
310 apr_size_t len)
312 if (len == 0)
313 return 0;
315 while (len > 0 && path[--len] != '/')
318 if (len == 0 && path[0] == '/')
319 return 1;
320 else
321 return len;
325 void
326 svn_path_add_component(svn_stringbuf_t *path,
327 const char *component)
329 apr_size_t len = strlen(component);
331 assert(is_canonical(path->data, path->len));
332 assert(is_canonical(component, len));
334 /* Append a dir separator, but only if this path is neither empty
335 nor consists of a single dir separator already. */
336 if ((! SVN_PATH_IS_EMPTY(path->data))
337 && (! ((path->len == 1) && (*(path->data) == '/'))))
339 char dirsep = '/';
340 svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep));
343 svn_stringbuf_appendbytes(path, component, len);
347 void
348 svn_path_remove_component(svn_stringbuf_t *path)
350 assert(is_canonical(path->data, path->len));
352 path->len = previous_segment(path->data, path->len);
353 path->data[path->len] = '\0';
357 void
358 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n)
360 while (n > 0)
362 svn_path_remove_component(path);
363 n--;
368 char *
369 svn_path_dirname(const char *path, apr_pool_t *pool)
371 apr_size_t len = strlen(path);
373 assert(is_canonical(path, len));
375 return apr_pstrmemdup(pool, path, previous_segment(path, len));
379 char *
380 svn_path_basename(const char *path, apr_pool_t *pool)
382 apr_size_t len = strlen(path);
383 apr_size_t start;
385 assert(is_canonical(path, len));
387 if (len == 1 && path[0] == '/')
388 start = 0;
389 else
391 start = len;
392 while (start > 0 && path[start - 1] != '/')
393 --start;
396 return apr_pstrmemdup(pool, path + start, len - start);
400 void
401 svn_path_split(const char *path,
402 const char **dirpath,
403 const char **base_name,
404 apr_pool_t *pool)
406 assert(dirpath != base_name);
408 if (dirpath)
409 *dirpath = svn_path_dirname(path, pool);
411 if (base_name)
412 *base_name = svn_path_basename(path, pool);
417 svn_path_is_empty(const char *path)
419 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
421 if (SVN_PATH_IS_EMPTY(path))
422 return 1;
424 return 0;
428 /* We decided against using apr_filepath_root here because of the negative
429 performance impact (creating a pool and converting strings ). */
430 svn_boolean_t
431 svn_dirent_is_root(const char *dirent, apr_size_t len)
433 /* directory is root if it's equal to '/' */
434 if (len == 1 && dirent[0] == '/')
435 return TRUE;
437 #if defined(WIN32) || defined(__CYGWIN__)
438 /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
439 are also root directories */
440 if ((len == 2 || len == 3) &&
441 (dirent[1] == ':') &&
442 ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
443 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
444 (len == 2 || (dirent[2] == '/' && len == 3)))
445 return TRUE;
447 /* On Windows and Cygwin, both //drive and //drive//share are root
448 directories */
449 if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
450 && dirent[len - 1] != '/')
452 int segments = 0;
453 int i;
454 for (i = len; i >= 2; i--)
456 if (dirent[i] == '/')
458 segments ++;
459 if (segments > 1)
460 return FALSE;
463 return (segments <= 1);
465 #endif /* WIN32 or Cygwin */
467 return FALSE;
472 svn_path_compare_paths(const char *path1,
473 const char *path2)
475 apr_size_t path1_len = strlen(path1);
476 apr_size_t path2_len = strlen(path2);
477 apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len);
478 apr_size_t i = 0;
480 assert(is_canonical(path1, path1_len));
481 assert(is_canonical(path2, path2_len));
483 /* Skip past common prefix. */
484 while (i < min_len && path1[i] == path2[i])
485 ++i;
487 /* Are the paths exactly the same? */
488 if ((path1_len == path2_len) && (i >= min_len))
489 return 0;
491 /* Children of paths are greater than their parents, but less than
492 greater siblings of their parents. */
493 if ((path1[i] == '/') && (path2[i] == 0))
494 return 1;
495 if ((path2[i] == '/') && (path1[i] == 0))
496 return -1;
497 if (path1[i] == '/')
498 return -1;
499 if (path2[i] == '/')
500 return 1;
502 /* Common prefix was skipped above, next character is compared to
503 determine order. We need to use an unsigned comparison, though,
504 so a "next character" of NULL (0x00) sorts numerically
505 smallest. */
506 return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1;
510 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
512 * This function handles everything except the URL-handling logic
513 * of svn_path_get_longest_ancestor, and assumes that PATH1 and
514 * PATH2 are *not* URLs.
516 * If the two paths do not share a common ancestor, return 0.
518 * New strings are allocated in POOL.
520 static apr_size_t
521 get_path_ancestor_length(const char *path1,
522 const char *path2,
523 apr_pool_t *pool)
525 apr_size_t path1_len, path2_len;
526 apr_size_t i = 0;
527 apr_size_t last_dirsep = 0;
529 path1_len = strlen(path1);
530 path2_len = strlen(path2);
532 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
533 return 0;
535 while (path1[i] == path2[i])
537 /* Keep track of the last directory separator we hit. */
538 if (path1[i] == '/')
539 last_dirsep = i;
541 i++;
543 /* If we get to the end of either path, break out. */
544 if ((i == path1_len) || (i == path2_len))
545 break;
548 /* two special cases:
549 1. '/' is the longest common ancestor of '/' and '/foo'
550 2. '/' is the longest common ancestor of '/rif' and '/raf' */
551 if (i == 1 && path1[0] == '/' && path2[0] == '/')
552 return 1;
554 /* last_dirsep is now the offset of the last directory separator we
555 crossed before reaching a non-matching byte. i is the offset of
556 that non-matching byte. */
557 if (((i == path1_len) && (path2[i] == '/'))
558 || ((i == path2_len) && (path1[i] == '/'))
559 || ((i == path1_len) && (i == path2_len)))
560 return i;
561 else
562 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
563 return 1;
564 return last_dirsep;
568 char *
569 svn_path_get_longest_ancestor(const char *path1,
570 const char *path2,
571 apr_pool_t *pool)
573 svn_boolean_t path1_is_url, path2_is_url;
574 path1_is_url = svn_path_is_url(path1);
575 path2_is_url = svn_path_is_url(path2);
577 if (path1_is_url && path2_is_url)
579 apr_size_t path_ancestor_len;
580 apr_size_t i = 0;
582 /* Find ':' */
583 while (1)
585 /* No shared protocol => no common prefix */
586 if (path1[i] != path2[i])
587 return apr_pmemdup(pool, SVN_EMPTY_PATH,
588 sizeof(SVN_EMPTY_PATH));
590 if (path1[i] == ':')
591 break;
593 /* They're both URLs, so EOS can't come before ':' */
594 assert((path1[i] != '\0') && (path2[i] != '\0'));
596 i++;
599 i += 3; /* Advance past '://' */
601 path_ancestor_len = get_path_ancestor_length(path1 + i, path2 + i,
602 pool);
604 if (path_ancestor_len == 0 ||
605 (path_ancestor_len == 1 && (path1 + i)[0] == '/'))
606 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
607 else
608 return apr_pstrndup(pool, path1, path_ancestor_len + i);
611 else if ((! path1_is_url) && (! path2_is_url))
613 return apr_pstrndup(pool, path1,
614 get_path_ancestor_length(path1, path2, pool));
617 else
619 /* A URL and a non-URL => no common prefix */
620 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
625 const char *
626 svn_path_is_child(const char *path1,
627 const char *path2,
628 apr_pool_t *pool)
630 apr_size_t i;
632 /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */
633 /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */
635 /* Allow "" and "foo" to be parent/child */
636 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
638 if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */
639 || path2[0] == '/') /* "/foo" not a child */
640 return NULL;
641 else
642 /* everything else is child */
643 return pool ? apr_pstrdup(pool, path2) : path2;
646 /* Reach the end of at least one of the paths. How should we handle
647 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
648 appear to arise in the current Subversion code, it's not clear to me
649 if they should be parent/child or not. */
650 for (i = 0; path1[i] && path2[i]; i++)
651 if (path1[i] != path2[i])
652 return NULL;
654 /* There are two cases that are parent/child
655 ... path1[i] == '\0'
656 .../foo path2[i] == '/'
658 / path1[i] == '\0'
659 /foo path2[i] != '/'
661 if (path1[i] == '\0' && path2[i])
663 if (path2[i] == '/')
664 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
665 else if (i == 1 && path1[0] == '/')
666 return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1;
669 /* Otherwise, path2 isn't a child. */
670 return NULL;
674 svn_boolean_t
675 svn_path_is_ancestor(const char *path1, const char *path2)
677 apr_size_t path1_len = strlen(path1);
679 /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
680 if (SVN_PATH_IS_EMPTY(path1))
681 return *path2 != '/';
683 /* If path1 is a prefix of path2, then:
684 - If path1 ends in a path separator,
685 - If the paths are of the same length
687 - path2 starts a new path component after the common prefix,
688 then path1 is an ancestor. */
689 if (strncmp(path1, path2, path1_len) == 0)
690 return path1[path1_len - 1] == '/'
691 || (path2[path1_len] == '/' || path2[path1_len] == '\0');
693 return FALSE;
697 apr_array_header_t *
698 svn_path_decompose(const char *path,
699 apr_pool_t *pool)
701 apr_size_t i, oldi;
703 apr_array_header_t *components =
704 apr_array_make(pool, 1, sizeof(const char *));
706 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
708 if (SVN_PATH_IS_EMPTY(path))
709 return components; /* ### Should we return a "" component? */
711 /* If PATH is absolute, store the '/' as the first component. */
712 i = oldi = 0;
713 if (path[i] == '/')
715 char dirsep = '/';
717 APR_ARRAY_PUSH(components, const char *)
718 = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep));
720 i++;
721 oldi++;
722 if (path[i] == '\0') /* path is a single '/' */
723 return components;
728 if ((path[i] == '/') || (path[i] == '\0'))
730 if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi))
731 APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH;
732 else
733 APR_ARRAY_PUSH(components, const char *)
734 = apr_pstrmemdup(pool, path + oldi, i - oldi);
736 i++;
737 oldi = i; /* skipping past the dirsep */
738 continue;
740 i++;
742 while (path[i-1]);
744 return components;
748 const char *
749 svn_path_compose(const apr_array_header_t *components,
750 apr_pool_t *pool)
752 apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths));
753 apr_size_t max_length = components->nelts;
754 char *path;
755 char *p;
756 int i;
758 /* Get the length of each component so a total length can be
759 calculated. */
760 for (i = 0; i < components->nelts; ++i)
762 apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *));
763 lengths[i] = l;
764 max_length += l;
767 path = apr_palloc(pool, max_length + 1);
768 p = path;
770 for (i = 0; i < components->nelts; ++i)
772 /* Append a '/' to the path. Handle the case with an absolute
773 path where a '/' appears in the first component. Only append
774 a '/' if the component is the second component that does not
775 follow a "/" first component; or it is the third or later
776 component. */
777 if (i > 1 ||
778 (i == 1 && strcmp("/", APR_ARRAY_IDX(components,
780 const char *)) != 0))
782 *p++ = '/';
785 memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]);
786 p += lengths[i];
789 *p = '\0';
791 return path;
795 svn_boolean_t
796 svn_path_is_single_path_component(const char *name)
798 /* assert (is_canonical (name, strlen (name))); ### Expensive strlen */
800 /* Can't be empty or `..' */
801 if (SVN_PATH_IS_EMPTY(name)
802 || (name[0] == '.' && name[1] == '.' && name[2] == '\0'))
803 return FALSE;
805 /* Slashes are bad, m'kay... */
806 if (strchr(name, '/') != NULL)
807 return FALSE;
809 /* It is valid. */
810 return TRUE;
814 svn_boolean_t
815 svn_path_is_backpath_present(const char *path)
817 int len = strlen(path);
819 if (! strcmp(path, ".."))
820 return TRUE;
822 if (! strncmp(path, "../", 3))
823 return TRUE;
825 if (strstr(path, "/../") != NULL)
826 return TRUE;
828 if (len >= 3
829 && (! strncmp(path + len - 3, "/..", 3)))
830 return TRUE;
832 return FALSE;
836 /*** URI Stuff ***/
838 /* Examine PATH as a potential URI, and return a substring of PATH
839 that immediately follows the (scheme):// portion of the URI, or
840 NULL if PATH doesn't appear to be a valid URI. The returned value
841 is not alloced -- it shares memory with PATH. */
842 static const char *
843 skip_uri_scheme(const char *path)
845 apr_size_t j;
847 /* A scheme is terminated by a : and cannot contain any /'s. */
848 for (j = 0; path[j] && path[j] != ':'; ++j)
849 if (path[j] == '/')
850 return NULL;
852 if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/')
853 return path + j + 3;
855 return NULL;
859 svn_boolean_t
860 svn_path_is_url(const char *path)
862 /* ### This function is reaaaaaaaaaaaaaally stupid right now.
863 We're just going to look for:
865 (scheme)://(optional_stuff)
867 Where (scheme) has no ':' or '/' characters.
869 Someday it might be nice to have an actual URI parser here.
871 return skip_uri_scheme(path) ? TRUE : FALSE;
876 /* Here is the BNF for path components in a URI. "pchar" is a
877 character in a path component.
879 pchar = unreserved | escaped |
880 ":" | "@" | "&" | "=" | "+" | "$" | ","
881 unreserved = alphanum | mark
882 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
884 Note that "escaped" doesn't really apply to what users can put in
885 their paths, so that really means the set of characters is:
887 alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
889 static const char uri_char_validity[256] = {
890 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
891 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
892 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
893 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
895 /* 64 */
896 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
897 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
898 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
899 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
901 /* 128 */
902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
903 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
904 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
905 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
907 /* 192 */
908 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
909 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
910 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
911 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
915 svn_boolean_t
916 svn_path_is_uri_safe(const char *path)
918 apr_size_t i;
920 /* Skip the URI scheme. */
921 path = skip_uri_scheme(path);
923 /* No scheme? Get outta here. */
924 if (! path)
925 return FALSE;
927 /* Skip to the first slash that's after the URI scheme. */
928 path = strchr(path, '/');
930 /* If there's no first slash, then there's only a host portion;
931 therefore there couldn't be any uri-unsafe characters after the
932 host... so return true. */
933 if (path == NULL)
934 return TRUE;
936 for (i = 0; path[i]; i++)
938 /* Allow '%XX' (where each X is a hex digit) */
939 if (path[i] == '%')
941 if (apr_isxdigit(path[i + 1]) && apr_isxdigit(path[i + 2]))
943 i += 2;
944 continue;
946 return FALSE;
948 else if (! uri_char_validity[((unsigned char)path[i])])
950 return FALSE;
954 return TRUE;
958 /* URI-encode each character c in PATH for which TABLE[c] is 0.
959 If no encoding was needed, return PATH, else return a new string allocated
960 in POOL. */
961 static const char *
962 uri_escape(const char *path, const char table[], apr_pool_t *pool)
964 svn_stringbuf_t *retstr;
965 apr_size_t i, copied = 0;
966 int c;
968 retstr = svn_stringbuf_create_ensure(strlen(path), pool);
969 for (i = 0; path[i]; i++)
971 c = (unsigned char)path[i];
972 if (table[c])
973 continue;
975 /* If we got here, we're looking at a character that isn't
976 supported by the (or at least, our) URI encoding scheme. We
977 need to escape this character. */
979 /* First things first, copy all the good stuff that we haven't
980 yet copied into our output buffer. */
981 if (i - copied)
982 svn_stringbuf_appendbytes(retstr, path + copied,
983 i - copied);
985 /* Now, sprintf() in our escaped character, making sure our
986 buffer is big enough to hold the '%' and two digits. We cast
987 the C to unsigned char here because the 'X' format character
988 will be tempted to treat it as an unsigned int...which causes
989 problem when messing with 0x80-0xFF chars. We also need space
990 for a null as sprintf will write one. */
991 svn_stringbuf_ensure(retstr, retstr->len + 4);
992 sprintf(retstr->data + retstr->len, "%%%02X", (unsigned char)c);
993 retstr->len += 3;
995 /* Finally, update our copy counter. */
996 copied = i + 1;
999 /* If we didn't encode anything, we don't need to duplicate the string. */
1000 if (retstr->len == 0)
1001 return path;
1003 /* Anything left to copy? */
1004 if (i - copied)
1005 svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
1007 /* retstr is null-terminated either by sprintf or the svn_stringbuf
1008 functions. */
1010 return retstr->data;
1014 const char *
1015 svn_path_uri_encode(const char *path, apr_pool_t *pool)
1017 const char *ret;
1019 ret = uri_escape(path, uri_char_validity, pool);
1021 /* Our interface guarantees a copy. */
1022 if (ret == path)
1023 return apr_pstrdup(pool, path);
1024 else
1025 return ret;
1028 static const char iri_escape_chars[256] = {
1029 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1030 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1031 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1032 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1033 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1034 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1035 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1038 /* 128 */
1039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1049 const char *
1050 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool)
1052 return uri_escape(iri, iri_escape_chars, pool);
1055 static const char uri_autoescape_chars[256] = {
1056 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1058 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1059 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1061 /* 64 */
1062 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1063 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1064 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1065 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
1067 /* 128 */
1068 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1073 /* 192 */
1074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1080 const char *
1081 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool)
1083 return uri_escape(uri, uri_autoescape_chars, pool);
1086 const char *
1087 svn_path_uri_decode(const char *path, apr_pool_t *pool)
1089 svn_stringbuf_t *retstr;
1090 apr_size_t i;
1091 svn_boolean_t query_start = FALSE;
1093 /* avoid repeated realloc */
1094 retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool);
1096 retstr->len = 0;
1097 for (i = 0; path[i]; i++)
1099 char c = path[i];
1101 if (c == '?')
1103 /* Mark the start of the query string, if it exists. */
1104 query_start = TRUE;
1106 else if (c == '+' && query_start)
1108 /* Only do this if we are into the query string.
1109 * RFC 2396, section 3.3 */
1110 c = ' ';
1112 else if (c == '%' && apr_isxdigit(path[i + 1])
1113 && apr_isxdigit(path[i+2]))
1115 char digitz[3];
1116 digitz[0] = path[++i];
1117 digitz[1] = path[++i];
1118 digitz[2] = '\0';
1119 c = (char)(strtol(digitz, NULL, 16));
1122 retstr->data[retstr->len++] = c;
1125 /* Null-terminate this bad-boy. */
1126 retstr->data[retstr->len] = 0;
1128 return retstr->data;
1132 const char *
1133 svn_path_url_add_component(const char *url,
1134 const char *component,
1135 apr_pool_t *pool)
1137 /* URL can have trailing '/' */
1138 url = svn_path_canonicalize(url, pool);
1140 return svn_path_join(url, svn_path_uri_encode(component, pool), pool);
1143 svn_error_t *
1144 svn_path_get_absolute(const char **pabsolute,
1145 const char *relative,
1146 apr_pool_t *pool)
1148 char *buffer;
1149 apr_status_t apr_err;
1150 const char *path_apr;
1152 if (svn_path_is_url(relative))
1154 *pabsolute = apr_pstrdup(pool, relative);
1155 return SVN_NO_ERROR;
1158 SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1160 apr_err = apr_filepath_merge(&buffer, NULL,
1161 path_apr,
1162 APR_FILEPATH_NOTRELATIVE
1163 | APR_FILEPATH_TRUENAME,
1164 pool);
1165 if (apr_err)
1166 return svn_error_createf(SVN_ERR_BAD_FILENAME, NULL,
1167 _("Couldn't determine absolute path of '%s'"),
1168 svn_path_local_style(relative, pool));
1170 SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1171 *pabsolute = svn_path_canonicalize(*pabsolute, pool);
1172 return SVN_NO_ERROR;
1176 svn_error_t *
1177 svn_path_split_if_file(const char *path,
1178 const char **pdirectory,
1179 const char **pfile,
1180 apr_pool_t *pool)
1182 apr_finfo_t finfo;
1183 svn_error_t *err;
1185 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
1187 err = svn_io_stat(&finfo, path, APR_FINFO_TYPE, pool);
1188 if (err && ! APR_STATUS_IS_ENOENT(err->apr_err))
1189 return err;
1191 if (err || finfo.filetype == APR_REG)
1193 svn_error_clear(err);
1194 svn_path_split(path, pdirectory, pfile, pool);
1196 else if (finfo.filetype == APR_DIR)
1198 *pdirectory = path;
1199 *pfile = SVN_EMPTY_PATH;
1201 else
1203 return svn_error_createf(SVN_ERR_BAD_FILENAME, NULL,
1204 _("'%s' is neither a file nor a directory name"),
1205 svn_path_local_style(path, pool));
1208 return SVN_NO_ERROR;
1212 const char *
1213 svn_path_canonicalize(const char *path, apr_pool_t *pool)
1215 char *canon, *dst;
1216 const char *src;
1217 apr_size_t seglen;
1218 apr_size_t canon_segments = 0;
1219 svn_boolean_t uri;
1221 dst = canon = apr_pcalloc(pool, strlen(path) + 1);
1223 /* Copy over the URI scheme if present. */
1224 src = skip_uri_scheme(path);
1225 if (src)
1227 uri = TRUE;
1228 memcpy(dst, path, src - path);
1229 dst += (src - path);
1231 else
1233 uri = FALSE;
1234 src = path;
1237 /* If this is an absolute path, then just copy over the initial
1238 separator character. */
1239 if (*src == '/')
1241 *(dst++) = *(src++);
1243 #if defined(WIN32) || defined(__CYGWIN__)
1244 /* On Windows permit two leading separator characters which means an
1245 * UNC path. However, a double slash in a URI after the scheme is never
1246 * valid. */
1247 if (!uri && *src == '/')
1248 *(dst++) = *(src++);
1249 #endif /* WIN32 or Cygwin */
1253 while (*src)
1255 /* Parse each segment, find the closing '/' */
1256 const char *next = src;
1257 while (*next && (*next != '/'))
1258 ++next;
1260 seglen = next - src;
1262 if (seglen == 0 || (seglen == 1 && src[0] == '.'))
1264 /* Noop segment, so do nothing. */
1266 else
1268 /* An actual segment, append it to the destination path */
1269 if (*next)
1270 seglen++;
1271 memcpy(dst, src, seglen);
1272 dst += seglen;
1273 canon_segments++;
1276 /* Skip over trailing slash to the next segment. */
1277 src = next;
1278 if (*src)
1279 src++;
1282 /* Remove the trailing slash. */
1283 if ((canon_segments > 0 || uri) && *(dst - 1) == '/')
1284 dst--;
1286 *dst = '\0';
1288 #if defined(WIN32) || defined(__CYGWIN__)
1289 /* Skip leading double slashes when there are less than 2
1290 * canon segments. UNC paths *MUST* have two segments. */
1291 if (canon_segments < 2 && canon[0] == '/' && canon[1] == '/')
1292 return canon + 1;
1293 #endif /* WIN32 or Cygwin */
1295 return canon;
1299 svn_boolean_t
1300 svn_path_is_canonical(const char *path, apr_pool_t *pool)
1302 return (strcmp(path, svn_path_canonicalize(path, pool)) == 0);
1307 /** Get APR's internal path encoding. */
1308 static svn_error_t *
1309 get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool)
1311 apr_status_t apr_err;
1312 int encoding_style;
1314 apr_err = apr_filepath_encoding(&encoding_style, pool);
1315 if (apr_err)
1316 return svn_error_wrap_apr(apr_err,
1317 _("Can't determine the native path encoding"));
1319 /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
1320 Well, for now we'll just punt to the svn_utf_ functions;
1321 those will at least do the ASCII-subset check. */
1322 *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8);
1323 return SVN_NO_ERROR;
1327 svn_error_t *
1328 svn_path_cstring_from_utf8(const char **path_apr,
1329 const char *path_utf8,
1330 apr_pool_t *pool)
1332 svn_boolean_t path_is_utf8;
1333 SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1334 if (path_is_utf8)
1336 *path_apr = apr_pstrdup(pool, path_utf8);
1337 return SVN_NO_ERROR;
1339 else
1340 return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool);
1344 svn_error_t *
1345 svn_path_cstring_to_utf8(const char **path_utf8,
1346 const char *path_apr,
1347 apr_pool_t *pool)
1349 svn_boolean_t path_is_utf8;
1350 SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1351 if (path_is_utf8)
1353 *path_utf8 = apr_pstrdup(pool, path_apr);
1354 return SVN_NO_ERROR;
1356 else
1357 return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool);
1361 /* Return a copy of PATH, allocated from POOL, for which control
1362 characters have been escaped using the form \NNN (where NNN is the
1363 octal representation of the byte's ordinal value). */
1364 static const char *
1365 illegal_path_escape(const char *path, apr_pool_t *pool)
1367 svn_stringbuf_t *retstr;
1368 apr_size_t i, copied = 0;
1369 int c;
1371 /* Create stringbuf with estimated buffer size. */
1372 retstr = svn_stringbuf_create_ensure(strlen(path), pool);
1373 for (i = 0; path[i]; i++)
1375 c = (unsigned char)path[i];
1376 if (! svn_ctype_iscntrl(c))
1377 continue;
1379 /* If we got here, we're looking at a character that isn't
1380 supported by the (or at least, our) URI encoding scheme. We
1381 need to escape this character. */
1383 /* First things first, copy all the good stuff that we haven't
1384 yet copied into our output buffer. */
1385 if (i - copied)
1386 svn_stringbuf_appendbytes(retstr, path + copied,
1387 i - copied);
1389 /* Now, sprintf() in our escaped character, making sure our
1390 buffer is big enough to hold the '%' and two digits. We cast
1391 the C to unsigned char here because the 'X' format character
1392 will be tempted to treat it as an unsigned int...which causes
1393 problem when messing with 0x80-0xFF chars. We also need space
1394 for a null as sprintf will write one. */
1395 /*### The backslash separator doesn't work too great with Windows,
1396 but it's what we'll use for consistency with invalid utf8
1397 formatting (until someone has a better idea) */
1398 svn_stringbuf_ensure(retstr, retstr->len + 4);
1399 sprintf(retstr->data + retstr->len, "\\%03o", (unsigned char)c);
1400 retstr->len += 4;
1402 /* Finally, update our copy counter. */
1403 copied = i + 1;
1406 /* If we didn't encode anything, we don't need to duplicate the string. */
1407 if (retstr->len == 0)
1408 return path;
1410 /* Anything left to copy? */
1411 if (i - copied)
1412 svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
1414 /* retstr is null-terminated either by sprintf or the svn_stringbuf
1415 functions. */
1417 return retstr->data;
1420 svn_error_t *
1421 svn_path_check_valid(const char *path, apr_pool_t *pool)
1423 const char *c;
1425 for (c = path; *c; c++)
1427 if (svn_ctype_iscntrl(*c))
1429 return svn_error_createf
1430 (SVN_ERR_FS_PATH_SYNTAX, NULL,
1431 _("Invalid control character '0x%02x' in path '%s'"),
1433 illegal_path_escape(svn_path_local_style(path, pool), pool));
1437 return SVN_NO_ERROR;
1440 void
1441 svn_path_splitext(const char **path_root,
1442 const char **path_ext,
1443 const char *path,
1444 apr_pool_t *pool)
1446 const char *last_dot, *last_slash;
1448 /* Easy out -- why do all the work when there's no way to report it? */
1449 if (! (path_root || path_ext))
1450 return;
1452 /* Do we even have a period in this thing? And if so, is there
1453 anything after it? We look for the "rightmost" period in the
1454 string. */
1455 last_dot = strrchr(path, '.');
1456 if (last_dot && (last_dot + 1 != '\0'))
1458 /* If we have a period, we need to make sure it occurs in the
1459 final path component -- that there's no path separator
1460 between the last period and the end of the PATH -- otherwise,
1461 it doesn't count. Also, we want to make sure that our period
1462 isn't the first character of the last component. */
1463 last_slash = strrchr(path, '/');
1464 if ((last_slash && (last_dot > (last_slash + 1)))
1465 || ((! last_slash) && (last_dot > path + 1)))
1467 if (path_root)
1468 *path_root = apr_pstrmemdup(pool, path,
1469 (last_dot - path + 1) * sizeof(*path));
1470 if (path_ext)
1471 *path_ext = apr_pstrdup(pool, last_dot + 1);
1472 return;
1475 /* If we get here, we never found a suitable separator character, so
1476 there's no split. */
1477 if (path_root)
1478 *path_root = apr_pstrdup(pool, path);
1479 if (path_ext)
1480 *path_ext = "";