2 * paths.c: a path manipulation library using svn_stringbuf_t
4 * ====================================================================
5 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
24 #include <apr_file_info.h>
27 #include "svn_string.h"
29 #include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */
31 #include "svn_io.h" /* for svn_io_stat() */
32 #include "svn_ctype.h"
35 /* The canonical empty path. Can this be changed? Well, change the empty
36 test below and the path library will work, not so sure about the fs/wc
38 #define SVN_EMPTY_PATH ""
40 /* TRUE if s is the canonical empty path, FALSE otherwise */
41 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
43 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
44 this be changed? Well, the path library will work, not so sure about
46 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
50 svn_path_internal_style(const char *path
, apr_pool_t
*pool
)
52 if ('/' != SVN_PATH_LOCAL_SEPARATOR
)
54 char *p
= apr_pstrdup(pool
, path
);
57 /* Convert all local-style separators to the canonical ones. */
58 for (; *p
!= '\0'; ++p
)
59 if (*p
== SVN_PATH_LOCAL_SEPARATOR
)
63 return svn_path_canonicalize(path
, pool
);
64 /* FIXME: Should also remove trailing /.'s, if the style says so. */
69 svn_path_local_style(const char *path
, apr_pool_t
*pool
)
71 path
= svn_path_canonicalize(path
, pool
);
72 /* FIXME: Should also remove trailing /.'s, if the style says so. */
74 /* Internally, Subversion represents the current directory with the
75 empty string. But users like to see "." . */
76 if (SVN_PATH_IS_EMPTY(path
))
79 /* If PATH is a URL, the "local style" is the same as the input. */
80 if (svn_path_is_url(path
))
81 return apr_pstrdup(pool
, path
);
83 if ('/' != SVN_PATH_LOCAL_SEPARATOR
)
85 char *p
= apr_pstrdup(pool
, path
);
88 /* Convert all canonical separators to the local-style ones. */
89 for (; *p
!= '\0'; ++p
)
91 *p
= SVN_PATH_LOCAL_SEPARATOR
;
101 is_canonical(const char *path
,
104 return (! SVN_PATH_IS_PLATFORM_EMPTY(path
, len
)
105 && (svn_dirent_is_root(path
, len
) ||
106 (len
<= 1 || path
[len
-1] != '/')));
111 char *svn_path_join(const char *base
,
112 const char *component
,
115 apr_size_t blen
= strlen(base
);
116 apr_size_t clen
= strlen(component
);
119 assert(is_canonical(base
, blen
));
120 assert(is_canonical(component
, clen
));
122 /* If the component is absolute, then return it. */
123 if (*component
== '/')
124 return apr_pmemdup(pool
, component
, clen
+ 1);
126 /* If either is empty return the other */
127 if (SVN_PATH_IS_EMPTY(base
))
128 return apr_pmemdup(pool
, component
, clen
+ 1);
129 if (SVN_PATH_IS_EMPTY(component
))
130 return apr_pmemdup(pool
, base
, blen
+ 1);
132 if (blen
== 1 && base
[0] == '/')
133 blen
= 0; /* Ignore base, just return separator + component */
135 /* Construct the new, combined path. */
136 path
= apr_palloc(pool
, blen
+ 1 + clen
+ 1);
137 memcpy(path
, base
, blen
);
139 memcpy(path
+ blen
+ 1, component
, clen
+ 1);
144 char *svn_path_join_many(apr_pool_t
*pool
, const char *base
, ...)
146 #define MAX_SAVED_LENGTHS 10
147 apr_size_t saved_lengths
[MAX_SAVED_LENGTHS
];
148 apr_size_t total_len
;
155 svn_boolean_t base_is_empty
= FALSE
, base_is_root
= FALSE
;
158 total_len
= strlen(base
);
160 assert(is_canonical(base
, total_len
));
162 if (total_len
== 1 && *base
== '/')
164 else if (SVN_PATH_IS_EMPTY(base
))
166 total_len
= sizeof(SVN_EMPTY_PATH
) - 1;
167 base_is_empty
= TRUE
;
170 saved_lengths
[0] = total_len
;
172 /* Compute the length of the resulting string. */
176 while ((s
= va_arg(va
, const char *)) != NULL
)
180 assert(is_canonical(s
, len
));
182 if (SVN_PATH_IS_EMPTY(s
))
185 if (nargs
++ < MAX_SAVED_LENGTHS
)
186 saved_lengths
[nargs
] = len
;
190 /* an absolute path. skip all components to this point and reset
194 base_is_root
= len
== 1;
195 base_is_empty
= FALSE
;
197 else if (nargs
== base_arg
198 || (nargs
== base_arg
+ 1 && base_is_root
)
201 /* if we have skipped everything up to this arg, then the base
202 and all prior components are empty. just set the length to
203 this component; do not add a separator. If the base is empty
204 we can now ignore it. */
207 base_is_empty
= FALSE
;
214 total_len
+= 1 + len
;
219 /* base == "/" and no further components. just return that. */
220 if (base_is_root
&& total_len
== 1)
221 return apr_pmemdup(pool
, "/", 2);
223 /* we got the total size. allocate it, with room for a NULL character. */
224 path
= p
= apr_palloc(pool
, total_len
+ 1);
226 /* if we aren't supposed to skip forward to an absolute component, and if
227 this is not an empty base that we are skipping, then copy the base
229 if (base_arg
== 0 && ! (SVN_PATH_IS_EMPTY(base
) && ! base_is_empty
))
231 if (SVN_PATH_IS_EMPTY(base
))
232 memcpy(p
, SVN_EMPTY_PATH
, len
= saved_lengths
[0]);
234 memcpy(p
, base
, len
= saved_lengths
[0]);
240 while ((s
= va_arg(va
, const char *)) != NULL
)
242 if (SVN_PATH_IS_EMPTY(s
))
245 if (++nargs
< base_arg
)
248 if (nargs
< MAX_SAVED_LENGTHS
)
249 len
= saved_lengths
[nargs
];
253 /* insert a separator if we aren't copying in the first component
254 (which can happen when base_arg is set). also, don't put in a slash
255 if the prior character is a slash (occurs when prior component
257 if (p
!= path
&& p
[-1] != '/')
260 /* copy the new component and advance the pointer */
267 assert((apr_size_t
)(p
- path
) == total_len
);
275 svn_path_component_count(const char *path
)
277 apr_size_t count
= 0;
279 assert(is_canonical(path
, strlen(path
)));
290 while (*path
&& *path
!= '/')
301 /* Return the length of substring necessary to encompass the entire
302 * previous path segment in PATH, which should be a LEN byte string.
304 * A trailing slash will not be included in the returned length except
305 * in the case in which PATH is absolute and there are no more
309 previous_segment(const char *path
,
315 while (len
> 0 && path
[--len
] != '/')
318 if (len
== 0 && path
[0] == '/')
326 svn_path_add_component(svn_stringbuf_t
*path
,
327 const char *component
)
329 apr_size_t len
= strlen(component
);
331 assert(is_canonical(path
->data
, path
->len
));
332 assert(is_canonical(component
, len
));
334 /* Append a dir separator, but only if this path is neither empty
335 nor consists of a single dir separator already. */
336 if ((! SVN_PATH_IS_EMPTY(path
->data
))
337 && (! ((path
->len
== 1) && (*(path
->data
) == '/'))))
340 svn_stringbuf_appendbytes(path
, &dirsep
, sizeof(dirsep
));
343 svn_stringbuf_appendbytes(path
, component
, len
);
348 svn_path_remove_component(svn_stringbuf_t
*path
)
350 assert(is_canonical(path
->data
, path
->len
));
352 path
->len
= previous_segment(path
->data
, path
->len
);
353 path
->data
[path
->len
] = '\0';
358 svn_path_remove_components(svn_stringbuf_t
*path
, apr_size_t n
)
362 svn_path_remove_component(path
);
369 svn_path_dirname(const char *path
, apr_pool_t
*pool
)
371 apr_size_t len
= strlen(path
);
373 assert(is_canonical(path
, len
));
375 return apr_pstrmemdup(pool
, path
, previous_segment(path
, len
));
380 svn_path_basename(const char *path
, apr_pool_t
*pool
)
382 apr_size_t len
= strlen(path
);
385 assert(is_canonical(path
, len
));
387 if (len
== 1 && path
[0] == '/')
392 while (start
> 0 && path
[start
- 1] != '/')
396 return apr_pstrmemdup(pool
, path
+ start
, len
- start
);
401 svn_path_split(const char *path
,
402 const char **dirpath
,
403 const char **base_name
,
406 assert(dirpath
!= base_name
);
409 *dirpath
= svn_path_dirname(path
, pool
);
412 *base_name
= svn_path_basename(path
, pool
);
417 svn_path_is_empty(const char *path
)
419 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
421 if (SVN_PATH_IS_EMPTY(path
))
428 /* We decided against using apr_filepath_root here because of the negative
429 performance impact (creating a pool and converting strings ). */
431 svn_dirent_is_root(const char *dirent
, apr_size_t len
)
433 /* directory is root if it's equal to '/' */
434 if (len
== 1 && dirent
[0] == '/')
437 #if defined(WIN32) || defined(__CYGWIN__)
438 /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
439 are also root directories */
440 if ((len
== 2 || len
== 3) &&
441 (dirent
[1] == ':') &&
442 ((dirent
[0] >= 'A' && dirent
[0] <= 'Z') ||
443 (dirent
[0] >= 'a' && dirent
[0] <= 'z')) &&
444 (len
== 2 || (dirent
[2] == '/' && len
== 3)))
447 /* On Windows and Cygwin, both //drive and //drive//share are root
449 if (len
>= 2 && dirent
[0] == '/' && dirent
[1] == '/'
450 && dirent
[len
- 1] != '/')
454 for (i
= len
; i
>= 2; i
--)
456 if (dirent
[i
] == '/')
463 return (segments
<= 1);
465 #endif /* WIN32 or Cygwin */
472 svn_path_compare_paths(const char *path1
,
475 apr_size_t path1_len
= strlen(path1
);
476 apr_size_t path2_len
= strlen(path2
);
477 apr_size_t min_len
= ((path1_len
< path2_len
) ? path1_len
: path2_len
);
480 assert(is_canonical(path1
, path1_len
));
481 assert(is_canonical(path2
, path2_len
));
483 /* Skip past common prefix. */
484 while (i
< min_len
&& path1
[i
] == path2
[i
])
487 /* Are the paths exactly the same? */
488 if ((path1_len
== path2_len
) && (i
>= min_len
))
491 /* Children of paths are greater than their parents, but less than
492 greater siblings of their parents. */
493 if ((path1
[i
] == '/') && (path2
[i
] == 0))
495 if ((path2
[i
] == '/') && (path1
[i
] == 0))
502 /* Common prefix was skipped above, next character is compared to
503 determine order. We need to use an unsigned comparison, though,
504 so a "next character" of NULL (0x00) sorts numerically
506 return (unsigned char)(path1
[i
]) < (unsigned char)(path2
[i
]) ? -1 : 1;
510 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
512 * This function handles everything except the URL-handling logic
513 * of svn_path_get_longest_ancestor, and assumes that PATH1 and
514 * PATH2 are *not* URLs.
516 * If the two paths do not share a common ancestor, return 0.
518 * New strings are allocated in POOL.
521 get_path_ancestor_length(const char *path1
,
525 apr_size_t path1_len
, path2_len
;
527 apr_size_t last_dirsep
= 0;
529 path1_len
= strlen(path1
);
530 path2_len
= strlen(path2
);
532 if (SVN_PATH_IS_EMPTY(path1
) || SVN_PATH_IS_EMPTY(path2
))
535 while (path1
[i
] == path2
[i
])
537 /* Keep track of the last directory separator we hit. */
543 /* If we get to the end of either path, break out. */
544 if ((i
== path1_len
) || (i
== path2_len
))
548 /* two special cases:
549 1. '/' is the longest common ancestor of '/' and '/foo'
550 2. '/' is the longest common ancestor of '/rif' and '/raf' */
551 if (i
== 1 && path1
[0] == '/' && path2
[0] == '/')
554 /* last_dirsep is now the offset of the last directory separator we
555 crossed before reaching a non-matching byte. i is the offset of
556 that non-matching byte. */
557 if (((i
== path1_len
) && (path2
[i
] == '/'))
558 || ((i
== path2_len
) && (path1
[i
] == '/'))
559 || ((i
== path1_len
) && (i
== path2_len
)))
562 if (last_dirsep
== 0 && path1
[0] == '/' && path2
[0] == '/')
569 svn_path_get_longest_ancestor(const char *path1
,
573 svn_boolean_t path1_is_url
, path2_is_url
;
574 path1_is_url
= svn_path_is_url(path1
);
575 path2_is_url
= svn_path_is_url(path2
);
577 if (path1_is_url
&& path2_is_url
)
579 apr_size_t path_ancestor_len
;
585 /* No shared protocol => no common prefix */
586 if (path1
[i
] != path2
[i
])
587 return apr_pmemdup(pool
, SVN_EMPTY_PATH
,
588 sizeof(SVN_EMPTY_PATH
));
593 /* They're both URLs, so EOS can't come before ':' */
594 assert((path1
[i
] != '\0') && (path2
[i
] != '\0'));
599 i
+= 3; /* Advance past '://' */
601 path_ancestor_len
= get_path_ancestor_length(path1
+ i
, path2
+ i
,
604 if (path_ancestor_len
== 0 ||
605 (path_ancestor_len
== 1 && (path1
+ i
)[0] == '/'))
606 return apr_pmemdup(pool
, SVN_EMPTY_PATH
, sizeof(SVN_EMPTY_PATH
));
608 return apr_pstrndup(pool
, path1
, path_ancestor_len
+ i
);
611 else if ((! path1_is_url
) && (! path2_is_url
))
613 return apr_pstrndup(pool
, path1
,
614 get_path_ancestor_length(path1
, path2
, pool
));
619 /* A URL and a non-URL => no common prefix */
620 return apr_pmemdup(pool
, SVN_EMPTY_PATH
, sizeof(SVN_EMPTY_PATH
));
626 svn_path_is_child(const char *path1
,
632 /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */
633 /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */
635 /* Allow "" and "foo" to be parent/child */
636 if (SVN_PATH_IS_EMPTY(path1
)) /* "" is the parent */
638 if (SVN_PATH_IS_EMPTY(path2
) /* "" not a child */
639 || path2
[0] == '/') /* "/foo" not a child */
642 /* everything else is child */
643 return pool
? apr_pstrdup(pool
, path2
) : path2
;
646 /* Reach the end of at least one of the paths. How should we handle
647 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
648 appear to arise in the current Subversion code, it's not clear to me
649 if they should be parent/child or not. */
650 for (i
= 0; path1
[i
] && path2
[i
]; i
++)
651 if (path1
[i
] != path2
[i
])
654 /* There are two cases that are parent/child
656 .../foo path2[i] == '/'
661 if (path1
[i
] == '\0' && path2
[i
])
664 return pool
? apr_pstrdup(pool
, path2
+ i
+ 1) : path2
+ i
+ 1;
665 else if (i
== 1 && path1
[0] == '/')
666 return pool
? apr_pstrdup(pool
, path2
+ 1) : path2
+ 1;
669 /* Otherwise, path2 isn't a child. */
675 svn_path_is_ancestor(const char *path1
, const char *path2
)
677 apr_size_t path1_len
= strlen(path1
);
679 /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
680 if (SVN_PATH_IS_EMPTY(path1
))
681 return *path2
!= '/';
683 /* If path1 is a prefix of path2, then:
684 - If path1 ends in a path separator,
685 - If the paths are of the same length
687 - path2 starts a new path component after the common prefix,
688 then path1 is an ancestor. */
689 if (strncmp(path1
, path2
, path1_len
) == 0)
690 return path1
[path1_len
- 1] == '/'
691 || (path2
[path1_len
] == '/' || path2
[path1_len
] == '\0');
698 svn_path_decompose(const char *path
,
703 apr_array_header_t
*components
=
704 apr_array_make(pool
, 1, sizeof(const char *));
706 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
708 if (SVN_PATH_IS_EMPTY(path
))
709 return components
; /* ### Should we return a "" component? */
711 /* If PATH is absolute, store the '/' as the first component. */
717 APR_ARRAY_PUSH(components
, const char *)
718 = apr_pstrmemdup(pool
, &dirsep
, sizeof(dirsep
));
722 if (path
[i
] == '\0') /* path is a single '/' */
728 if ((path
[i
] == '/') || (path
[i
] == '\0'))
730 if (SVN_PATH_IS_PLATFORM_EMPTY(path
+ oldi
, i
- oldi
))
731 APR_ARRAY_PUSH(components
, const char *) = SVN_EMPTY_PATH
;
733 APR_ARRAY_PUSH(components
, const char *)
734 = apr_pstrmemdup(pool
, path
+ oldi
, i
- oldi
);
737 oldi
= i
; /* skipping past the dirsep */
749 svn_path_compose(const apr_array_header_t
*components
,
752 apr_size_t
*lengths
= apr_palloc(pool
, components
->nelts
*sizeof(*lengths
));
753 apr_size_t max_length
= components
->nelts
;
758 /* Get the length of each component so a total length can be
760 for (i
= 0; i
< components
->nelts
; ++i
)
762 apr_size_t l
= strlen(APR_ARRAY_IDX(components
, i
, const char *));
767 path
= apr_palloc(pool
, max_length
+ 1);
770 for (i
= 0; i
< components
->nelts
; ++i
)
772 /* Append a '/' to the path. Handle the case with an absolute
773 path where a '/' appears in the first component. Only append
774 a '/' if the component is the second component that does not
775 follow a "/" first component; or it is the third or later
778 (i
== 1 && strcmp("/", APR_ARRAY_IDX(components
,
780 const char *)) != 0))
785 memcpy(p
, APR_ARRAY_IDX(components
, i
, const char *), lengths
[i
]);
796 svn_path_is_single_path_component(const char *name
)
798 /* assert (is_canonical (name, strlen (name))); ### Expensive strlen */
800 /* Can't be empty or `..' */
801 if (SVN_PATH_IS_EMPTY(name
)
802 || (name
[0] == '.' && name
[1] == '.' && name
[2] == '\0'))
805 /* Slashes are bad, m'kay... */
806 if (strchr(name
, '/') != NULL
)
815 svn_path_is_backpath_present(const char *path
)
817 int len
= strlen(path
);
819 if (! strcmp(path
, ".."))
822 if (! strncmp(path
, "../", 3))
825 if (strstr(path
, "/../") != NULL
)
829 && (! strncmp(path
+ len
- 3, "/..", 3)))
838 /* Examine PATH as a potential URI, and return a substring of PATH
839 that immediately follows the (scheme):// portion of the URI, or
840 NULL if PATH doesn't appear to be a valid URI. The returned value
841 is not alloced -- it shares memory with PATH. */
843 skip_uri_scheme(const char *path
)
847 /* A scheme is terminated by a : and cannot contain any /'s. */
848 for (j
= 0; path
[j
] && path
[j
] != ':'; ++j
)
852 if (j
> 0 && path
[j
] == ':' && path
[j
+1] == '/' && path
[j
+2] == '/')
860 svn_path_is_url(const char *path
)
862 /* ### This function is reaaaaaaaaaaaaaally stupid right now.
863 We're just going to look for:
865 (scheme)://(optional_stuff)
867 Where (scheme) has no ':' or '/' characters.
869 Someday it might be nice to have an actual URI parser here.
871 return skip_uri_scheme(path
) ? TRUE
: FALSE
;
876 /* Here is the BNF for path components in a URI. "pchar" is a
877 character in a path component.
879 pchar = unreserved | escaped |
880 ":" | "@" | "&" | "=" | "+" | "$" | ","
881 unreserved = alphanum | mark
882 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
884 Note that "escaped" doesn't really apply to what users can put in
885 their paths, so that really means the set of characters is:
887 alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
889 static const char uri_char_validity
[256] = {
890 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
891 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
892 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
893 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
896 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
897 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
898 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
899 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
903 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
904 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
905 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
908 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
909 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
910 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
911 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
916 svn_path_is_uri_safe(const char *path
)
920 /* Skip the URI scheme. */
921 path
= skip_uri_scheme(path
);
923 /* No scheme? Get outta here. */
927 /* Skip to the first slash that's after the URI scheme. */
928 path
= strchr(path
, '/');
930 /* If there's no first slash, then there's only a host portion;
931 therefore there couldn't be any uri-unsafe characters after the
932 host... so return true. */
936 for (i
= 0; path
[i
]; i
++)
938 /* Allow '%XX' (where each X is a hex digit) */
941 if (apr_isxdigit(path
[i
+ 1]) && apr_isxdigit(path
[i
+ 2]))
948 else if (! uri_char_validity
[((unsigned char)path
[i
])])
958 /* URI-encode each character c in PATH for which TABLE[c] is 0.
959 If no encoding was needed, return PATH, else return a new string allocated
962 uri_escape(const char *path
, const char table
[], apr_pool_t
*pool
)
964 svn_stringbuf_t
*retstr
;
965 apr_size_t i
, copied
= 0;
968 retstr
= svn_stringbuf_create("", pool
);
969 for (i
= 0; path
[i
]; i
++)
971 c
= (unsigned char)path
[i
];
975 /* If we got here, we're looking at a character that isn't
976 supported by the (or at least, our) URI encoding scheme. We
977 need to escape this character. */
979 /* First things first, copy all the good stuff that we haven't
980 yet copied into our output buffer. */
982 svn_stringbuf_appendbytes(retstr
, path
+ copied
,
985 /* Now, sprintf() in our escaped character, making sure our
986 buffer is big enough to hold the '%' and two digits. We cast
987 the C to unsigned char here because the 'X' format character
988 will be tempted to treat it as an unsigned int...which causes
989 problem when messing with 0x80-0xFF chars. We also need space
990 for a null as sprintf will write one. */
991 svn_stringbuf_ensure(retstr
, retstr
->len
+ 4);
992 sprintf(retstr
->data
+ retstr
->len
, "%%%02X", (unsigned char)c
);
995 /* Finally, update our copy counter. */
999 /* If we didn't encode anything, we don't need to duplicate the string. */
1000 if (retstr
->len
== 0)
1003 /* Anything left to copy? */
1005 svn_stringbuf_appendbytes(retstr
, path
+ copied
, i
- copied
);
1007 /* retstr is null-terminated either by sprintf or the svn_stringbuf
1010 return retstr
->data
;
1015 svn_path_uri_encode(const char *path
, apr_pool_t
*pool
)
1019 ret
= uri_escape(path
, uri_char_validity
, pool
);
1021 /* Our interface guarantees a copy. */
1023 return apr_pstrdup(pool
, path
);
1028 static const char iri_escape_chars
[256] = {
1029 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1030 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1031 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1032 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1033 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1034 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1035 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1050 svn_path_uri_from_iri(const char *iri
, apr_pool_t
*pool
)
1052 return uri_escape(iri
, iri_escape_chars
, pool
);
1055 static const char uri_autoescape_chars
[256] = {
1056 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1058 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1059 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1062 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1063 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1064 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1065 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
1068 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1081 svn_path_uri_autoescape(const char *uri
, apr_pool_t
*pool
)
1083 return uri_escape(uri
, uri_autoescape_chars
, pool
);
1087 svn_path_uri_decode(const char *path
, apr_pool_t
*pool
)
1089 svn_stringbuf_t
*retstr
;
1091 svn_boolean_t query_start
= FALSE
;
1093 retstr
= svn_stringbuf_create("", pool
);
1095 /* avoid repeated realloc */
1096 svn_stringbuf_ensure(retstr
, strlen(path
) + 1);
1099 for (i
= 0; path
[i
]; i
++)
1105 /* Mark the start of the query string, if it exists. */
1108 else if (c
== '+' && query_start
)
1110 /* Only do this if we are into the query string.
1111 * RFC 2396, section 3.3 */
1114 else if (c
== '%' && apr_isxdigit(path
[i
+ 1])
1115 && apr_isxdigit(path
[i
+2]))
1118 digitz
[0] = path
[++i
];
1119 digitz
[1] = path
[++i
];
1121 c
= (char)(strtol(digitz
, NULL
, 16));
1124 retstr
->data
[retstr
->len
++] = c
;
1127 /* Null-terminate this bad-boy. */
1128 retstr
->data
[retstr
->len
] = 0;
1130 return retstr
->data
;
1135 svn_path_url_add_component(const char *url
,
1136 const char *component
,
1139 /* URL can have trailing '/' */
1140 url
= svn_path_canonicalize(url
, pool
);
1142 return svn_path_join(url
, svn_path_uri_encode(component
, pool
), pool
);
1146 svn_path_get_absolute(const char **pabsolute
,
1147 const char *relative
,
1151 apr_status_t apr_err
;
1152 const char *path_apr
;
1154 if (svn_path_is_url(relative
))
1156 *pabsolute
= apr_pstrdup(pool
, relative
);
1157 return SVN_NO_ERROR
;
1160 SVN_ERR(svn_path_cstring_from_utf8(&path_apr
, relative
, pool
));
1162 apr_err
= apr_filepath_merge(&buffer
, NULL
,
1164 APR_FILEPATH_NOTRELATIVE
1165 | APR_FILEPATH_TRUENAME
,
1168 return svn_error_createf(SVN_ERR_BAD_FILENAME
, NULL
,
1169 _("Couldn't determine absolute path of '%s'"),
1170 svn_path_local_style(relative
, pool
));
1172 SVN_ERR(svn_path_cstring_to_utf8(pabsolute
, buffer
, pool
));
1173 *pabsolute
= svn_path_canonicalize(*pabsolute
, pool
);
1174 return SVN_NO_ERROR
;
1179 svn_path_split_if_file(const char *path
,
1180 const char **pdirectory
,
1187 /* assert (is_canonical (path, strlen (path))); ### Expensive strlen */
1189 err
= svn_io_stat(&finfo
, path
, APR_FINFO_TYPE
, pool
);
1190 if (err
&& ! APR_STATUS_IS_ENOENT(err
->apr_err
))
1193 if (err
|| finfo
.filetype
== APR_REG
)
1195 svn_error_clear(err
);
1196 svn_path_split(path
, pdirectory
, pfile
, pool
);
1198 else if (finfo
.filetype
== APR_DIR
)
1201 *pfile
= SVN_EMPTY_PATH
;
1205 return svn_error_createf(SVN_ERR_BAD_FILENAME
, NULL
,
1206 _("'%s' is neither a file nor a directory name"),
1207 svn_path_local_style(path
, pool
));
1210 return SVN_NO_ERROR
;
1215 svn_path_canonicalize(const char *path
, apr_pool_t
*pool
)
1220 apr_size_t canon_segments
= 0;
1223 dst
= canon
= apr_pcalloc(pool
, strlen(path
) + 1);
1225 /* Copy over the URI scheme if present. */
1226 src
= skip_uri_scheme(path
);
1230 memcpy(dst
, path
, src
- path
);
1231 dst
+= (src
- path
);
1239 /* If this is an absolute path, then just copy over the initial
1240 separator character. */
1243 *(dst
++) = *(src
++);
1245 #if defined(WIN32) || defined(__CYGWIN__)
1246 /* On Windows permit two leading separator characters which means an
1247 * UNC path. However, a double slash in a URI after the scheme is never
1249 if (!uri
&& *src
== '/')
1250 *(dst
++) = *(src
++);
1251 #endif /* WIN32 or Cygwin */
1257 /* Parse each segment, find the closing '/' */
1258 const char *next
= src
;
1259 while (*next
&& (*next
!= '/'))
1262 seglen
= next
- src
;
1264 if (seglen
== 0 || (seglen
== 1 && src
[0] == '.'))
1266 /* Noop segment, so do nothing. */
1270 /* An actual segment, append it to the destination path */
1273 memcpy(dst
, src
, seglen
);
1278 /* Skip over trailing slash to the next segment. */
1284 /* Remove the trailing slash. */
1285 if ((canon_segments
> 0 || uri
) && *(dst
- 1) == '/')
1290 #if defined(WIN32) || defined(__CYGWIN__)
1291 /* Skip leading double slashes when there are less than 2
1292 * canon segments. UNC paths *MUST* have two segments. */
1293 if (canon_segments
< 2 && canon
[0] == '/' && canon
[1] == '/')
1295 #endif /* WIN32 or Cygwin */
1302 svn_path_is_canonical(const char *path
, apr_pool_t
*pool
)
1304 return (strcmp(path
, svn_path_canonicalize(path
, pool
)) == 0);
1309 /** Get APR's internal path encoding. */
1310 static svn_error_t
*
1311 get_path_encoding(svn_boolean_t
*path_is_utf8
, apr_pool_t
*pool
)
1313 apr_status_t apr_err
;
1316 apr_err
= apr_filepath_encoding(&encoding_style
, pool
);
1318 return svn_error_wrap_apr(apr_err
,
1319 _("Can't determine the native path encoding"));
1321 /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
1322 Well, for now we'll just punt to the svn_utf_ functions;
1323 those will at least do the ASCII-subset check. */
1324 *path_is_utf8
= (encoding_style
== APR_FILEPATH_ENCODING_UTF8
);
1325 return SVN_NO_ERROR
;
1330 svn_path_cstring_from_utf8(const char **path_apr
,
1331 const char *path_utf8
,
1334 svn_boolean_t path_is_utf8
;
1335 SVN_ERR(get_path_encoding(&path_is_utf8
, pool
));
1338 *path_apr
= apr_pstrdup(pool
, path_utf8
);
1339 return SVN_NO_ERROR
;
1342 return svn_utf_cstring_from_utf8(path_apr
, path_utf8
, pool
);
1347 svn_path_cstring_to_utf8(const char **path_utf8
,
1348 const char *path_apr
,
1351 svn_boolean_t path_is_utf8
;
1352 SVN_ERR(get_path_encoding(&path_is_utf8
, pool
));
1355 *path_utf8
= apr_pstrdup(pool
, path_apr
);
1356 return SVN_NO_ERROR
;
1359 return svn_utf_cstring_to_utf8(path_utf8
, path_apr
, pool
);
1363 /* Return a copy of PATH, allocated from POOL, for which control
1364 characters have been escaped using the form \NNN (where NNN is the
1365 octal representation of the byte's ordinal value). */
1367 illegal_path_escape(const char *path
, apr_pool_t
*pool
)
1369 svn_stringbuf_t
*retstr
;
1370 apr_size_t i
, copied
= 0;
1373 retstr
= svn_stringbuf_create("", pool
);
1374 for (i
= 0; path
[i
]; i
++)
1376 c
= (unsigned char)path
[i
];
1377 if (! svn_ctype_iscntrl(c
))
1380 /* If we got here, we're looking at a character that isn't
1381 supported by the (or at least, our) URI encoding scheme. We
1382 need to escape this character. */
1384 /* First things first, copy all the good stuff that we haven't
1385 yet copied into our output buffer. */
1387 svn_stringbuf_appendbytes(retstr
, path
+ copied
,
1390 /* Now, sprintf() in our escaped character, making sure our
1391 buffer is big enough to hold the '%' and two digits. We cast
1392 the C to unsigned char here because the 'X' format character
1393 will be tempted to treat it as an unsigned int...which causes
1394 problem when messing with 0x80-0xFF chars. We also need space
1395 for a null as sprintf will write one. */
1396 /*### The backslash separator doesn't work too great with Windows,
1397 but it's what we'll use for consistency with invalid utf8
1398 formatting (until someone has a better idea) */
1399 svn_stringbuf_ensure(retstr
, retstr
->len
+ 4);
1400 sprintf(retstr
->data
+ retstr
->len
, "\\%03o", (unsigned char)c
);
1403 /* Finally, update our copy counter. */
1407 /* If we didn't encode anything, we don't need to duplicate the string. */
1408 if (retstr
->len
== 0)
1411 /* Anything left to copy? */
1413 svn_stringbuf_appendbytes(retstr
, path
+ copied
, i
- copied
);
1415 /* retstr is null-terminated either by sprintf or the svn_stringbuf
1418 return retstr
->data
;
1422 svn_path_check_valid(const char *path
, apr_pool_t
*pool
)
1426 for (c
= path
; *c
; c
++)
1428 if (svn_ctype_iscntrl(*c
))
1430 return svn_error_createf
1431 (SVN_ERR_FS_PATH_SYNTAX
, NULL
,
1432 _("Invalid control character '0x%02x' in path '%s'"),
1434 illegal_path_escape(svn_path_local_style(path
, pool
), pool
));
1438 return SVN_NO_ERROR
;
1442 svn_path_splitext(const char **path_root
,
1443 const char **path_ext
,
1447 const char *last_dot
, *last_slash
;
1449 /* Easy out -- why do all the work when there's no way to report it? */
1450 if (! (path_root
|| path_ext
))
1453 /* Do we even have a period in this thing? And if so, is there
1454 anything after it? We look for the "rightmost" period in the
1456 last_dot
= strrchr(path
, '.');
1457 if (last_dot
&& (last_dot
+ 1 != '\0'))
1459 /* If we have a period, we need to make sure it occurs in the
1460 final path component -- that there's no path separator
1461 between the last period and the end of the PATH -- otherwise,
1462 it doesn't count. Also, we want to make sure that our period
1463 isn't the first character of the last component. */
1464 last_slash
= strrchr(path
, '/');
1465 if ((last_slash
&& (last_dot
> (last_slash
+ 1)))
1466 || ((! last_slash
) && (last_dot
> path
+ 1)))
1469 *path_root
= apr_pstrmemdup(pool
, path
,
1470 (last_dot
- path
+ 1) * sizeof(*path
));
1472 *path_ext
= apr_pstrdup(pool
, last_dot
+ 1);
1476 /* If we get here, we never found a suitable separator character, so
1477 there's no split. */
1479 *path_root
= apr_pstrdup(pool
, path
);