3 * ====================================================================
4 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
6 * This software is licensed as described in the file COPYING, which
7 * you should have received as part of this distribution. The terms
8 * are also available at http://subversion.tigris.org/license-1.html.
9 * If newer versions of this license are posted there, you may use a
10 * newer version instead, at your option.
12 * This software consists of voluntary contributions made by many
13 * individuals. For exact contribution history, see the revision
14 * history and logs, available at http://subversion.tigris.org/.
15 * ====================================================================
19 * @brief A path manipulation library
21 * All incoming and outgoing paths are non-NULL and in UTF-8, unless
22 * otherwise documented.
24 * No result path ever ends with a separator, no matter whether the
25 * path is a file or directory, because we always canonicalize() it.
27 * Nearly all the @c svn_path_xxx functions expect paths passed into
28 * them to be in canonical form as defined by the Subversion path
29 * library itself. The only functions which do *not* have such
32 * - @c svn_path_canonicalize()
33 * - @c svn_path_is_canonical()
34 * - @c svn_path_internal_style()
36 * For the most part, we mean what most anyone would mean when talking
37 * about canonical paths, but to be on the safe side, you must run
38 * your paths through @c svn_path_canonicalize() before passing them to
39 * other functions in this API.
46 #include <apr_pools.h>
47 #include <apr_tables.h>
49 #include "svn_string.h"
50 #include "svn_error.h"
55 #endif /* __cplusplus */
59 /** Convert @a path from the local style to the canonical internal style. */
60 const char *svn_path_internal_style(const char *path
, apr_pool_t
*pool
);
62 /** Convert @a path from the canonical internal style to the local style. */
63 const char *svn_path_local_style(const char *path
, apr_pool_t
*pool
);
66 /** Join a base path (@a base) with a component (@a component), allocated in
69 * If either @a base or @a component is the empty path, then the other
70 * argument will be copied and returned. If both are the empty path the
71 * empty path is returned.
73 * If the @a component is an absolute path, then it is copied and returned.
74 * Exactly one slash character ('/') is used to joined the components,
75 * accounting for any trailing slash in @a base.
77 * Note that the contents of @a base are not examined, so it is possible to
78 * use this function for constructing URLs, or for relative URLs or
81 * This function is NOT appropriate for native (local) file
82 * paths. Only for "internal" canonicalized paths, since it uses '/'
83 * for the separator. Further, an absolute path (for @a component) is
84 * based on a leading '/' character. Thus, an "absolute URI" for the
85 * @a component won't be detected. An absolute URI can only be used
88 char *svn_path_join(const char *base
,
89 const char *component
,
92 /** Join multiple components onto a @a base path, allocated in @a pool. The
93 * components are terminated by a @c NULL.
95 * If any component is the empty string, it will be ignored.
97 * If any component is an absolute path, then it resets the base and
98 * further components will be appended to it.
100 * See svn_path_join() for further notes about joining paths.
102 char *svn_path_join_many(apr_pool_t
*pool
, const char *base
, ...);
105 /** Get the basename of the specified canonicalized @a path. The
106 * basename is defined as the last component of the path (ignoring any
107 * trailing slashes). If the @a path is root ("/"), then that is
108 * returned. Otherwise, the returned value will have no slashes in
111 * Example: svn_path_basename("/foo/bar") -> "bar"
113 * The returned basename will be allocated in @a pool.
115 * @note If an empty string is passed, then an empty string will be returned.
117 char *svn_path_basename(const char *path
, apr_pool_t
*pool
);
119 /** Get the dirname of the specified canonicalized @a path, defined as
120 * the path with its basename removed.
122 * Get the dirname of the specified @a path, defined as the path with its
123 * basename removed. If @a path is root ("/"), it is returned unchanged.
125 * The returned dirname will be allocated in @a pool.
127 char *svn_path_dirname(const char *path
, apr_pool_t
*pool
);
129 /** Split @a path into a root portion and an extension such that
130 * the root + the extension = the original path, and where the
131 * extension contains no period (.) characters. If not @c NULL, set
132 * @a *path_root to the root portion. If not @c NULL, set
133 * @a *path_ext to the extension (or "" if there is no extension
134 * found). Allocate both @a *path_root and @a *path_ext in @a pool.
138 void svn_path_splitext(const char **path_root
, const char **path_ext
,
139 const char *path
, apr_pool_t
*pool
);
141 /** Return the number of components in the canonicalized @a path.
146 svn_path_component_count(const char *path
);
148 /** Add a @a component (a NULL-terminated C-string) to the
149 * canonicalized @a path. @a component is allowed to contain
150 * directory separators.
152 * If @a path is non-empty, append the appropriate directory separator
153 * character, and then @a component. If @a path is empty, simply set it to
154 * @a component; don't add any separator character.
156 * If the result ends in a separator character, then remove the separator.
158 void svn_path_add_component(svn_stringbuf_t
*path
,
159 const char *component
);
161 /** Remove one component off the end of the canonicalized @a path. */
162 void svn_path_remove_component(svn_stringbuf_t
*path
);
164 /** Remove @a n components off the end of the canonicalized @a path.
165 * Equivalent to calling svn_path_remove_component() @a n times.
169 void svn_path_remove_components(svn_stringbuf_t
*path
, apr_size_t n
);
171 /** Divide the canonicalized @a path into @a *dirpath and @a
172 * *base_name, allocated in @a pool.
174 * If @a dirpath or @a base_name is NULL, then don't set that one.
176 * Either @a dirpath or @a base_name may be @a path's own address, but they
177 * may not both be the same address, or the results are undefined.
179 * If @a path has two or more components, the separator between @a dirpath
180 * and @a base_name is not included in either of the new names.
183 * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre>
184 * - <pre>"/bar" ==> "/" and "bar"</pre>
185 * - <pre>"/" ==> "/" and "/"</pre>
186 * - <pre>"X:/" ==> "X:/" and "X:/"</pre>
187 * - <pre>"bar" ==> "" and "bar"</pre>
188 * - <pre>"" ==> "" and ""</pre>
190 void svn_path_split(const char *path
,
191 const char **dirpath
,
192 const char **base_name
,
196 /** Return non-zero iff @a path is empty ("") or represents the current
197 * directory -- that is, if prepending it as a component to an existing
198 * path would result in no meaningful change.
200 int svn_path_is_empty(const char *path
);
202 /** Return TRUE if @a directory is considered a root directory on the platform
203 * at hand, amongst which '/' on all platforms or 'X:/', '\\\\?\\X:/',
204 * '\\\\.\\..', '\\\\server\\share' on Windows.
208 svn_boolean_t
svn_dirent_is_root(const char *dirent
, apr_size_t len
);
211 /** Return a new path (or URL) like @a path, but transformed such that
212 * some types of path specification redundancies are removed.
214 * This involves collapsing redundant "/./" elements, removing
215 * multiple adjacent separator characters, removing trailing
216 * separator characters, and possibly other semantically inoperative
219 * The returned path may be statically allocated, equal to @a path, or
220 * allocated from @a pool.
222 const char *svn_path_canonicalize(const char *path
, apr_pool_t
*pool
);
224 /** Return @c TRUE iff path is canonical. Use @a pool for temporary
227 * @note The test for canonicalization is currently defined as
228 * "looks exactly the same as @c svn_path_canonicalize() would make
233 svn_boolean_t
svn_path_is_canonical(const char *path
, apr_pool_t
*pool
);
236 /** Return an integer greater than, equal to, or less than 0, according
237 * as @a path1 is greater than, equal to, or less than @a path2.
239 int svn_path_compare_paths(const char *path1
, const char *path2
);
242 /** Return the longest common path shared by two canonicalized paths,
243 * @a path1 and @a path2. If there's no common ancestor, return the
246 * @a path1 and @a path2 may be URLs. In order for two URLs to have
247 * a common ancestor, they must (a) have the same protocol (since two URLs
248 * with the same path but different protocols may point at completely
249 * different resources), and (b) share a common ancestor in their path
250 * component, i.e. 'protocol://' is not a sufficient ancestor.
252 char *svn_path_get_longest_ancestor(const char *path1
,
256 /** Convert @a relative canonicalized path to an absolute path and
257 * return the results in @a *pabsolute, allocated in @a pool.
259 * @a relative may be a URL, in which case no attempt is made to convert it,
260 * and a copy of the URL is returned.
263 svn_path_get_absolute(const char **pabsolute
,
264 const char *relative
,
267 /** Return the path part of the canonicalized @a path in @a
268 * *pdirectory, and the file part in @a *pfile. If @a path is a
269 * directory, set @a *pdirectory to @a path, and @a *pfile to the
270 * empty string. If @a path does not exist it is treated as if it is
271 * a file, since directories do not normally vanish.
274 svn_path_split_if_file(const char *path
,
275 const char **pdirectory
,
279 /** Find the common prefix of the canonicalized paths in @a targets
280 * (an array of <tt>const char *</tt>'s), and remove redundant paths if @a
281 * remove_redundancies is TRUE.
283 * - Set @a *pcommon to the absolute path of the path or URL common to
284 * all of the targets. If the targets have no common prefix, or
285 * are a mix of URLs and local paths, set @a *pcommon to the
288 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
289 * to an array of targets relative to @a *pcommon, and if
290 * @a remove_redundancies is TRUE, omit any paths/URLs that are
291 * descendants of another path/URL in @a targets. If *pcommon
292 * is empty, @a *pcondensed_targets will contain full URLs and/or
293 * absolute paths; redundancies can still be removed (from both URLs
294 * and paths). If @a pcondensed_targets is NULL, leave it alone.
296 * Else if there is exactly one target, then
298 * - Set @a *pcommon to that target, and
300 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
301 * to an array containing zero elements. Else if
302 * @a pcondensed_targets is NULL, leave it alone.
304 * If there are no items in @a targets, set @a *pcommon and (if
305 * applicable) @a *pcondensed_targets to @c NULL.
307 * @note There is no guarantee that @a *pcommon is within a working
310 svn_path_condense_targets(const char **pcommon
,
311 apr_array_header_t
**pcondensed_targets
,
312 const apr_array_header_t
*targets
,
313 svn_boolean_t remove_redundancies
,
317 /** Copy a list of canonicalized @a targets, one at a time, into @a
318 * pcondensed_targets, omitting any targets that are found earlier in
319 * the list, or whose ancestor is found earlier in the list. Ordering
320 * of targets in the original list is preserved in the condensed list
321 * of targets. Use @a pool for any allocations.
323 * How does this differ in functionality from svn_path_condense_targets()?
325 * Here's the short version:
327 * 1. Disclaimer: if you wish to debate the following, talk to Karl. :-)
328 * Order matters for updates because a multi-arg update is not
329 * atomic, and CVS users are used to, when doing 'cvs up targetA
330 * targetB' seeing targetA get updated, then targetB. I think the
331 * idea is that if you're in a time-sensitive or flaky-network
332 * situation, a user can say, "I really *need* to update
333 * wc/A/D/G/tau, but I might as well update my whole working copy if
334 * I can." So that user will do 'svn up wc/A/D/G/tau wc', and if
335 * something dies in the middles of the 'wc' update, at least the
336 * user has 'tau' up-to-date.
338 * 2. Also, we have this notion of an anchor and a target for updates
339 * (the anchor is where the update editor is rooted, the target is
340 * the actual thing we want to update). I needed a function that
341 * would NOT screw with my input paths so that I could tell the
342 * difference between someone being in A/D and saying 'svn up G' and
343 * being in A/D/G and saying 'svn up .' -- believe it or not, these
344 * two things don't mean the same thing. svn_path_condense_targets()
345 * plays with absolute paths (which is fine, so does
346 * svn_path_remove_redundancies()), but the difference is that it
347 * actually tweaks those targets to be relative to the "grandfather
348 * path" common to all the targets. Updates don't require a
349 * "grandfather path" at all, and even if it did, the whole
350 * conversion to an absolute path drops the crucial difference
351 * between saying "i'm in foo, update bar" and "i'm in foo/bar,
355 svn_path_remove_redundancies(apr_array_header_t
**pcondensed_targets
,
356 const apr_array_header_t
*targets
,
360 /** Decompose the canonicalized @a path into an array of <tt>const
361 * char *</tt> components, allocated in @a pool. If @a path is
362 * absolute, the first component will be a lone dir separator (the
365 apr_array_header_t
*svn_path_decompose(const char *path
,
368 /** Join an array of <tt>const char *</tt> components into a '/'
369 * separated path, allocated in @a pool. The joined path is absolute if
370 * the first component is a lone dir separator.
372 * Calling svn_path_compose() on the output of svn_path_decompose()
373 * will return the exact same path.
377 const char *svn_path_compose(const apr_array_header_t
*components
,
380 /** Test that @a name is a single path component, that is:
381 * - not @c NULL or empty.
382 * - not a `/'-separated directory path
383 * - not empty or `..'
385 svn_boolean_t
svn_path_is_single_path_component(const char *name
);
389 * Test to see if a backpath, i.e. '..', is present in @a path.
390 * If not, return @c FALSE.
391 * If so, return @c TRUE.
395 svn_boolean_t
svn_path_is_backpath_present(const char *path
);
398 /** Test if @a path2 is a child of @a path1.
399 * If not, return @c NULL.
400 * If so, return a copy of the remainder path, allocated in @a pool.
401 * (The remainder is the component which, added to @a path1, yields
402 * @a path2. The remainder does not begin with a dir separator.)
404 * Both paths must be in canonical form, and must either be absolute,
405 * or contain no ".." components.
407 * If @a path2 is the same as @a path1, it is not considered a child, so the
408 * result is @c NULL; an empty string is never returned.
410 * @note In 1.5 this function has been extended to allow a @c NULL @a pool
411 * in which case a pointer into @a path2 will be returned to
412 * identify the remainder path.
414 * ### todo: the ".." restriction is unfortunate, and would ideally
415 * be lifted by making the implementation smarter. But this is not
416 * trivial: if the path is "../foo", how do you know whether or not
417 * the current directory is named "foo" in its parent?
419 const char *svn_path_is_child(const char *path1
,
423 /** Return TRUE if @a path1 is an ancestor of @a path2 or the paths are equal
424 * and FALSE otherwise.
429 svn_path_is_ancestor(const char *path1
, const char *path2
);
432 * Check whether @a path is a valid Subversion path.
434 * A valid Subversion pathname is a UTF-8 string without control
435 * characters. "Valid" means Subversion can store the pathname in
436 * a repository. There may be other, OS-specific, limitations on
437 * what paths can be represented in a working copy.
439 * ASSUMPTION: @a path is a valid UTF-8 string. This function does
440 * not check UTF-8 validity.
442 * Return @c SVN_NO_ERROR if valid and @c SVN_ERR_FS_PATH_SYNTAX if
447 svn_error_t
*svn_path_check_valid(const char *path
, apr_pool_t
*pool
);
452 * @defgroup svn_path_uri_stuff URI/URL conversion
456 /** Return TRUE iff @a path looks like a valid absolute URL. */
457 svn_boolean_t
svn_path_is_url(const char *path
);
459 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */
460 svn_boolean_t
svn_path_is_uri_safe(const char *path
);
462 /** Return a URI-encoded copy of @a path, allocated in @a pool. */
463 const char *svn_path_uri_encode(const char *path
, apr_pool_t
*pool
);
465 /** Return a URI-decoded copy of @a path, allocated in @a pool. */
466 const char *svn_path_uri_decode(const char *path
, apr_pool_t
*pool
);
468 /** Extend @a url by @a component, URI-encoding that @a component
469 * before adding it to the @a url; return the new @a url, allocated in
470 * @a pool. If @a component is @c NULL, just return a copy of @a url,
471 * allocated in @a pool.
473 * @a component need not be a single path segment, but if it contains
474 * multiple segments, they must be separated by '/'. If @a component
475 * is already URI-encoded, just use <tt>svn_path_join (url, component,
476 * pool)</tt> instead.
478 * @a url need not be a canonical path; it may have a trailing '/'.
480 const char *svn_path_url_add_component(const char *url
,
481 const char *component
,
485 * Convert @a iri (Internationalized URI) to an URI.
486 * The return value may be the same as @a iri if it was already
487 * a URI. Else, allocate the return value in @a pool.
491 const char *svn_path_uri_from_iri(const char *iri
,
495 * URI-encode certain characters in @a uri that are not valid in an URI, but
496 * doesn't have any special meaning in @a uri at their positions. If no
497 * characters need escaping, just return @a uri.
499 * @note Currently, this function escapes <, >, ", space, {, }, |, \, ^, and `.
500 * This may be extended in the future to do context-dependent escaping.
504 const char *svn_path_uri_autoescape(const char *uri
,
509 /** Charset conversion stuff
511 * @defgroup svn_path_charset_stuff Charset conversion
515 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */
516 svn_error_t
*svn_path_cstring_from_utf8(const char **path_apr
,
517 const char *path_utf8
,
520 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */
521 svn_error_t
*svn_path_cstring_to_utf8(const char **path_utf8
,
522 const char *path_apr
,
530 #endif /* __cplusplus */
533 #endif /* SVN_PATH_H */