1 // filesystem path.cpp ------------------------------------------------------------- //
3 // Copyright Beman Dawes 2008
5 // Distributed under the Boost Software License, Version 1.0.
6 // See http://www.boost.org/LICENSE_1_0.txt
8 // Library home page: http://www.boost.org/libs/filesystem
10 #include <boost/config.hpp>
11 #if !defined( BOOST_NO_STD_WSTRING )
12 // Boost.Filesystem V3 and later requires std::wstring support.
13 // During the transition to V3, libraries are compiled with both V2 and V3 sources.
14 // On old compilers that don't support V3 anyhow, we just skip everything so the compile
15 // will succeed and the library can be built.
17 // define BOOST_FILESYSTEM_SOURCE so that <boost/system/config.hpp> knows
18 // the library is being built (possibly exporting rather than importing code)
19 #define BOOST_FILESYSTEM_SOURCE
21 #ifndef BOOST_SYSTEM_NO_DEPRECATED
22 # define BOOST_SYSTEM_NO_DEPRECATED
25 #include <boost/filesystem/v3/config.hpp>
26 #include <boost/filesystem/v3/path.hpp>
27 #include <boost/scoped_array.hpp>
28 #include <boost/system/error_code.hpp>
29 #include <boost/assert.hpp>
35 #ifdef BOOST_WINDOWS_API
36 # include "windows_file_codecvt.hpp"
38 #elif defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__)
39 # include <boost/filesystem/detail/utf8_codecvt_facet.hpp>
42 #ifdef BOOST_FILESYSTEM_DEBUG
47 namespace fs
= boost::filesystem3
;
49 using boost::filesystem3::path
;
54 using boost::system::error_code
;
56 #ifndef BOOST_FILESYSTEM_CODECVT_BUF_SIZE
57 # define BOOST_FILESYSTEM_CODECVT_BUF_SIZE 256
60 //--------------------------------------------------------------------------------------//
62 // class path helpers //
64 //--------------------------------------------------------------------------------------//
68 //------------------------------------------------------------------------------------//
69 // miscellaneous class path helpers //
70 //------------------------------------------------------------------------------------//
72 typedef path::value_type value_type
;
73 typedef path::string_type string_type
;
74 typedef string_type::size_type size_type
;
76 const std::size_t default_codecvt_buf_size
= BOOST_FILESYSTEM_CODECVT_BUF_SIZE
;
78 # ifdef BOOST_WINDOWS_API
80 const wchar_t separator
= L
'/';
81 const wchar_t preferred_separator
= L
'\\';
82 const wchar_t* const separators
= L
"/\\";
83 const wchar_t* separator_string
= L
"/";
84 const wchar_t* preferred_separator_string
= L
"\\";
85 const wchar_t colon
= L
':';
86 const wchar_t dot
= L
'.';
87 const wchar_t questionmark
= L
'?';
88 const fs::path
dot_path(L
".");
89 const fs::path
dot_dot_path(L
"..");
93 const char separator
= '/';
94 const char preferred_separator
= '/';
95 const char* const separators
= "/";
96 const char* separator_string
= "/";
97 const char* preferred_separator_string
= "/";
98 const char colon
= ':';
100 const fs::path
dot_path(".");
101 const fs::path
dot_dot_path("..");
105 inline bool is_separator(fs::path::value_type c
)
107 return c
== separator
108 # ifdef BOOST_WINDOWS_API
109 || c
== preferred_separator
114 bool is_non_root_separator(const string_type
& str
, size_type pos
);
115 // pos is position of the separator
117 size_type
filename_pos(const string_type
& str
,
118 size_type end_pos
); // end_pos is past-the-end position
119 // Returns: 0 if str itself is filename (or empty)
121 size_type
root_directory_start(const string_type
& path
, size_type size
);
122 // Returns: npos if no root_directory found
125 const string_type
& src
,
126 size_type
& element_pos
,
127 size_type
& element_size
,
128 # if !BOOST_WORKAROUND(BOOST_MSVC, <= 1310) // VC++ 7.1
129 size_type size
= string_type::npos
135 } // unnamed namespace
137 //--------------------------------------------------------------------------------------//
139 // class path implementation //
141 //--------------------------------------------------------------------------------------//
145 namespace filesystem3
148 path
& path::operator/=(const path
& p
)
152 if (this == &p
) // self-append
155 if (!is_separator(rhs
.m_pathname
[0]))
156 m_append_separator_if_needed();
157 m_pathname
+= rhs
.m_pathname
;
161 if (!is_separator(*p
.m_pathname
.begin()))
162 m_append_separator_if_needed();
163 m_pathname
+= p
.m_pathname
;
168 path
& path::operator/=(const value_type
* ptr
)
172 if (ptr
>= m_pathname
.data()
173 && ptr
< m_pathname
.data() + m_pathname
.size()) // overlapping source
176 if (!is_separator(rhs
.m_pathname
[0]))
177 m_append_separator_if_needed();
178 m_pathname
+= rhs
.m_pathname
;
182 if (!is_separator(*ptr
))
183 m_append_separator_if_needed();
189 # ifdef BOOST_WINDOWS_API
191 const std::string
path::generic_string(const codecvt_type
& cvt
) const
194 std::replace(tmp
.m_pathname
.begin(), tmp
.m_pathname
.end(), L
'\\', L
'/');
195 return tmp
.string(cvt
);
198 const std::wstring
path::generic_wstring() const
201 std::replace(tmp
.m_pathname
.begin(), tmp
.m_pathname
.end(), L
'\\', L
'/');
202 return tmp
.wstring();
205 # endif // BOOST_WINDOWS_API
207 // m_append_separator_if_needed ----------------------------------------------------//
209 path::string_type::size_type
path::m_append_separator_if_needed()
211 if (!m_pathname
.empty() &&
212 # ifdef BOOST_WINDOWS_API
213 *(m_pathname
.end()-1) != colon
&&
215 !is_separator(*(m_pathname
.end()-1)))
217 string_type::size_type
tmp(m_pathname
.size());
218 m_pathname
+= preferred_separator
;
224 // m_erase_redundant_separator -----------------------------------------------------//
226 void path::m_erase_redundant_separator(string_type::size_type sep_pos
)
228 if (sep_pos
// a separator was added
229 && sep_pos
< m_pathname
.size() // and something was appended
230 && (m_pathname
[sep_pos
+1] == separator
// and it was also separator
231 # ifdef BOOST_WINDOWS_API
232 || m_pathname
[sep_pos
+1] == preferred_separator
// or preferred_separator
234 )) { m_pathname
.erase(sep_pos
, 1); } // erase the added separator
237 // modifiers -----------------------------------------------------------------------//
239 # ifdef BOOST_WINDOWS_API
240 path
& path::make_preferred()
242 std::replace(m_pathname
.begin(), m_pathname
.end(), L
'/', L
'\\');
247 path
& path::remove_filename()
249 m_pathname
.erase(m_parent_path_end());
253 path
& path::replace_extension(const path
& source
)
255 // erase existing extension if any
256 size_type
pos(m_pathname
.rfind(dot
));
257 if (pos
!= string_type::npos
&& pos
>= filename_pos(m_pathname
, m_pathname
.size()))
258 m_pathname
.erase(pos
);
260 // append source extension if any
261 pos
= source
.m_pathname
.rfind(dot
);
262 if (pos
!= string_type::npos
)
263 m_pathname
+= source
.c_str() + pos
;
268 // decomposition -------------------------------------------------------------------//
270 path
path::root_path() const
272 path
temp(root_name());
273 if (!root_directory().empty()) temp
.m_pathname
+= root_directory().c_str();
277 path
path::root_name() const
279 iterator
itr(begin());
281 return (itr
.m_pos
!= m_pathname
.size()
283 (itr
.m_element
.m_pathname
.size() > 1
284 && is_separator(itr
.m_element
.m_pathname
[0])
285 && is_separator(itr
.m_element
.m_pathname
[1])
287 # ifdef BOOST_WINDOWS_API
288 || itr
.m_element
.m_pathname
[itr
.m_element
.m_pathname
.size()-1] == colon
295 path
path::root_directory() const
297 size_type
pos(root_directory_start(m_pathname
, m_pathname
.size()));
299 return pos
== string_type::npos
301 : path(m_pathname
.c_str() + pos
, m_pathname
.c_str() + pos
+ 1);
304 path
path::relative_path() const
306 iterator
itr(begin());
308 for (; itr
.m_pos
!= m_pathname
.size()
309 && (is_separator(itr
.m_element
.m_pathname
[0])
310 # ifdef BOOST_WINDOWS_API
311 || itr
.m_element
.m_pathname
[itr
.m_element
.m_pathname
.size()-1] == colon
315 return path(m_pathname
.c_str() + itr
.m_pos
);
318 string_type::size_type
path::m_parent_path_end() const
320 size_type
end_pos(filename_pos(m_pathname
, m_pathname
.size()));
322 bool filename_was_separator(m_pathname
.size()
323 && is_separator(m_pathname
[end_pos
]));
325 // skip separators unless root directory
326 size_type
root_dir_pos(root_directory_start(m_pathname
, end_pos
));
329 && (end_pos
-1) != root_dir_pos
330 && is_separator(m_pathname
[end_pos
-1])
334 return (end_pos
== 1 && root_dir_pos
== 0 && filename_was_separator
)
339 path
path::parent_path() const
341 size_type
end_pos(m_parent_path_end());
342 return end_pos
== string_type::npos
344 : path(m_pathname
.c_str(), m_pathname
.c_str() + end_pos
);
347 path
path::filename() const
349 size_type
pos(filename_pos(m_pathname
, m_pathname
.size()));
350 return (m_pathname
.size()
352 && is_separator(m_pathname
[pos
])
353 && is_non_root_separator(m_pathname
, pos
))
355 : path(m_pathname
.c_str() + pos
);
358 path
path::stem() const
360 path
name(filename());
361 if (name
== dot_path
|| name
== dot_dot_path
) return name
;
362 size_type
pos(name
.m_pathname
.rfind(dot
));
363 return pos
== string_type::npos
365 : path(name
.m_pathname
.c_str(), name
.m_pathname
.c_str() + pos
);
368 path
path::extension() const
370 path
name(filename());
371 if (name
== dot_path
|| name
== dot_dot_path
) return path();
372 size_type
pos(name
.m_pathname
.rfind(dot
));
373 return pos
== string_type::npos
375 : path(name
.m_pathname
.c_str() + pos
);
378 // m_normalize ----------------------------------------------------------------------//
380 path
& path::m_normalize()
382 if (m_pathname
.empty()) return *this;
385 iterator
start(begin());
386 iterator
last(end());
387 iterator
stop(last
--);
388 for (iterator
itr(start
); itr
!= stop
; ++itr
)
390 // ignore "." except at start and last
391 if (itr
->native().size() == 1
392 && (itr
->native())[0] == dot
394 && itr
!= last
) continue;
396 // ignore a name and following ".."
398 && itr
->native().size() == 2
399 && (itr
->native())[0] == dot
400 && (itr
->native())[1] == dot
) // dot dot
402 string_type
lf(temp
.filename().native());
406 && lf
[0] != separator
))
410 # ifdef BOOST_WINDOWS_API
417 temp
.remove_filename();
418 // if not root directory, must also remove "/" if any
419 if (temp
.m_pathname
.size() > 0
420 && temp
.m_pathname
[temp
.m_pathname
.size()-1]
423 string_type::size_type
rds(
424 root_directory_start(temp
.m_pathname
, temp
.m_pathname
.size()));
425 if (rds
== string_type::npos
426 || rds
!= temp
.m_pathname
.size()-1)
427 { temp
.m_pathname
.erase(temp
.m_pathname
.size()-1); }
431 if (temp
.empty() && ++next
!= stop
432 && next
== last
&& *last
== dot_path
) temp
/= dot_path
;
440 if (temp
.empty()) temp
/= dot_path
;
441 m_pathname
= temp
.m_pathname
;
445 } // namespace filesystem3
448 //--------------------------------------------------------------------------------------//
450 // class path helpers implementation //
452 //--------------------------------------------------------------------------------------//
457 // is_non_root_separator -------------------------------------------------//
459 bool is_non_root_separator(const string_type
& str
, size_type pos
)
460 // pos is position of the separator
462 BOOST_ASSERT_MSG(!str
.empty() && is_separator(str
[pos
]),
463 "precondition violation");
465 // subsequent logic expects pos to be for leftmost slash of a set
466 while (pos
> 0 && is_separator(str
[pos
-1]))
470 && (pos
<= 2 || !is_separator(str
[1])
471 || str
.find_first_of(separators
, 2) != pos
)
472 # ifdef BOOST_WINDOWS_API
473 && (pos
!=2 || str
[1] != colon
)
478 // filename_pos --------------------------------------------------------------------//
480 size_type
filename_pos(const string_type
& str
,
481 size_type end_pos
) // end_pos is past-the-end position
482 // return 0 if str itself is filename (or empty)
486 && is_separator(str
[0])
487 && is_separator(str
[1])) return 0;
490 if (end_pos
&& is_separator(str
[end_pos
-1]))
493 // set pos to start of last element
494 size_type
pos(str
.find_last_of(separators
, end_pos
-1));
496 # ifdef BOOST_WINDOWS_API
497 if (pos
== string_type::npos
)
498 pos
= str
.find_last_of(colon
, end_pos
-2);
501 return (pos
== string_type::npos
// path itself must be a filename (or empty)
502 || (pos
== 1 && is_separator(str
[0]))) // or net
503 ? 0 // so filename is entire string
504 : pos
+ 1; // or starts after delimiter
507 // root_directory_start ------------------------------------------------------------//
509 size_type
root_directory_start(const string_type
& path
, size_type size
)
510 // return npos if no root_directory found
513 # ifdef BOOST_WINDOWS_API
517 && is_separator(path
[2])) return 2;
522 && is_separator(path
[0])
523 && is_separator(path
[1])) return string_type::npos
;
525 # ifdef BOOST_WINDOWS_API
528 && is_separator(path
[0])
529 && is_separator(path
[1])
530 && path
[2] == questionmark
531 && is_separator(path
[3]))
533 string_type::size_type
pos(path
.find_first_of(separators
, 4));
534 return pos
< size
? pos
: string_type::npos
;
540 && is_separator(path
[0])
541 && is_separator(path
[1])
542 && !is_separator(path
[2]))
544 string_type::size_type
pos(path
.find_first_of(separators
, 2));
545 return pos
< size
? pos
: string_type::npos
;
549 if (size
> 0 && is_separator(path
[0])) return 0;
551 return string_type::npos
;
554 // first_element --------------------------------------------------------------------//
555 // sets pos and len of first element, excluding extra separators
556 // if src.empty(), sets pos,len, to 0,0.
559 const string_type
& src
,
560 size_type
& element_pos
,
561 size_type
& element_size
,
565 if (size
== string_type::npos
) size
= src
.size();
568 if (src
.empty()) return;
570 string_type::size_type
cur(0);
572 // deal with // [network]
573 if (size
>= 2 && is_separator(src
[0])
574 && is_separator(src
[1])
576 || !is_separator(src
[2])))
582 // leading (not non-network) separator
583 else if (is_separator(src
[0]))
586 // bypass extra leading separators
588 && is_separator(src
[cur
+1]))
596 // at this point, we have either a plain name, a network name,
597 // or (on Windows only) a device name
601 # ifdef BOOST_WINDOWS_API
604 && !is_separator(src
[cur
]))
610 # ifdef BOOST_WINDOWS_API
611 if (cur
== size
) return;
612 // include device delimiter
613 if (src
[cur
] == colon
)
620 } // unnammed namespace
622 //--------------------------------------------------------------------------------------//
624 // class path::iterator implementation //
626 //--------------------------------------------------------------------------------------//
630 namespace filesystem3
633 path::iterator
path::begin() const
636 itr
.m_path_ptr
= this;
637 size_type element_size
;
638 first_element(m_pathname
, itr
.m_pos
, element_size
);
639 itr
.m_element
= m_pathname
.substr(itr
.m_pos
, element_size
);
640 if (itr
.m_element
.m_pathname
== preferred_separator_string
)
641 itr
.m_element
.m_pathname
= separator_string
; // needed for Windows, harmless on POSIX
645 path::iterator
path::end() const
648 itr
.m_path_ptr
= this;
649 itr
.m_pos
= m_pathname
.size();
653 void path::m_path_iterator_increment(path::iterator
& it
)
655 BOOST_ASSERT_MSG(it
.m_pos
< it
.m_path_ptr
->m_pathname
.size(),
656 "path::basic_iterator increment past end()");
658 // increment to position past current element
659 it
.m_pos
+= it
.m_element
.m_pathname
.size();
661 // if end reached, create end basic_iterator
662 if (it
.m_pos
== it
.m_path_ptr
->m_pathname
.size())
664 it
.m_element
.clear();
668 // both POSIX and Windows treat paths that begin with exactly two separators specially
669 bool was_net(it
.m_element
.m_pathname
.size() > 2
670 && is_separator(it
.m_element
.m_pathname
[0])
671 && is_separator(it
.m_element
.m_pathname
[1])
672 && !is_separator(it
.m_element
.m_pathname
[2]));
674 // process separator (Windows drive spec is only case not a separator)
675 if (is_separator(it
.m_path_ptr
->m_pathname
[it
.m_pos
]))
677 // detect root directory
679 # ifdef BOOST_WINDOWS_API
681 || it
.m_element
.m_pathname
[it
.m_element
.m_pathname
.size()-1] == colon
685 it
.m_element
.m_pathname
= separator
;
690 while (it
.m_pos
!= it
.m_path_ptr
->m_pathname
.size()
691 && is_separator(it
.m_path_ptr
->m_pathname
[it
.m_pos
]))
694 // detect trailing separator, and treat it as ".", per POSIX spec
695 if (it
.m_pos
== it
.m_path_ptr
->m_pathname
.size()
696 && is_non_root_separator(it
.m_path_ptr
->m_pathname
, it
.m_pos
-1))
699 it
.m_element
= dot_path
;
705 size_type
end_pos(it
.m_path_ptr
->m_pathname
.find_first_of(separators
, it
.m_pos
));
706 if (end_pos
== string_type::npos
) end_pos
= it
.m_path_ptr
->m_pathname
.size();
707 it
.m_element
= it
.m_path_ptr
->m_pathname
.substr(it
.m_pos
, end_pos
- it
.m_pos
);
710 void path::m_path_iterator_decrement(path::iterator
& it
)
712 BOOST_ASSERT_MSG(it
.m_pos
, "path::iterator decrement past begin()");
714 size_type
end_pos(it
.m_pos
);
716 // if at end and there was a trailing non-root '/', return "."
717 if (it
.m_pos
== it
.m_path_ptr
->m_pathname
.size()
718 && it
.m_path_ptr
->m_pathname
.size() > 1
719 && is_separator(it
.m_path_ptr
->m_pathname
[it
.m_pos
-1])
720 && is_non_root_separator(it
.m_path_ptr
->m_pathname
, it
.m_pos
-1)
724 it
.m_element
= dot_path
;
728 size_type
root_dir_pos(root_directory_start(it
.m_path_ptr
->m_pathname
, end_pos
));
730 // skip separators unless root directory
734 && (end_pos
-1) != root_dir_pos
735 && is_separator(it
.m_path_ptr
->m_pathname
[end_pos
-1])
739 it
.m_pos
= filename_pos(it
.m_path_ptr
->m_pathname
, end_pos
);
740 it
.m_element
= it
.m_path_ptr
->m_pathname
.substr(it
.m_pos
, end_pos
- it
.m_pos
);
741 if (it
.m_element
.m_pathname
== preferred_separator_string
)
742 it
.m_element
.m_pathname
= separator_string
; // needed for Windows, harmless on POSIX
745 } // namespace filesystem3
748 //--------------------------------------------------------------------------------------//
752 //--------------------------------------------------------------------------------------//
757 //------------------------------------------------------------------------------------//
759 //------------------------------------------------------------------------------------//
761 #ifdef BOOST_WINDOWS_API
763 std::locale
path_locale(std::locale(), new windows_file_codecvt
);
765 const std::codecvt
<wchar_t, char, std::mbstate_t>*
766 codecvt_facet(&std::use_facet
<std::codecvt
<wchar_t, char, std::mbstate_t> >
769 #elif defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__)
771 // "All BSD system functions expect their string parameters to be in UTF-8 encoding
772 // and nothing else." See
773 // http://developer.apple.com/mac/library/documentation/MacOSX/Conceptual/BPInternational/Articles/FileEncodings.html
775 // "The kernel will reject any filename that is not a valid UTF-8 string, and it will
776 // even be normalized (to Unicode NFD) before stored on disk, at least when using HFS.
777 // The right way to deal with it would be to always convert the filename to UTF-8
778 // before trying to open/create a file." See
779 // http://lists.apple.com/archives/unix-porting/2007/Sep/msg00023.html
781 // "How a file name looks at the API level depends on the API. Current Carbon APIs
782 // handle file names as an array of UTF-16 characters; POSIX ones handle them as an
783 // array of UTF-8, which is why UTF-8 works well in Terminal. How it's stored on disk
784 // depends on the disk format; HFS+ uses UTF-16, but that's not important in most
786 // http://lists.apple.com/archives/applescript-users/2002/Sep/msg00319.html
788 // Many thanks to Peter Dimov for digging out the above references!
790 std::locale
path_locale(std::locale(),
791 new boost::filesystem::detail::utf8_codecvt_facet
);
793 const std::codecvt
<wchar_t, char, std::mbstate_t>*
794 codecvt_facet(&std::use_facet
<std::codecvt
<wchar_t, char, std::mbstate_t> >
799 // ISO C calls std::locale("") "the locale-specific native environment", and this
800 // locale is the default for many POSIX-based operating systems such as Linux.
802 // std::locale("") construction can throw (if environmental variables LC_MESSAGES or
803 // or LANG are wrong, for example), so lazy initialization is used to ensure
804 // that exceptions occur after main() starts and so can be caught.
806 std::locale path_locale
; // initialized by path::wchar_t_codecvt_facet() below
807 const std::codecvt
<wchar_t, char, std::mbstate_t>* codecvt_facet
; // ditto
811 } // unnamed namespace
813 //--------------------------------------------------------------------------------------//
814 // path::imbue implementation //
815 //--------------------------------------------------------------------------------------//
819 namespace filesystem3
822 const path::codecvt_type
*& path::wchar_t_codecvt_facet()
824 # if defined(BOOST_POSIX_API) && \
825 !(defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__))
826 // A local static initialized by calling path::imbue ensures that std::locale(""),
827 // which may throw, is called only if path_locale and condecvt_facet will actually
828 // be used. Thus misconfigured environmental variables will only cause an
829 // exception if a valid std::locale("") is actually needed.
830 static std::locale
posix_lazy_initialization(path::imbue(std::locale("")));
832 return codecvt_facet
;
835 std::locale
path::imbue(const std::locale
& loc
)
837 std::locale
temp(path_locale
);
840 &std::use_facet
<std::codecvt
<wchar_t, char, std::mbstate_t> >(path_locale
);
844 } // namespace filesystem3
847 #endif // has wide character support