2 * @brief Iterator through entries in a directory.
4 /* Copyright (C) 2007,2008,2010,2011,2012,2013,2014,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "safeunistd.h"
26 #include <sys/types.h>
33 CommitAndExit::CommitAndExit(const char * msg_
, const std::string
& path
,
40 msg
+= strerror(errno_
);
44 CommitAndExit::CommitAndExit(const char * msg_
, int errno_
)
48 msg
+= strerror(errno_
);
52 CommitAndExit::CommitAndExit(const char * msg_
, const char * error
)
60 #if defined O_NOATIME && O_NOATIME != 0
61 uid_t
DirectoryIterator::euid
= geteuid();
64 magic_t
DirectoryIterator::magic_cookie
= NULL
;
67 DirectoryIterator::call_stat()
72 retval
= fstat(fd
, &statbuf
);
74 } else if (!follow_symlinks
) {
75 retval
= lstat(path
.c_str(), &statbuf
);
78 retval
= stat(path
.c_str(), &statbuf
);
81 if (errno
== ENOENT
|| errno
== ENOTDIR
)
84 throw string(strerror(errno
));
85 // Commit changes to files processed so far.
86 throw CommitAndExit("Can't stat", path
, errno
);
91 DirectoryIterator::build_path()
93 if (path
.length() == path_len
) {
100 DirectoryIterator::open_fd()
103 mode_t mode
= O_BINARY
| O_RDONLY
;
104 # if defined O_NOATIME && O_NOATIME != 0
105 if (try_noatime()) mode
|= O_NOATIME
;
107 fd
= open(path
.c_str(), mode
);
108 # if defined O_NOATIME && O_NOATIME != 0
109 if (fd
< 0 && (mode
& O_NOATIME
)) {
111 fd
= open(path
.c_str(), mode
);
119 throw FileNotFound();
121 string
m("Failed to open file: ");
122 m
+= strerror(errno
);
126 // Commit changes to files processed so far.
127 throw CommitAndExit("Can't open file", path
, errno
);
130 #ifdef HAVE_POSIX_FADVISE
131 // On Linux, POSIX_FADV_NOREUSE has been a no-op since 2.6.18 (released
132 // 2006) and before that it was incorrectly implemented as an alias for
133 // POSIX_FADV_WILLNEED. There have been a few attempts to make
134 // POSIX_FADV_NOREUSE actually work on Linux but nothing has been merged so
135 // for now let's not waste effort making a syscall we know to currently be
136 // a no-op. We can revise this conditional if it gets usefully
139 posix_fadvise(fd
, 0, 0, POSIX_FADV_NOREUSE
);
145 DirectoryIterator::close_fd()
147 #ifdef HAVE_POSIX_FADVISE
149 // Linux doesn't implement POSIX_FADV_NOREUSE so instead we use
150 // POSIX_FADV_DONTNEED just before closing the fd. This is a bit more
151 // aggressive than we ideally want - really we just want to stop our
152 // reads from pushing other pages out of the OS cache, but if the
153 // pages we read are already cached it would probably be better to leave
154 // them cached after the read.
155 posix_fadvise(fd
, 0, 0, POSIX_FADV_DONTNEED
);
163 DirectoryIterator::start(const std::string
& path_
)
165 if (dir
) closedir(dir
);
167 path_len
= path
.length();
168 dir
= opendir(path
.c_str());
170 if (errno
== ENOENT
|| errno
== ENOTDIR
)
171 throw FileNotFound();
173 throw string(strerror(errno
));
174 // Commit changes to files processed so far.
175 throw CommitAndExit("Can't open directory", path
, errno
);
180 DirectoryIterator::next_failed() const
182 // The Linux getdents() syscall (which readdir uses internally) is
183 // documented as being able to return ENOENT and ENOTDIR. Also,
184 // EACCES has been observed here on CIFS mounts.
185 if (errno
== ENOENT
|| errno
== ENOTDIR
)
186 throw FileNotFound();
188 throw string(strerror(errno
));
189 throw CommitAndExit("Can't read next entry from directory", path
, errno
);
193 DirectoryIterator::get_magic_mimetype()
195 if (rare(magic_cookie
== NULL
)) {
196 #ifdef MAGIC_MIME_TYPE
197 magic_cookie
= magic_open(MAGIC_SYMLINK
|MAGIC_MIME_TYPE
|MAGIC_ERROR
);
199 // MAGIC_MIME_TYPE was added in 4.22, released 2007-12-27. If we don't
200 // have it then use MAGIC_MIME instead and trim any encoding off below.
201 magic_cookie
= magic_open(MAGIC_SYMLINK
|MAGIC_MIME
|MAGIC_ERROR
);
203 if (magic_cookie
== NULL
) {
204 // Commit changes to files processed so far.
205 throw CommitAndExit("Failed to initialise the file magic library",
208 if (magic_load(magic_cookie
, NULL
) == -1) {
209 // Commit changes to files processed so far.
210 const char * err
= magic_error(magic_cookie
);
211 throw CommitAndExit("Failed to load the file magic database", err
);
215 const char * res
= NULL
;
216 // Prior to 5.15, magic_descriptor() closed the fd passed, so avoid it.
217 #if defined MAGIC_VERSION && MAGIC_VERSION - 0 >= 515
219 if (lseek(fd
, 0, SEEK_SET
) == 0)
220 res
= magic_descriptor(magic_cookie
, fd
);
225 res
= magic_file(magic_cookie
, path
.c_str());
228 const char * err
= magic_error(magic_cookie
);
230 int eno
= magic_errno(magic_cookie
);
231 if (eno
== ENOENT
|| eno
== ENOTDIR
)
232 throw FileNotFound();
233 string
m("Failed to use magic on file: ");
240 // Sometimes libmagic returns this string instead of a mime-type for some
241 // Microsoft documents, so pick a suitable MIME content-type based on the
242 // extension. Newer versions seem to return "application/CDFV2-corrupt"
243 // instead for this case (on Debian, file 5.11 gives the former and file
245 #define COMPOSITE_DOC "Composite Document File V2 Document"
246 if (strncmp(res
, COMPOSITE_DOC
, sizeof(COMPOSITE_DOC
) - 1) == 0 ||
247 strcmp(res
, "application/CDFV2-corrupt") == 0) {
248 // Default to something self-explanatory.
249 res
= "application/x-compound-document-file";
250 const char * leaf
= leafname();
251 const char * ext
= strrchr(leaf
, '.');
252 if (ext
&& strlen(++ext
) == 3) {
254 for (int i
= 0; i
!= 3; ++i
) {
255 if (ext
[i
] <= 'Z' && ext
[i
] >= 'A')
256 e
[i
] = ext
[i
] + ('a' - 'A');
263 res
= "application/msword";
266 if (e
[1] == 's' && e
[2] == 'g')
267 res
= "application/vnd.ms-outlook";
270 if (e
[1] == 'p' || e
[1] == 'o')
271 res
= "application/vnd.ms-powerpoint";
272 else if (e
[1] == 'u' && e
[2] == 'b')
273 res
= "application/x-mspublisher";
277 res
= "application/vnd.ms-excel";
280 if (e
[1] == 'p' && e
[2] != 'd')
281 res
= "application/vnd.ms-works";
286 #ifndef MAGIC_MIME_TYPE
287 // Discard any encoding from mime type value. Prior to version 5.0 the
288 // return value just had a space separator, e.g.:
290 // text/plain charset=us-ascii
292 // 5.0 changed that (but version 4.22 and later have MAGIC_MIME_TYPE
293 // so we don't need to handle this variant here):
295 // text/plain; charset=us-ascii
296 const char* spc
= strchr(res
, ' ');
298 return string(res
, spc
- res
);