2 * @brief Wrappers for low-level POSIX I/O routines.
4 /* Copyright (C) 2004,2006,2007,2008,2009,2011,2012,2014,2015,2016,2018 Olly Betts
5 * Copyright (C) 2010 Richard Boulton
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "posixy_wrapper.h"
27 #include "safeunistd.h"
33 #include <xapian/error.h>
38 // Trying to include the correct headers with the correct defines set to
39 // get pread() and pwrite() prototyped on every platform without breaking any
40 // other platform is a real can of worms. So instead we probe for what
41 // prototypes (if any) are required in configure and put them into
42 // PREAD_PROTOTYPE and PWRITE_PROTOTYPE.
43 #if defined HAVE_PREAD && defined PREAD_PROTOTYPE
46 #if defined HAVE_PWRITE && defined PWRITE_PROTOTYPE
51 io_unlink(const std::string
& filename
)
53 if (posixy_unlink(filename
.c_str()) == 0) {
56 if (errno
!= ENOENT
) {
57 throw Xapian::DatabaseError(filename
+ ": delete failed", errno
);
62 // The smallest fd we want to use for a writable handle.
63 const int MIN_WRITE_FD
= 3;
66 io_open_block_wr(const char * fname
, bool anew
)
68 // Use auto because on AIX O_CLOEXEC may be a 64-bit integer constant.
69 auto flags
= O_RDWR
| O_BINARY
| O_CLOEXEC
;
70 if (anew
) flags
|= O_CREAT
| O_TRUNC
;
71 int fd
= ::open(fname
, flags
, 0666);
72 if (fd
>= MIN_WRITE_FD
|| fd
< 0) return fd
;
74 // We want to avoid using fd < MIN_WRITE_FD, in case some other code in
75 // the same process tries to write to stdout or stderr, which would end up
76 // corrupting our database.
78 #ifdef F_DUPFD_CLOEXEC
79 // dup to the first unused fd >= MIN_WRITE_FD.
80 fd
= fcntl(badfd
, F_DUPFD_CLOEXEC
, MIN_WRITE_FD
);
81 // F_DUPFD_CLOEXEC may not be supported.
82 if (fd
< 0 && errno
== EINVAL
)
86 fd
= fcntl(badfd
, F_DUPFD
, MIN_WRITE_FD
);
89 (void)fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
92 int save_errno
= errno
;
97 char toclose
[MIN_WRITE_FD
];
98 memset(toclose
, 0, sizeof(toclose
));
103 } while (fd
>= 0 && fd
< MIN_WRITE_FD
);
104 int save_errno
= errno
;
105 for (badfd
= 0; badfd
!= MIN_WRITE_FD
; ++badfd
)
112 (void)fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
117 Assert(fd
>= MIN_WRITE_FD
|| fd
< 0);
122 io_read(int fd
, char * p
, size_t n
, size_t min
)
126 ssize_t c
= read(fd
, p
, n
);
129 if (total
>= min
) break;
130 throw Xapian::DatabaseCorruptError("Couldn't read enough (EOF)");
132 if (errno
== EINTR
) continue;
133 throw Xapian::DatabaseError("Error reading from file", errno
);
142 /** Write n bytes from block pointed to by p to file descriptor fd. */
144 io_write(int fd
, const char * p
, size_t n
)
147 ssize_t c
= write(fd
, p
, n
);
149 if (errno
== EINTR
) continue;
150 throw Xapian::DatabaseError("Error writing to file", errno
);
158 io_pread(int fd
, char * p
, size_t n
, off_t o
, size_t min
)
163 ssize_t c
= pread(fd
, p
, n
, o
);
164 // We should get a full read most of the time, so streamline that case.
165 if (usual(c
== ssize_t(n
)))
167 // -1 is error, 0 is EOF
172 throw Xapian::DatabaseError("EOF reading database");
174 // We get EINTR if the syscall was interrupted by a signal.
175 // In this case we should retry the read.
176 if (errno
== EINTR
) continue;
177 throw Xapian::DatabaseError("Error reading database", errno
);
187 if (rare(lseek(fd
, o
, SEEK_SET
) < 0))
188 throw Xapian::DatabaseError("Error seeking database", errno
);
190 ssize_t c
= read(fd
, p
, n
);
191 // We should get a full read most of the time, so streamline that case.
192 if (usual(c
== ssize_t(n
)))
198 throw Xapian::DatabaseError("EOF reading database");
200 // We get EINTR if the syscall was interrupted by a signal.
201 // In this case we should retry the read.
202 if (errno
== EINTR
) continue;
203 throw Xapian::DatabaseError("Error reading database", errno
);
215 io_pwrite(int fd
, const char * p
, size_t n
, off_t o
)
219 ssize_t c
= pwrite(fd
, p
, n
, o
);
220 // We should get a full write most of the time, so streamline that
222 if (usual(c
== ssize_t(n
)))
225 if (errno
== EINTR
) continue;
226 throw Xapian::DatabaseError("Error writing to file", errno
);
233 if (rare(lseek(fd
, o
, SEEK_SET
) < 0))
234 throw Xapian::DatabaseError("Error seeking database", errno
);
241 throw_block_error(const char * s
, off_t b
, int e
= 0)
245 throw Xapian::DatabaseError(m
, e
);
248 #ifdef HAVE_POSIX_FADVISE
250 io_readahead_block(int fd
, size_t n
, off_t b
, off_t o
)
253 // Assume that any failure is likely to also happen for another call with
255 return posix_fadvise(fd
, o
, n
, POSIX_FADV_WILLNEED
) == 0;
260 io_read_block(int fd
, char * p
, size_t n
, off_t b
, off_t o
)
263 // Prefer pread if available since it's typically implemented as a
264 // separate syscall, and that eliminates the overhead of an extra syscall
268 ssize_t c
= pread(fd
, p
, n
, o
);
269 // We should get a full read most of the time, so streamline that case.
270 if (usual(c
== ssize_t(n
)))
272 // -1 is error, 0 is EOF
275 throw_block_error("EOF reading block ", b
);
276 // We get EINTR if the syscall was interrupted by a signal.
277 // In this case we should retry the read.
278 if (errno
== EINTR
) continue;
279 throw_block_error("Error reading block ", b
, errno
);
286 if (rare(lseek(fd
, o
, SEEK_SET
) < 0))
287 throw_block_error("Error seeking to block ", b
, errno
);
289 ssize_t c
= read(fd
, p
, n
);
290 // We should get a full read most of the time, so streamline that case.
291 if (usual(c
== ssize_t(n
)))
295 throw_block_error("EOF reading block ", b
);
296 // We get EINTR if the syscall was interrupted by a signal.
297 // In this case we should retry the read.
298 if (errno
== EINTR
) continue;
299 throw_block_error("Error reading block ", b
, errno
);
308 io_write_block(int fd
, const char * p
, size_t n
, off_t b
, off_t o
)
311 // Prefer pwrite if available since it's typically implemented as a
312 // separate syscall, and that eliminates the overhead of an extra syscall
316 ssize_t c
= pwrite(fd
, p
, n
, o
);
317 // We should get a full write most of the time, so streamline that case.
318 if (usual(c
== ssize_t(n
)))
321 // We get EINTR if the syscall was interrupted by a signal.
322 // In this case we should retry the write.
323 if (errno
== EINTR
) continue;
324 throw_block_error("Error writing block ", b
, errno
);
331 if (rare(lseek(fd
, o
, SEEK_SET
) < 0))
332 throw_block_error("Error seeking to block ", b
, errno
);
334 ssize_t c
= write(fd
, p
, n
);
335 // We should get a full write most of the time, so streamline that case.
336 if (usual(c
== ssize_t(n
)))
339 // We get EINTR if the syscall was interrupted by a signal.
340 // In this case we should retry the write.
341 if (errno
== EINTR
) continue;
342 throw_block_error("Error writing block ", b
, errno
);
351 io_tmp_rename(const std::string
& tmp_file
, const std::string
& real_file
)
354 // We retry on EXDEV a few times as some older Linux kernels are buggy and
355 // fail with EXDEV when the two files are on the same device (as they
356 // always ought to be when this function is used). Don't retry forever in
357 // case someone calls this with files on different devices.
359 // We're not sure exactly which kernels are buggy in this way, but there's
360 // discussion here: https://www.spinics.net/lists/linux-nfs/msg17306.html
362 // Reported at: https://trac.xapian.org/ticket/698
366 if (posixy_rename(tmp_file
.c_str(), real_file
.c_str()) < 0) {
368 if (errno
== EXDEV
&& --retries
> 0) goto retry
;
370 // With NFS, rename() failing may just mean that the server crashed
371 // after successfully renaming, but before reporting this, and then
372 // the retried operation fails. So we need to check if the source
373 // file still exists, which we do by calling unlink(), since we want
374 // to remove the temporary file anyway.
375 int saved_errno
= errno
;
376 if (unlink(tmp_file
.c_str()) == 0 || errno
!= ENOENT
) {