Reimplement Language Modelling weights
[xapian.git] / xapian-core / common / io_utils.h
blob7cecb7edfdb621dbae51f2ee737f2939b51944fc
1 /** @file
2 * @brief Wrappers for low-level POSIX I/O routines.
3 */
4 /* Copyright (C) 2006,2007,2008,2009,2011,2014,2015,2016,2018,2024 Olly Betts
5 * Copyright (C) 2010 Richard Boulton
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_IO_UTILS_H
23 #define XAPIAN_INCLUDED_IO_UTILS_H
25 #ifndef PACKAGE
26 # error config.h must be included first in each C++ source file
27 #endif
29 #include <sys/types.h>
30 #include "safefcntl.h"
31 #include "safeunistd.h"
32 #include <limits>
33 #include <string>
35 /** Open a block-based file for reading.
37 * @param fname The path of the file to open.
39 inline int io_open_block_rd(const char * fname) {
40 return ::open(fname, O_RDONLY | O_BINARY | O_CLOEXEC);
43 /** Open a block-based file for reading.
45 * @param fname The path of the file to open.
47 inline int io_open_block_rd(const std::string & fname)
49 return io_open_block_rd(fname.c_str());
52 /** Open a block-based file for writing.
54 * @param fname The path of the file to open.
55 * @param anew If true, open the file anew (create or truncate it).
57 int io_open_block_wr(const char * fname, bool anew);
59 /** Open a block-based file for writing.
61 * @param fname The path of the file to open.
62 * @param anew If true, open the file anew (create or truncate it).
64 inline int io_open_block_wr(const std::string & fname, bool anew)
66 return io_open_block_wr(fname.c_str(), anew);
69 /** Ensure all data previously written to file descriptor fd has been written to
70 * disk.
72 * Returns false if this could not be done.
74 inline bool io_sync(int fd)
76 #if defined HAVE_FDATASYNC
77 // If we have it, prefer fdatasync() over fsync() as the former avoids
78 // updating the access time so is probably a little more efficient.
79 return fdatasync(fd) == 0;
80 #elif defined HAVE_FSYNC
81 return fsync(fd) == 0;
82 #elif defined __WIN32__
83 return _commit(fd) == 0;
84 #else
85 # error Cannot implement io_sync() without fdatasync(), fsync(), or _commit()
86 #endif
89 inline bool io_full_sync(int fd)
91 #ifdef F_FULLFSYNC
92 /* Only supported on macOS (at the time of writing at least).
94 * This call ensures that data has actually been written to disk, not just
95 * to the drive's write cache, so it provides better protection from power
96 * failures, etc. It does take longer though.
98 * According to the sqlite sources, this shouldn't fail on a local FS so
99 * a failure means that the file system doesn't support this operation and
100 * therefore it's best to fallback to fdatasync()/fsync().
102 if (fcntl(fd, F_FULLFSYNC, 0) == 0)
103 return true;
104 #endif
105 return io_sync(fd);
108 /** Read n bytes (or until EOF) into block pointed to by p from file descriptor
109 * fd.
111 * If a read error occurs, throws DatabaseError.
113 * If @a min is specified and EOF is reached after less than @a min bytes,
114 * throws DatabaseCorruptError.
116 * Returns the number of bytes actually read.
118 size_t io_read(int fd, char * p, size_t n, size_t min = 0);
120 /** Write n bytes from block pointed to by p to file descriptor fd. */
121 void io_write(int fd, const char * p, size_t n);
123 inline void io_write(int fd, const unsigned char * p, size_t n) {
124 io_write(fd, reinterpret_cast<const char *>(p), n);
127 /** Read n bytes (or until EOF) into block pointed to by p from file descriptor
128 * fd starting at position o.
130 * If a read error occurs, throws DatabaseError.
132 * If @a min is specified and EOF is reached after less than @a min bytes,
133 * throws DatabaseCorruptError.
135 * The current file position may or may not be updated.
137 * Returns the number of bytes actually read.
139 size_t io_pread(int fd, char * p, size_t n, off_t o, size_t min = 0);
141 /** Write n bytes from block pointed to by p to file descriptor fd starting at
142 * position o.
144 * If a read error occurs, throws DatabaseError.
146 * The current file position may or may not be updated.
148 void io_pwrite(int fd, const char * p, size_t n, off_t o);
150 /** Readahead block b size n bytes from file descriptor fd.
152 * Returns false if we can't readahead on this fd.
154 #ifdef HAVE_POSIX_FADVISE
155 bool io_readahead_block(int fd, size_t n, off_t b, off_t o = 0);
156 #else
157 inline bool io_readahead_block(int, size_t, off_t, off_t = 0) { return false; }
158 #endif
160 /// Read block b size n bytes into buffer p from file descriptor fd, offset o.
161 void io_read_block(int fd, char * p, size_t n, off_t b, off_t o = 0);
163 /// Write block b size n bytes from buffer p to file descriptor fd, offset o.
164 void io_write_block(int fd, const char * p, size_t n, off_t b, off_t o = 0);
166 inline void io_write_block(int fd, const unsigned char * p, size_t n, off_t b) {
167 io_write_block(fd, reinterpret_cast<const char *>(p), n, b);
170 /** Delete a file.
172 * @param filename The file to delete.
174 * @exception Xapian::DatabaseError is thrown if @a filename existed but
175 * couldn't be unlinked.
176 * @return true if @a filename was successfully removed; false if it
177 * didn't exist. If the file is on NFS, false may be returned
178 * even if the file was removed (if the server fails after
179 * removing the file but before telling the client, and the
180 * client then retries).
182 bool io_unlink(const std::string & filename);
184 /** Rename a temporary file to its final position.
186 * Attempts to deal with NFS infelicities. If the rename fails, the temporary
187 * file is removed.
189 * @return true if the rename succeeded; false if it failed (and errno will
190 * be set appropriately).
192 bool io_tmp_rename(const std::string & tmp_file, const std::string & real_file);
194 /** Protect against stray writes to fds we use pwrite() on.
196 * Protect against user code or other libraries accidentally trying to
197 * write to our fd by setting the file position high. To avoid problems
198 * we're rolling this out gradually on platforms we've tested it on.
200 static inline void io_protect_from_write(int fd) {
201 #ifdef __linux__
202 // The maximum off_t value works for at least btrfs.
203 if (lseek(fd, std::numeric_limits<off_t>::max(), SEEK_SET) < 0) {
204 // Try the actual maximum for ext4 (which matches the documented
205 // maximum filesize) since ext4 is very widely used.
206 (void)lseek(fd, 0xffffffff000, SEEK_SET);
208 #elif defined __FreeBSD__ || \
209 defined __APPLE__ || \
210 defined __NetBSD__ || \
211 defined __OpenBSD__
212 // The maximum off_t value worked in testing on:
213 // * FreeBSD 14.0
214 // * macOS 10.10 and 12.6
215 // * NetBSD 10.0
216 // * OpenBSD 7.5
217 (void)lseek(fd, std::numeric_limits<off_t>::max(), SEEK_SET);
218 #else
219 (void)fd;
220 #endif
223 #endif // XAPIAN_INCLUDED_IO_UTILS_H