Sync with cat.c from netbsd-8
[minix3.git] / minix / tests / test74.c
blobe9733d29cd56b6defa26bba48e89706066141224
1 /* Test 74 - mmap functionality & regression test.
3 * This test tests some basic functionality of mmap, and also some
4 * cases that are quite complex for the system to handle.
6 * Memory pages are generally made available on demand. Memory copying
7 * is done by the kernel. As the kernel may encounter pagefaults in
8 * legitimate memory ranges (e.g. pages that aren't mapped; pages that
9 * are mapped RO as they are COW), it cooperates with VM to make the
10 * mappings and let the copy succeed transparently.
12 * With file-mapped ranges this can result in a deadlock, if care is
13 * not taken, as the copy might be request by VFS or an FS. This test
14 * triggers as many of these states as possible to ensure they are
15 * successful or (where appropriate) fail gracefully, i.e. without
16 * deadlock.
18 * To do this, system calls are done with source or target buffers with
19 * missing or readonly mappings, both anonymous and file-mapped. The
20 * cache is flushed before mmap() so that we know the mappings should
21 * not be present on mmap() time. Then e.g. a read() or write() is
22 * executed with that buffer as target. This triggers a FS copying
23 * to or from a missing range that it itself is needed to map in first.
24 * VFS detects this, requests VM to map in the pages, which does so with
25 * the help of another VFS thread and the FS, and then re-issues the
26 * request to the FS.
28 * Another case is the VFS itself does such a copy. This is actually
29 * unusual as filenames are already faulted in by the requesting process
30 * in libc by strlen(). select() allows such a case, however, so this
31 * is tested too. We are satisfied if the call completes.
34 #include <sys/types.h>
35 #include <sys/mman.h>
36 #include <sys/ioctl.h>
37 #include <sys/ioc_memory.h>
38 #include <sys/param.h>
39 #include <minix/paths.h>
40 #include <stdio.h>
41 #include <assert.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <unistd.h>
45 #include <fcntl.h>
46 #include <dirent.h>
48 #include "common.h"
49 #include "testcache.h"
51 int max_error = 0; /* make all e()'s fatal */
53 int
54 dowriteblock(int b, int blocksize, u32_t seed, char *data)
56 u64_t offset;
57 int fd;
59 get_fd_offset(b, blocksize, &offset, &fd);
61 if(pwrite(fd, data, blocksize, offset) < blocksize) {
62 perror("pwrite");
63 return -1;
66 return blocksize;
69 int
70 readblock(int b, int blocksize, u32_t seed, char *data)
72 u64_t offset;
73 int fd;
74 char *mmapdata;
75 int pread_first = random() % 2;
77 get_fd_offset(b, blocksize, &offset, &fd);
79 if(pread_first) {
80 if(pread(fd, data, blocksize, offset) < blocksize) {
81 perror("pread");
82 return -1;
86 if((mmapdata = mmap(NULL, blocksize, PROT_READ, MAP_PRIVATE | MAP_FILE,
87 fd, offset)) == MAP_FAILED) {
88 perror("mmap");
89 return -1;
92 if(!pread_first) {
93 if(pread(fd, data, blocksize, offset) < blocksize) {
94 perror("pread");
95 return -1;
99 if(memcmp(mmapdata, data, blocksize)) {
100 fprintf(stderr, "readblock: mmap, pread mismatch\n");
101 return -1;
104 if(munmap(mmapdata, blocksize) < 0) {
105 perror("munmap");
106 return -1;
109 return blocksize;
112 void testend(void) { }
114 static void do_read(void *buf, int fd, int writable)
116 ssize_t ret;
117 size_t n = PAGE_SIZE;
118 struct stat sb;
119 if(fstat(fd, &sb) < 0) e(1);
120 if(S_ISDIR(sb.st_mode)) return;
121 ret = read(fd, buf, n);
123 /* if the buffer is writable, it should succeed */
124 if(writable) { if(ret != n) e(3); return; }
126 /* if the buffer is not writable, it should fail with EFAULT */
127 if(ret >= 0) e(4);
128 if(errno != EFAULT) e(5);
131 static void do_write(void *buf, int fd, int writable)
133 size_t n = PAGE_SIZE;
134 struct stat sb;
135 if(fstat(fd, &sb) < 0) e(1);
136 if(S_ISDIR(sb.st_mode)) return;
137 if(write(fd, buf, n) != n) e(3);
140 static void do_stat(void *buf, int fd, int writable)
142 int r;
143 r = fstat(fd, (struct stat *) buf);
145 /* should succeed if buf is writable */
146 if(writable) { if(r < 0) e(3); return; }
148 /* should fail with EFAULT if buf is not */
149 if(r >= 0) e(4);
150 if(errno != EFAULT) e(5);
153 static void do_getdents(void *buf, int fd, int writable)
155 struct stat sb;
156 int r;
157 if(fstat(fd, &sb) < 0) e(1);
158 if(!S_ISDIR(sb.st_mode)) return; /* OK */
159 r = getdents(fd, buf, PAGE_SIZE);
160 if(writable) { if(r < 0) e(3); return; }
162 /* should fail with EFAULT if buf is not */
163 if(r >= 0) e(4);
164 if(errno != EFAULT) e(5);
167 static void do_readlink1(void *buf, int fd, int writable)
169 char target[200];
170 /* the system call just has to fail gracefully */
171 readlink(buf, target, sizeof(target));
174 #define NODENAME "a"
175 #define TARGETNAME "b"
177 static void do_readlink2(void *buf, int fd, int writable)
179 ssize_t rl;
180 unlink(NODENAME);
181 if(symlink(TARGETNAME, NODENAME) < 0) e(1);
182 rl=readlink(NODENAME, buf, sizeof(buf));
184 /* if buf is writable, it should succeed, with a certain result */
185 if(writable) {
186 if(rl < 0) e(2);
187 ((char *) buf)[rl] = '\0';
188 if(strcmp(buf, TARGETNAME)) {
189 fprintf(stderr, "readlink: expected %s, got %s\n",
190 TARGETNAME, (char *)buf);
191 e(3);
193 return;
196 /* if buf is not writable, it should fail with EFAULT */
197 if(rl >= 0) e(4);
199 if(errno != EFAULT) e(5);
202 static void do_symlink1(void *buf, int fd, int writable)
204 /* the system call just has to fail gracefully */
205 (void)symlink(buf, NODENAME);
208 static void do_symlink2(void *buf, int fd, int writable)
210 /* the system call just has to fail gracefully */
211 (void)symlink(NODENAME, buf);
214 static void do_open(void *buf, int fd, int writable)
216 int r;
217 /* the system call just has to fail gracefully */
218 r = open(buf, O_RDONLY);
219 if(r >= 0) close(r);
222 static void do_select1(void *buf, int fd, int writable)
224 struct timeval timeout = { 0, 200000 }; /* 0.2 sec */
225 /* the system call just has to fail gracefully */
226 (void)select(1, buf, NULL, NULL, &timeout);
229 static void do_select2(void *buf, int fd, int writable)
231 struct timeval timeout = { 0, 200000 }; /* 1 sec */
232 /* the system call just has to fail gracefully */
233 (void)select(1, NULL, buf, NULL, &timeout);
236 static void do_select3(void *buf, int fd, int writable)
238 struct timeval timeout = { 0, 200000 }; /* 1 sec */
239 /* the system call just has to fail gracefully */
240 (void)select(1, NULL, NULL, buf, &timeout);
243 static void fillfile(int fd, int size)
245 char *buf = malloc(size);
247 if(size < 1 || size % PAGE_SIZE || !buf) { e(1); }
248 memset(buf, 'A', size);
249 buf[50] = '\0'; /* so it can be used as a filename arg */
250 buf[size-1] = '\0';
251 if(write(fd, buf, size) != size) { e(2); }
252 if(lseek(fd, SEEK_SET, 0) < 0) { e(3); }
253 free(buf);
256 static void make_buffers(int size,
257 int *ret_fd_rw, int *ret_fd_ro,
258 void **filebuf_rw, void **filebuf_ro, void **anonbuf)
260 char fn_rw[] = "testfile_rw.XXXXXX", fn_ro[] = "testfile_ro.XXXXXX";
261 *ret_fd_rw = mkstemp(fn_rw);
262 *ret_fd_ro = mkstemp(fn_ro);
264 if(size < 1 || size % PAGE_SIZE) { e(2); }
265 if(*ret_fd_rw < 0) { e(1); }
266 if(*ret_fd_ro < 0) { e(1); }
267 fillfile(*ret_fd_rw, size);
268 fillfile(*ret_fd_ro, size);
269 if(fcntl(*ret_fd_rw, F_FLUSH_FS_CACHE) < 0) { e(4); }
270 if(fcntl(*ret_fd_ro, F_FLUSH_FS_CACHE) < 0) { e(4); }
272 if((*filebuf_rw = mmap(0, size, PROT_READ | PROT_WRITE,
273 MAP_PRIVATE | MAP_FILE, *ret_fd_rw, 0)) == MAP_FAILED) {
274 e(5);
275 quit();
278 if((*filebuf_ro = mmap(0, size, PROT_READ,
279 MAP_PRIVATE | MAP_FILE, *ret_fd_ro, 0)) == MAP_FAILED) {
280 e(5);
281 quit();
284 if((*anonbuf = mmap(0, size, PROT_READ | PROT_WRITE,
285 MAP_PRIVATE | MAP_ANON, -1, 0)) == MAP_FAILED) {
286 e(6);
287 quit();
290 if(unlink(fn_rw) < 0) { e(12); }
291 if(unlink(fn_ro) < 0) { e(12); }
294 static void forget_buffers(void *buf1, void *buf2, void *buf3, int fd1, int fd2, int size)
296 if(munmap(buf1, size) < 0) { e(1); }
297 if(munmap(buf2, size) < 0) { e(2); }
298 if(munmap(buf3, size) < 0) { e(2); }
299 if(fcntl(fd1, F_FLUSH_FS_CACHE) < 0) { e(3); }
300 if(fcntl(fd2, F_FLUSH_FS_CACHE) < 0) { e(3); }
301 if(close(fd1) < 0) { e(4); }
302 if(close(fd2) < 0) { e(4); }
305 #define NEXPERIMENTS 12
306 struct {
307 void (*do_operation)(void * buf, int fd, int writable);
308 } experiments[NEXPERIMENTS] = {
309 { do_read },
310 { do_write },
311 { do_stat },
312 { do_getdents },
313 { do_readlink1 },
314 { do_readlink2 },
315 { do_symlink1 },
316 { do_symlink2 },
317 { do_open, },
318 { do_select1 },
319 { do_select2 },
320 { do_select3 },
323 static void test_memory_types_vs_operations(void)
325 #define NFDS 4
326 #define BUFSIZE (10 * PAGE_SIZE)
327 int exp, fds[NFDS];
328 int f = 0, size = BUFSIZE;
330 /* open some test fd's */
331 #define OPEN(fn, mode) { assert(f >= 0 && f < NFDS); \
332 fds[f] = open(fn, mode); if(fds[f] < 0) { e(2); } f++; }
333 OPEN("regular", O_RDWR | O_CREAT);
334 OPEN(".", O_RDONLY);
335 OPEN("/dev/ram", O_RDWR);
336 OPEN("/dev/zero", O_RDWR);
338 /* make sure the regular file has plenty of size to play with */
339 fillfile(fds[0], BUFSIZE);
341 /* and the ramdisk too */
342 if(ioctl(fds[2], MIOCRAMSIZE, &size) < 0) { e(3); }
344 for(exp = 0; exp < NEXPERIMENTS; exp++) {
345 for(f = 0; f < NFDS; f++) {
346 void *anonmem, *filemem_rw, *filemem_ro;
347 int buffd_rw, buffd_ro;
349 make_buffers(BUFSIZE, &buffd_rw, &buffd_ro,
350 &filemem_rw, &filemem_ro, &anonmem);
352 if(lseek(fds[f], 0, SEEK_SET) != 0) { e(10); }
353 experiments[exp].do_operation(anonmem, fds[f], 1);
355 if(lseek(fds[f], 0, SEEK_SET) != 0) { e(11); }
356 experiments[exp].do_operation(filemem_rw, fds[f], 1);
358 if(lseek(fds[f], 0, SEEK_SET) != 0) { e(12); }
359 experiments[exp].do_operation(filemem_ro, fds[f], 0);
361 forget_buffers(filemem_rw, filemem_ro, anonmem, buffd_rw, buffd_ro, BUFSIZE);
366 static void basic_regression(void)
368 int fd, fd1, fd2;
369 ssize_t rb, wr;
370 char buf[PAGE_SIZE*2];
371 void *block, *block1, *block2;
372 #define BLOCKSIZE (PAGE_SIZE*10)
373 block = mmap(0, BLOCKSIZE, PROT_READ | PROT_WRITE,
374 MAP_PRIVATE | MAP_ANON, -1, 0);
376 if(block == MAP_FAILED) { e(1); }
378 memset(block, 0, BLOCKSIZE);
380 /* shrink from bottom */
381 munmap(block, PAGE_SIZE);
383 /* Next test: use a system call write() to access a block of
384 * unavailable file-mapped memory.
386 * This is a thorny corner case to make succeed transparently
387 * because
388 * (1) it is a filesystem that is doing the memory access
389 * (copy from the constblock1 range in this process to the
390 * FS) but is also the FS needed to satisfy the range if it
391 * isn't in the cache.
392 * (2) there are two separate memory regions involved, requiring
393 * separate VFS requests from VM to properly satisfy, requiring
394 * some complex state to be kept.
397 fd1 = open("../testsh1", O_RDONLY);
398 if (fd1 < 0) fd1 = open("../testsh1.sh", O_RDONLY);
399 fd2 = open("../testsh2", O_RDONLY);
400 if (fd2 < 0) fd2 = open("../testsh2.sh", O_RDONLY);
401 if(fd1 < 0 || fd2 < 0) { e(2); }
403 /* just check that we can't mmap() a file writable */
404 if(mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FILE, fd1, 0) != MAP_FAILED) {
405 e(1);
408 /* check that we can mmap() a file MAP_SHARED readonly */
409 if(mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_FILE, fd1, 0) == MAP_FAILED) {
410 e(1);
413 /* clear cache of files before mmap so pages won't be present already */
414 if(fcntl(fd1, F_FLUSH_FS_CACHE) < 0) { e(1); }
415 if(fcntl(fd2, F_FLUSH_FS_CACHE) < 0) { e(1); }
417 #define LOCATION1 (void *) 0x90000000
418 #define LOCATION2 ((void *)((char *)LOCATION1 + PAGE_SIZE))
419 block1 = mmap(LOCATION1, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_FILE, fd1, 0);
420 if(block1 == MAP_FAILED) { e(4); }
421 if(block1 != LOCATION1) { e(5); }
423 block2 = mmap(LOCATION2, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_FILE, fd2, 0);
424 if(block2 == MAP_FAILED) { e(10); }
425 if(block2 != LOCATION2) { e(11); }
427 unlink("testfile");
428 fd = open("testfile", O_CREAT | O_RDWR);
429 if(fd < 0) { e(15); }
431 /* write() using the mmap()ped memory as buffer */
433 if((wr=write(fd, LOCATION1, sizeof(buf))) != sizeof(buf)) {
434 fprintf(stderr, "wrote %zd bytes instead of %zd\n",
435 wr, sizeof(buf));
436 e(20);
437 quit();
440 /* verify written contents */
442 if((rb=pread(fd, buf, sizeof(buf), 0)) != sizeof(buf)) {
443 if(rb < 0) perror("pread");
444 fprintf(stderr, "wrote %zd bytes\n", wr);
445 fprintf(stderr, "read %zd bytes instead of %zd\n",
446 rb, sizeof(buf));
447 e(21);
448 quit();
451 if(memcmp(buf, LOCATION1, sizeof(buf))) {
452 e(22);
453 quit();
456 close(fd);
457 close(fd1);
458 close(fd2);
463 * Test mmap on none-dev file systems - file systems that do not have a buffer
464 * cache and therefore have to fake mmap support. We use procfs as target.
465 * The idea is that while we succeed in mapping in /proc/uptime, we also get
466 * a new uptime value every time we map in the page -- VM must not cache it.
468 static void
469 nonedev_regression(void)
471 int fd, fd2;
472 char *buf;
473 unsigned long uptime1, uptime2, uptime3;
475 subtest++;
477 if ((fd = open(_PATH_PROC "uptime", O_RDONLY)) < 0) e(1);
479 buf = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
480 if (buf == MAP_FAILED) e(2);
482 if (buf[4095] != 0) e(3);
484 if ((uptime1 = atoi(buf)) == 0) e(4);
486 if (munmap(buf, 4096) != 0) e(5);
488 sleep(2);
490 buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FILE,
491 fd, 0);
492 if (buf == MAP_FAILED) e(6);
494 if (buf[4095] != 0) e(7);
496 if ((uptime2 = atoi(buf)) == 0) e(8);
498 if (uptime1 == uptime2) e(9);
500 if (munmap(buf, 4096) != 0) e(10);
502 sleep(2);
504 buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0);
505 if (buf == MAP_FAILED) e(11);
507 if (buf[4095] != 0) e(12);
509 if ((uptime3 = atoi(buf)) == 0) e(13);
511 if (uptime1 == uptime3) e(14);
512 if (uptime2 == uptime3) e(15);
514 if (munmap(buf, 4096) != 0) e(16);
516 /* Also test page faults not incurred by the process itself. */
517 if ((fd2 = open("testfile", O_CREAT | O_TRUNC | O_WRONLY)) < 0) e(17);
519 if (unlink("testfile") != 0) e(18);
521 buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0);
522 if (buf == MAP_FAILED) e(19);
524 if (write(fd2, buf, 10) != 10) e(20);
526 if (munmap(buf, 4096) != 0) e(21);
528 close(fd2);
529 close(fd);
533 * Regression test for a nasty memory-mapped file corruption bug, which is not
534 * easy to reproduce but, before being solved, did occur in practice every once
535 * in a while. The executive summary is that through stale inode associations,
536 * VM could end up using an old block to satisfy a memory mapping.
538 * This subtest relies on a number of assumptions regarding allocation and
539 * reuse of inode numbers and blocks. These assumptions hold for MFS but
540 * possibly no other file system. However, if the subtest's assumptions are
541 * not met, it will simply succeed.
543 static void
544 corruption_regression(void)
546 char *ptr, *buf;
547 struct statvfs sf;
548 struct stat st;
549 size_t block_size;
550 off_t size;
551 int fd, fd2;
553 subtest = 1;
555 if (statvfs(".", &sf) != 0) e(0);
556 block_size = sf.f_bsize;
558 if ((buf = malloc(block_size * 2)) == NULL) e(0);
561 * We first need a file that is just large enough that it requires the
562 * allocation of a metadata block - an indirect block - when more data
563 * is written to it. This is fileA. We keep it open throughout the
564 * test so we can unlink it immediately.
566 if ((fd = open("fileA", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
567 e(0);
568 if (unlink("fileA") != 0) e(0);
571 * Write to fileA until its next block requires the allocation of an
572 * additional metadata block - an indirect block.
574 size = 0;
575 memset(buf, 'A', block_size);
576 do {
578 * Repeatedly write an extra block, until the file consists of
579 * more blocks than just the file data.
581 if (write(fd, buf, block_size) != block_size) e(0);
582 size += block_size;
583 if (size >= block_size * 64) {
585 * It doesn't look like this is going to work.
586 * Skip this subtest altogether.
588 if (close(fd) != 0) e(0);
589 free(buf);
591 return;
593 if (fstat(fd, &st) != 0) e(0);
594 } while (st.st_blocks * 512 == size);
596 /* Once we get there, go one step back by truncating by one block. */
597 size -= block_size; /* for MFS, size will end up being 7*block_size */
598 if (ftruncate(fd, size) != 0) e(0);
601 * Create a first file, fileB, and write two blocks to it. FileB's
602 * blocks are going to end up in the secondary VM cache, associated to
603 * fileB's inode number (and two different offsets within the file).
604 * The block cache does not know about files getting deleted, so we can
605 * unlink fileB immediately after creating it. So far so good.
607 if ((fd2 = open("fileB", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
608 e(0);
609 if (unlink("fileB") != 0) e(0);
610 memset(buf, 'B', block_size * 2);
611 if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
612 if (close(fd2) != 0) e(0);
615 * Write one extra block to fileA, hoping that this causes allocation
616 * of a metadata block as well. This is why we tried to get fileA to
617 * the point that one more block would also require the allocation of a
618 * metadata block. Our intent is to recycle the blocks that we just
619 * allocated and freed for fileB. As of writing, for the metadata
620 * block, this will *not* break the association with fileB's inode,
621 * which by itself is not a problem, yet crucial to reproducing
622 * the actual problem a bit later. Note that the test does not rely on
623 * whether the file system allocates the data block or the metadata
624 * block first, although it does need reverse deallocation (see below).
626 memset(buf, 'A', block_size);
627 if (write(fd, buf, block_size) != block_size) e(0);
630 * Create a new file, fileC, which recycles the inode number of fileB,
631 * but uses two new blocks to store its data. These new blocks will
632 * get associated to the fileB inode number, and one of them will
633 * thereby eclipse (but not remove) the association of fileA's metadata
634 * block to the inode of fileB.
636 if ((fd2 = open("fileC", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
637 e(0);
638 if (unlink("fileC") != 0) e(0);
639 memset(buf, 'C', block_size * 2);
640 if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
641 if (close(fd2) != 0) e(0);
644 * Free up the extra fileA blocks for reallocation, in particular
645 * including the metadata block. Again, this will not affect the
646 * contents of the VM cache in any way. FileA's metadata block remains
647 * cached in VM, with the inode association for fileB's block.
649 if (ftruncate(fd, size) != 0) e(0);
652 * Now create yet one more file, fileD, which also recycles the inode
653 * number of fileB and fileC. Write two blocks to it; these blocks
654 * should recycle the blocks we just freed. One of these is fileA's
655 * just-freed metadata block, for which the new inode association will
656 * be equal to the inode association it had already (as long as blocks
657 * are freed in reverse order of their allocation, which happens to be
658 * the case for MFS). As a result, the block is not updated in the VM
659 * cache, and VM will therefore continue to see the inode association
660 * for the corresponding block of fileC which is still in the VM cache.
662 if ((fd2 = open("fileD", O_CREAT | O_TRUNC | O_RDWR, 0600)) == -1)
663 e(0);
664 memset(buf, 'D', block_size * 2);
665 if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
667 ptr = mmap(NULL, block_size * 2, PROT_READ, MAP_FILE, fd2, 0);
668 if (ptr == MAP_FAILED) e(0);
671 * Finally, we can test the issue. Since fileC's block is still the
672 * block for which VM has the corresponding inode association, VM will
673 * now find and map in fileC's block, instead of fileD's block. The
674 * result is that we get a memory-mapped area with stale contents,
675 * different from those of the underlying file.
677 if (memcmp(buf, ptr, block_size * 2)) e(0);
679 /* Clean up. */
680 if (munmap(ptr, block_size * 2) != 0) e(0);
682 if (close(fd2) != 0) e(0);
683 if (unlink("fileD") != 0) e(0);
685 if (close(fd) != 0) e(0);
687 free(buf);
691 * Test mmap on file holes. Holes are a tricky case with the current VM
692 * implementation. There are two main issues. First, whenever a file data
693 * block is freed, VM has to know about this, or it will later blindly map in
694 * the old data. This, file systems explicitly tell VM (through libminixfs)
695 * whenever a block is freed, upon which VM cache forgets the block. Second,
696 * blocks are accessed primarily by a <dev,dev_off> pair and only additionally
697 * by a <ino,ino_off> pair. Holes have no meaningful value for the first pair,
698 * but do need to be registered in VM with the second pair, or accessing them
699 * will generate a segmentation fault. Thus, file systems explicitly tell VM
700 * (through libminixfs) when a hole is being peeked; libminixfs currently fakes
701 * a device offset to make this work.
703 static void
704 hole_regression(void)
706 struct statvfs st;
707 size_t block_size;
708 char *buf;
709 int fd;
711 if (statvfs(".", &st) < 0) e(1);
713 block_size = st.f_bsize;
715 if ((buf = malloc(block_size)) == NULL) e(2);
717 if ((fd = open("testfile", O_CREAT | O_TRUNC | O_RDWR)) < 0) e(3);
719 if (unlink("testfile") != 0) e(4);
722 * We perform the test twice, in a not-so-perfect attempt to test the
723 * two aspects independently. The first part immediately creates a
724 * hole, and is supposed to fail only if reporting holes to VM does not
725 * work. However, it may also fail if a page for a previous file with
726 * the same inode number as "testfile" is still in the VM cache.
728 memset(buf, 12, block_size);
730 if (write(fd, buf, block_size) != block_size) e(5);
732 if (lseek(fd, block_size * 2, SEEK_CUR) != block_size * 3) e(6);
734 memset(buf, 78, block_size);
736 if (write(fd, buf, block_size) != block_size) e(7);
738 free(buf);
740 if ((buf = mmap(NULL, 4 * block_size, PROT_READ, MAP_SHARED | MAP_FILE,
741 fd, 0)) == MAP_FAILED) e(8);
743 if (buf[0 * block_size] != 12 || buf[1 * block_size - 1] != 12) e(9);
744 if (buf[1 * block_size] != 0 || buf[2 * block_size - 1] != 0) e(10);
745 if (buf[2 * block_size] != 0 || buf[3 * block_size - 1] != 0) e(11);
746 if (buf[3 * block_size] != 78 || buf[4 * block_size - 1] != 78) e(12);
748 if (munmap(buf, 4 * block_size) != 0) e(13);
751 * The second part first creates file content and only turns part of it
752 * into a file hole, thus ensuring that VM has previously cached pages
753 * for the blocks that are freed. The test will fail if VM keeps the
754 * pages around in its cache.
756 if ((buf = malloc(block_size)) == NULL) e(14);
758 if (lseek(fd, block_size, SEEK_SET) != block_size) e(15);
760 memset(buf, 34, block_size);
762 if (write(fd, buf, block_size) != block_size) e(16);
764 memset(buf, 56, block_size);
766 if (write(fd, buf, block_size) != block_size) e(17);
768 if (ftruncate(fd, block_size) != 0) e(18);
770 if (lseek(fd, block_size * 3, SEEK_SET) != block_size * 3) e(19);
772 memset(buf, 78, block_size);
774 if (write(fd, buf, block_size) != block_size) e(20);
776 free(buf);
778 if ((buf = mmap(NULL, 4 * block_size, PROT_READ, MAP_SHARED | MAP_FILE,
779 fd, 0)) == MAP_FAILED) e(21);
781 if (buf[0 * block_size] != 12 || buf[1 * block_size - 1] != 12) e(22);
782 if (buf[1 * block_size] != 0 || buf[2 * block_size - 1] != 0) e(23);
783 if (buf[2 * block_size] != 0 || buf[3 * block_size - 1] != 0) e(24);
784 if (buf[3 * block_size] != 78 || buf[4 * block_size - 1] != 78) e(25);
786 if (munmap(buf, 4 * block_size) != 0) e(26);
788 close(fd);
792 * Test that soft faults during file system I/O do not cause functions to
793 * return partial I/O results.
795 * We refer to the faults that are caused internally within the operating
796 * system as a result of the deadlock mitigation described at the top of this
797 * file, as a particular class of "soft faults". Such soft faults may occur in
798 * the middle of an I/O operation, and general I/O semantics dictate that upon
799 * partial success, the partial success is returned (and *not* an error). As a
800 * result, these soft faults, if not handled as special cases, may cause even
801 * common file system operations such as read(2) on a regular file to return
802 * fewer bytes than requested. Such unexpected short reads are typically not
803 * handled well by userland, and the OS must prevent them from occurring if it
804 * can. Note that read(2) is the most problematic, but certainly not the only,
805 * case where this problem can occur.
807 * Unfortunately, several file system services are not following the proper
808 * general I/O semantics - and this includes MFS. Therefore, for now, we have
809 * to test this case using block device I/O, which does do the right thing.
810 * In this test we hope that the root file system is mounted on a block device
811 * usable for (read-only!) testing purposes.
813 static void
814 softfault_partial(void)
816 struct statvfs stf;
817 struct stat st;
818 char *buf, *buf2;
819 ssize_t size;
820 int fd;
822 if (statvfs("/", &stf) != 0) e(0);
825 * If the root file system is not mounted off a block device, or if we
826 * cannot open that device ourselves, simply skip this subtest.
828 if (stat(stf.f_mntfromname, &st) != 0 || !S_ISBLK(st.st_mode))
829 return; /* skip subtest */
831 if ((fd = open(stf.f_mntfromname, O_RDONLY)) == -1)
832 return; /* skip subtest */
835 * See if we can read in the first two full blocks, or two pages worth
836 * of data, whichever is larger. If that fails, there is no point in
837 * continuing the test.
839 size = MAX(stf.f_bsize, PAGE_SIZE) * 2;
841 if ((buf = mmap(NULL, size, PROT_READ | PROT_READ,
842 MAP_ANON | MAP_PRIVATE | MAP_PREALLOC, -1, 0)) == MAP_FAILED) e(0);
844 if (read(fd, buf, size) != size) {
845 munmap(buf, size);
846 close(fd);
847 return; /* skip subtest */
850 lseek(fd, 0, SEEK_SET);
853 * Now attempt a read to a partially faulted-in buffer. The first time
854 * around, the I/O transfer will generate a fault and return partial
855 * success. In that case, the entire I/O transfer should be retried
856 * after faulting in the missing page(s), thus resulting in the read
857 * succeeding in full.
859 if ((buf2 = mmap(NULL, size, PROT_READ | PROT_READ,
860 MAP_ANON | MAP_PRIVATE, -1, 0)) == MAP_FAILED) e(0);
861 buf2[0] = '\0'; /* fault in the first page */
863 if (read(fd, buf2, size) != size) e(0);
865 /* The result should be correct, too. */
866 if (memcmp(buf, buf2, size)) e(0);
868 /* Clean up. */
869 munmap(buf2, size);
870 munmap(buf, size);
872 close(fd);
876 main(int argc, char *argv[])
878 int i, iter = 2;
880 start(74);
882 basic_regression();
884 nonedev_regression();
887 * Any inode or block allocation happening concurrently with this
888 * subtest will make the subtest succeed without testing the actual
889 * issue. Thus, repeat the subtest a fair number of times.
891 for (i = 0; i < 10; i++)
892 corruption_regression();
894 hole_regression();
896 test_memory_types_vs_operations();
898 softfault_partial();
900 makefiles(MAXFILES);
902 cachequiet(!bigflag);
903 if(bigflag) iter = 3;
905 /* Try various combinations working set sizes
906 * and block sizes in order to specifically
907 * target the primary cache, then primary+secondary
908 * cache, then primary+secondary cache+secondary
909 * cache eviction.
912 if(dotest(PAGE_SIZE, 100, iter)) e(5);
913 if(dotest(PAGE_SIZE*2, 100, iter)) e(2);
914 if(dotest(PAGE_SIZE*3, 100, iter)) e(3);
915 if(dotest(PAGE_SIZE, 20000, iter)) e(5);
917 if(bigflag) {
918 u32_t totalmem, freemem, cachedmem;
919 if(dotest(PAGE_SIZE, 150000, iter)) e(5);
920 getmem(&totalmem, &freemem, &cachedmem);
921 if(dotest(PAGE_SIZE, totalmem*1.5, iter)) e(6);
924 quit();
926 return 0;