1 /* Test 74 - mmap functionality & regression test.
3 * This test tests some basic functionality of mmap, and also some
4 * cases that are quite complex for the system to handle.
6 * Memory pages are generally made available on demand. Memory copying
7 * is done by the kernel. As the kernel may encounter pagefaults in
8 * legitimate memory ranges (e.g. pages that aren't mapped; pages that
9 * are mapped RO as they are COW), it cooperates with VM to make the
10 * mappings and let the copy succeed transparently.
12 * With file-mapped ranges this can result in a deadlock, if care is
13 * not taken, as the copy might be request by VFS or an FS. This test
14 * triggers as many of these states as possible to ensure they are
15 * successful or (where appropriate) fail gracefully, i.e. without
18 * To do this, system calls are done with source or target buffers with
19 * missing or readonly mappings, both anonymous and file-mapped. The
20 * cache is flushed before mmap() so that we know the mappings should
21 * not be present on mmap() time. Then e.g. a read() or write() is
22 * executed with that buffer as target. This triggers a FS copying
23 * to or from a missing range that it itself is needed to map in first.
24 * VFS detects this, requests VM to map in the pages, which does so with
25 * the help of another VFS thread and the FS, and then re-issues the
28 * Another case is the VFS itself does such a copy. This is actually
29 * unusual as filenames are already faulted in by the requesting process
30 * in libc by strlen(). select() allows such a case, however, so this
31 * is tested too. We are satisfied if the call completes.
34 #include <sys/types.h>
36 #include <sys/ioctl.h>
37 #include <sys/ioc_memory.h>
38 #include <sys/param.h>
39 #include <minix/paths.h>
49 #include "testcache.h"
51 int max_error
= 0; /* make all e()'s fatal */
54 dowriteblock(int b
, int blocksize
, u32_t seed
, char *data
)
59 get_fd_offset(b
, blocksize
, &offset
, &fd
);
61 if(pwrite(fd
, data
, blocksize
, offset
) < blocksize
) {
70 readblock(int b
, int blocksize
, u32_t seed
, char *data
)
75 int pread_first
= random() % 2;
77 get_fd_offset(b
, blocksize
, &offset
, &fd
);
80 if(pread(fd
, data
, blocksize
, offset
) < blocksize
) {
86 if((mmapdata
= mmap(NULL
, blocksize
, PROT_READ
, MAP_PRIVATE
| MAP_FILE
,
87 fd
, offset
)) == MAP_FAILED
) {
93 if(pread(fd
, data
, blocksize
, offset
) < blocksize
) {
99 if(memcmp(mmapdata
, data
, blocksize
)) {
100 fprintf(stderr
, "readblock: mmap, pread mismatch\n");
104 if(munmap(mmapdata
, blocksize
) < 0) {
112 void testend(void) { }
114 static void do_read(void *buf
, int fd
, int writable
)
117 size_t n
= PAGE_SIZE
;
119 if(fstat(fd
, &sb
) < 0) e(1);
120 if(S_ISDIR(sb
.st_mode
)) return;
121 ret
= read(fd
, buf
, n
);
123 /* if the buffer is writable, it should succeed */
124 if(writable
) { if(ret
!= n
) e(3); return; }
126 /* if the buffer is not writable, it should fail with EFAULT */
128 if(errno
!= EFAULT
) e(5);
131 static void do_write(void *buf
, int fd
, int writable
)
133 size_t n
= PAGE_SIZE
;
135 if(fstat(fd
, &sb
) < 0) e(1);
136 if(S_ISDIR(sb
.st_mode
)) return;
137 if(write(fd
, buf
, n
) != n
) e(3);
140 static void do_stat(void *buf
, int fd
, int writable
)
143 r
= fstat(fd
, (struct stat
*) buf
);
145 /* should succeed if buf is writable */
146 if(writable
) { if(r
< 0) e(3); return; }
148 /* should fail with EFAULT if buf is not */
150 if(errno
!= EFAULT
) e(5);
153 static void do_getdents(void *buf
, int fd
, int writable
)
157 if(fstat(fd
, &sb
) < 0) e(1);
158 if(!S_ISDIR(sb
.st_mode
)) return; /* OK */
159 r
= getdents(fd
, buf
, PAGE_SIZE
);
160 if(writable
) { if(r
< 0) e(3); return; }
162 /* should fail with EFAULT if buf is not */
164 if(errno
!= EFAULT
) e(5);
167 static void do_readlink1(void *buf
, int fd
, int writable
)
170 /* the system call just has to fail gracefully */
171 readlink(buf
, target
, sizeof(target
));
175 #define TARGETNAME "b"
177 static void do_readlink2(void *buf
, int fd
, int writable
)
181 if(symlink(TARGETNAME
, NODENAME
) < 0) e(1);
182 rl
=readlink(NODENAME
, buf
, sizeof(buf
));
184 /* if buf is writable, it should succeed, with a certain result */
187 ((char *) buf
)[rl
] = '\0';
188 if(strcmp(buf
, TARGETNAME
)) {
189 fprintf(stderr
, "readlink: expected %s, got %s\n",
190 TARGETNAME
, (char *)buf
);
196 /* if buf is not writable, it should fail with EFAULT */
199 if(errno
!= EFAULT
) e(5);
202 static void do_symlink1(void *buf
, int fd
, int writable
)
204 /* the system call just has to fail gracefully */
205 (void)symlink(buf
, NODENAME
);
208 static void do_symlink2(void *buf
, int fd
, int writable
)
210 /* the system call just has to fail gracefully */
211 (void)symlink(NODENAME
, buf
);
214 static void do_open(void *buf
, int fd
, int writable
)
217 /* the system call just has to fail gracefully */
218 r
= open(buf
, O_RDONLY
);
222 static void do_select1(void *buf
, int fd
, int writable
)
224 struct timeval timeout
= { 0, 200000 }; /* 0.2 sec */
225 /* the system call just has to fail gracefully */
226 (void)select(1, buf
, NULL
, NULL
, &timeout
);
229 static void do_select2(void *buf
, int fd
, int writable
)
231 struct timeval timeout
= { 0, 200000 }; /* 1 sec */
232 /* the system call just has to fail gracefully */
233 (void)select(1, NULL
, buf
, NULL
, &timeout
);
236 static void do_select3(void *buf
, int fd
, int writable
)
238 struct timeval timeout
= { 0, 200000 }; /* 1 sec */
239 /* the system call just has to fail gracefully */
240 (void)select(1, NULL
, NULL
, buf
, &timeout
);
243 static void fillfile(int fd
, int size
)
245 char *buf
= malloc(size
);
247 if(size
< 1 || size
% PAGE_SIZE
|| !buf
) { e(1); }
248 memset(buf
, 'A', size
);
249 buf
[50] = '\0'; /* so it can be used as a filename arg */
251 if(write(fd
, buf
, size
) != size
) { e(2); }
252 if(lseek(fd
, SEEK_SET
, 0) < 0) { e(3); }
256 static void make_buffers(int size
,
257 int *ret_fd_rw
, int *ret_fd_ro
,
258 void **filebuf_rw
, void **filebuf_ro
, void **anonbuf
)
260 char fn_rw
[] = "testfile_rw.XXXXXX", fn_ro
[] = "testfile_ro.XXXXXX";
261 *ret_fd_rw
= mkstemp(fn_rw
);
262 *ret_fd_ro
= mkstemp(fn_ro
);
264 if(size
< 1 || size
% PAGE_SIZE
) { e(2); }
265 if(*ret_fd_rw
< 0) { e(1); }
266 if(*ret_fd_ro
< 0) { e(1); }
267 fillfile(*ret_fd_rw
, size
);
268 fillfile(*ret_fd_ro
, size
);
269 if(fcntl(*ret_fd_rw
, F_FLUSH_FS_CACHE
) < 0) { e(4); }
270 if(fcntl(*ret_fd_ro
, F_FLUSH_FS_CACHE
) < 0) { e(4); }
272 if((*filebuf_rw
= mmap(0, size
, PROT_READ
| PROT_WRITE
,
273 MAP_PRIVATE
| MAP_FILE
, *ret_fd_rw
, 0)) == MAP_FAILED
) {
278 if((*filebuf_ro
= mmap(0, size
, PROT_READ
,
279 MAP_PRIVATE
| MAP_FILE
, *ret_fd_ro
, 0)) == MAP_FAILED
) {
284 if((*anonbuf
= mmap(0, size
, PROT_READ
| PROT_WRITE
,
285 MAP_PRIVATE
| MAP_ANON
, -1, 0)) == MAP_FAILED
) {
290 if(unlink(fn_rw
) < 0) { e(12); }
291 if(unlink(fn_ro
) < 0) { e(12); }
294 static void forget_buffers(void *buf1
, void *buf2
, void *buf3
, int fd1
, int fd2
, int size
)
296 if(munmap(buf1
, size
) < 0) { e(1); }
297 if(munmap(buf2
, size
) < 0) { e(2); }
298 if(munmap(buf3
, size
) < 0) { e(2); }
299 if(fcntl(fd1
, F_FLUSH_FS_CACHE
) < 0) { e(3); }
300 if(fcntl(fd2
, F_FLUSH_FS_CACHE
) < 0) { e(3); }
301 if(close(fd1
) < 0) { e(4); }
302 if(close(fd2
) < 0) { e(4); }
305 #define NEXPERIMENTS 12
307 void (*do_operation
)(void * buf
, int fd
, int writable
);
308 } experiments
[NEXPERIMENTS
] = {
323 static void test_memory_types_vs_operations(void)
326 #define BUFSIZE (10 * PAGE_SIZE)
328 int f
= 0, size
= BUFSIZE
;
330 /* open some test fd's */
331 #define OPEN(fn, mode) { assert(f >= 0 && f < NFDS); \
332 fds[f] = open(fn, mode); if(fds[f] < 0) { e(2); } f++; }
333 OPEN("regular", O_RDWR
| O_CREAT
);
335 OPEN("/dev/ram", O_RDWR
);
336 OPEN("/dev/zero", O_RDWR
);
338 /* make sure the regular file has plenty of size to play with */
339 fillfile(fds
[0], BUFSIZE
);
341 /* and the ramdisk too */
342 if(ioctl(fds
[2], MIOCRAMSIZE
, &size
) < 0) { e(3); }
344 for(exp
= 0; exp
< NEXPERIMENTS
; exp
++) {
345 for(f
= 0; f
< NFDS
; f
++) {
346 void *anonmem
, *filemem_rw
, *filemem_ro
;
347 int buffd_rw
, buffd_ro
;
349 make_buffers(BUFSIZE
, &buffd_rw
, &buffd_ro
,
350 &filemem_rw
, &filemem_ro
, &anonmem
);
352 if(lseek(fds
[f
], 0, SEEK_SET
) != 0) { e(10); }
353 experiments
[exp
].do_operation(anonmem
, fds
[f
], 1);
355 if(lseek(fds
[f
], 0, SEEK_SET
) != 0) { e(11); }
356 experiments
[exp
].do_operation(filemem_rw
, fds
[f
], 1);
358 if(lseek(fds
[f
], 0, SEEK_SET
) != 0) { e(12); }
359 experiments
[exp
].do_operation(filemem_ro
, fds
[f
], 0);
361 forget_buffers(filemem_rw
, filemem_ro
, anonmem
, buffd_rw
, buffd_ro
, BUFSIZE
);
366 static void basic_regression(void)
370 char buf
[PAGE_SIZE
*2];
371 void *block
, *block1
, *block2
;
372 #define BLOCKSIZE (PAGE_SIZE*10)
373 block
= mmap(0, BLOCKSIZE
, PROT_READ
| PROT_WRITE
,
374 MAP_PRIVATE
| MAP_ANON
, -1, 0);
376 if(block
== MAP_FAILED
) { e(1); }
378 memset(block
, 0, BLOCKSIZE
);
380 /* shrink from bottom */
381 munmap(block
, PAGE_SIZE
);
383 /* Next test: use a system call write() to access a block of
384 * unavailable file-mapped memory.
386 * This is a thorny corner case to make succeed transparently
388 * (1) it is a filesystem that is doing the memory access
389 * (copy from the constblock1 range in this process to the
390 * FS) but is also the FS needed to satisfy the range if it
391 * isn't in the cache.
392 * (2) there are two separate memory regions involved, requiring
393 * separate VFS requests from VM to properly satisfy, requiring
394 * some complex state to be kept.
397 fd1
= open("../testsh1", O_RDONLY
);
398 fd2
= open("../testsh2", O_RDONLY
);
399 if(fd1
< 0 || fd2
< 0) { e(2); }
401 /* just check that we can't mmap() a file writable */
402 if(mmap(NULL
, PAGE_SIZE
, PROT_READ
| PROT_WRITE
, MAP_SHARED
| MAP_FILE
, fd1
, 0) != MAP_FAILED
) {
406 /* check that we can mmap() a file MAP_SHARED readonly */
407 if(mmap(NULL
, PAGE_SIZE
, PROT_READ
, MAP_SHARED
| MAP_FILE
, fd1
, 0) == MAP_FAILED
) {
411 /* clear cache of files before mmap so pages won't be present already */
412 if(fcntl(fd1
, F_FLUSH_FS_CACHE
) < 0) { e(1); }
413 if(fcntl(fd2
, F_FLUSH_FS_CACHE
) < 0) { e(1); }
415 #define LOCATION1 (void *) 0x90000000
416 #define LOCATION2 ((void *)((char *)LOCATION1 + PAGE_SIZE))
417 block1
= mmap(LOCATION1
, PAGE_SIZE
, PROT_READ
, MAP_PRIVATE
| MAP_FILE
, fd1
, 0);
418 if(block1
== MAP_FAILED
) { e(4); }
419 if(block1
!= LOCATION1
) { e(5); }
421 block2
= mmap(LOCATION2
, PAGE_SIZE
, PROT_READ
, MAP_PRIVATE
| MAP_FILE
, fd2
, 0);
422 if(block2
== MAP_FAILED
) { e(10); }
423 if(block2
!= LOCATION2
) { e(11); }
426 fd
= open("testfile", O_CREAT
| O_RDWR
);
427 if(fd
< 0) { e(15); }
429 /* write() using the mmap()ped memory as buffer */
431 if((wr
=write(fd
, LOCATION1
, sizeof(buf
))) != sizeof(buf
)) {
432 fprintf(stderr
, "wrote %zd bytes instead of %zd\n",
438 /* verify written contents */
440 if((rb
=pread(fd
, buf
, sizeof(buf
), 0)) != sizeof(buf
)) {
441 if(rb
< 0) perror("pread");
442 fprintf(stderr
, "wrote %zd bytes\n", wr
);
443 fprintf(stderr
, "read %zd bytes instead of %zd\n",
449 if(memcmp(buf
, LOCATION1
, sizeof(buf
))) {
461 * Test mmap on none-dev file systems - file systems that do not have a buffer
462 * cache and therefore have to fake mmap support. We use procfs as target.
463 * The idea is that while we succeed in mapping in /proc/uptime, we also get
464 * a new uptime value every time we map in the page -- VM must not cache it.
467 nonedev_regression(void)
471 unsigned long uptime1
, uptime2
, uptime3
;
475 if ((fd
= open(_PATH_PROC
"uptime", O_RDONLY
)) < 0) e(1);
477 buf
= mmap(NULL
, 4096, PROT_READ
, MAP_PRIVATE
| MAP_FILE
, fd
, 0);
478 if (buf
== MAP_FAILED
) e(2);
480 if (buf
[4095] != 0) e(3);
482 if ((uptime1
= atoi(buf
)) == 0) e(4);
484 if (munmap(buf
, 4096) != 0) e(5);
488 buf
= mmap(NULL
, 4096, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_FILE
,
490 if (buf
== MAP_FAILED
) e(6);
492 if (buf
[4095] != 0) e(7);
494 if ((uptime2
= atoi(buf
)) == 0) e(8);
496 if (uptime1
== uptime2
) e(9);
498 if (munmap(buf
, 4096) != 0) e(10);
502 buf
= mmap(NULL
, 4096, PROT_READ
, MAP_SHARED
| MAP_FILE
, fd
, 0);
503 if (buf
== MAP_FAILED
) e(11);
505 if (buf
[4095] != 0) e(12);
507 if ((uptime3
= atoi(buf
)) == 0) e(13);
509 if (uptime1
== uptime3
) e(14);
510 if (uptime2
== uptime3
) e(15);
512 if (munmap(buf
, 4096) != 0) e(16);
514 /* Also test page faults not incurred by the process itself. */
515 if ((fd2
= open("testfile", O_CREAT
| O_TRUNC
| O_WRONLY
)) < 0) e(17);
517 if (unlink("testfile") != 0) e(18);
519 buf
= mmap(NULL
, 4096, PROT_READ
, MAP_SHARED
| MAP_FILE
, fd
, 0);
520 if (buf
== MAP_FAILED
) e(19);
522 if (write(fd2
, buf
, 10) != 10) e(20);
524 if (munmap(buf
, 4096) != 0) e(21);
531 * Regression test for a nasty memory-mapped file corruption bug, which is not
532 * easy to reproduce but, before being solved, did occur in practice every once
533 * in a while. The executive summary is that through stale inode associations,
534 * VM could end up using an old block to satisfy a memory mapping.
536 * This subtest relies on a number of assumptions regarding allocation and
537 * reuse of inode numbers and blocks. These assumptions hold for MFS but
538 * possibly no other file system. However, if the subtest's assumptions are
539 * not met, it will simply succeed.
542 corruption_regression(void)
553 if (statvfs(".", &sf
) != 0) e(0);
554 block_size
= sf
.f_bsize
;
556 if ((buf
= malloc(block_size
* 2)) == NULL
) e(0);
559 * We first need a file that is just large enough that it requires the
560 * allocation of a metadata block - an indirect block - when more data
561 * is written to it. This is fileA. We keep it open throughout the
562 * test so we can unlink it immediately.
564 if ((fd
= open("fileA", O_CREAT
| O_TRUNC
| O_WRONLY
, 0600)) == -1)
566 if (unlink("fileA") != 0) e(0);
569 * Write to fileA until its next block requires the allocation of an
570 * additional metadata block - an indirect block.
573 memset(buf
, 'A', block_size
);
576 * Repeatedly write an extra block, until the file consists of
577 * more blocks than just the file data.
579 if (write(fd
, buf
, block_size
) != block_size
) e(0);
581 if (size
>= block_size
* 64) {
583 * It doesn't look like this is going to work.
584 * Skip this subtest altogether.
586 if (close(fd
) != 0) e(0);
591 if (fstat(fd
, &st
) != 0) e(0);
592 } while (st
.st_blocks
* 512 == size
);
594 /* Once we get there, go one step back by truncating by one block. */
595 size
-= block_size
; /* for MFS, size will end up being 7*block_size */
596 if (ftruncate(fd
, size
) != 0) e(0);
599 * Create a first file, fileB, and write two blocks to it. FileB's
600 * blocks are going to end up in the secondary VM cache, associated to
601 * fileB's inode number (and two different offsets within the file).
602 * The block cache does not know about files getting deleted, so we can
603 * unlink fileB immediately after creating it. So far so good.
605 if ((fd2
= open("fileB", O_CREAT
| O_TRUNC
| O_WRONLY
, 0600)) == -1)
607 if (unlink("fileB") != 0) e(0);
608 memset(buf
, 'B', block_size
* 2);
609 if (write(fd2
, buf
, block_size
* 2) != block_size
* 2) e(0);
610 if (close(fd2
) != 0) e(0);
613 * Write one extra block to fileA, hoping that this causes allocation
614 * of a metadata block as well. This is why we tried to get fileA to
615 * the point that one more block would also require the allocation of a
616 * metadata block. Our intent is to recycle the blocks that we just
617 * allocated and freed for fileB. As of writing, for the metadata
618 * block, this will *not* break the association with fileB's inode,
619 * which by itself is not a problem, yet crucial to reproducing
620 * the actual problem a bit later. Note that the test does not rely on
621 * whether the file system allocates the data block or the metadata
622 * block first, although it does need reverse deallocation (see below).
624 memset(buf
, 'A', block_size
);
625 if (write(fd
, buf
, block_size
) != block_size
) e(0);
628 * Create a new file, fileC, which recycles the inode number of fileB,
629 * but uses two new blocks to store its data. These new blocks will
630 * get associated to the fileB inode number, and one of them will
631 * thereby eclipse (but not remove) the association of fileA's metadata
632 * block to the inode of fileB.
634 if ((fd2
= open("fileC", O_CREAT
| O_TRUNC
| O_WRONLY
, 0600)) == -1)
636 if (unlink("fileC") != 0) e(0);
637 memset(buf
, 'C', block_size
* 2);
638 if (write(fd2
, buf
, block_size
* 2) != block_size
* 2) e(0);
639 if (close(fd2
) != 0) e(0);
642 * Free up the extra fileA blocks for reallocation, in particular
643 * including the metadata block. Again, this will not affect the
644 * contents of the VM cache in any way. FileA's metadata block remains
645 * cached in VM, with the inode association for fileB's block.
647 if (ftruncate(fd
, size
) != 0) e(0);
650 * Now create yet one more file, fileD, which also recycles the inode
651 * number of fileB and fileC. Write two blocks to it; these blocks
652 * should recycle the blocks we just freed. One of these is fileA's
653 * just-freed metadata block, for which the new inode association will
654 * be equal to the inode association it had already (as long as blocks
655 * are freed in reverse order of their allocation, which happens to be
656 * the case for MFS). As a result, the block is not updated in the VM
657 * cache, and VM will therefore continue to see the inode association
658 * for the corresponding block of fileC which is still in the VM cache.
660 if ((fd2
= open("fileD", O_CREAT
| O_TRUNC
| O_RDWR
, 0600)) == -1)
662 memset(buf
, 'D', block_size
* 2);
663 if (write(fd2
, buf
, block_size
* 2) != block_size
* 2) e(0);
665 ptr
= mmap(NULL
, block_size
* 2, PROT_READ
, MAP_FILE
, fd2
, 0);
666 if (ptr
== MAP_FAILED
) e(0);
669 * Finally, we can test the issue. Since fileC's block is still the
670 * block for which VM has the corresponding inode association, VM will
671 * now find and map in fileC's block, instead of fileD's block. The
672 * result is that we get a memory-mapped area with stale contents,
673 * different from those of the underlying file.
675 if (memcmp(buf
, ptr
, block_size
* 2)) e(0);
678 if (munmap(ptr
, block_size
* 2) != 0) e(0);
680 if (close(fd2
) != 0) e(0);
681 if (unlink("fileD") != 0) e(0);
683 if (close(fd
) != 0) e(0);
689 * Test mmap on file holes. Holes are a tricky case with the current VM
690 * implementation. There are two main issues. First, whenever a file data
691 * block is freed, VM has to know about this, or it will later blindly map in
692 * the old data. This, file systems explicitly tell VM (through libminixfs)
693 * whenever a block is freed, upon which VM cache forgets the block. Second,
694 * blocks are accessed primarily by a <dev,dev_off> pair and only additionally
695 * by a <ino,ino_off> pair. Holes have no meaningful value for the first pair,
696 * but do need to be registered in VM with the second pair, or accessing them
697 * will generate a segmentation fault. Thus, file systems explicitly tell VM
698 * (through libminixfs) when a hole is being peeked; libminixfs currently fakes
699 * a device offset to make this work.
702 hole_regression(void)
709 if (statvfs(".", &st
) < 0) e(1);
711 block_size
= st
.f_bsize
;
713 if ((buf
= malloc(block_size
)) == NULL
) e(2);
715 if ((fd
= open("testfile", O_CREAT
| O_TRUNC
| O_RDWR
)) < 0) e(3);
717 if (unlink("testfile") != 0) e(4);
720 * We perform the test twice, in a not-so-perfect attempt to test the
721 * two aspects independently. The first part immediately creates a
722 * hole, and is supposed to fail only if reporting holes to VM does not
723 * work. However, it may also fail if a page for a previous file with
724 * the same inode number as "testfile" is still in the VM cache.
726 memset(buf
, 12, block_size
);
728 if (write(fd
, buf
, block_size
) != block_size
) e(5);
730 if (lseek(fd
, block_size
* 2, SEEK_CUR
) != block_size
* 3) e(6);
732 memset(buf
, 78, block_size
);
734 if (write(fd
, buf
, block_size
) != block_size
) e(7);
738 if ((buf
= mmap(NULL
, 4 * block_size
, PROT_READ
, MAP_SHARED
| MAP_FILE
,
739 fd
, 0)) == MAP_FAILED
) e(8);
741 if (buf
[0 * block_size
] != 12 || buf
[1 * block_size
- 1] != 12) e(9);
742 if (buf
[1 * block_size
] != 0 || buf
[2 * block_size
- 1] != 0) e(10);
743 if (buf
[2 * block_size
] != 0 || buf
[3 * block_size
- 1] != 0) e(11);
744 if (buf
[3 * block_size
] != 78 || buf
[4 * block_size
- 1] != 78) e(12);
746 if (munmap(buf
, 4 * block_size
) != 0) e(13);
749 * The second part first creates file content and only turns part of it
750 * into a file hole, thus ensuring that VM has previously cached pages
751 * for the blocks that are freed. The test will fail if VM keeps the
752 * pages around in its cache.
754 if ((buf
= malloc(block_size
)) == NULL
) e(14);
756 if (lseek(fd
, block_size
, SEEK_SET
) != block_size
) e(15);
758 memset(buf
, 34, block_size
);
760 if (write(fd
, buf
, block_size
) != block_size
) e(16);
762 memset(buf
, 56, block_size
);
764 if (write(fd
, buf
, block_size
) != block_size
) e(17);
766 if (ftruncate(fd
, block_size
) != 0) e(18);
768 if (lseek(fd
, block_size
* 3, SEEK_SET
) != block_size
* 3) e(19);
770 memset(buf
, 78, block_size
);
772 if (write(fd
, buf
, block_size
) != block_size
) e(20);
776 if ((buf
= mmap(NULL
, 4 * block_size
, PROT_READ
, MAP_SHARED
| MAP_FILE
,
777 fd
, 0)) == MAP_FAILED
) e(21);
779 if (buf
[0 * block_size
] != 12 || buf
[1 * block_size
- 1] != 12) e(22);
780 if (buf
[1 * block_size
] != 0 || buf
[2 * block_size
- 1] != 0) e(23);
781 if (buf
[2 * block_size
] != 0 || buf
[3 * block_size
- 1] != 0) e(24);
782 if (buf
[3 * block_size
] != 78 || buf
[4 * block_size
- 1] != 78) e(25);
784 if (munmap(buf
, 4 * block_size
) != 0) e(26);
790 * Test that soft faults during file system I/O do not cause functions to
791 * return partial I/O results.
793 * We refer to the faults that are caused internally within the operating
794 * system as a result of the deadlock mitigation described at the top of this
795 * file, as a particular class of "soft faults". Such soft faults may occur in
796 * the middle of an I/O operation, and general I/O semantics dictate that upon
797 * partial success, the partial success is returned (and *not* an error). As a
798 * result, these soft faults, if not handled as special cases, may cause even
799 * common file system operations such as read(2) on a regular file to return
800 * fewer bytes than requested. Such unexpected short reads are typically not
801 * handled well by userland, and the OS must prevent them from occurring if it
802 * can. Note that read(2) is the most problematic, but certainly not the only,
803 * case where this problem can occur.
805 * Unfortunately, several file system services are not following the proper
806 * general I/O semantics - and this includes MFS. Therefore, for now, we have
807 * to test this case using block device I/O, which does do the right thing.
808 * In this test we hope that the root file system is mounted on a block device
809 * usable for (read-only!) testing purposes.
812 softfault_partial(void)
820 if (statvfs("/", &stf
) != 0) e(0);
823 * If the root file system is not mounted off a block device, or if we
824 * cannot open that device ourselves, simply skip this subtest.
826 if (stat(stf
.f_mntfromname
, &st
) != 0 || !S_ISBLK(st
.st_mode
))
827 return; /* skip subtest */
829 if ((fd
= open(stf
.f_mntfromname
, O_RDONLY
)) == -1)
830 return; /* skip subtest */
833 * See if we can read in the first two full blocks, or two pages worth
834 * of data, whichever is larger. If that fails, there is no point in
835 * continuing the test.
837 size
= MAX(stf
.f_bsize
, PAGE_SIZE
) * 2;
839 if ((buf
= mmap(NULL
, size
, PROT_READ
| PROT_READ
,
840 MAP_ANON
| MAP_PRIVATE
| MAP_PREALLOC
, -1, 0)) == MAP_FAILED
) e(0);
842 if (read(fd
, buf
, size
) != size
) {
845 return; /* skip subtest */
848 lseek(fd
, 0, SEEK_SET
);
851 * Now attempt a read to a partially faulted-in buffer. The first time
852 * around, the I/O transfer will generate a fault and return partial
853 * success. In that case, the entire I/O transfer should be retried
854 * after faulting in the missing page(s), thus resulting in the read
855 * succeeding in full.
857 if ((buf2
= mmap(NULL
, size
, PROT_READ
| PROT_READ
,
858 MAP_ANON
| MAP_PRIVATE
, -1, 0)) == MAP_FAILED
) e(0);
859 buf2
[0] = '\0'; /* fault in the first page */
861 if (read(fd
, buf2
, size
) != size
) e(0);
863 /* The result should be correct, too. */
864 if (memcmp(buf
, buf2
, size
)) e(0);
874 main(int argc
, char *argv
[])
882 nonedev_regression();
885 * Any inode or block allocation happening concurrently with this
886 * subtest will make the subtest succeed without testing the actual
887 * issue. Thus, repeat the subtest a fair number of times.
889 for (i
= 0; i
< 10; i
++)
890 corruption_regression();
894 test_memory_types_vs_operations();
900 cachequiet(!bigflag
);
901 if(bigflag
) iter
= 3;
903 /* Try various combinations working set sizes
904 * and block sizes in order to specifically
905 * target the primary cache, then primary+secondary
906 * cache, then primary+secondary cache+secondary
910 if(dotest(PAGE_SIZE
, 100, iter
)) e(5);
911 if(dotest(PAGE_SIZE
*2, 100, iter
)) e(2);
912 if(dotest(PAGE_SIZE
*3, 100, iter
)) e(3);
913 if(dotest(PAGE_SIZE
, 20000, iter
)) e(5);
916 u32_t totalmem
, freemem
, cachedmem
;
917 if(dotest(PAGE_SIZE
, 150000, iter
)) e(5);
918 getmem(&totalmem
, &freemem
, &cachedmem
);
919 if(dotest(PAGE_SIZE
, totalmem
*1.5, iter
)) e(6);