4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
35 #include <sys/crypto/icp.h>
36 #include <sys/processor.h>
37 #include <sys/rrwlock.h>
40 #include <sys/systeminfo.h>
42 #include <sys/utsname.h>
43 #include <sys/zfs_context.h>
44 #include <sys/zfs_onexit.h>
45 #include <sys/zfs_vfsops.h>
46 #include <sys/zstd/zstd.h>
48 #include <zfs_fletcher.h>
52 * Emulation of kernel services in userland.
56 char hw_serial
[HW_HOSTID_LEN
];
57 struct utsname hw_utsname
;
59 /* If set, all blocks read will be copied to the specified directory. */
60 char *vn_dumpdir
= NULL
;
62 /* this only exists to have its address taken */
66 * =========================================================================
68 * =========================================================================
70 * TS_STACK_MIN is dictated by the minimum allowed pthread stack size. While
71 * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
72 * the expected stack depth while small enough to avoid exhausting address
73 * space with high thread counts.
75 #define TS_STACK_MIN MAX(PTHREAD_STACK_MIN, 32768)
76 #define TS_STACK_MAX (256 * 1024)
78 struct zk_thread_wrapper
{
84 zk_thread_wrapper(void *arg
)
86 struct zk_thread_wrapper ztw
;
87 memcpy(&ztw
, arg
, sizeof (ztw
));
94 zk_thread_create(void (*func
)(void *), void *arg
, size_t stksize
, int state
)
99 struct zk_thread_wrapper
*ztw
;
100 int detachstate
= PTHREAD_CREATE_DETACHED
;
102 VERIFY0(pthread_attr_init(&attr
));
104 if (state
& TS_JOINABLE
)
105 detachstate
= PTHREAD_CREATE_JOINABLE
;
107 VERIFY0(pthread_attr_setdetachstate(&attr
, detachstate
));
110 * We allow the default stack size in user space to be specified by
111 * setting the ZFS_STACK_SIZE environment variable. This allows us
112 * the convenience of observing and debugging stack overruns in
113 * user space. Explicitly specified stack sizes will be honored.
114 * The usage of ZFS_STACK_SIZE is discussed further in the
115 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
118 stkstr
= getenv("ZFS_STACK_SIZE");
121 stksize
= TS_STACK_MAX
;
123 stksize
= MAX(atoi(stkstr
), TS_STACK_MIN
);
126 VERIFY3S(stksize
, >, 0);
127 stksize
= P2ROUNDUP(MAX(stksize
, TS_STACK_MIN
), PAGESIZE
);
130 * If this ever fails, it may be because the stack size is not a
131 * multiple of system page size.
133 VERIFY0(pthread_attr_setstacksize(&attr
, stksize
));
134 VERIFY0(pthread_attr_setguardsize(&attr
, PAGESIZE
));
136 VERIFY(ztw
= malloc(sizeof (*ztw
)));
139 VERIFY0(pthread_create(&tid
, &attr
, zk_thread_wrapper
, ztw
));
140 VERIFY0(pthread_attr_destroy(&attr
));
142 return ((void *)(uintptr_t)tid
);
146 * =========================================================================
148 * =========================================================================
151 kstat_create(const char *module
, int instance
, const char *name
,
152 const char *class, uchar_t type
, ulong_t ndata
, uchar_t ks_flag
)
154 (void) module
, (void) instance
, (void) name
, (void) class, (void) type
,
155 (void) ndata
, (void) ks_flag
;
160 kstat_install(kstat_t
*ksp
)
166 kstat_delete(kstat_t
*ksp
)
172 kstat_set_raw_ops(kstat_t
*ksp
,
173 int (*headers
)(char *buf
, size_t size
),
174 int (*data
)(char *buf
, size_t size
, void *data
),
175 void *(*addr
)(kstat_t
*ksp
, loff_t index
))
177 (void) ksp
, (void) headers
, (void) data
, (void) addr
;
181 * =========================================================================
183 * =========================================================================
187 mutex_init(kmutex_t
*mp
, char *name
, int type
, void *cookie
)
189 (void) name
, (void) type
, (void) cookie
;
190 VERIFY0(pthread_mutex_init(&mp
->m_lock
, NULL
));
191 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
195 mutex_destroy(kmutex_t
*mp
)
197 VERIFY0(pthread_mutex_destroy(&mp
->m_lock
));
201 mutex_enter(kmutex_t
*mp
)
203 VERIFY0(pthread_mutex_lock(&mp
->m_lock
));
204 mp
->m_owner
= pthread_self();
208 mutex_tryenter(kmutex_t
*mp
)
210 int error
= pthread_mutex_trylock(&mp
->m_lock
);
212 mp
->m_owner
= pthread_self();
215 VERIFY3S(error
, ==, EBUSY
);
221 mutex_exit(kmutex_t
*mp
)
223 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
224 VERIFY0(pthread_mutex_unlock(&mp
->m_lock
));
228 * =========================================================================
230 * =========================================================================
234 rw_init(krwlock_t
*rwlp
, char *name
, int type
, void *arg
)
236 (void) name
, (void) type
, (void) arg
;
237 VERIFY0(pthread_rwlock_init(&rwlp
->rw_lock
, NULL
));
238 rwlp
->rw_readers
= 0;
243 rw_destroy(krwlock_t
*rwlp
)
245 VERIFY0(pthread_rwlock_destroy(&rwlp
->rw_lock
));
249 rw_enter(krwlock_t
*rwlp
, krw_t rw
)
251 if (rw
== RW_READER
) {
252 VERIFY0(pthread_rwlock_rdlock(&rwlp
->rw_lock
));
253 atomic_inc_uint(&rwlp
->rw_readers
);
255 VERIFY0(pthread_rwlock_wrlock(&rwlp
->rw_lock
));
256 rwlp
->rw_owner
= pthread_self();
261 rw_exit(krwlock_t
*rwlp
)
263 if (RW_READ_HELD(rwlp
))
264 atomic_dec_uint(&rwlp
->rw_readers
);
268 VERIFY0(pthread_rwlock_unlock(&rwlp
->rw_lock
));
272 rw_tryenter(krwlock_t
*rwlp
, krw_t rw
)
277 error
= pthread_rwlock_tryrdlock(&rwlp
->rw_lock
);
279 error
= pthread_rwlock_trywrlock(&rwlp
->rw_lock
);
283 atomic_inc_uint(&rwlp
->rw_readers
);
285 rwlp
->rw_owner
= pthread_self();
290 VERIFY3S(error
, ==, EBUSY
);
296 zone_get_hostid(void *zonep
)
299 * We're emulating the system's hostid in userland.
302 return (strtoul(hw_serial
, NULL
, 10));
306 rw_tryupgrade(krwlock_t
*rwlp
)
313 * =========================================================================
314 * condition variables
315 * =========================================================================
319 cv_init(kcondvar_t
*cv
, char *name
, int type
, void *arg
)
321 (void) name
, (void) type
, (void) arg
;
322 VERIFY0(pthread_cond_init(cv
, NULL
));
326 cv_destroy(kcondvar_t
*cv
)
328 VERIFY0(pthread_cond_destroy(cv
));
332 cv_wait(kcondvar_t
*cv
, kmutex_t
*mp
)
334 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
335 VERIFY0(pthread_cond_wait(cv
, &mp
->m_lock
));
336 mp
->m_owner
= pthread_self();
340 cv_wait_sig(kcondvar_t
*cv
, kmutex_t
*mp
)
347 cv_timedwait(kcondvar_t
*cv
, kmutex_t
*mp
, clock_t abstime
)
354 delta
= abstime
- ddi_get_lbolt();
358 VERIFY(gettimeofday(&tv
, NULL
) == 0);
360 ts
.tv_sec
= tv
.tv_sec
+ delta
/ hz
;
361 ts
.tv_nsec
= tv
.tv_usec
* NSEC_PER_USEC
+ (delta
% hz
) * (NANOSEC
/ hz
);
362 if (ts
.tv_nsec
>= NANOSEC
) {
364 ts
.tv_nsec
-= NANOSEC
;
367 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
368 error
= pthread_cond_timedwait(cv
, &mp
->m_lock
, &ts
);
369 mp
->m_owner
= pthread_self();
371 if (error
== ETIMEDOUT
)
380 cv_timedwait_hires(kcondvar_t
*cv
, kmutex_t
*mp
, hrtime_t tim
, hrtime_t res
,
389 ASSERT(flag
== 0 || flag
== CALLOUT_FLAG_ABSOLUTE
);
392 if (flag
& CALLOUT_FLAG_ABSOLUTE
)
393 delta
-= gethrtime();
398 VERIFY0(gettimeofday(&tv
, NULL
));
400 ts
.tv_sec
= tv
.tv_sec
+ delta
/ NANOSEC
;
401 ts
.tv_nsec
= tv
.tv_usec
* NSEC_PER_USEC
+ (delta
% NANOSEC
);
402 if (ts
.tv_nsec
>= NANOSEC
) {
404 ts
.tv_nsec
-= NANOSEC
;
407 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
408 error
= pthread_cond_timedwait(cv
, &mp
->m_lock
, &ts
);
409 mp
->m_owner
= pthread_self();
411 if (error
== ETIMEDOUT
)
420 cv_signal(kcondvar_t
*cv
)
422 VERIFY0(pthread_cond_signal(cv
));
426 cv_broadcast(kcondvar_t
*cv
)
428 VERIFY0(pthread_cond_broadcast(cv
));
432 * =========================================================================
434 * =========================================================================
438 seq_printf(struct seq_file
*m
, const char *fmt
, ...)
440 (void) m
, (void) fmt
;
444 procfs_list_install(const char *module
,
445 const char *submodule
,
448 procfs_list_t
*procfs_list
,
449 int (*show
)(struct seq_file
*f
, void *p
),
450 int (*show_header
)(struct seq_file
*f
),
451 int (*clear
)(procfs_list_t
*procfs_list
),
452 size_t procfs_list_node_off
)
454 (void) module
, (void) submodule
, (void) name
, (void) mode
, (void) show
,
455 (void) show_header
, (void) clear
;
456 mutex_init(&procfs_list
->pl_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
457 list_create(&procfs_list
->pl_list
,
458 procfs_list_node_off
+ sizeof (procfs_list_node_t
),
459 procfs_list_node_off
+ offsetof(procfs_list_node_t
, pln_link
));
460 procfs_list
->pl_next_id
= 1;
461 procfs_list
->pl_node_offset
= procfs_list_node_off
;
465 procfs_list_uninstall(procfs_list_t
*procfs_list
)
471 procfs_list_destroy(procfs_list_t
*procfs_list
)
473 ASSERT(list_is_empty(&procfs_list
->pl_list
));
474 list_destroy(&procfs_list
->pl_list
);
475 mutex_destroy(&procfs_list
->pl_lock
);
478 #define NODE_ID(procfs_list, obj) \
479 (((procfs_list_node_t *)(((char *)obj) + \
480 (procfs_list)->pl_node_offset))->pln_id)
483 procfs_list_add(procfs_list_t
*procfs_list
, void *p
)
485 ASSERT(MUTEX_HELD(&procfs_list
->pl_lock
));
486 NODE_ID(procfs_list
, p
) = procfs_list
->pl_next_id
++;
487 list_insert_tail(&procfs_list
->pl_list
, p
);
491 * =========================================================================
493 * =========================================================================
497 * =========================================================================
498 * Figure out which debugging statements to print
499 * =========================================================================
502 static char *dprintf_string
;
503 static int dprintf_print_all
;
506 dprintf_find_string(const char *string
)
508 char *tmp_str
= dprintf_string
;
509 int len
= strlen(string
);
512 * Find out if this is a string we want to print.
513 * String format: file1.c,function_name1,file2.c,file3.c
516 while (tmp_str
!= NULL
) {
517 if (strncmp(tmp_str
, string
, len
) == 0 &&
518 (tmp_str
[len
] == ',' || tmp_str
[len
] == '\0'))
520 tmp_str
= strchr(tmp_str
, ',');
522 tmp_str
++; /* Get rid of , */
528 dprintf_setup(int *argc
, char **argv
)
533 * Debugging can be specified two ways: by setting the
534 * environment variable ZFS_DEBUG, or by including a
535 * "debug=..." argument on the command line. The command
536 * line setting overrides the environment variable.
539 for (i
= 1; i
< *argc
; i
++) {
540 int len
= strlen("debug=");
541 /* First look for a command line argument */
542 if (strncmp("debug=", argv
[i
], len
) == 0) {
543 dprintf_string
= argv
[i
] + len
;
544 /* Remove from args */
545 for (j
= i
; j
< *argc
; j
++)
552 if (dprintf_string
== NULL
) {
553 /* Look for ZFS_DEBUG environment variable */
554 dprintf_string
= getenv("ZFS_DEBUG");
558 * Are we just turning on all debugging?
560 if (dprintf_find_string("on"))
561 dprintf_print_all
= 1;
563 if (dprintf_string
!= NULL
)
564 zfs_flags
|= ZFS_DEBUG_DPRINTF
;
568 * =========================================================================
570 * =========================================================================
573 __dprintf(boolean_t dprint
, const char *file
, const char *func
,
574 int line
, const char *fmt
, ...)
576 /* Get rid of annoying "../common/" prefix to filename. */
577 const char *newfile
= zfs_basename(file
);
581 /* dprintf messages are printed immediately */
583 if (!dprintf_print_all
&&
584 !dprintf_find_string(newfile
) &&
585 !dprintf_find_string(func
))
588 /* Print out just the function name if requested */
590 if (dprintf_find_string("pid"))
591 (void) printf("%d ", getpid());
592 if (dprintf_find_string("tid"))
593 (void) printf("%ju ",
594 (uintmax_t)(uintptr_t)pthread_self());
595 if (dprintf_find_string("cpu"))
596 (void) printf("%u ", getcpuid());
597 if (dprintf_find_string("time"))
598 (void) printf("%llu ", gethrtime());
599 if (dprintf_find_string("long"))
600 (void) printf("%s, line %d: ", newfile
, line
);
601 (void) printf("dprintf: %s: ", func
);
603 (void) vprintf(fmt
, adx
);
607 /* zfs_dbgmsg is logged for dumping later */
613 buf
= umem_alloc(size
, UMEM_NOFAIL
);
614 i
= snprintf(buf
, size
, "%s:%d:%s(): ", newfile
, line
, func
);
618 (void) vsnprintf(buf
+ i
, size
- i
, fmt
, adx
);
624 umem_free(buf
, size
);
629 * =========================================================================
630 * cmn_err() and panic()
631 * =========================================================================
633 static char ce_prefix
[CE_IGNORE
][10] = { "", "NOTICE: ", "WARNING: ", "" };
634 static char ce_suffix
[CE_IGNORE
][2] = { "", "\n", "\n", "" };
637 vpanic(const char *fmt
, va_list adx
)
639 (void) fprintf(stderr
, "error: ");
640 (void) vfprintf(stderr
, fmt
, adx
);
641 (void) fprintf(stderr
, "\n");
643 abort(); /* think of it as a "user-level crash dump" */
647 panic(const char *fmt
, ...)
657 vcmn_err(int ce
, const char *fmt
, va_list adx
)
661 if (ce
!= CE_NOTE
) { /* suppress noise in userland stress testing */
662 (void) fprintf(stderr
, "%s", ce_prefix
[ce
]);
663 (void) vfprintf(stderr
, fmt
, adx
);
664 (void) fprintf(stderr
, "%s", ce_suffix
[ce
]);
669 cmn_err(int ce
, const char *fmt
, ...)
674 vcmn_err(ce
, fmt
, adx
);
679 * =========================================================================
681 * =========================================================================
687 (void) poll(0, 0, ticks
* (1000 / hz
));
691 * Find highest one bit set.
692 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
693 * The __builtin_clzll() function is supported by both GCC and Clang.
696 highbit64(uint64_t i
)
701 return (NBBY
* sizeof (uint64_t) - __builtin_clzll(i
));
705 * Find lowest one bit set.
706 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
707 * The __builtin_ffsll() function is supported by both GCC and Clang.
715 return (__builtin_ffsll(i
));
718 const char *random_path
= "/dev/random";
719 const char *urandom_path
= "/dev/urandom";
720 static int random_fd
= -1, urandom_fd
= -1;
725 VERIFY((random_fd
= open(random_path
, O_RDONLY
| O_CLOEXEC
)) != -1);
726 VERIFY((urandom_fd
= open(urandom_path
, O_RDONLY
| O_CLOEXEC
)) != -1);
740 random_get_bytes_common(uint8_t *ptr
, size_t len
, int fd
)
748 bytes
= read(fd
, ptr
, resid
);
749 ASSERT3S(bytes
, >=, 0);
758 random_get_bytes(uint8_t *ptr
, size_t len
)
760 return (random_get_bytes_common(ptr
, len
, random_fd
));
764 random_get_pseudo_bytes(uint8_t *ptr
, size_t len
)
766 return (random_get_bytes_common(ptr
, len
, urandom_fd
));
770 ddi_strtoul(const char *hw_serial
, char **nptr
, int base
, unsigned long *result
)
775 *result
= strtoul(hw_serial
, &end
, base
);
782 ddi_strtoull(const char *str
, char **nptr
, int base
, u_longlong_t
*result
)
787 *result
= strtoull(str
, &end
, base
);
796 return (&hw_utsname
);
800 * =========================================================================
801 * kernel emulation setup & teardown
802 * =========================================================================
805 umem_out_of_memory(void)
807 char errmsg
[] = "out of memory -- generating core dump\n";
809 (void) fprintf(stderr
, "%s", errmsg
);
815 kernel_init(int mode
)
817 extern uint_t rrw_tsd_key
;
819 umem_nofail_callback(umem_out_of_memory
);
821 physmem
= sysconf(_SC_PHYS_PAGES
);
823 dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t
)physmem
,
824 (double)physmem
* sysconf(_SC_PAGE_SIZE
) / (1ULL << 30));
826 (void) snprintf(hw_serial
, sizeof (hw_serial
), "%ld",
827 (mode
& SPA_MODE_WRITE
) ? get_system_hostid() : 0);
831 VERIFY0(uname(&hw_utsname
));
838 spa_init((spa_mode_t
)mode
);
842 tsd_create(&rrw_tsd_key
, rrw_tsd_destroy
);
867 crgetruid(cred_t
*cr
)
881 crgetngroups(cred_t
*cr
)
888 crgetgroups(cred_t
*cr
)
895 zfs_secpolicy_snapshot_perms(const char *name
, cred_t
*cr
)
897 (void) name
, (void) cr
;
902 zfs_secpolicy_rename_perms(const char *from
, const char *to
, cred_t
*cr
)
904 (void) from
, (void) to
, (void) cr
;
909 zfs_secpolicy_destroy_perms(const char *name
, cred_t
*cr
)
911 (void) name
, (void) cr
;
916 secpolicy_zfs(const cred_t
*cr
)
923 secpolicy_zfs_proc(const cred_t
*cr
, proc_t
*proc
)
925 (void) cr
, (void) proc
;
930 ksid_lookupdomain(const char *dom
)
934 kd
= umem_zalloc(sizeof (ksiddomain_t
), UMEM_NOFAIL
);
935 kd
->kd_name
= spa_strdup(dom
);
940 ksiddomain_rele(ksiddomain_t
*ksid
)
942 spa_strfree(ksid
->kd_name
);
943 umem_free(ksid
, sizeof (ksiddomain_t
));
947 kmem_vasprintf(const char *fmt
, va_list adx
)
952 va_copy(adx_copy
, adx
);
953 VERIFY(vasprintf(&buf
, fmt
, adx_copy
) != -1);
960 kmem_asprintf(const char *fmt
, ...)
966 VERIFY(vasprintf(&buf
, fmt
, adx
) != -1);
973 zfs_onexit_fd_hold(int fd
, minor_t
*minorp
)
981 zfs_onexit_fd_rele(zfs_file_t
*fp
)
987 zfs_onexit_add_cb(minor_t minor
, void (*func
)(void *), void *data
,
988 uint64_t *action_handle
)
990 (void) minor
, (void) func
, (void) data
, (void) action_handle
;
995 spl_fstrans_mark(void)
997 return ((fstrans_cookie_t
)0);
1001 spl_fstrans_unmark(fstrans_cookie_t cookie
)
1007 __spl_pf_fstrans_check(void)
1013 kmem_cache_reap_active(void)
1018 void *zvol_tag
= "zvol_tag";
1021 zvol_create_minor(const char *name
)
1027 zvol_create_minors_recursive(const char *name
)
1033 zvol_remove_minors(spa_t
*spa
, const char *name
, boolean_t async
)
1035 (void) spa
, (void) name
, (void) async
;
1039 zvol_rename_minors(spa_t
*spa
, const char *oldname
, const char *newname
,
1042 (void) spa
, (void) oldname
, (void) newname
, (void) async
;
1048 * path - fully qualified path to file
1049 * flags - file attributes O_READ / O_WRITE / O_EXCL
1050 * fpp - pointer to return file pointer
1052 * Returns 0 on success underlying error on failure.
1055 zfs_file_open(const char *path
, int flags
, int mode
, zfs_file_t
**fpp
)
1064 if (!(flags
& O_CREAT
) && stat64(path
, &st
) == -1)
1067 if (!(flags
& O_CREAT
) && S_ISBLK(st
.st_mode
))
1070 if (flags
& O_CREAT
)
1071 old_umask
= umask(0);
1073 fd
= open64(path
, flags
, mode
);
1077 if (flags
& O_CREAT
)
1078 (void) umask(old_umask
);
1080 if (vn_dumpdir
!= NULL
) {
1081 char *dumppath
= umem_zalloc(MAXPATHLEN
, UMEM_NOFAIL
);
1082 const char *inpath
= zfs_basename(path
);
1084 (void) snprintf(dumppath
, MAXPATHLEN
,
1085 "%s/%s", vn_dumpdir
, inpath
);
1086 dump_fd
= open64(dumppath
, O_CREAT
| O_WRONLY
, 0666);
1087 umem_free(dumppath
, MAXPATHLEN
);
1088 if (dump_fd
== -1) {
1097 (void) fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
1099 fp
= umem_zalloc(sizeof (zfs_file_t
), UMEM_NOFAIL
);
1101 fp
->f_dump_fd
= dump_fd
;
1108 zfs_file_close(zfs_file_t
*fp
)
1111 if (fp
->f_dump_fd
!= -1)
1112 close(fp
->f_dump_fd
);
1114 umem_free(fp
, sizeof (zfs_file_t
));
1118 * Stateful write - use os internal file pointer to determine where to
1119 * write and update on successful completion.
1121 * fp - pointer to file (pipe, socket, etc) to write to
1122 * buf - buffer to write
1123 * count - # of bytes to write
1124 * resid - pointer to count of unwritten bytes (if short write)
1126 * Returns 0 on success errno on failure.
1129 zfs_file_write(zfs_file_t
*fp
, const void *buf
, size_t count
, ssize_t
*resid
)
1133 rc
= write(fp
->f_fd
, buf
, count
);
1138 *resid
= count
- rc
;
1139 } else if (rc
!= count
) {
1147 * Stateless write - os internal file pointer is not updated.
1149 * fp - pointer to file (pipe, socket, etc) to write to
1150 * buf - buffer to write
1151 * count - # of bytes to write
1152 * off - file offset to write to (only valid for seekable types)
1153 * resid - pointer to count of unwritten bytes
1155 * Returns 0 on success errno on failure.
1158 zfs_file_pwrite(zfs_file_t
*fp
, const void *buf
,
1159 size_t count
, loff_t pos
, ssize_t
*resid
)
1161 ssize_t rc
, split
, done
;
1165 * To simulate partial disk writes, we split writes into two
1166 * system calls so that the process can be killed in between.
1167 * This is used by ztest to simulate realistic failure modes.
1169 sectors
= count
>> SPA_MINBLOCKSHIFT
;
1170 split
= (sectors
> 0 ? rand() % sectors
: 0) << SPA_MINBLOCKSHIFT
;
1171 rc
= pwrite64(fp
->f_fd
, buf
, split
, pos
);
1174 rc
= pwrite64(fp
->f_fd
, (char *)buf
+ split
,
1175 count
- split
, pos
+ split
);
1178 if (rc
== -1 && errno
== EINVAL
) {
1180 * Under Linux, this most likely means an alignment issue
1181 * (memory or disk) due to O_DIRECT, so we abort() in order
1182 * to catch the offender.
1194 *resid
= count
- done
;
1195 } else if (done
!= count
) {
1203 * Stateful read - use os internal file pointer to determine where to
1204 * read and update on successful completion.
1206 * fp - pointer to file (pipe, socket, etc) to read from
1207 * buf - buffer to write
1208 * count - # of bytes to read
1209 * resid - pointer to count of unread bytes (if short read)
1211 * Returns 0 on success errno on failure.
1214 zfs_file_read(zfs_file_t
*fp
, void *buf
, size_t count
, ssize_t
*resid
)
1218 rc
= read(fp
->f_fd
, buf
, count
);
1223 *resid
= count
- rc
;
1224 } else if (rc
!= count
) {
1232 * Stateless read - os internal file pointer is not updated.
1234 * fp - pointer to file (pipe, socket, etc) to read from
1235 * buf - buffer to write
1236 * count - # of bytes to write
1237 * off - file offset to read from (only valid for seekable types)
1238 * resid - pointer to count of unwritten bytes (if short write)
1240 * Returns 0 on success errno on failure.
1243 zfs_file_pread(zfs_file_t
*fp
, void *buf
, size_t count
, loff_t off
,
1248 rc
= pread64(fp
->f_fd
, buf
, count
, off
);
1252 * Under Linux, this most likely means an alignment issue
1253 * (memory or disk) due to O_DIRECT, so we abort() in order to
1254 * catch the offender.
1256 if (errno
== EINVAL
)
1262 if (fp
->f_dump_fd
!= -1) {
1265 status
= pwrite64(fp
->f_dump_fd
, buf
, rc
, off
);
1266 ASSERT(status
!= -1);
1270 *resid
= count
- rc
;
1271 } else if (rc
!= count
) {
1279 * lseek - set / get file pointer
1281 * fp - pointer to file (pipe, socket, etc) to read from
1282 * offp - value to seek to, returns current value plus passed offset
1283 * whence - see man pages for standard lseek whence values
1285 * Returns 0 on success errno on failure (ESPIPE for non seekable types)
1288 zfs_file_seek(zfs_file_t
*fp
, loff_t
*offp
, int whence
)
1292 rc
= lseek(fp
->f_fd
, *offp
, whence
);
1302 * Get file attributes
1304 * filp - file pointer
1305 * zfattr - pointer to file attr structure
1307 * Currently only used for fetching size and file mode
1309 * Returns 0 on success or error code of underlying getattr call on failure.
1312 zfs_file_getattr(zfs_file_t
*fp
, zfs_file_attr_t
*zfattr
)
1316 if (fstat64_blk(fp
->f_fd
, &st
) == -1)
1319 zfattr
->zfa_size
= st
.st_size
;
1320 zfattr
->zfa_mode
= st
.st_mode
;
1328 * filp - file pointer
1329 * flags - O_SYNC and or O_DSYNC
1331 * Returns 0 on success or error code of underlying sync call on failure.
1334 zfs_file_fsync(zfs_file_t
*fp
, int flags
)
1338 if (fsync(fp
->f_fd
) < 0)
1345 * fallocate - allocate or free space on disk
1348 * mode (non-standard options for hole punching etc)
1349 * offset - offset to start allocating or freeing from
1350 * len - length to free / allocate
1355 zfs_file_fallocate(zfs_file_t
*fp
, int mode
, loff_t offset
, loff_t len
)
1358 return (fallocate(fp
->f_fd
, mode
, offset
, len
));
1360 (void) fp
, (void) mode
, (void) offset
, (void) len
;
1361 return (EOPNOTSUPP
);
1366 * Request current file pointer offset
1368 * fp - pointer to file
1370 * Returns current file offset.
1373 zfs_file_off(zfs_file_t
*fp
)
1375 return (lseek(fp
->f_fd
, SEEK_CUR
, 0));
1381 * path - fully qualified file path
1383 * Returns 0 on success.
1388 zfs_file_unlink(const char *path
)
1390 return (remove(path
));
1394 * Get reference to file pointer
1396 * fd - input file descriptor
1398 * Returns pointer to file struct or NULL.
1399 * Unsupported in user space.
1402 zfs_file_get(int fd
)
1409 * Drop reference to file pointer
1411 * fp - pointer to file struct
1413 * Unsupported in user space.
1416 zfs_file_put(zfs_file_t
*fp
)
1423 zfsvfs_update_fromname(const char *oldname
, const char *newname
)
1425 (void) oldname
, (void) newname
;
1429 spa_import_os(spa_t
*spa
)
1435 spa_export_os(spa_t
*spa
)
1441 spa_activate_os(spa_t
*spa
)
1447 spa_deactivate_os(spa_t
*spa
)