1 /* $NetBSD: nfs_iod.c,v 1.3 2009/03/14 21:04:25 dsl Exp $ */
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: nfs_iod.c,v 1.3 2009/03/14 21:04:25 dsl Exp $");
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <sys/mount.h>
49 #include <sys/malloc.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/domain.h>
57 #include <sys/protosw.h>
58 #include <sys/namei.h>
59 #include <sys/syslog.h>
60 #include <sys/filedesc.h>
61 #include <sys/kthread.h>
62 #include <sys/kauth.h>
63 #include <sys/syscallargs.h>
65 #include <netinet/in.h>
66 #include <netinet/tcp.h>
67 #include <nfs/xdr_subs.h>
68 #include <nfs/rpcv2.h>
69 #include <nfs/nfsproto.h>
71 #include <nfs/nfsm_subs.h>
72 #include <nfs/nfsrvcache.h>
73 #include <nfs/nfsmount.h>
74 #include <nfs/nfsnode.h>
75 #include <nfs/nfsrtt.h>
76 #include <nfs/nfs_var.h>
78 extern int nuidhash_max
;
82 * nfs_iodlist_lock -> nid_lock -> nm_lock
84 kmutex_t nfs_iodlist_lock
;
85 struct nfs_iodlist nfs_iodlist_idle
;
86 struct nfs_iodlist nfs_iodlist_all
;
87 int nfs_niothreads
= -1; /* == "0, and has never been set" */
91 * Asynchronous I/O threads for client nfs.
92 * They do read-ahead and write-behind operations on the block I/O cache.
93 * Never returns unless it fails or gets killed.
100 struct nfs_iod
*myiod
;
101 struct nfsmount
*nmp
;
103 myiod
= kmem_alloc(sizeof(*myiod
), KM_SLEEP
);
104 mutex_init(&myiod
->nid_lock
, MUTEX_DEFAULT
, IPL_NONE
);
105 cv_init(&myiod
->nid_cv
, "nfsiod");
106 myiod
->nid_exiting
= false;
107 myiod
->nid_mount
= NULL
;
108 mutex_enter(&nfs_iodlist_lock
);
109 LIST_INSERT_HEAD(&nfs_iodlist_all
, myiod
, nid_all
);
110 mutex_exit(&nfs_iodlist_lock
);
113 mutex_enter(&nfs_iodlist_lock
);
114 LIST_INSERT_HEAD(&nfs_iodlist_idle
, myiod
, nid_idle
);
115 mutex_exit(&nfs_iodlist_lock
);
117 mutex_enter(&myiod
->nid_lock
);
118 while (/*CONSTCOND*/ true) {
119 nmp
= myiod
->nid_mount
;
121 myiod
->nid_mount
= NULL
;
124 if (__predict_false(myiod
->nid_exiting
)) {
126 * drop nid_lock to preserve locking order.
128 mutex_exit(&myiod
->nid_lock
);
129 mutex_enter(&nfs_iodlist_lock
);
130 mutex_enter(&myiod
->nid_lock
);
132 * recheck nid_mount because nfs_asyncio can
133 * pick us in the meantime as we are still on
136 if (myiod
->nid_mount
!= NULL
) {
137 mutex_exit(&nfs_iodlist_lock
);
140 LIST_REMOVE(myiod
, nid_idle
);
141 mutex_exit(&nfs_iodlist_lock
);
144 cv_wait(&myiod
->nid_cv
, &myiod
->nid_lock
);
146 mutex_exit(&myiod
->nid_lock
);
148 mutex_enter(&nmp
->nm_lock
);
149 while ((bp
= TAILQ_FIRST(&nmp
->nm_bufq
)) != NULL
) {
150 /* Take one off the front of the list */
151 TAILQ_REMOVE(&nmp
->nm_bufq
, bp
, b_freelist
);
153 if (nmp
->nm_bufqlen
< 2 * nmp
->nm_bufqiods
) {
154 cv_broadcast(&nmp
->nm_aiocv
);
156 mutex_exit(&nmp
->nm_lock
);
157 KERNEL_LOCK(1, curlwp
);
159 KERNEL_UNLOCK_LAST(curlwp
);
160 mutex_enter(&nmp
->nm_lock
);
162 * If there are more than one iod on this mount,
163 * then defect so that the iods can be shared out
164 * fairly between the mounts
166 if (nfs_defect
&& nmp
->nm_bufqiods
> 1) {
170 KASSERT(nmp
->nm_bufqiods
> 0);
172 mutex_exit(&nmp
->nm_lock
);
175 KASSERT(myiod
->nid_mount
== NULL
);
176 mutex_exit(&myiod
->nid_lock
);
178 cv_destroy(&myiod
->nid_cv
);
179 mutex_destroy(&myiod
->nid_lock
);
180 kmem_free(myiod
, sizeof(*myiod
));
189 mutex_init(&nfs_iodlist_lock
, MUTEX_DEFAULT
, IPL_NONE
);
190 LIST_INIT(&nfs_iodlist_all
);
191 LIST_INIT(&nfs_iodlist_idle
);
199 error
= nfs_set_niothreads(0);
201 mutex_destroy(&nfs_iodlist_lock
);
205 nfs_set_niothreads(int newval
)
211 KERNEL_UNLOCK_ALL(curlwp
, &hold_count
);
213 mutex_enter(&nfs_iodlist_lock
);
214 /* clamp to sane range */
215 nfs_niothreads
= max(0, min(newval
, NFS_MAXASYNCDAEMON
));
217 while (nfs_numasync
!= nfs_niothreads
&& error
== 0) {
218 while (nfs_numasync
< nfs_niothreads
) {
221 * kthread_create can wait for pagedaemon and
222 * pagedaemon can wait for nfsiod which needs to acquire
226 mutex_exit(&nfs_iodlist_lock
);
227 error
= kthread_create(PRI_NONE
, KTHREAD_MPSAFE
, NULL
,
228 nfssvc_iod
, NULL
, NULL
, "nfsio");
229 mutex_enter(&nfs_iodlist_lock
);
232 nfs_niothreads
= nfs_numasync
;
237 while (nfs_numasync
> nfs_niothreads
) {
238 nid
= LIST_FIRST(&nfs_iodlist_all
);
240 /* iod has not started yet. */
241 kpause("nfsiorm", false, hz
, &nfs_iodlist_lock
);
244 LIST_REMOVE(nid
, nid_all
);
245 mutex_enter(&nid
->nid_lock
);
246 KASSERT(!nid
->nid_exiting
);
247 nid
->nid_exiting
= true;
248 cv_signal(&nid
->nid_cv
);
249 mutex_exit(&nid
->nid_lock
);
253 mutex_exit(&nfs_iodlist_lock
);
255 KERNEL_LOCK(hold_count
, curlwp
);
260 * Get an authorization string for the uid by having the mount_nfs sitting
261 * on this mount point porpous out of the kernel and do it.
264 nfs_getauth(struct nfsmount
*nmp
, struct nfsreq
*rep
, kauth_cred_t cred
, char **auth_str
, int *auth_len
, char *verf_str
, int *verf_len
, NFSKERBKEY_T key
)
265 /* key: return session key */
269 while ((nmp
->nm_iflag
& NFSMNT_WAITAUTH
) == 0) {
270 nmp
->nm_iflag
|= NFSMNT_WANTAUTH
;
271 (void) tsleep((void *)&nmp
->nm_authtype
, PSOCK
,
273 error
= nfs_sigintr(nmp
, rep
, rep
->r_lwp
);
275 nmp
->nm_iflag
&= ~NFSMNT_WANTAUTH
;
279 nmp
->nm_iflag
&= ~(NFSMNT_WAITAUTH
| NFSMNT_WANTAUTH
);
280 nmp
->nm_authstr
= *auth_str
= (char *)malloc(RPCAUTH_MAXSIZ
, M_TEMP
, M_WAITOK
);
281 nmp
->nm_authlen
= RPCAUTH_MAXSIZ
;
282 nmp
->nm_verfstr
= verf_str
;
283 nmp
->nm_verflen
= *verf_len
;
284 nmp
->nm_authuid
= kauth_cred_geteuid(cred
);
285 wakeup((void *)&nmp
->nm_authstr
);
288 * And wait for mount_nfs to do its stuff.
290 while ((nmp
->nm_iflag
& NFSMNT_HASAUTH
) == 0 && error
== 0) {
291 (void) tsleep((void *)&nmp
->nm_authlen
, PSOCK
,
293 error
= nfs_sigintr(nmp
, rep
, rep
->r_lwp
);
295 if (nmp
->nm_iflag
& NFSMNT_AUTHERR
) {
296 nmp
->nm_iflag
&= ~NFSMNT_AUTHERR
;
300 free((void *)*auth_str
, M_TEMP
);
302 *auth_len
= nmp
->nm_authlen
;
303 *verf_len
= nmp
->nm_verflen
;
304 memcpy(key
, nmp
->nm_key
, sizeof (NFSKERBKEY_T
));
306 nmp
->nm_iflag
&= ~NFSMNT_HASAUTH
;
307 nmp
->nm_iflag
|= NFSMNT_WAITAUTH
;
308 if (nmp
->nm_iflag
& NFSMNT_WANTAUTH
) {
309 nmp
->nm_iflag
&= ~NFSMNT_WANTAUTH
;
310 wakeup((void *)&nmp
->nm_authtype
);
316 * Get a nickname authenticator and verifier.
319 nfs_getnickauth(struct nfsmount
*nmp
, kauth_cred_t cred
, char **auth_str
,
320 int *auth_len
, char *verf_str
, int verf_len
)
322 struct timeval ktvin
, ktvout
, tv
;
323 struct nfsuid
*nuidp
;
324 u_int32_t
*nickp
, *verfp
;
326 memset(&ktvout
, 0, sizeof ktvout
); /* XXX gcc */
329 if (verf_len
< (4 * NFSX_UNSIGNED
))
330 panic("nfs_getnickauth verf too small");
332 LIST_FOREACH(nuidp
, NMUIDHASH(nmp
, kauth_cred_geteuid(cred
)), nu_hash
) {
333 if (kauth_cred_geteuid(nuidp
->nu_cr
) == kauth_cred_geteuid(cred
))
336 if (!nuidp
|| nuidp
->nu_expire
< time_second
)
340 * Move to the end of the lru list (end of lru == most recently used).
342 TAILQ_REMOVE(&nmp
->nm_uidlruhead
, nuidp
, nu_lru
);
343 TAILQ_INSERT_TAIL(&nmp
->nm_uidlruhead
, nuidp
, nu_lru
);
345 nickp
= (u_int32_t
*)malloc(2 * NFSX_UNSIGNED
, M_TEMP
, M_WAITOK
);
346 *nickp
++ = txdr_unsigned(RPCAKN_NICKNAME
);
347 *nickp
= txdr_unsigned(nuidp
->nu_nickname
);
348 *auth_str
= (char *)nickp
;
349 *auth_len
= 2 * NFSX_UNSIGNED
;
352 * Now we must encrypt the verifier and package it up.
354 verfp
= (u_int32_t
*)verf_str
;
355 *verfp
++ = txdr_unsigned(RPCAKN_NICKNAME
);
357 if (tv
.tv_sec
> nuidp
->nu_timestamp
.tv_sec
||
358 (tv
.tv_sec
== nuidp
->nu_timestamp
.tv_sec
&&
359 tv
.tv_usec
> nuidp
->nu_timestamp
.tv_usec
))
360 nuidp
->nu_timestamp
= tv
;
362 nuidp
->nu_timestamp
.tv_usec
++;
363 ktvin
.tv_sec
= txdr_unsigned(nuidp
->nu_timestamp
.tv_sec
);
364 ktvin
.tv_usec
= txdr_unsigned(nuidp
->nu_timestamp
.tv_usec
);
367 * Now encrypt the timestamp verifier in ecb mode using the session
374 *verfp
++ = ktvout
.tv_sec
;
375 *verfp
++ = ktvout
.tv_usec
;
381 * Save the current nickname in a hash list entry on the mount point.
384 nfs_savenickauth(struct nfsmount
*nmp
, kauth_cred_t cred
, int len
, NFSKERBKEY_T key
, struct mbuf
**mdp
, char **dposp
, struct mbuf
*mrep
)
386 struct nfsuid
*nuidp
;
389 struct mbuf
*md
= *mdp
;
390 struct timeval ktvin
, ktvout
;
392 char *dpos
= *dposp
, *cp2
;
393 int deltasec
, error
= 0;
395 memset(&ktvout
, 0, sizeof ktvout
); /* XXX gcc */
397 if (len
== (3 * NFSX_UNSIGNED
)) {
398 nfsm_dissect(tl
, u_int32_t
*, 3 * NFSX_UNSIGNED
);
399 ktvin
.tv_sec
= *tl
++;
400 ktvin
.tv_usec
= *tl
++;
401 nick
= fxdr_unsigned(u_int32_t
, *tl
);
404 * Decrypt the timestamp in ecb mode.
409 ktvout
.tv_sec
= fxdr_unsigned(long, ktvout
.tv_sec
);
410 ktvout
.tv_usec
= fxdr_unsigned(long, ktvout
.tv_usec
);
411 deltasec
= time_second
- ktvout
.tv_sec
;
413 deltasec
= -deltasec
;
415 * If ok, add it to the hash list for the mount point.
417 if (deltasec
<= NFS_KERBCLOCKSKEW
) {
418 if (nmp
->nm_numuids
< nuidhash_max
) {
420 nuidp
= kmem_alloc(sizeof(*nuidp
), KM_SLEEP
);
422 nuidp
= TAILQ_FIRST(&nmp
->nm_uidlruhead
);
423 LIST_REMOVE(nuidp
, nu_hash
);
424 TAILQ_REMOVE(&nmp
->nm_uidlruhead
, nuidp
,
428 kauth_cred_seteuid(nuidp
->nu_cr
, kauth_cred_geteuid(cred
));
429 nuidp
->nu_expire
= time_second
+ NFS_KERBTTL
;
430 nuidp
->nu_timestamp
= ktvout
;
431 nuidp
->nu_nickname
= nick
;
432 memcpy(nuidp
->nu_key
, key
, sizeof (NFSKERBKEY_T
));
433 TAILQ_INSERT_TAIL(&nmp
->nm_uidlruhead
, nuidp
,
435 LIST_INSERT_HEAD(NMUIDHASH(nmp
, kauth_cred_geteuid(cred
)),
439 nfsm_adv(nfsm_rndup(len
));