]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_syscalls.c
xnu-1486.2.11.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/file_internal.h>
78 #include <sys/filedesc.h>
79 #include <sys/stat.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/mount_internal.h>
82 #include <sys/proc_internal.h> /* for fdflags */
83 #include <sys/kauth.h>
84 #include <sys/sysctl.h>
85 #include <sys/ubc.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/kpi_mbuf.h>
89 #include <sys/socket.h>
90 #include <sys/socketvar.h>
91 #include <sys/domain.h>
92 #include <sys/protosw.h>
93 #include <sys/fcntl.h>
94 #include <sys/lockf.h>
95 #include <sys/syslog.h>
96 #include <sys/user.h>
97 #include <sys/sysproto.h>
98 #include <sys/kpi_socket.h>
99 #include <sys/fsevents.h>
100 #include <libkern/OSAtomic.h>
101 #include <kern/thread_call.h>
102 #include <kern/task.h>
103
104 #include <security/audit/audit.h>
105
106 #include <netinet/in.h>
107 #include <netinet/tcp.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/rpcv2.h>
110 #include <nfs/nfsproto.h>
111 #include <nfs/nfs.h>
112 #include <nfs/nfsm_subs.h>
113 #include <nfs/nfsrvcache.h>
114 #include <nfs/nfs_gss.h>
115 #include <nfs/nfsmount.h>
116 #include <nfs/nfsnode.h>
117 #include <nfs/nfs_lock.h>
118 #if CONFIG_MACF
119 #include <security/mac_framework.h>
120 #endif
121
122 kern_return_t thread_terminate(thread_t); /* XXX */
123
124 #if NFSSERVER
125
126 extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
127 struct nfsrv_sock *slp,
128 vfs_context_t ctx,
129 mbuf_t *mrepp);
130 extern int nfsrv_wg_delay;
131 extern int nfsrv_wg_delay_v3;
132
133 static int nfsrv_require_resv_port = 0;
134 static int nfsrv_deadsock_timer_on = 0;
135
136 int nfssvc_export(user_addr_t argp);
137 int nfssvc_nfsd(void);
138 int nfssvc_addsock(socket_t, mbuf_t);
139 void nfsrv_zapsock(struct nfsrv_sock *);
140 void nfsrv_slpderef(struct nfsrv_sock *);
141 void nfsrv_slpfree(struct nfsrv_sock *);
142
143 #endif /* NFSSERVER */
144
145 /*
146 * sysctl stuff
147 */
148 SYSCTL_DECL(_vfs_generic);
149 SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
150
151 #if NFSCLIENT
152 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
153 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
154 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW, &nfs_iosize, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfs_access_cache_timeout, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW, &nfs_allow_async, 0, "");
158 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW, &nfs_statfs_rate_limit, 0, "");
159 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW, &nfsiod_thread_max, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD, &nfsiod_thread_count, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD, &nfs_lockd_mounts, 0, "");
162 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW, &nfs_max_async_writes, 0, "");
163 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW, &nfs_single_des, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW, &nfs_access_delete, 0, "");
165 #endif /* NFSCLIENT */
166
167 #if NFSSERVER
168 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
169 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW, &nfsrv_wg_delay, 0, "");
170 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW, &nfsrv_wg_delay_v3, 0, "");
171 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW, &nfsrv_require_resv_port, 0, "");
172 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW, &nfsrv_async, 0, "");
173 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW, &nfsrv_export_hash_size, 0, "");
174 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW, &nfsrv_reqcache_size, 0, "");
175 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW, &nfsrv_sock_max_rec_queue_length, 0, "");
176 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW, &nfsrv_user_stat_enabled, 0, "");
177 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW, &nfsrv_gss_context_ttl, 0, "");
178 #if CONFIG_FSE
179 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW, &nfsrv_fsevents_enabled, 0, "");
180 #endif
181 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW, &nfsd_thread_max, 0, "");
182 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD, &nfsd_thread_count, 0, "");
183 #endif /* NFSSERVER */
184
185
186 #if NFSCLIENT
187
188 int
189 nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
190 {
191 struct lockd_ans la;
192 int error;
193
194 if (uap->flag == NFSCLNT_LOCKDANS) {
195 error = copyin(uap->argp, &la, sizeof(la));
196 return (error != 0 ? error : nfslockdans(p, &la));
197 }
198 return EINVAL;
199 }
200
201 /*
202 * Asynchronous I/O threads for client NFS.
203 * They do read-ahead and write-behind operations on the block I/O cache.
204 *
205 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
206 * when unused for a while. There are as many nfsiod structs as there are
207 * nfsiod threads; however there's no strict tie between a thread and a struct.
208 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
209 * up, it removes the next struct nfsiod from the queue and services it. Then
210 * it will put the struct at the head of free list and sleep on it.
211 * Async requests will pull the next struct nfsiod from the head of the free list,
212 * put it on the work queue, and wake whatever thread is waiting on that struct.
213 */
214
215 /*
216 * nfsiod thread exit routine
217 *
218 * Must be called with nfsiod_mutex held so that the
219 * decision to terminate is atomic with the termination.
220 */
221 void
222 nfsiod_terminate(struct nfsiod *niod)
223 {
224 nfsiod_thread_count--;
225 lck_mtx_unlock(nfsiod_mutex);
226 if (niod)
227 FREE(niod, M_TEMP);
228 else
229 printf("nfsiod: terminating without niod\n");
230 thread_terminate(current_thread());
231 /*NOTREACHED*/
232 }
233
234 /* nfsiod thread startup routine */
235 void
236 nfsiod_thread(void)
237 {
238 struct nfsiod *niod;
239 int error;
240
241 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
242 if (!niod) {
243 lck_mtx_lock(nfsiod_mutex);
244 nfsiod_thread_count--;
245 wakeup(current_thread());
246 lck_mtx_unlock(nfsiod_mutex);
247 thread_terminate(current_thread());
248 /*NOTREACHED*/
249 }
250 bzero(niod, sizeof(*niod));
251 lck_mtx_lock(nfsiod_mutex);
252 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
253 wakeup(current_thread());
254 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
255 /* shouldn't return... so we have an error */
256 /* remove an old nfsiod struct and terminate */
257 lck_mtx_lock(nfsiod_mutex);
258 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
259 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
260 nfsiod_terminate(niod);
261 /*NOTREACHED*/
262 }
263
264 /*
265 * Start up another nfsiod thread.
266 * (unless we're already maxed out and there are nfsiods running)
267 */
268 int
269 nfsiod_start(void)
270 {
271 thread_t thd = THREAD_NULL;
272
273 lck_mtx_lock(nfsiod_mutex);
274 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
275 lck_mtx_unlock(nfsiod_mutex);
276 return (EBUSY);
277 }
278 nfsiod_thread_count++;
279 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
280 lck_mtx_unlock(nfsiod_mutex);
281 return (EBUSY);
282 }
283 /* wait for the thread to complete startup */
284 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
285 thread_deallocate(thd);
286 return (0);
287 }
288
289 /*
290 * Continuation for Asynchronous I/O threads for NFS client.
291 *
292 * Grab an nfsiod struct to work on, do some work, then drop it
293 */
294 int
295 nfsiod_continue(int error)
296 {
297 struct nfsiod *niod;
298 struct nfsmount *nmp;
299 struct nfsreq *req, *treq;
300 struct nfs_reqqhead iodq;
301 int morework;
302
303 lck_mtx_lock(nfsiod_mutex);
304 niod = TAILQ_FIRST(&nfsiodwork);
305 if (!niod) {
306 /* there's no work queued up */
307 /* remove an old nfsiod struct and terminate */
308 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
309 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
310 nfsiod_terminate(niod);
311 /*NOTREACHED*/
312 }
313 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
314
315 worktodo:
316 while ((nmp = niod->niod_nmp)) {
317 /*
318 * Service this mount's async I/O queue.
319 *
320 * In order to ensure some level of fairness between mounts,
321 * we grab all the work up front before processing it so any
322 * new work that arrives will be serviced on a subsequent
323 * iteration - and we have a chance to see if other work needs
324 * to be done (e.g. the delayed write queue needs to be pushed
325 * or other mounts are waiting for an nfsiod).
326 */
327 /* grab the current contents of the queue */
328 TAILQ_INIT(&iodq);
329 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
330 lck_mtx_unlock(nfsiod_mutex);
331
332 /* process the queue */
333 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
334 TAILQ_REMOVE(&iodq, req, r_achain);
335 req->r_achain.tqe_next = NFSREQNOLIST;
336 req->r_callback.rcb_func(req);
337 }
338
339 /* now check if there's more/other work to be done */
340 lck_mtx_lock(nfsiod_mutex);
341 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
342 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
343 /* we're going to stop working on this mount */
344 if (morework) /* mount still needs more work so queue it up */
345 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
346 nmp->nm_niod = NULL;
347 niod->niod_nmp = NULL;
348 }
349 }
350
351 /* loop if there's still a mount to work on */
352 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
353 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
354 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
355 }
356 if (niod->niod_nmp)
357 goto worktodo;
358
359 /* queue ourselves back up - if there aren't too many threads running */
360 if (nfsiod_thread_count <= NFSIOD_MAX) {
361 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
362 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
363 /* shouldn't return... so we have an error */
364 /* remove an old nfsiod struct and terminate */
365 lck_mtx_lock(nfsiod_mutex);
366 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
367 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
368 }
369 nfsiod_terminate(niod);
370 /*NOTREACHED*/
371 return (0);
372 }
373
374 #endif /* NFSCLIENT */
375
376
377 #if NFSSERVER
378
379 /*
380 * NFS server system calls
381 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
382 */
383
384 /*
385 * Get file handle system call
386 */
387 int
388 getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
389 {
390 vnode_t vp;
391 struct nfs_filehandle nfh;
392 int error;
393 struct nameidata nd;
394 char path[MAXPATHLEN], *ptr;
395 u_int pathlen;
396 struct nfs_exportfs *nxfs;
397 struct nfs_export *nx;
398
399 /*
400 * Must be super user
401 */
402 error = proc_suser(p);
403 if (error)
404 return (error);
405
406 error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen);
407 if (error)
408 return (error);
409
410 if (!nfsrv_is_initialized())
411 return (EINVAL);
412
413 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
414 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
415 error = namei(&nd);
416 if (error)
417 return (error);
418 nameidone(&nd);
419
420 vp = nd.ni_vp;
421
422 // find exportfs that matches f_mntonname
423 lck_rw_lock_shared(&nfsrv_export_rwlock);
424 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
425 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
426 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
427 break;
428 }
429 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
430 error = EINVAL;
431 goto out;
432 }
433 // find export that best matches remainder of path
434 ptr = path + strlen(nxfs->nxfs_path);
435 while (*ptr && (*ptr == '/'))
436 ptr++;
437 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
438 int len = strlen(nx->nx_path);
439 if (len == 0) // we've hit the export entry for the root directory
440 break;
441 if (!strncmp(nx->nx_path, ptr, len))
442 break;
443 }
444 if (!nx) {
445 error = EINVAL;
446 goto out;
447 }
448
449 bzero(&nfh, sizeof(nfh));
450 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
451 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
452 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
453 nfh.nfh_xh.nxh_flags = 0;
454 nfh.nfh_xh.nxh_reserved = 0;
455 nfh.nfh_len = NFSV3_MAX_FID_SIZE;
456 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
457 if (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)
458 error = EOVERFLOW;
459 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
460 nfh.nfh_len += sizeof(nfh.nfh_xh);
461 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
462
463 out:
464 lck_rw_done(&nfsrv_export_rwlock);
465 vnode_put(vp);
466 if (error)
467 return (error);
468 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh));
469 return (error);
470 }
471
472 extern struct fileops vnops;
473
474 /*
475 * syscall for the rpc.lockd to use to translate a NFS file handle into
476 * an open descriptor.
477 *
478 * warning: do not remove the suser() call or this becomes one giant
479 * security hole.
480 */
481 int
482 fhopen( proc_t p,
483 struct fhopen_args *uap,
484 int32_t *retval)
485 {
486 vnode_t vp;
487 struct nfs_filehandle nfh;
488 struct nfs_export *nx;
489 struct nfs_export_options *nxo;
490 struct flock lf;
491 struct fileproc *fp, *nfp;
492 int fmode, error, type;
493 int indx;
494 vfs_context_t ctx = vfs_context_current();
495 kauth_action_t action;
496
497 /*
498 * Must be super user
499 */
500 error = suser(vfs_context_ucred(ctx), 0);
501 if (error) {
502 return (error);
503 }
504
505 if (!nfsrv_is_initialized()) {
506 return (EINVAL);
507 }
508
509 fmode = FFLAGS(uap->flags);
510 /* why not allow a non-read/write open for our lockd? */
511 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
512 return (EINVAL);
513
514 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
515 if (error)
516 return (error);
517 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
518 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
519 return (EINVAL);
520 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
521 if (error)
522 return (error);
523 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
524
525 lck_rw_lock_shared(&nfsrv_export_rwlock);
526 /* now give me my vnode, it gets returned to me with a reference */
527 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
528 lck_rw_done(&nfsrv_export_rwlock);
529 if (error) {
530 if (error == NFSERR_TRYLATER)
531 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
532 return (error);
533 }
534
535 /*
536 * From now on we have to make sure not
537 * to forget about the vnode.
538 * Any error that causes an abort must vnode_put(vp).
539 * Just set error = err and 'goto bad;'.
540 */
541
542 /*
543 * from vn_open
544 */
545 if (vnode_vtype(vp) == VSOCK) {
546 error = EOPNOTSUPP;
547 goto bad;
548 }
549
550 /* disallow write operations on directories */
551 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
552 error = EISDIR;
553 goto bad;
554 }
555
556 /* compute action to be authorized */
557 action = 0;
558 if (fmode & FREAD)
559 action |= KAUTH_VNODE_READ_DATA;
560 if (fmode & (FWRITE | O_TRUNC))
561 action |= KAUTH_VNODE_WRITE_DATA;
562 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
563 goto bad;
564
565 if ((error = VNOP_OPEN(vp, fmode, ctx)))
566 goto bad;
567 if ((error = vnode_ref_ext(vp, fmode)))
568 goto bad;
569
570 /*
571 * end of vn_open code
572 */
573
574 // starting here... error paths should call vn_close/vnode_put
575 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
576 vn_close(vp, fmode & FMASK, ctx);
577 goto bad;
578 }
579 fp = nfp;
580
581 fp->f_fglob->fg_flag = fmode & FMASK;
582 fp->f_fglob->fg_type = DTYPE_VNODE;
583 fp->f_fglob->fg_ops = &vnops;
584 fp->f_fglob->fg_data = (caddr_t)vp;
585
586 // XXX do we really need to support this with fhopen()?
587 if (fmode & (O_EXLOCK | O_SHLOCK)) {
588 lf.l_whence = SEEK_SET;
589 lf.l_start = 0;
590 lf.l_len = 0;
591 if (fmode & O_EXLOCK)
592 lf.l_type = F_WRLCK;
593 else
594 lf.l_type = F_RDLCK;
595 type = F_FLOCK;
596 if ((fmode & FNONBLOCK) == 0)
597 type |= F_WAIT;
598 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) {
599 struct vfs_context context = *vfs_context_current();
600 /* Modify local copy (to not damage thread copy) */
601 context.vc_ucred = fp->f_fglob->fg_cred;
602
603 vn_close(vp, fp->f_fglob->fg_flag, &context);
604 fp_free(p, indx, fp);
605 return (error);
606 }
607 fp->f_fglob->fg_flag |= FHASLOCK;
608 }
609
610 vnode_put(vp);
611
612 proc_fdlock(p);
613 procfdtbl_releasefd(p, indx, NULL);
614 fp_drop(p, indx, fp, 1);
615 proc_fdunlock(p);
616
617 *retval = indx;
618 return (0);
619
620 bad:
621 vnode_put(vp);
622 return (error);
623 }
624
625 /*
626 * NFS server pseudo system call
627 */
628 int
629 nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
630 {
631 mbuf_t nam;
632 struct user_nfsd_args user_nfsdarg;
633 socket_t so;
634 int error;
635
636 AUDIT_ARG(cmd, uap->flag);
637
638 /*
639 * Must be super user for most operations (export ops checked later).
640 */
641 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p))))
642 return (error);
643 #if CONFIG_MACF
644 error = mac_system_check_nfsd(kauth_cred_get());
645 if (error)
646 return (error);
647 #endif
648
649 /* make sure NFS server data structures have been initialized */
650 nfsrv_init();
651
652 if (uap->flag & NFSSVC_ADDSOCK) {
653 if (IS_64BIT_PROCESS(p)) {
654 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
655 } else {
656 struct nfsd_args tmp_args;
657 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
658 if (error == 0) {
659 user_nfsdarg.sock = tmp_args.sock;
660 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
661 user_nfsdarg.namelen = tmp_args.namelen;
662 }
663 }
664 if (error)
665 return (error);
666 /* get the socket */
667 error = file_socket(user_nfsdarg.sock, &so);
668 if (error)
669 return (error);
670 /* Get the client address for connected sockets. */
671 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
672 nam = NULL;
673 } else {
674 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
675 if (error) {
676 /* drop the iocount file_socket() grabbed on the file descriptor */
677 file_drop(user_nfsdarg.sock);
678 return (error);
679 }
680 }
681 /*
682 * nfssvc_addsock() will grab a retain count on the socket
683 * to keep the socket from being closed when nfsd closes its
684 * file descriptor for it.
685 */
686 error = nfssvc_addsock(so, nam);
687 /* drop the iocount file_socket() grabbed on the file descriptor */
688 file_drop(user_nfsdarg.sock);
689 } else if (uap->flag & NFSSVC_NFSD) {
690 error = nfssvc_nfsd();
691 } else if (uap->flag & NFSSVC_EXPORT) {
692 error = nfssvc_export(uap->argp);
693 } else {
694 error = EINVAL;
695 }
696 if (error == EINTR || error == ERESTART)
697 error = 0;
698 return (error);
699 }
700
701 /*
702 * Adds a socket to the list for servicing by nfsds.
703 */
704 int
705 nfssvc_addsock(socket_t so, mbuf_t mynam)
706 {
707 struct nfsrv_sock *slp;
708 int error = 0, sodomain, sotype, soprotocol, on = 1;
709 struct timeval timeo;
710
711 /* make sure mbuf constants are set up */
712 if (!nfs_mbuf_mhlen)
713 nfs_mbuf_init();
714
715 sock_gettype(so, &sodomain, &sotype, &soprotocol);
716
717 /* There should be only one UDP socket */
718 if ((soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
719 mbuf_freem(mynam);
720 return (EEXIST);
721 }
722
723 /* Set protocol options and reserve some space (for UDP). */
724 if (sotype == SOCK_STREAM)
725 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
726 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
727 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
728 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
729 int reserve = NFS_UDPSOCKBUF;
730 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
731 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
732 if (error) {
733 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
734 error = 0;
735 }
736 }
737 sock_nointerrupt(so, 0);
738
739 /*
740 * Set socket send/receive timeouts.
741 * Receive timeout shouldn't matter, but setting the send timeout
742 * will make sure that an unresponsive client can't hang the server.
743 */
744 timeo.tv_usec = 0;
745 timeo.tv_sec = 1;
746 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
747 timeo.tv_sec = 30;
748 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
749 if (error) {
750 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
751 error = 0;
752 }
753
754 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
755 if (!slp) {
756 mbuf_freem(mynam);
757 return (ENOMEM);
758 }
759 bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
760 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
761 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
762
763 lck_mtx_lock(nfsd_mutex);
764
765 if (soprotocol == IPPROTO_UDP) {
766 /* There should be only one UDP socket */
767 if (nfsrv_udpsock) {
768 lck_mtx_unlock(nfsd_mutex);
769 nfsrv_slpfree(slp);
770 mbuf_freem(mynam);
771 return (EEXIST);
772 }
773 nfsrv_udpsock = slp;
774 }
775
776 /* add the socket to the list */
777 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
778
779 sock_retain(so); /* grab a retain count on the socket */
780 slp->ns_so = so;
781 slp->ns_sotype = sotype;
782 slp->ns_nam = mynam;
783
784 /* set up the socket upcall */
785 socket_lock(so, 1);
786 so->so_upcallarg = (caddr_t)slp;
787 so->so_upcall = nfsrv_rcv;
788 so->so_rcv.sb_flags |= SB_UPCALL;
789 socket_unlock(so, 1);
790 /* just playin' it safe */
791 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
792
793 /* mark that the socket is not in the nfsrv_sockwg list */
794 slp->ns_wgq.tqe_next = SLPNOLIST;
795
796 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
797
798 nfsrv_wakenfsd(slp);
799 lck_mtx_unlock(nfsd_mutex);
800
801 return (0);
802 }
803
804 /*
805 * nfssvc_nfsd()
806 *
807 * nfsd theory of operation:
808 *
809 * The first nfsd thread stays in user mode accepting new TCP connections
810 * which are then added via the "addsock" call. The rest of the nfsd threads
811 * simply call into the kernel and remain there in a loop handling NFS
812 * requests until killed by a signal.
813 *
814 * There's a list of nfsd threads (nfsd_head).
815 * There's an nfsd queue that contains only those nfsds that are
816 * waiting for work to do (nfsd_queue).
817 *
818 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
819 * managing the work on the sockets:
820 * nfsrv_sockwait - sockets w/new data waiting to be worked on
821 * nfsrv_sockwork - sockets being worked on which may have more work to do
822 * nfsrv_sockwg -- sockets which have pending write gather data
823 * When a socket receives data, if it is not currently queued, it
824 * will be placed at the end of the "wait" queue.
825 * Whenever a socket needs servicing we make sure it is queued and
826 * wake up a waiting nfsd (if there is one).
827 *
828 * nfsds will service at most 8 requests from the same socket before
829 * defecting to work on another socket.
830 * nfsds will defect immediately if there are any sockets in the "wait" queue
831 * nfsds looking for a socket to work on check the "wait" queue first and
832 * then check the "work" queue.
833 * When an nfsd starts working on a socket, it removes it from the head of
834 * the queue it's currently on and moves it to the end of the "work" queue.
835 * When nfsds are checking the queues for work, any sockets found not to
836 * have any work are simply dropped from the queue.
837 *
838 */
839 int
840 nfssvc_nfsd(void)
841 {
842 mbuf_t m, mrep;
843 struct nfsrv_sock *slp;
844 struct nfsd *nfsd;
845 struct nfsrv_descript *nd = NULL;
846 int error = 0, cacherep, writes_todo;
847 int siz, procrastinate, opcnt = 0;
848 u_quad_t cur_usec;
849 struct timeval now;
850 struct vfs_context context;
851
852 #ifndef nolint
853 cacherep = RC_DOIT;
854 writes_todo = 0;
855 #endif
856
857 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
858 if (!nfsd)
859 return (ENOMEM);
860 bzero(nfsd, sizeof(struct nfsd));
861 lck_mtx_lock(nfsd_mutex);
862 if (nfsd_thread_count++ == 0)
863 nfsrv_initcache(); /* Init the server request cache */
864 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
865 lck_mtx_unlock(nfsd_mutex);
866
867 context.vc_thread = current_thread();
868
869 /*
870 * Loop getting rpc requests until SIGKILL.
871 */
872 for (;;) {
873 if (nfsd_thread_max <= 0) {
874 /* NFS server shutting down, get out ASAP */
875 error = EINTR;
876 slp = nfsd->nfsd_slp;
877 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
878 /* already have some work to do */
879 error = 0;
880 slp = nfsd->nfsd_slp;
881 } else {
882 /* need to find work to do */
883 error = 0;
884 lck_mtx_lock(nfsd_mutex);
885 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
886 if (nfsd_thread_count > nfsd_thread_max) {
887 /*
888 * If we have no socket and there are more
889 * nfsd threads than configured, let's exit.
890 */
891 error = 0;
892 goto done;
893 }
894 nfsd->nfsd_flag |= NFSD_WAITING;
895 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
896 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL);
897 if (error) {
898 if (nfsd->nfsd_flag & NFSD_WAITING) {
899 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
900 nfsd->nfsd_flag &= ~NFSD_WAITING;
901 }
902 goto done;
903 }
904 }
905 slp = nfsd->nfsd_slp;
906 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
907 /* look for a socket to work on in the wait queue */
908 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
909 lck_rw_lock_exclusive(&slp->ns_rwlock);
910 /* remove from the head of the queue */
911 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
912 slp->ns_flag &= ~SLP_WAITQ;
913 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
914 break;
915 /* nothing to do, so skip this socket */
916 lck_rw_done(&slp->ns_rwlock);
917 }
918 }
919 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
920 /* look for a socket to work on in the work queue */
921 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
922 lck_rw_lock_exclusive(&slp->ns_rwlock);
923 /* remove from the head of the queue */
924 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
925 slp->ns_flag &= ~SLP_WORKQ;
926 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
927 break;
928 /* nothing to do, so skip this socket */
929 lck_rw_done(&slp->ns_rwlock);
930 }
931 }
932 if (!nfsd->nfsd_slp && slp) {
933 /* we found a socket to work on, grab a reference */
934 slp->ns_sref++;
935 nfsd->nfsd_slp = slp;
936 opcnt = 0;
937 /* and put it at the back of the work queue */
938 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
939 slp->ns_flag |= SLP_WORKQ;
940 lck_rw_done(&slp->ns_rwlock);
941 }
942 lck_mtx_unlock(nfsd_mutex);
943 if (!slp)
944 continue;
945 lck_rw_lock_exclusive(&slp->ns_rwlock);
946 if (slp->ns_flag & SLP_VALID) {
947 if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) {
948 slp->ns_flag &= ~SLP_NEEDQ;
949 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
950 }
951 if (slp->ns_flag & SLP_DISCONN)
952 nfsrv_zapsock(slp);
953 error = nfsrv_dorec(slp, nfsd, &nd);
954 if (error == EINVAL) { // RPCSEC_GSS drop
955 if (slp->ns_sotype == SOCK_STREAM)
956 nfsrv_zapsock(slp); // drop connection
957 }
958 writes_todo = 0;
959 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
960 microuptime(&now);
961 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
962 (u_quad_t)now.tv_usec;
963 if (slp->ns_wgtime <= cur_usec) {
964 error = 0;
965 cacherep = RC_DOIT;
966 writes_todo = 1;
967 }
968 slp->ns_flag &= ~SLP_DOWRITES;
969 }
970 nfsd->nfsd_flag |= NFSD_REQINPROG;
971 }
972 lck_rw_done(&slp->ns_rwlock);
973 }
974 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
975 if (nd) {
976 nfsm_chain_cleanup(&nd->nd_nmreq);
977 if (nd->nd_nam2)
978 mbuf_freem(nd->nd_nam2);
979 if (IS_VALID_CRED(nd->nd_cr))
980 kauth_cred_unref(&nd->nd_cr);
981 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
982 nd = NULL;
983 }
984 nfsd->nfsd_slp = NULL;
985 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
986 if (slp)
987 nfsrv_slpderef(slp);
988 if (nfsd_thread_max <= 0)
989 break;
990 continue;
991 }
992 if (nd) {
993 microuptime(&nd->nd_starttime);
994 if (nd->nd_nam2)
995 nd->nd_nam = nd->nd_nam2;
996 else
997 nd->nd_nam = slp->ns_nam;
998
999 cacherep = nfsrv_getcache(nd, slp, &mrep);
1000
1001 if (nfsrv_require_resv_port) {
1002 /* Check if source port is a reserved port */
1003 u_short port;
1004 struct sockaddr *nam = mbuf_data(nd->nd_nam);
1005 struct sockaddr_in *sin;
1006
1007 sin = (struct sockaddr_in *)nam;
1008 port = ntohs(sin->sin_port);
1009 if (port >= IPPORT_RESERVED &&
1010 nd->nd_procnum != NFSPROC_NULL) {
1011 char strbuf[MAX_IPv4_STR_LEN];
1012 nd->nd_procnum = NFSPROC_NOOP;
1013 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1014 cacherep = RC_DOIT;
1015 printf("NFS request from unprivileged port (%s:%d)\n",
1016 inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)),
1017 port);
1018 }
1019 }
1020
1021 }
1022
1023 /*
1024 * Loop to get all the write RPC replies that have been
1025 * gathered together.
1026 */
1027 do {
1028 switch (cacherep) {
1029 case RC_DOIT:
1030 if (nd && (nd->nd_vers == NFS_VER3))
1031 procrastinate = nfsrv_wg_delay_v3;
1032 else
1033 procrastinate = nfsrv_wg_delay;
1034 lck_rw_lock_shared(&nfsrv_export_rwlock);
1035 context.vc_ucred = NULL;
1036 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
1037 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1038 else
1039 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1040 lck_rw_done(&nfsrv_export_rwlock);
1041 if (mrep == NULL) {
1042 /*
1043 * If this is a stream socket and we are not going
1044 * to send a reply we better close the connection
1045 * so the client doesn't hang.
1046 */
1047 if (error && slp->ns_sotype == SOCK_STREAM) {
1048 lck_rw_lock_exclusive(&slp->ns_rwlock);
1049 nfsrv_zapsock(slp);
1050 lck_rw_done(&slp->ns_rwlock);
1051 printf("NFS server: NULL reply from proc = %d error = %d\n",
1052 nd->nd_procnum, error);
1053 }
1054 break;
1055
1056 }
1057 if (error) {
1058 OSAddAtomic(1, &nfsstats.srv_errs);
1059 nfsrv_updatecache(nd, FALSE, mrep);
1060 if (nd->nd_nam2) {
1061 mbuf_freem(nd->nd_nam2);
1062 nd->nd_nam2 = NULL;
1063 }
1064 break;
1065 }
1066 OSAddAtomic(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
1067 nfsrv_updatecache(nd, TRUE, mrep);
1068 /* FALLTHRU */
1069
1070 case RC_REPLY:
1071 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1072 /*
1073 * Need to checksum or encrypt the reply
1074 */
1075 error = nfs_gss_svc_protect_reply(nd, mrep);
1076 if (error) {
1077 mbuf_freem(mrep);
1078 break;
1079 }
1080 }
1081
1082 /*
1083 * Get the total size of the reply
1084 */
1085 m = mrep;
1086 siz = 0;
1087 while (m) {
1088 siz += mbuf_len(m);
1089 m = mbuf_next(m);
1090 }
1091 if (siz <= 0 || siz > NFS_MAXPACKET) {
1092 printf("mbuf siz=%d\n",siz);
1093 panic("Bad nfs svc reply");
1094 }
1095 m = mrep;
1096 mbuf_pkthdr_setlen(m, siz);
1097 error = mbuf_pkthdr_setrcvif(m, NULL);
1098 if (error)
1099 panic("nfsd setrcvif failed: %d", error);
1100 /*
1101 * For stream protocols, prepend a Sun RPC
1102 * Record Mark.
1103 */
1104 if (slp->ns_sotype == SOCK_STREAM) {
1105 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1106 if (!error)
1107 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1108 }
1109 if (!error) {
1110 if (slp->ns_flag & SLP_VALID) {
1111 error = nfsrv_send(slp, nd->nd_nam2, m);
1112 } else {
1113 error = EPIPE;
1114 mbuf_freem(m);
1115 }
1116 } else {
1117 mbuf_freem(m);
1118 }
1119 mrep = NULL;
1120 if (nd->nd_nam2) {
1121 mbuf_freem(nd->nd_nam2);
1122 nd->nd_nam2 = NULL;
1123 }
1124 if (error == EPIPE) {
1125 lck_rw_lock_exclusive(&slp->ns_rwlock);
1126 nfsrv_zapsock(slp);
1127 lck_rw_done(&slp->ns_rwlock);
1128 }
1129 if (error == EINTR || error == ERESTART) {
1130 nfsm_chain_cleanup(&nd->nd_nmreq);
1131 if (IS_VALID_CRED(nd->nd_cr))
1132 kauth_cred_unref(&nd->nd_cr);
1133 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1134 nfsrv_slpderef(slp);
1135 lck_mtx_lock(nfsd_mutex);
1136 goto done;
1137 }
1138 break;
1139 case RC_DROPIT:
1140 mbuf_freem(nd->nd_nam2);
1141 nd->nd_nam2 = NULL;
1142 break;
1143 };
1144 opcnt++;
1145 if (nd) {
1146 nfsm_chain_cleanup(&nd->nd_nmreq);
1147 if (nd->nd_nam2)
1148 mbuf_freem(nd->nd_nam2);
1149 if (IS_VALID_CRED(nd->nd_cr))
1150 kauth_cred_unref(&nd->nd_cr);
1151 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1152 nd = NULL;
1153 }
1154
1155 /*
1156 * Check to see if there are outstanding writes that
1157 * need to be serviced.
1158 */
1159 writes_todo = 0;
1160 if (slp->ns_wgtime) {
1161 microuptime(&now);
1162 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1163 (u_quad_t)now.tv_usec;
1164 if (slp->ns_wgtime <= cur_usec) {
1165 cacherep = RC_DOIT;
1166 writes_todo = 1;
1167 }
1168 }
1169 } while (writes_todo);
1170
1171 nd = NULL;
1172 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1173 lck_rw_lock_exclusive(&slp->ns_rwlock);
1174 error = nfsrv_dorec(slp, nfsd, &nd);
1175 if (error == EINVAL) { // RPCSEC_GSS drop
1176 if (slp->ns_sotype == SOCK_STREAM)
1177 nfsrv_zapsock(slp); // drop connection
1178 }
1179 lck_rw_done(&slp->ns_rwlock);
1180 }
1181 if (!nd) {
1182 /* drop our reference on the socket */
1183 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1184 nfsd->nfsd_slp = NULL;
1185 nfsrv_slpderef(slp);
1186 }
1187 }
1188 lck_mtx_lock(nfsd_mutex);
1189 done:
1190 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1191 FREE(nfsd, M_NFSD);
1192 if (--nfsd_thread_count == 0)
1193 nfsrv_cleanup();
1194 lck_mtx_unlock(nfsd_mutex);
1195 return (error);
1196 }
1197
1198 int
1199 nfssvc_export(user_addr_t argp)
1200 {
1201 int error = 0, is_64bit;
1202 struct user_nfs_export_args unxa;
1203 vfs_context_t ctx = vfs_context_current();
1204
1205 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
1206
1207 /* copy in pointers to path and export args */
1208 if (is_64bit) {
1209 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1210 } else {
1211 struct nfs_export_args tnxa;
1212 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1213 if (error == 0) {
1214 /* munge into LP64 version of nfs_export_args structure */
1215 unxa.nxa_fsid = tnxa.nxa_fsid;
1216 unxa.nxa_expid = tnxa.nxa_expid;
1217 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1218 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1219 unxa.nxa_flags = tnxa.nxa_flags;
1220 unxa.nxa_netcount = tnxa.nxa_netcount;
1221 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1222 }
1223 }
1224 if (error)
1225 return (error);
1226
1227 error = nfsrv_export(&unxa, ctx);
1228
1229 return (error);
1230 }
1231
1232 /*
1233 * Shut down a socket associated with an nfsrv_sock structure.
1234 * Should be called with the send lock set, if required.
1235 * The trick here is to increment the sref at the start, so that the nfsds
1236 * will stop using it and clear ns_flag at the end so that it will not be
1237 * reassigned during cleanup.
1238 */
1239 void
1240 nfsrv_zapsock(struct nfsrv_sock *slp)
1241 {
1242 socket_t so;
1243
1244 if ((slp->ns_flag & SLP_VALID) == 0)
1245 return;
1246 slp->ns_flag &= ~SLP_ALLFLAGS;
1247
1248 so = slp->ns_so;
1249 if (so == NULL)
1250 return;
1251
1252 /*
1253 * Attempt to deter future upcalls, but leave the
1254 * upcall info in place to avoid a race with the
1255 * networking code.
1256 */
1257 socket_lock(so, 1);
1258 so->so_rcv.sb_flags &= ~SB_UPCALL;
1259 socket_unlock(so, 1);
1260
1261 sock_shutdown(so, SHUT_RDWR);
1262 }
1263
1264 /*
1265 * cleanup and release a server socket structure.
1266 */
1267 void
1268 nfsrv_slpfree(struct nfsrv_sock *slp)
1269 {
1270 struct nfsrv_descript *nwp, *nnwp;
1271
1272 if (slp->ns_so) {
1273 sock_release(slp->ns_so);
1274 slp->ns_so = NULL;
1275 }
1276 if (slp->ns_nam)
1277 mbuf_free(slp->ns_nam);
1278 if (slp->ns_raw)
1279 mbuf_freem(slp->ns_raw);
1280 if (slp->ns_rec)
1281 mbuf_freem(slp->ns_rec);
1282 if (slp->ns_frag)
1283 mbuf_freem(slp->ns_frag);
1284 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1285 slp->ns_reccnt = 0;
1286
1287 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1288 nnwp = nwp->nd_tq.le_next;
1289 LIST_REMOVE(nwp, nd_tq);
1290 nfsm_chain_cleanup(&nwp->nd_nmreq);
1291 if (nwp->nd_mrep)
1292 mbuf_freem(nwp->nd_mrep);
1293 if (nwp->nd_nam2)
1294 mbuf_freem(nwp->nd_nam2);
1295 if (IS_VALID_CRED(nwp->nd_cr))
1296 kauth_cred_unref(&nwp->nd_cr);
1297 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
1298 }
1299 LIST_INIT(&slp->ns_tq);
1300
1301 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
1302 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
1303 FREE(slp, M_NFSSVC);
1304 }
1305
1306 /*
1307 * Derefence a server socket structure. If it has no more references and
1308 * is no longer valid, you can throw it away.
1309 */
1310 void
1311 nfsrv_slpderef(struct nfsrv_sock *slp)
1312 {
1313 struct timeval now;
1314
1315 lck_mtx_lock(nfsd_mutex);
1316 lck_rw_lock_exclusive(&slp->ns_rwlock);
1317 slp->ns_sref--;
1318
1319 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1320 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1321 /* remove socket from queue since there's no work */
1322 if (slp->ns_flag & SLP_WAITQ)
1323 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1324 else
1325 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1326 slp->ns_flag &= ~SLP_QUEUED;
1327 }
1328 lck_rw_done(&slp->ns_rwlock);
1329 lck_mtx_unlock(nfsd_mutex);
1330 return;
1331 }
1332
1333 /* This socket is no longer valid, so we'll get rid of it */
1334
1335 if (slp->ns_flag & SLP_QUEUED) {
1336 if (slp->ns_flag & SLP_WAITQ)
1337 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1338 else
1339 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1340 slp->ns_flag &= ~SLP_QUEUED;
1341 }
1342
1343 /*
1344 * Queue the socket up for deletion
1345 * and start the timer to delete it
1346 * after it has been in limbo for
1347 * a while.
1348 */
1349 microuptime(&now);
1350 slp->ns_timestamp = now.tv_sec;
1351 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1352 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
1353 if (!nfsrv_deadsock_timer_on) {
1354 nfsrv_deadsock_timer_on = 1;
1355 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1356 NFSRV_DEADSOCKDELAY * 1000);
1357 }
1358
1359 lck_rw_done(&slp->ns_rwlock);
1360 /* now remove from the write gather socket list */
1361 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1362 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1363 slp->ns_wgq.tqe_next = SLPNOLIST;
1364 }
1365 lck_mtx_unlock(nfsd_mutex);
1366 }
1367
1368 /*
1369 * Check periodically for dead sockets pending delete.
1370 * If a socket has been dead for more than NFSRV_DEADSOCKDELAY
1371 * seconds then we assume it's safe to free.
1372 */
1373 void
1374 nfsrv_deadsock_timer(__unused void *param0, __unused void *param1)
1375 {
1376 struct nfsrv_sock *slp;
1377 struct timeval now;
1378 time_t time_to_wait;
1379
1380 microuptime(&now);
1381 lck_mtx_lock(nfsd_mutex);
1382
1383 while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) {
1384 if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec)
1385 break;
1386 TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain);
1387 nfsrv_slpfree(slp);
1388 }
1389 if (TAILQ_EMPTY(&nfsrv_deadsocklist)) {
1390 nfsrv_deadsock_timer_on = 0;
1391 lck_mtx_unlock(nfsd_mutex);
1392 return;
1393 }
1394 time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec;
1395 if (time_to_wait < 1)
1396 time_to_wait = 1;
1397
1398 lck_mtx_unlock(nfsd_mutex);
1399
1400 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1401 time_to_wait * 1000);
1402 }
1403
1404 /*
1405 * Clean up the data structures for the server.
1406 */
1407 void
1408 nfsrv_cleanup(void)
1409 {
1410 struct nfsrv_sock *slp, *nslp;
1411 struct timeval now;
1412 #if CONFIG_FSE
1413 struct nfsrv_fmod *fp, *nfp;
1414 int i;
1415 #endif
1416
1417 microuptime(&now);
1418 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1419 nslp = TAILQ_NEXT(slp, ns_chain);
1420 if (slp->ns_flag & SLP_VALID) {
1421 lck_rw_lock_exclusive(&slp->ns_rwlock);
1422 nfsrv_zapsock(slp);
1423 lck_rw_done(&slp->ns_rwlock);
1424 }
1425 if (slp->ns_flag & SLP_QUEUED) {
1426 if (slp->ns_flag & SLP_WAITQ)
1427 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1428 else
1429 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1430 slp->ns_flag &= ~SLP_QUEUED;
1431 }
1432 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1433 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1434 slp->ns_wgq.tqe_next = SLPNOLIST;
1435 }
1436 /* queue the socket up for deletion */
1437 slp->ns_timestamp = now.tv_sec;
1438 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1439 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
1440 if (!nfsrv_deadsock_timer_on) {
1441 nfsrv_deadsock_timer_on = 1;
1442 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1443 NFSRV_DEADSOCKDELAY * 1000);
1444 }
1445 }
1446
1447 #if CONFIG_FSE
1448 /*
1449 * Flush pending file write fsevents
1450 */
1451 lck_mtx_lock(nfsrv_fmod_mutex);
1452 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1453 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1454 /*
1455 * Fire off the content modified fsevent for each
1456 * entry, remove it from the list, and free it.
1457 */
1458 if (nfsrv_fsevents_enabled)
1459 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1460 FSE_ARG_VNODE, fp->fm_vp,
1461 FSE_ARG_DONE);
1462 vnode_put(fp->fm_vp);
1463 kauth_cred_unref(&fp->fm_context.vc_ucred);
1464 nfp = LIST_NEXT(fp, fm_link);
1465 LIST_REMOVE(fp, fm_link);
1466 FREE(fp, M_TEMP);
1467 }
1468 }
1469 nfsrv_fmod_pending = 0;
1470 lck_mtx_unlock(nfsrv_fmod_mutex);
1471 #endif
1472
1473 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1474
1475 nfsrv_cleancache(); /* And clear out server cache */
1476
1477 nfsrv_udpsock = NULL;
1478 }
1479
1480 #endif /* NFS_NOSERVER */