]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_syscalls.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
1c79356b 76#include <sys/kernel.h>
91447636 77#include <sys/file_internal.h>
1c79356b
A
78#include <sys/filedesc.h>
79#include <sys/stat.h>
91447636
A
80#include <sys/vnode_internal.h>
81#include <sys/mount_internal.h>
82#include <sys/proc_internal.h> /* for fdflags */
83#include <sys/kauth.h>
1c79356b 84#include <sys/sysctl.h>
55e303ae 85#include <sys/ubc.h>
1c79356b
A
86#include <sys/uio.h>
87#include <sys/malloc.h>
91447636 88#include <sys/kpi_mbuf.h>
1c79356b
A
89#include <sys/socket.h>
90#include <sys/socketvar.h>
91#include <sys/domain.h>
92#include <sys/protosw.h>
55e303ae
A
93#include <sys/fcntl.h>
94#include <sys/lockf.h>
1c79356b
A
95#include <sys/syslog.h>
96#include <sys/user.h>
91447636
A
97#include <sys/sysproto.h>
98#include <sys/kpi_socket.h>
2d21ac55 99#include <sys/fsevents.h>
91447636 100#include <libkern/OSAtomic.h>
2d21ac55
A
101#include <kern/thread_call.h>
102#include <kern/task.h>
1c79356b 103
ccc36f2f
A
104#include <bsm/audit_kernel.h>
105
1c79356b
A
106#include <netinet/in.h>
107#include <netinet/tcp.h>
1c79356b
A
108#include <nfs/xdr_subs.h>
109#include <nfs/rpcv2.h>
110#include <nfs/nfsproto.h>
111#include <nfs/nfs.h>
112#include <nfs/nfsm_subs.h>
113#include <nfs/nfsrvcache.h>
2d21ac55 114#include <nfs/nfs_gss.h>
1c79356b
A
115#include <nfs/nfsmount.h>
116#include <nfs/nfsnode.h>
55e303ae 117#include <nfs/nfs_lock.h>
2d21ac55
A
118#if CONFIG_MACF
119#include <security/mac_framework.h>
1c79356b
A
120#endif
121
2d21ac55
A
122kern_return_t thread_terminate(thread_t); /* XXX */
123
124#if NFSSERVER
125
126extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
127 struct nfsrv_sock *slp,
128 vfs_context_t ctx,
129 mbuf_t *mrepp);
130extern int nfsrv_wg_delay;
131extern int nfsrv_wg_delay_v3;
132
133static int nfsrv_require_resv_port = 0;
134static int nfsrv_deadsock_timer_on = 0;
135
136static int nfssvc_addsock(socket_t, mbuf_t);
137static int nfssvc_nfsd(void);
138static int nfssvc_export(user_addr_t);
139
140static void nfsrv_zapsock(struct nfsrv_sock *slp);
141static void nfsrv_slpderef(struct nfsrv_sock *);
142static void nfsrv_slpfree(struct nfsrv_sock *);
143
144#endif /* NFSSERVER */
145
146/*
147 * sysctl stuff
148 */
149SYSCTL_DECL(_vfs_generic);
150SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
151
152#if NFSCLIENT
153SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
154SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
155SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
156SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW, &nfs_iosize, 0, "");
157SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfs_access_cache_timeout, 0, "");
158SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW, &nfs_allow_async, 0, "");
159SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW, &nfs_statfs_rate_limit, 0, "");
160SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW, &nfsiod_thread_max, 0, "");
161SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD, &nfsiod_thread_count, 0, "");
162SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD, &nfs_lockd_mounts, 0, "");
163SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW, &nfs_max_async_writes, 0, "");
164#endif /* NFSCLIENT */
165
166#if NFSSERVER
167SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
168SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW, &nfsrv_wg_delay, 0, "");
169SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW, &nfsrv_wg_delay_v3, 0, "");
170SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW, &nfsrv_require_resv_port, 0, "");
171SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW, &nfsrv_async, 0, "");
172SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW, &nfsrv_reqcache_size, 0, "");
173SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW, &nfsrv_sock_max_rec_queue_length, 0, "");
174SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW, &nfsrv_user_stat_enabled, 0, "");
175SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW, &nfsrv_fsevents_enabled, 0, "");
176SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW, &nfsd_thread_max, 0, "");
177SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD, &nfsd_thread_count, 0, "");
178#endif /* NFSSERVER */
179
180
181#if NFSCLIENT
182
183int
184nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
185{
186 struct lockd_ans la;
187 int error;
188
189 if (uap->flag == NFSCLNT_LOCKDANS) {
190 error = copyin(uap->argp, &la, sizeof(la));
191 return (error != 0 ? error : nfslockdans(p, &la));
192 }
193 return EINVAL;
194}
195
196/*
197 * Asynchronous I/O threads for client NFS.
198 * They do read-ahead and write-behind operations on the block I/O cache.
199 *
200 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
201 * when unused for a while. There are as many nfsiod structs as there are
202 * nfsiod threads; however there's no strict tie between a thread and a struct.
203 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
204 * up, it removes the next struct nfsiod from the queue and services it. Then
205 * it will put the struct at the head of free list and sleep on it.
206 * Async requests will pull the next struct nfsiod from the head of the free list,
207 * put it on the work queue, and wake whatever thread is waiting on that struct.
208 */
209static int nfsiod_continue(int);
210
211/*
212 * nfsiod thread exit routine
213 *
214 * Must be called with nfsiod_mutex held so that the
215 * decision to terminate is atomic with the termination.
216 */
217static void
218nfsiod_terminate(struct nfsiod *niod)
219{
220 nfsiod_thread_count--;
221 lck_mtx_unlock(nfsiod_mutex);
222 if (niod)
223 FREE(niod, M_TEMP);
224 else
225 printf("nfsiod: terminating without niod\n");
226 thread_terminate(current_thread());
227 /*NOTREACHED*/
228}
229
230/* nfsiod thread startup routine */
231static void
232nfsiod_thread(void)
233{
234 struct nfsiod *niod;
235 int error;
236
237 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
238 if (!niod) {
239 lck_mtx_lock(nfsiod_mutex);
240 nfsiod_thread_count--;
241 lck_mtx_unlock(nfsiod_mutex);
242 thread_terminate(current_thread());
243 /*NOTREACHED*/
244 }
245 bzero(niod, sizeof(*niod));
246 lck_mtx_lock(nfsiod_mutex);
247 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
248 wakeup(current_thread());
249 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
250 /* shouldn't return... so we have an error */
251 /* remove an old nfsiod struct and terminate */
252 lck_mtx_lock(nfsiod_mutex);
253 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
254 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
255 nfsiod_terminate(niod);
256 /*NOTREACHED*/
257}
258
259/*
260 * Start up another nfsiod thread.
261 * (unless we're already maxed out and there are nfsiods running)
262 */
263int
264nfsiod_start(void)
265{
266 thread_t thd;
267
268 lck_mtx_lock(nfsiod_mutex);
269 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
270 lck_mtx_unlock(nfsiod_mutex);
271 return (EBUSY);
272 }
273 nfsiod_thread_count++;
274 thd = kernel_thread(kernel_task, nfsiod_thread);
275 /* wait for the thread to complete startup */
276 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
277 return (0);
278}
279
280/*
281 * Continuation for Asynchronous I/O threads for NFS client.
282 *
283 * Grab an nfsiod struct to work on, do some work, then drop it
284 */
285static int
286nfsiod_continue(int error)
287{
288 struct nfsiod *niod;
289 struct nfsmount *nmp;
290 struct nfsreq *req, *treq;
291 struct nfs_reqqhead iodq;
292 int morework;
293
294 lck_mtx_lock(nfsiod_mutex);
295 niod = TAILQ_FIRST(&nfsiodwork);
296 if (!niod) {
297 /* there's no work queued up */
298 if (error != EWOULDBLOCK)
299 printf("nfsiod: error %d work %p\n", error, niod);
300 /* remove an old nfsiod struct and terminate */
301 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
302 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
303 nfsiod_terminate(niod);
304 /*NOTREACHED*/
305 }
306 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
307
308worktodo:
309 while ((nmp = niod->niod_nmp)) {
310 /*
311 * Service this mount's async I/O queue.
312 *
313 * In order to ensure some level of fairness between mounts,
314 * we grab all the work up front before processing it so any
315 * new work that arrives will be serviced on a subsequent
316 * iteration - and we have a chance to see if other work needs
317 * to be done (e.g. the delayed write queue needs to be pushed
318 * or other mounts are waiting for an nfsiod).
319 */
320 /* grab the current contents of the queue */
321 TAILQ_INIT(&iodq);
322 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
323 lck_mtx_unlock(nfsiod_mutex);
324
325 /* process the queue */
326 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
327 TAILQ_REMOVE(&iodq, req, r_achain);
328 req->r_achain.tqe_next = NFSREQNOLIST;
329 req->r_callback.rcb_func(req);
330 }
331
332 /* now check if there's more/other work to be done */
333 lck_mtx_lock(nfsiod_mutex);
334 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
335 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
336 /* we're going to stop working on this mount */
337 if (morework) /* mount still needs more work so queue it up */
338 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
339 nmp->nm_niod = NULL;
340 niod->niod_nmp = NULL;
341 }
342 }
343
344 /* loop if there's still a mount to work on */
345 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
346 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
347 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
348 }
349 if (niod->niod_nmp)
350 goto worktodo;
351
352 /* queue ourselves back up - if there aren't too many threads running */
353 if (nfsiod_thread_count <= NFSIOD_MAX) {
354 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
355 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
356 /* shouldn't return... so we have an error */
357 /* remove an old nfsiod struct and terminate */
358 lck_mtx_lock(nfsiod_mutex);
359 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
360 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
361 }
362 nfsiod_terminate(niod);
363 /*NOTREACHED*/
364 return (0);
365}
366
367#endif /* NFSCLIENT */
368
369
370#if NFSSERVER
371
1c79356b
A
372/*
373 * NFS server system calls
374 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
375 */
376
377/*
378 * Get file handle system call
379 */
1c79356b 380int
91447636 381getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
1c79356b 382{
91447636
A
383 vnode_t vp;
384 struct nfs_filehandle nfh;
1c79356b
A
385 int error;
386 struct nameidata nd;
91447636
A
387 char path[MAXPATHLEN], *ptr;
388 u_int pathlen;
389 struct nfs_exportfs *nxfs;
390 struct nfs_export *nx;
391
1c79356b
A
392 /*
393 * Must be super user
394 */
91447636
A
395 error = proc_suser(p);
396 if (error)
397 return (error);
398
399 error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen);
400 if (error)
1c79356b 401 return (error);
91447636 402
2d21ac55
A
403 if (!nfsrv_is_initialized())
404 return (EINVAL);
405
91447636 406 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
2d21ac55 407 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
1c79356b
A
408 error = namei(&nd);
409 if (error)
410 return (error);
91447636
A
411 nameidone(&nd);
412
1c79356b 413 vp = nd.ni_vp;
91447636
A
414
415 // find exportfs that matches f_mntonname
2d21ac55 416 lck_rw_lock_shared(&nfsrv_export_rwlock);
91447636 417 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
2d21ac55
A
418 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
419 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
91447636
A
420 break;
421 }
422 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
423 error = EINVAL;
424 goto out;
425 }
426 // find export that best matches remainder of path
427 ptr = path + strlen(nxfs->nxfs_path);
428 while (*ptr && (*ptr == '/'))
429 ptr++;
430 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
431 int len = strlen(nx->nx_path);
432 if (len == 0) // we've hit the export entry for the root directory
433 break;
434 if (!strncmp(nx->nx_path, ptr, len))
435 break;
436 }
437 if (!nx) {
438 error = EINVAL;
439 goto out;
440 }
441
442 bzero(&nfh, sizeof(nfh));
0c530ab8
A
443 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
444 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
445 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
91447636
A
446 nfh.nfh_xh.nxh_flags = 0;
447 nfh.nfh_xh.nxh_reserved = 0;
2d21ac55
A
448 nfh.nfh_len = NFSV3_MAX_FID_SIZE;
449 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
450 if (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)
91447636
A
451 error = EOVERFLOW;
452 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
453 nfh.nfh_len += sizeof(nfh.nfh_xh);
2d21ac55 454 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
91447636
A
455
456out:
2d21ac55 457 lck_rw_done(&nfsrv_export_rwlock);
91447636 458 vnode_put(vp);
1c79356b
A
459 if (error)
460 return (error);
91447636 461 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh));
1c79356b
A
462 return (error);
463}
464
91447636
A
465extern struct fileops vnops;
466
55e303ae
A
467/*
468 * syscall for the rpc.lockd to use to translate a NFS file handle into
469 * an open descriptor.
470 *
471 * warning: do not remove the suser() call or this becomes one giant
472 * security hole.
473 */
55e303ae 474int
91447636
A
475fhopen( proc_t p,
476 struct fhopen_args *uap,
477 register_t *retval)
55e303ae 478{
91447636
A
479 vnode_t vp;
480 struct nfs_filehandle nfh;
481 struct nfs_export *nx;
482 struct nfs_export_options *nxo;
55e303ae 483 struct flock lf;
91447636
A
484 struct fileproc *fp, *nfp;
485 int fmode, error, type;
55e303ae 486 int indx;
2d21ac55 487 vfs_context_t ctx = vfs_context_current();
91447636
A
488 kauth_action_t action;
489
55e303ae
A
490 /*
491 * Must be super user
492 */
2d21ac55 493 error = suser(vfs_context_ucred(ctx), 0);
0c530ab8 494 if (error) {
55e303ae 495 return (error);
0c530ab8 496 }
55e303ae 497
2d21ac55
A
498 if (!nfsrv_is_initialized()) {
499 return (EINVAL);
500 }
501
55e303ae
A
502 fmode = FFLAGS(uap->flags);
503 /* why not allow a non-read/write open for our lockd? */
2d21ac55 504 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
55e303ae 505 return (EINVAL);
91447636
A
506
507 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
2d21ac55 508 if (error)
91447636
A
509 return (error);
510 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
2d21ac55 511 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
91447636
A
512 return (EINVAL);
513 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
2d21ac55 514 if (error)
55e303ae 515 return (error);
2d21ac55 516 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
91447636 517
2d21ac55 518 lck_rw_lock_shared(&nfsrv_export_rwlock);
91447636 519 /* now give me my vnode, it gets returned to me with a reference */
2d21ac55
A
520 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
521 lck_rw_done(&nfsrv_export_rwlock);
0c530ab8 522 if (error) {
2d21ac55
A
523 if (error == NFSERR_TRYLATER)
524 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
55e303ae 525 return (error);
0c530ab8 526 }
91447636 527
55e303ae 528 /*
91447636
A
529 * From now on we have to make sure not
530 * to forget about the vnode.
531 * Any error that causes an abort must vnode_put(vp).
532 * Just set error = err and 'goto bad;'.
55e303ae
A
533 */
534
535 /*
536 * from vn_open
537 */
91447636 538 if (vnode_vtype(vp) == VSOCK) {
55e303ae
A
539 error = EOPNOTSUPP;
540 goto bad;
541 }
542
91447636
A
543 /* disallow write operations on directories */
544 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
545 error = EISDIR;
55e303ae
A
546 goto bad;
547 }
548
91447636
A
549 /* compute action to be authorized */
550 action = 0;
551 if (fmode & FREAD)
552 action |= KAUTH_VNODE_READ_DATA;
553 if (fmode & (FWRITE | O_TRUNC))
554 action |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 555 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
91447636 556 goto bad;
55e303ae 557
2d21ac55 558 if ((error = VNOP_OPEN(vp, fmode, ctx)))
91447636
A
559 goto bad;
560 if ((error = vnode_ref_ext(vp, fmode)))
55e303ae
A
561 goto bad;
562
55e303ae
A
563 /*
564 * end of vn_open code
565 */
566
91447636 567 // starting here... error paths should call vn_close/vnode_put
2d21ac55
A
568 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
569 vn_close(vp, fmode & FMASK, ctx);
55e303ae
A
570 goto bad;
571 }
572 fp = nfp;
573
91447636
A
574 fp->f_fglob->fg_flag = fmode & FMASK;
575 fp->f_fglob->fg_type = DTYPE_VNODE;
576 fp->f_fglob->fg_ops = &vnops;
577 fp->f_fglob->fg_data = (caddr_t)vp;
578
579 // XXX do we really need to support this with fhopen()?
55e303ae
A
580 if (fmode & (O_EXLOCK | O_SHLOCK)) {
581 lf.l_whence = SEEK_SET;
582 lf.l_start = 0;
583 lf.l_len = 0;
584 if (fmode & O_EXLOCK)
585 lf.l_type = F_WRLCK;
586 else
587 lf.l_type = F_RDLCK;
588 type = F_FLOCK;
589 if ((fmode & FNONBLOCK) == 0)
590 type |= F_WAIT;
2d21ac55
A
591 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) {
592 struct vfs_context context = *vfs_context_current();
593 /* Modify local copy (to not damage thread copy) */
594 context.vc_ucred = fp->f_fglob->fg_cred;
595
596 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636 597 fp_free(p, indx, fp);
55e303ae
A
598 return (error);
599 }
91447636 600 fp->f_fglob->fg_flag |= FHASLOCK;
55e303ae
A
601 }
602
91447636
A
603 vnode_put(vp);
604
605 proc_fdlock(p);
6601e61a 606 procfdtbl_releasefd(p, indx, NULL);
91447636
A
607 fp_drop(p, indx, fp, 1);
608 proc_fdunlock(p);
609
55e303ae
A
610 *retval = indx;
611 return (0);
612
613bad:
91447636 614 vnode_put(vp);
55e303ae
A
615 return (error);
616}
617
1c79356b 618/*
2d21ac55 619 * NFS server pseudo system call
1c79356b 620 */
1c79356b 621int
91447636 622nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
1c79356b 623{
91447636
A
624 mbuf_t nam;
625 struct user_nfsd_args user_nfsdarg;
91447636 626 socket_t so;
1c79356b
A
627 int error;
628
ccc36f2f
A
629 AUDIT_ARG(cmd, uap->flag);
630
1c79356b
A
631 /*
632 * Must be super user
633 */
91447636 634 error = proc_suser(p);
2d21ac55 635 if (error)
1c79356b 636 return (error);
2d21ac55
A
637#if CONFIG_MACF
638 error = mac_system_check_nfsd(kauth_cred_get());
639 if (error)
640 return (error);
641#endif
91447636 642
2d21ac55
A
643 /* make sure NFS server data structures have been initialized */
644 nfsrv_init();
1c79356b 645
2d21ac55 646 if (uap->flag & NFSSVC_ADDSOCK) {
91447636
A
647 if (IS_64BIT_PROCESS(p)) {
648 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
649 } else {
650 struct nfsd_args tmp_args;
651 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
652 if (error == 0) {
653 user_nfsdarg.sock = tmp_args.sock;
654 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
655 user_nfsdarg.namelen = tmp_args.namelen;
656 }
657 }
1c79356b
A
658 if (error)
659 return (error);
91447636
A
660 /* get the socket */
661 error = file_socket(user_nfsdarg.sock, &so);
1c79356b
A
662 if (error)
663 return (error);
91447636
A
664 /* Get the client address for connected sockets. */
665 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
666 nam = NULL;
667 } else {
668 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
669 if (error) {
670 /* drop the iocount file_socket() grabbed on the file descriptor */
671 file_drop(user_nfsdarg.sock);
1c79356b 672 return (error);
91447636 673 }
1c79356b 674 }
91447636
A
675 /*
676 * nfssvc_addsock() will grab a retain count on the socket
677 * to keep the socket from being closed when nfsd closes its
678 * file descriptor for it.
679 */
2d21ac55 680 error = nfssvc_addsock(so, nam);
91447636
A
681 /* drop the iocount file_socket() grabbed on the file descriptor */
682 file_drop(user_nfsdarg.sock);
683 } else if (uap->flag & NFSSVC_NFSD) {
2d21ac55 684 error = nfssvc_nfsd();
91447636 685 } else if (uap->flag & NFSSVC_EXPORT) {
2d21ac55 686 error = nfssvc_export(uap->argp);
91447636
A
687 } else {
688 error = EINVAL;
1c79356b 689 }
1c79356b
A
690 if (error == EINTR || error == ERESTART)
691 error = 0;
692 return (error);
693}
694
1c79356b
A
695/*
696 * Adds a socket to the list for servicing by nfsds.
697 */
698static int
2d21ac55 699nfssvc_addsock(socket_t so, mbuf_t mynam)
1c79356b 700{
2d21ac55
A
701 struct nfsrv_sock *slp;
702 int error = 0, sodomain, sotype, soprotocol, on = 1;
91447636
A
703 struct timeval timeo;
704
705 /* make sure mbuf constants are set up */
2d21ac55 706 if (!nfs_mbuf_mhlen)
91447636
A
707 nfs_mbuf_init();
708
709 sock_gettype(so, &sodomain, &sotype, &soprotocol);
710
2d21ac55
A
711 /* There should be only one UDP socket */
712 if ((soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
91447636 713 mbuf_freem(mynam);
2d21ac55 714 return (EEXIST);
1c79356b
A
715 }
716
2d21ac55
A
717 /* Set protocol options and reserve some space (for UDP). */
718 if (sotype == SOCK_STREAM)
91447636 719 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
2d21ac55 720 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
91447636 721 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2d21ac55
A
722 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
723 int reserve = NFS_UDPSOCKBUF;
724 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
725 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
726 if (error) {
727 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
728 error = 0;
729 }
1c79356b 730 }
91447636
A
731 sock_nointerrupt(so, 0);
732
2d21ac55
A
733 /*
734 * Set socket send/receive timeouts.
735 * Receive timeout shouldn't matter, but setting the send timeout
736 * will make sure that an unresponsive client can't hang the server.
737 */
91447636 738 timeo.tv_usec = 0;
2d21ac55
A
739 timeo.tv_sec = 1;
740 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
741 timeo.tv_sec = 30;
742 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
743 if (error) {
744 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
745 error = 0;
746 }
91447636 747
2d21ac55
A
748 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
749 if (!slp) {
750 mbuf_freem(mynam);
751 return (ENOMEM);
752 }
753 bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
754 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
755 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
756
757 lck_mtx_lock(nfsd_mutex);
758
759 if (soprotocol == IPPROTO_UDP) {
760 /* There should be only one UDP socket */
761 if (nfsrv_udpsock) {
762 lck_mtx_unlock(nfsd_mutex);
763 nfsrv_slpfree(slp);
91447636 764 mbuf_freem(mynam);
2d21ac55 765 return (EEXIST);
91447636 766 }
2d21ac55 767 nfsrv_udpsock = slp;
1c79356b 768 }
91447636 769
2d21ac55
A
770 /* add the socket to the list */
771 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
772
91447636 773 sock_retain(so); /* grab a retain count on the socket */
1c79356b 774 slp->ns_so = so;
91447636 775 slp->ns_sotype = sotype;
1c79356b 776 slp->ns_nam = mynam;
91447636 777
2d21ac55 778 /* set up the socket upcall */
91447636 779 socket_lock(so, 1);
1c79356b
A
780 so->so_upcallarg = (caddr_t)slp;
781 so->so_upcall = nfsrv_rcv;
2d21ac55 782 so->so_rcv.sb_flags |= SB_UPCALL;
91447636
A
783 socket_unlock(so, 1);
784
2d21ac55
A
785 /* mark that the socket is not in the nfsrv_sockwg list */
786 slp->ns_wgq.tqe_next = SLPNOLIST;
787
91447636
A
788 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
789
1c79356b 790 nfsrv_wakenfsd(slp);
91447636
A
791 lck_mtx_unlock(nfsd_mutex);
792
1c79356b
A
793 return (0);
794}
795
796/*
2d21ac55
A
797 * nfssvc_nfsd()
798 *
799 * nfsd theory of operation:
800 *
801 * The first nfsd thread stays in user mode accepting new TCP connections
802 * which are then added via the "addsock" call. The rest of the nfsd threads
803 * simply call into the kernel and remain there in a loop handling NFS
804 * requests until killed by a signal.
805 *
806 * There's a list of nfsd threads (nfsd_head).
807 * There's an nfsd queue that contains only those nfsds that are
808 * waiting for work to do (nfsd_queue).
809 *
810 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
811 * managing the work on the sockets:
812 * nfsrv_sockwait - sockets w/new data waiting to be worked on
813 * nfsrv_sockwork - sockets being worked on which may have more work to do
814 * nfsrv_sockwg -- sockets which have pending write gather data
815 * When a socket receives data, if it is not currently queued, it
816 * will be placed at the end of the "wait" queue.
817 * Whenever a socket needs servicing we make sure it is queued and
818 * wake up a waiting nfsd (if there is one).
819 *
820 * nfsds will service at most 8 requests from the same socket before
821 * defecting to work on another socket.
822 * nfsds will defect immediately if there are any sockets in the "wait" queue
823 * nfsds looking for a socket to work on check the "wait" queue first and
824 * then check the "work" queue.
825 * When an nfsd starts working on a socket, it removes it from the head of
826 * the queue it's currently on and moves it to the end of the "work" queue.
827 * When nfsds are checking the queues for work, any sockets found not to
828 * have any work are simply dropped from the queue.
829 *
1c79356b
A
830 */
831static int
2d21ac55 832nfssvc_nfsd(void)
1c79356b 833{
2d21ac55
A
834 mbuf_t m, mrep;
835 struct nfsrv_sock *slp;
836 struct nfsd *nfsd;
1c79356b 837 struct nfsrv_descript *nd = NULL;
91447636 838 int error = 0, cacherep, writes_todo;
2d21ac55 839 int siz, procrastinate, opcnt = 0;
1c79356b 840 u_quad_t cur_usec;
55e303ae 841 struct timeval now;
2d21ac55 842 struct vfs_context context;
1c79356b
A
843
844#ifndef nolint
845 cacherep = RC_DOIT;
846 writes_todo = 0;
847#endif
91447636 848
2d21ac55
A
849 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
850 if (!nfsd)
851 return (ENOMEM);
852 bzero(nfsd, sizeof(struct nfsd));
853 lck_mtx_lock(nfsd_mutex);
854 if (nfsd_thread_count++ == 0)
855 nfsrv_initcache(); /* Init the server request cache */
856 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
857 lck_mtx_unlock(nfsd_mutex);
858
859 context.vc_thread = current_thread();
91447636 860
1c79356b
A
861 /*
862 * Loop getting rpc requests until SIGKILL.
863 */
864 for (;;) {
2d21ac55
A
865 if (nfsd_thread_max <= 0) {
866 /* NFS server shutting down, get out ASAP */
867 error = EINTR;
868 slp = nfsd->nfsd_slp;
869 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
870 /* already have some work to do */
871 error = 0;
872 slp = nfsd->nfsd_slp;
873 } else {
874 /* need to find work to do */
875 error = 0;
91447636 876 lck_mtx_lock(nfsd_mutex);
2d21ac55
A
877 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
878 if (nfsd_thread_count > nfsd_thread_max) {
879 /*
880 * If we have no socket and there are more
881 * nfsd threads than configured, let's exit.
882 */
883 error = 0;
884 goto done;
885 }
1c79356b 886 nfsd->nfsd_flag |= NFSD_WAITING;
2d21ac55
A
887 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
888 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL);
91447636 889 if (error) {
2d21ac55
A
890 if (nfsd->nfsd_flag & NFSD_WAITING) {
891 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
892 nfsd->nfsd_flag &= ~NFSD_WAITING;
893 }
1c79356b 894 goto done;
91447636 895 }
1c79356b 896 }
2d21ac55
A
897 slp = nfsd->nfsd_slp;
898 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
899 /* look for a socket to work on in the wait queue */
900 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
901 lck_rw_lock_exclusive(&slp->ns_rwlock);
902 /* remove from the head of the queue */
903 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
904 slp->ns_flag &= ~SLP_WAITQ;
905 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
906 break;
907 /* nothing to do, so skip this socket */
908 lck_rw_done(&slp->ns_rwlock);
1c79356b 909 }
2d21ac55
A
910 }
911 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
912 /* look for a socket to work on in the work queue */
913 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
914 lck_rw_lock_exclusive(&slp->ns_rwlock);
915 /* remove from the head of the queue */
916 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
917 slp->ns_flag &= ~SLP_WORKQ;
918 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
919 break;
920 /* nothing to do, so skip this socket */
921 lck_rw_done(&slp->ns_rwlock);
922 }
923 }
924 if (!nfsd->nfsd_slp && slp) {
925 /* we found a socket to work on, grab a reference */
926 slp->ns_sref++;
927 nfsd->nfsd_slp = slp;
928 opcnt = 0;
929 /* and put it at the back of the work queue */
930 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
931 slp->ns_flag |= SLP_WORKQ;
932 lck_rw_done(&slp->ns_rwlock);
1c79356b 933 }
91447636 934 lck_mtx_unlock(nfsd_mutex);
2d21ac55 935 if (!slp)
1c79356b 936 continue;
91447636 937 lck_rw_lock_exclusive(&slp->ns_rwlock);
1c79356b 938 if (slp->ns_flag & SLP_VALID) {
743b1565 939 if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) {
91447636
A
940 slp->ns_flag &= ~SLP_NEEDQ;
941 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
942 }
743b1565
A
943 if (slp->ns_flag & SLP_DISCONN)
944 nfsrv_zapsock(slp);
1c79356b 945 error = nfsrv_dorec(slp, nfsd, &nd);
2d21ac55
A
946 if (error == EINVAL) { // RPCSEC_GSS drop
947 if (slp->ns_sotype == SOCK_STREAM)
948 nfsrv_zapsock(slp); // drop connection
949 }
950 writes_todo = 0;
951 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
952 microuptime(&now);
953 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
954 (u_quad_t)now.tv_usec;
955 if (slp->ns_wgtime <= cur_usec) {
956 error = 0;
957 cacherep = RC_DOIT;
958 writes_todo = 1;
959 }
960 slp->ns_flag &= ~SLP_DOWRITES;
961 }
1c79356b
A
962 nfsd->nfsd_flag |= NFSD_REQINPROG;
963 }
91447636 964 lck_rw_done(&slp->ns_rwlock);
1c79356b 965 }
2d21ac55 966 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1c79356b 967 if (nd) {
2d21ac55 968 nfsm_chain_cleanup(&nd->nd_nmreq);
55e303ae 969 if (nd->nd_nam2)
91447636 970 mbuf_freem(nd->nd_nam2);
0c530ab8
A
971 if (IS_VALID_CRED(nd->nd_cr))
972 kauth_cred_unref(&nd->nd_cr);
2d21ac55 973 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1c79356b
A
974 nd = NULL;
975 }
91447636 976 nfsd->nfsd_slp = NULL;
1c79356b 977 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
2d21ac55
A
978 if (slp)
979 nfsrv_slpderef(slp);
980 if (nfsd_thread_max <= 0)
981 break;
1c79356b
A
982 continue;
983 }
1c79356b 984 if (nd) {
55e303ae 985 microuptime(&nd->nd_starttime);
1c79356b
A
986 if (nd->nd_nam2)
987 nd->nd_nam = nd->nd_nam2;
988 else
989 nd->nd_nam = slp->ns_nam;
990
2d21ac55
A
991 cacherep = nfsrv_getcache(nd, slp, &mrep);
992
993 if (nfsrv_require_resv_port) {
994 /* Check if source port is a reserved port */
1c79356b 995 u_short port;
91447636 996 struct sockaddr *nam = mbuf_data(nd->nd_nam);
1c79356b
A
997 struct sockaddr_in *sin;
998
999 sin = (struct sockaddr_in *)nam;
1000 port = ntohs(sin->sin_port);
1001 if (port >= IPPORT_RESERVED &&
1002 nd->nd_procnum != NFSPROC_NULL) {
91447636 1003 char strbuf[MAX_IPv4_STR_LEN];
1c79356b
A
1004 nd->nd_procnum = NFSPROC_NOOP;
1005 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1006 cacherep = RC_DOIT;
1007 printf("NFS request from unprivileged port (%s:%d)\n",
91447636
A
1008 inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)),
1009 port);
1c79356b
A
1010 }
1011 }
1012
1013 }
1014
1015 /*
2d21ac55 1016 * Loop to get all the write RPC replies that have been
1c79356b
A
1017 * gathered together.
1018 */
1019 do {
1020 switch (cacherep) {
1021 case RC_DOIT:
2d21ac55
A
1022 if (nd && (nd->nd_vers == NFS_VER3))
1023 procrastinate = nfsrv_wg_delay_v3;
1c79356b 1024 else
2d21ac55
A
1025 procrastinate = nfsrv_wg_delay;
1026 lck_rw_lock_shared(&nfsrv_export_rwlock);
1027 context.vc_ucred = NULL;
91447636 1028 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
2d21ac55
A
1029 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1030 else
1031 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1032 lck_rw_done(&nfsrv_export_rwlock);
1033 if (mrep == NULL) {
1034 /*
1035 * If this is a stream socket and we are not going
1036 * to send a reply we better close the connection
1037 * so the client doesn't hang.
1038 */
1039 if (error && slp->ns_sotype == SOCK_STREAM) {
1040 lck_rw_lock_exclusive(&slp->ns_rwlock);
1041 nfsrv_zapsock(slp);
1042 lck_rw_done(&slp->ns_rwlock);
1043 printf("NFS server: NULL reply from proc = %d error = %d\n",
1044 nd->nd_procnum, error);
1045 }
1c79356b 1046 break;
2d21ac55
A
1047
1048 }
1c79356b 1049 if (error) {
91447636 1050 OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs);
2d21ac55 1051 nfsrv_updatecache(nd, FALSE, mrep);
55e303ae 1052 if (nd->nd_nam2) {
91447636 1053 mbuf_freem(nd->nd_nam2);
55e303ae
A
1054 nd->nd_nam2 = NULL;
1055 }
1c79356b
A
1056 break;
1057 }
91447636 1058 OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]);
2d21ac55
A
1059 nfsrv_updatecache(nd, TRUE, mrep);
1060 /* FALLTHRU */
1061
1c79356b 1062 case RC_REPLY:
2d21ac55
A
1063 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1064 /*
1065 * Need to checksum or encrypt the reply
1066 */
1067 error = nfs_gss_svc_protect_reply(nd, mrep);
1068 if (error) {
1069 mbuf_freem(mrep);
1070 break;
1071 }
1072 }
1073
1074 /*
1075 * Get the total size of the reply
1076 */
1077 m = mrep;
1c79356b
A
1078 siz = 0;
1079 while (m) {
91447636
A
1080 siz += mbuf_len(m);
1081 m = mbuf_next(m);
1c79356b
A
1082 }
1083 if (siz <= 0 || siz > NFS_MAXPACKET) {
1084 printf("mbuf siz=%d\n",siz);
1085 panic("Bad nfs svc reply");
1086 }
2d21ac55 1087 m = mrep;
91447636
A
1088 mbuf_pkthdr_setlen(m, siz);
1089 error = mbuf_pkthdr_setrcvif(m, NULL);
1090 if (error)
1091 panic("nfsd setrcvif failed: %d", error);
1c79356b
A
1092 /*
1093 * For stream protocols, prepend a Sun RPC
1094 * Record Mark.
1095 */
91447636
A
1096 if (slp->ns_sotype == SOCK_STREAM) {
1097 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1098 if (!error)
1099 *(u_long*)mbuf_data(m) = htonl(0x80000000 | siz);
1c79356b 1100 }
91447636
A
1101 if (!error) {
1102 if (slp->ns_flag & SLP_VALID) {
2d21ac55 1103 error = nfsrv_send(slp, nd->nd_nam2, m);
91447636
A
1104 } else {
1105 error = EPIPE;
1106 mbuf_freem(m);
1107 }
1108 } else {
1109 mbuf_freem(m);
1c79356b 1110 }
2d21ac55 1111 mrep = NULL;
55e303ae 1112 if (nd->nd_nam2) {
91447636 1113 mbuf_freem(nd->nd_nam2);
55e303ae
A
1114 nd->nd_nam2 = NULL;
1115 }
91447636
A
1116 if (error == EPIPE) {
1117 lck_rw_lock_exclusive(&slp->ns_rwlock);
1c79356b 1118 nfsrv_zapsock(slp);
91447636
A
1119 lck_rw_done(&slp->ns_rwlock);
1120 }
1c79356b 1121 if (error == EINTR || error == ERESTART) {
2d21ac55 1122 nfsm_chain_cleanup(&nd->nd_nmreq);
0c530ab8
A
1123 if (IS_VALID_CRED(nd->nd_cr))
1124 kauth_cred_unref(&nd->nd_cr);
2d21ac55 1125 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1c79356b 1126 nfsrv_slpderef(slp);
2d21ac55 1127 lck_mtx_lock(nfsd_mutex);
1c79356b
A
1128 goto done;
1129 }
1130 break;
1131 case RC_DROPIT:
91447636 1132 mbuf_freem(nd->nd_nam2);
2d21ac55 1133 nd->nd_nam2 = NULL;
1c79356b
A
1134 break;
1135 };
2d21ac55 1136 opcnt++;
1c79356b 1137 if (nd) {
2d21ac55 1138 nfsm_chain_cleanup(&nd->nd_nmreq);
55e303ae 1139 if (nd->nd_nam2)
91447636 1140 mbuf_freem(nd->nd_nam2);
0c530ab8
A
1141 if (IS_VALID_CRED(nd->nd_cr))
1142 kauth_cred_unref(&nd->nd_cr);
2d21ac55 1143 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1c79356b
A
1144 nd = NULL;
1145 }
1146
1147 /*
1148 * Check to see if there are outstanding writes that
1149 * need to be serviced.
1150 */
2d21ac55
A
1151 writes_todo = 0;
1152 if (slp->ns_wgtime) {
1153 microuptime(&now);
1154 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1155 (u_quad_t)now.tv_usec;
1156 if (slp->ns_wgtime <= cur_usec) {
1157 cacherep = RC_DOIT;
1158 writes_todo = 1;
1159 }
91447636 1160 }
1c79356b 1161 } while (writes_todo);
2d21ac55
A
1162
1163 nd = NULL;
1164 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1165 lck_rw_lock_exclusive(&slp->ns_rwlock);
1166 error = nfsrv_dorec(slp, nfsd, &nd);
1167 if (error == EINVAL) { // RPCSEC_GSS drop
1168 if (slp->ns_sotype == SOCK_STREAM)
1169 nfsrv_zapsock(slp); // drop connection
1170 }
91447636 1171 lck_rw_done(&slp->ns_rwlock);
2d21ac55
A
1172 }
1173 if (!nd) {
1174 /* drop our reference on the socket */
1c79356b
A
1175 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1176 nfsd->nfsd_slp = NULL;
1177 nfsrv_slpderef(slp);
91447636 1178 }
1c79356b 1179 }
91447636 1180 lck_mtx_lock(nfsd_mutex);
2d21ac55 1181done:
1c79356b 1182 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
91447636 1183 FREE(nfsd, M_NFSD);
2d21ac55
A
1184 if (--nfsd_thread_count == 0)
1185 nfsrv_cleanup();
91447636 1186 lck_mtx_unlock(nfsd_mutex);
1c79356b
A
1187 return (error);
1188}
91447636
A
1189
1190static int
2d21ac55 1191nfssvc_export(user_addr_t argp)
91447636
A
1192{
1193 int error = 0, is_64bit;
1194 struct user_nfs_export_args unxa;
2d21ac55 1195 vfs_context_t ctx = vfs_context_current();
91447636 1196
2d21ac55 1197 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
91447636
A
1198
1199 /* copy in pointers to path and export args */
1200 if (is_64bit) {
1201 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1202 } else {
1203 struct nfs_export_args tnxa;
1204 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1205 if (error == 0) {
1206 /* munge into LP64 version of nfs_export_args structure */
1207 unxa.nxa_fsid = tnxa.nxa_fsid;
1208 unxa.nxa_expid = tnxa.nxa_expid;
1209 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1210 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1211 unxa.nxa_flags = tnxa.nxa_flags;
1212 unxa.nxa_netcount = tnxa.nxa_netcount;
1213 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1214 }
1215 }
1216 if (error)
1217 return (error);
1218
2d21ac55 1219 error = nfsrv_export(&unxa, ctx);
91447636
A
1220
1221 return (error);
1222}
1223
1c79356b 1224/*
2d21ac55 1225 * Shut down a socket associated with an nfsrv_sock structure.
1c79356b
A
1226 * Should be called with the send lock set, if required.
1227 * The trick here is to increment the sref at the start, so that the nfsds
1228 * will stop using it and clear ns_flag at the end so that it will not be
1229 * reassigned during cleanup.
1230 */
1231static void
2d21ac55 1232nfsrv_zapsock(struct nfsrv_sock *slp)
1c79356b 1233{
91447636 1234 socket_t so;
1c79356b 1235
91447636
A
1236 if ((slp->ns_flag & SLP_VALID) == 0)
1237 return;
1c79356b 1238 slp->ns_flag &= ~SLP_ALLFLAGS;
91447636
A
1239
1240 so = slp->ns_so;
1241 if (so == NULL)
1242 return;
1243
743b1565
A
1244 /*
1245 * Attempt to deter future upcalls, but leave the
1246 * upcall info in place to avoid a race with the
1247 * networking code.
1248 */
91447636 1249 socket_lock(so, 1);
91447636
A
1250 so->so_rcv.sb_flags &= ~SB_UPCALL;
1251 socket_unlock(so, 1);
743b1565 1252
91447636 1253 sock_shutdown(so, SHUT_RDWR);
1c79356b
A
1254}
1255
1c79356b 1256/*
91447636 1257 * cleanup and release a server socket structure.
1c79356b 1258 */
2d21ac55
A
1259static void
1260nfsrv_slpfree(struct nfsrv_sock *slp)
1c79356b 1261{
91447636 1262 struct nfsrv_descript *nwp, *nnwp;
1c79356b 1263
91447636
A
1264 if (slp->ns_so) {
1265 sock_release(slp->ns_so);
1266 slp->ns_so = NULL;
1267 }
1268 if (slp->ns_nam)
1269 mbuf_free(slp->ns_nam);
1270 if (slp->ns_raw)
1271 mbuf_freem(slp->ns_raw);
1272 if (slp->ns_rec)
1273 mbuf_freem(slp->ns_rec);
2d21ac55
A
1274 if (slp->ns_frag)
1275 mbuf_freem(slp->ns_frag);
1276 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1277 slp->ns_reccnt = 0;
55e303ae 1278
91447636
A
1279 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1280 nnwp = nwp->nd_tq.le_next;
1281 LIST_REMOVE(nwp, nd_tq);
2d21ac55
A
1282 nfsm_chain_cleanup(&nwp->nd_nmreq);
1283 if (nwp->nd_mrep)
1284 mbuf_freem(nwp->nd_mrep);
1285 if (nwp->nd_nam2)
1286 mbuf_freem(nwp->nd_nam2);
0c530ab8
A
1287 if (IS_VALID_CRED(nwp->nd_cr))
1288 kauth_cred_unref(&nwp->nd_cr);
2d21ac55 1289 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
55e303ae 1290 }
91447636
A
1291 LIST_INIT(&slp->ns_tq);
1292
2d21ac55
A
1293 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
1294 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
91447636 1295 FREE(slp, M_NFSSVC);
55e303ae
A
1296}
1297
1298/*
91447636
A
1299 * Derefence a server socket structure. If it has no more references and
1300 * is no longer valid, you can throw it away.
55e303ae
A
1301 */
1302void
2d21ac55 1303nfsrv_slpderef(struct nfsrv_sock *slp)
55e303ae 1304{
743b1565
A
1305 struct timeval now;
1306
91447636
A
1307 lck_mtx_lock(nfsd_mutex);
1308 lck_rw_lock_exclusive(&slp->ns_rwlock);
1309 slp->ns_sref--;
2d21ac55 1310
91447636 1311 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
2d21ac55
A
1312 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1313 /* remove socket from queue since there's no work */
1314 if (slp->ns_flag & SLP_WAITQ)
1315 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1316 else
1317 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1318 slp->ns_flag &= ~SLP_QUEUED;
1319 }
91447636
A
1320 lck_rw_done(&slp->ns_rwlock);
1321 lck_mtx_unlock(nfsd_mutex);
1322 return;
55e303ae 1323 }
91447636 1324
2d21ac55
A
1325 /* This socket is no longer valid, so we'll get rid of it */
1326
1327 if (slp->ns_flag & SLP_QUEUED) {
1328 if (slp->ns_flag & SLP_WAITQ)
1329 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1330 else
1331 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1332 slp->ns_flag &= ~SLP_QUEUED;
1333 }
1334
1335 /*
1336 * Queue the socket up for deletion
1337 * and start the timer to delete it
1338 * after it has been in limbo for
1339 * a while.
1340 */
743b1565
A
1341 microuptime(&now);
1342 slp->ns_timestamp = now.tv_sec;
2d21ac55
A
1343 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1344 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
1345 if (!nfsrv_deadsock_timer_on) {
1346 nfsrv_deadsock_timer_on = 1;
1347 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1348 NFSRV_DEADSOCKDELAY * 1000);
1349 }
1350
743b1565 1351 lck_rw_done(&slp->ns_rwlock);
2d21ac55
A
1352 /* now remove from the write gather socket list */
1353 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1354 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1355 slp->ns_wgq.tqe_next = SLPNOLIST;
1356 }
91447636 1357 lck_mtx_unlock(nfsd_mutex);
55e303ae
A
1358}
1359
1c79356b 1360/*
2d21ac55
A
1361 * Check periodically for dead sockets pending delete.
1362 * If a socket has been dead for more than NFSRV_DEADSOCKDELAY
1363 * seconds then we assume it's safe to free.
1c79356b
A
1364 */
1365void
2d21ac55 1366nfsrv_deadsock_timer(__unused void *param0, __unused void *param1)
1c79356b 1367{
2d21ac55 1368 struct nfsrv_sock *slp;
743b1565 1369 struct timeval now;
2d21ac55 1370 time_t time_to_wait;
1c79356b 1371
2d21ac55
A
1372 microuptime(&now);
1373 lck_mtx_lock(nfsd_mutex);
1c79356b 1374
2d21ac55
A
1375 while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) {
1376 if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec)
1377 break;
1378 TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain);
1379 nfsrv_slpfree(slp);
8ad349bb 1380 }
2d21ac55
A
1381 if (TAILQ_EMPTY(&nfsrv_deadsocklist)) {
1382 nfsrv_deadsock_timer_on = 0;
1383 lck_mtx_unlock(nfsd_mutex);
1384 return;
91447636 1385 }
2d21ac55
A
1386 time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec;
1387 if (time_to_wait < 1)
1388 time_to_wait = 1;
1c79356b 1389
2d21ac55
A
1390 lck_mtx_unlock(nfsd_mutex);
1391
1392 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1393 time_to_wait * 1000);
1c79356b
A
1394}
1395
1396/*
2d21ac55 1397 * Clean up the data structures for the server.
1c79356b 1398 */
2d21ac55
A
1399void
1400nfsrv_cleanup(void)
1c79356b 1401{
2d21ac55 1402 struct nfsrv_sock *slp, *nslp;
55e303ae 1403 struct timeval now;
2d21ac55
A
1404 struct nfsrv_fmod *fp, *nfp;
1405 int i;
1c79356b 1406
55e303ae 1407 microuptime(&now);
2d21ac55
A
1408 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1409 nslp = TAILQ_NEXT(slp, ns_chain);
1410 if (slp->ns_flag & SLP_VALID) {
1411 lck_rw_lock_exclusive(&slp->ns_rwlock);
1412 nfsrv_zapsock(slp);
1413 lck_rw_done(&slp->ns_rwlock);
1414 }
1415 if (slp->ns_flag & SLP_QUEUED) {
1416 if (slp->ns_flag & SLP_WAITQ)
1417 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1418 else
1419 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1420 slp->ns_flag &= ~SLP_QUEUED;
1421 }
1422 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1423 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1424 slp->ns_wgq.tqe_next = SLPNOLIST;
1425 }
1426 /* queue the socket up for deletion */
1427 slp->ns_timestamp = now.tv_sec;
1428 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1429 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
1430 if (!nfsrv_deadsock_timer_on) {
1431 nfsrv_deadsock_timer_on = 1;
1432 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1433 NFSRV_DEADSOCKDELAY * 1000);
1434 }
1435 }
1436
1437 /*
1438 * Flush pending file write fsevents
1439 */
1440 lck_mtx_lock(nfsrv_fmod_mutex);
1441 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1442 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1443 /*
1444 * Fire off the content modified fsevent for each
1445 * entry, remove it from the list, and free it.
1446 */
1447#if CONFIG_FSE
1448 if (nfsrv_fsevents_enabled)
1449 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1450 FSE_ARG_VNODE, fp->fm_vp,
1451 FSE_ARG_DONE);
1452#endif
1453 vnode_put(fp->fm_vp);
1454 kauth_cred_unref(&fp->fm_context.vc_ucred);
1455 nfp = LIST_NEXT(fp, fm_link);
1456 LIST_REMOVE(fp, fm_link);
1457 FREE(fp, M_TEMP);
1458 }
1459 }
1460 nfsrv_fmod_pending = 0;
1461 lck_mtx_unlock(nfsrv_fmod_mutex);
1462
1463 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1464
1465 nfsrv_cleancache(); /* And clear out server cache */
1466
1467 nfsrv_udpsock = NULL;
1c79356b 1468}
2d21ac55 1469
1c79356b 1470#endif /* NFS_NOSERVER */