]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_syscalls.c
xnu-4570.20.62.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
CommitLineData
1c79356b 1/*
5ba3f43e 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
1c79356b 76#include <sys/kernel.h>
91447636 77#include <sys/file_internal.h>
1c79356b
A
78#include <sys/filedesc.h>
79#include <sys/stat.h>
91447636
A
80#include <sys/vnode_internal.h>
81#include <sys/mount_internal.h>
82#include <sys/proc_internal.h> /* for fdflags */
83#include <sys/kauth.h>
1c79356b 84#include <sys/sysctl.h>
55e303ae 85#include <sys/ubc.h>
1c79356b
A
86#include <sys/uio.h>
87#include <sys/malloc.h>
91447636 88#include <sys/kpi_mbuf.h>
1c79356b
A
89#include <sys/socket.h>
90#include <sys/socketvar.h>
91#include <sys/domain.h>
92#include <sys/protosw.h>
55e303ae
A
93#include <sys/fcntl.h>
94#include <sys/lockf.h>
1c79356b
A
95#include <sys/syslog.h>
96#include <sys/user.h>
91447636
A
97#include <sys/sysproto.h>
98#include <sys/kpi_socket.h>
2d21ac55 99#include <sys/fsevents.h>
91447636 100#include <libkern/OSAtomic.h>
2d21ac55
A
101#include <kern/thread_call.h>
102#include <kern/task.h>
1c79356b 103
b0d623f7 104#include <security/audit/audit.h>
ccc36f2f 105
1c79356b
A
106#include <netinet/in.h>
107#include <netinet/tcp.h>
1c79356b
A
108#include <nfs/xdr_subs.h>
109#include <nfs/rpcv2.h>
110#include <nfs/nfsproto.h>
111#include <nfs/nfs.h>
112#include <nfs/nfsm_subs.h>
113#include <nfs/nfsrvcache.h>
2d21ac55 114#include <nfs/nfs_gss.h>
1c79356b
A
115#include <nfs/nfsmount.h>
116#include <nfs/nfsnode.h>
55e303ae 117#include <nfs/nfs_lock.h>
2d21ac55
A
118#if CONFIG_MACF
119#include <security/mac_framework.h>
1c79356b
A
120#endif
121
2d21ac55
A
122kern_return_t thread_terminate(thread_t); /* XXX */
123
124#if NFSSERVER
125
126extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
127 struct nfsrv_sock *slp,
128 vfs_context_t ctx,
129 mbuf_t *mrepp);
130extern int nfsrv_wg_delay;
131extern int nfsrv_wg_delay_v3;
132
133static int nfsrv_require_resv_port = 0;
3e170ce0
A
134static time_t nfsrv_idlesock_timer_on = 0;
135static int nfsrv_sock_tcp_cnt = 0;
136#define NFSD_MIN_IDLE_TIMEOUT 30
137static int nfsrv_sock_idle_timeout = 3600; /* One hour */
2d21ac55 138
b0d623f7
A
139int nfssvc_export(user_addr_t argp);
140int nfssvc_nfsd(void);
141int nfssvc_addsock(socket_t, mbuf_t);
142void nfsrv_zapsock(struct nfsrv_sock *);
143void nfsrv_slpderef(struct nfsrv_sock *);
144void nfsrv_slpfree(struct nfsrv_sock *);
2d21ac55
A
145
146#endif /* NFSSERVER */
147
148/*
149 * sysctl stuff
150 */
151SYSCTL_DECL(_vfs_generic);
152SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
153
154#if NFSCLIENT
155SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
6d2010ae
A
156SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
157SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
158SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
159SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
160SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
161SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
162SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
163SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
164SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
165SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
6d2010ae
A
166SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
167SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
168SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
169SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
170SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
316670eb
A
171SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
172SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
39236c6e 173SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
fe8ab488 174SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
3e170ce0 175SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
5ba3f43e 176SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "");
2d21ac55
A
177#endif /* NFSCLIENT */
178
179#if NFSSERVER
180SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
6d2010ae
A
181SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
182SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
183SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
184SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
185SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
186SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
187SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
188SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
189SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
b0d623f7 190#if CONFIG_FSE
6d2010ae 191SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
b0d623f7 192#endif
6d2010ae
A
193SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
194SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
3e170ce0
A
195SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
196SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
316670eb
A
197#ifdef NFS_UC_Q_DEBUG
198SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
199SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
200SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
3e170ce0 201SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
316670eb 202#endif
2d21ac55
A
203#endif /* NFSSERVER */
204
205
206#if NFSCLIENT
207
fe8ab488
A
208static int
209mapname2id(struct nfs_testmapid *map)
210{
211 int error;
212
213 error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
214 if (error)
215 return (error);
216
217 if (map->ntm_grpflag)
218 error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
219 else
220 error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
221
222 return (error);
223}
224
225static int
226mapid2name(struct nfs_testmapid *map)
227{
228 int error;
5ba3f43e 229 size_t len = sizeof(map->ntm_name);
fe8ab488
A
230
231 if (map->ntm_grpflag)
232 error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
233 else
234 error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
235
236 if (error)
237 return (error);
238
239 error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
240
241 return (error);
242
243}
244
fe8ab488
A
245static int
246nfsclnt_testidmap(proc_t p, user_addr_t argp)
247{
248 struct nfs_testmapid mapid;
249 int error, coerror;
5ba3f43e 250 size_t len = sizeof(mapid.ntm_name);
fe8ab488
A
251
252 /* Let root make this call. */
253 error = proc_suser(p);
254 if (error)
255 return (error);
256
257 error = copyin(argp, &mapid, sizeof(mapid));
258 if (error)
259 return (error);
5ba3f43e
A
260 switch (mapid.ntm_lookup) {
261 case NTM_NAME2ID:
fe8ab488 262 error = mapname2id(&mapid);
5ba3f43e
A
263 break;
264 case NTM_ID2NAME:
fe8ab488 265 error = mapid2name(&mapid);
5ba3f43e
A
266 break;
267 case NTM_NAME2GUID:
268 error = nfs4_id2guid(mapid.ntm_name, &mapid.ntm_guid, mapid.ntm_grpflag);
269 break;
270 case NTM_GUID2NAME:
271 error = nfs4_guid2id(&mapid.ntm_guid, mapid.ntm_name, &len, mapid.ntm_grpflag);
272 break;
273 default:
274 return (EINVAL);
275 }
fe8ab488
A
276
277 coerror = copyout(&mapid, argp, sizeof(mapid));
278
279 return (error ? error : coerror);
280}
281
2d21ac55
A
282int
283nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
284{
285 struct lockd_ans la;
286 int error;
287
6d2010ae
A
288 switch (uap->flag) {
289 case NFSCLNT_LOCKDANS:
2d21ac55 290 error = copyin(uap->argp, &la, sizeof(la));
6d2010ae
A
291 if (!error)
292 error = nfslockdans(p, &la);
293 break;
294 case NFSCLNT_LOCKDNOTIFY:
295 error = nfslockdnotify(p, uap->argp);
296 break;
fe8ab488
A
297 case NFSCLNT_TESTIDMAP:
298 error = nfsclnt_testidmap(p, uap->argp);
299 break;
6d2010ae
A
300 default:
301 error = EINVAL;
2d21ac55 302 }
6d2010ae 303 return (error);
2d21ac55
A
304}
305
fe8ab488 306
2d21ac55
A
307/*
308 * Asynchronous I/O threads for client NFS.
309 * They do read-ahead and write-behind operations on the block I/O cache.
310 *
311 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
312 * when unused for a while. There are as many nfsiod structs as there are
313 * nfsiod threads; however there's no strict tie between a thread and a struct.
314 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
315 * up, it removes the next struct nfsiod from the queue and services it. Then
316 * it will put the struct at the head of free list and sleep on it.
317 * Async requests will pull the next struct nfsiod from the head of the free list,
318 * put it on the work queue, and wake whatever thread is waiting on that struct.
319 */
2d21ac55
A
320
321/*
322 * nfsiod thread exit routine
323 *
324 * Must be called with nfsiod_mutex held so that the
325 * decision to terminate is atomic with the termination.
326 */
b0d623f7 327void
2d21ac55
A
328nfsiod_terminate(struct nfsiod *niod)
329{
330 nfsiod_thread_count--;
331 lck_mtx_unlock(nfsiod_mutex);
332 if (niod)
333 FREE(niod, M_TEMP);
334 else
335 printf("nfsiod: terminating without niod\n");
336 thread_terminate(current_thread());
337 /*NOTREACHED*/
338}
339
340/* nfsiod thread startup routine */
b0d623f7 341void
2d21ac55
A
342nfsiod_thread(void)
343{
344 struct nfsiod *niod;
345 int error;
346
347 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
348 if (!niod) {
349 lck_mtx_lock(nfsiod_mutex);
350 nfsiod_thread_count--;
b0d623f7 351 wakeup(current_thread());
2d21ac55
A
352 lck_mtx_unlock(nfsiod_mutex);
353 thread_terminate(current_thread());
354 /*NOTREACHED*/
355 }
356 bzero(niod, sizeof(*niod));
357 lck_mtx_lock(nfsiod_mutex);
358 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
359 wakeup(current_thread());
360 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
361 /* shouldn't return... so we have an error */
362 /* remove an old nfsiod struct and terminate */
363 lck_mtx_lock(nfsiod_mutex);
364 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
365 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
366 nfsiod_terminate(niod);
367 /*NOTREACHED*/
368}
369
370/*
371 * Start up another nfsiod thread.
372 * (unless we're already maxed out and there are nfsiods running)
373 */
374int
375nfsiod_start(void)
376{
b0d623f7 377 thread_t thd = THREAD_NULL;
2d21ac55
A
378
379 lck_mtx_lock(nfsiod_mutex);
380 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
381 lck_mtx_unlock(nfsiod_mutex);
382 return (EBUSY);
383 }
384 nfsiod_thread_count++;
b0d623f7
A
385 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
386 lck_mtx_unlock(nfsiod_mutex);
387 return (EBUSY);
388 }
2d21ac55
A
389 /* wait for the thread to complete startup */
390 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
b0d623f7 391 thread_deallocate(thd);
2d21ac55
A
392 return (0);
393}
394
395/*
396 * Continuation for Asynchronous I/O threads for NFS client.
397 *
398 * Grab an nfsiod struct to work on, do some work, then drop it
399 */
b0d623f7 400int
2d21ac55
A
401nfsiod_continue(int error)
402{
403 struct nfsiod *niod;
404 struct nfsmount *nmp;
405 struct nfsreq *req, *treq;
406 struct nfs_reqqhead iodq;
407 int morework;
408
409 lck_mtx_lock(nfsiod_mutex);
410 niod = TAILQ_FIRST(&nfsiodwork);
411 if (!niod) {
412 /* there's no work queued up */
2d21ac55
A
413 /* remove an old nfsiod struct and terminate */
414 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
415 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
416 nfsiod_terminate(niod);
417 /*NOTREACHED*/
418 }
419 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
420
421worktodo:
422 while ((nmp = niod->niod_nmp)) {
fe8ab488
A
423 if (nmp == NULL){
424 niod->niod_nmp = NULL;
425 break;
426 }
427
2d21ac55
A
428 /*
429 * Service this mount's async I/O queue.
430 *
431 * In order to ensure some level of fairness between mounts,
432 * we grab all the work up front before processing it so any
433 * new work that arrives will be serviced on a subsequent
434 * iteration - and we have a chance to see if other work needs
435 * to be done (e.g. the delayed write queue needs to be pushed
436 * or other mounts are waiting for an nfsiod).
437 */
438 /* grab the current contents of the queue */
439 TAILQ_INIT(&iodq);
440 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
3e170ce0
A
441 /* Mark each iod request as being managed by an iod */
442 TAILQ_FOREACH(req, &iodq, r_achain) {
443 lck_mtx_lock(&req->r_mtx);
444 assert(!(req->r_flags & R_IOD));
445 req->r_flags |= R_IOD;
446 lck_mtx_unlock(&req->r_mtx);
447 }
2d21ac55
A
448 lck_mtx_unlock(nfsiod_mutex);
449
450 /* process the queue */
451 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
452 TAILQ_REMOVE(&iodq, req, r_achain);
3e170ce0 453 req->r_achain.tqe_next = NFSREQNOLIST;
2d21ac55
A
454 req->r_callback.rcb_func(req);
455 }
456
457 /* now check if there's more/other work to be done */
458 lck_mtx_lock(nfsiod_mutex);
459 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
460 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
fe8ab488
A
461 /*
462 * we're going to stop working on this mount but if the
463 * mount still needs more work so queue it up
464 */
465 if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST)
2d21ac55
A
466 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
467 nmp->nm_niod = NULL;
468 niod->niod_nmp = NULL;
469 }
470 }
471
472 /* loop if there's still a mount to work on */
473 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
474 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
475 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
fe8ab488 476 niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
2d21ac55
A
477 }
478 if (niod->niod_nmp)
479 goto worktodo;
480
481 /* queue ourselves back up - if there aren't too many threads running */
482 if (nfsiod_thread_count <= NFSIOD_MAX) {
483 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
484 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
485 /* shouldn't return... so we have an error */
486 /* remove an old nfsiod struct and terminate */
487 lck_mtx_lock(nfsiod_mutex);
488 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
489 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
490 }
491 nfsiod_terminate(niod);
492 /*NOTREACHED*/
493 return (0);
494}
495
496#endif /* NFSCLIENT */
497
498
499#if NFSSERVER
500
1c79356b
A
501/*
502 * NFS server system calls
503 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
504 */
505
506/*
507 * Get file handle system call
508 */
1c79356b 509int
91447636 510getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
1c79356b 511{
91447636
A
512 vnode_t vp;
513 struct nfs_filehandle nfh;
6d2010ae 514 int error, fhlen, fidlen;
1c79356b 515 struct nameidata nd;
91447636 516 char path[MAXPATHLEN], *ptr;
6d2010ae 517 size_t pathlen;
91447636
A
518 struct nfs_exportfs *nxfs;
519 struct nfs_export *nx;
520
1c79356b
A
521 /*
522 * Must be super user
523 */
91447636
A
524 error = proc_suser(p);
525 if (error)
526 return (error);
527
6d2010ae
A
528 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
529 if (!error)
530 error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
91447636 531 if (error)
1c79356b 532 return (error);
6d2010ae
A
533 /* limit fh size to length specified (or v3 size by default) */
534 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE))
535 fhlen = NFSV3_MAX_FH_SIZE;
536 fidlen = fhlen - sizeof(struct nfs_exphandle);
91447636 537
2d21ac55
A
538 if (!nfsrv_is_initialized())
539 return (EINVAL);
540
6d2010ae 541 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
2d21ac55 542 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
1c79356b
A
543 error = namei(&nd);
544 if (error)
545 return (error);
91447636
A
546 nameidone(&nd);
547
1c79356b 548 vp = nd.ni_vp;
91447636
A
549
550 // find exportfs that matches f_mntonname
2d21ac55 551 lck_rw_lock_shared(&nfsrv_export_rwlock);
91447636 552 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
2d21ac55
A
553 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
554 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
91447636
A
555 break;
556 }
557 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
558 error = EINVAL;
559 goto out;
560 }
561 // find export that best matches remainder of path
562 ptr = path + strlen(nxfs->nxfs_path);
563 while (*ptr && (*ptr == '/'))
564 ptr++;
565 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
566 int len = strlen(nx->nx_path);
567 if (len == 0) // we've hit the export entry for the root directory
568 break;
569 if (!strncmp(nx->nx_path, ptr, len))
570 break;
571 }
572 if (!nx) {
573 error = EINVAL;
574 goto out;
575 }
576
577 bzero(&nfh, sizeof(nfh));
0c530ab8
A
578 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
579 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
580 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
91447636
A
581 nfh.nfh_xh.nxh_flags = 0;
582 nfh.nfh_xh.nxh_reserved = 0;
6d2010ae 583 nfh.nfh_len = fidlen;
2d21ac55 584 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
6d2010ae 585 if (nfh.nfh_len > (uint32_t)fidlen)
91447636
A
586 error = EOVERFLOW;
587 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
588 nfh.nfh_len += sizeof(nfh.nfh_xh);
2d21ac55 589 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
91447636
A
590
591out:
2d21ac55 592 lck_rw_done(&nfsrv_export_rwlock);
91447636 593 vnode_put(vp);
1c79356b
A
594 if (error)
595 return (error);
5ba3f43e
A
596 /*
597 * At first blush, this may appear to leak a kernel stack
598 * address, but the copyout() never reaches &nfh.nfh_fhp
599 * (sizeof(fhandle_t) < sizeof(nfh)).
600 */
6d2010ae 601 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
1c79356b
A
602 return (error);
603}
604
39236c6e 605extern const struct fileops vnops;
91447636 606
55e303ae
A
607/*
608 * syscall for the rpc.lockd to use to translate a NFS file handle into
609 * an open descriptor.
610 *
611 * warning: do not remove the suser() call or this becomes one giant
612 * security hole.
613 */
55e303ae 614int
91447636
A
615fhopen( proc_t p,
616 struct fhopen_args *uap,
b0d623f7 617 int32_t *retval)
55e303ae 618{
91447636
A
619 vnode_t vp;
620 struct nfs_filehandle nfh;
621 struct nfs_export *nx;
622 struct nfs_export_options *nxo;
55e303ae 623 struct flock lf;
91447636
A
624 struct fileproc *fp, *nfp;
625 int fmode, error, type;
55e303ae 626 int indx;
2d21ac55 627 vfs_context_t ctx = vfs_context_current();
91447636
A
628 kauth_action_t action;
629
55e303ae
A
630 /*
631 * Must be super user
632 */
2d21ac55 633 error = suser(vfs_context_ucred(ctx), 0);
0c530ab8 634 if (error) {
55e303ae 635 return (error);
0c530ab8 636 }
55e303ae 637
2d21ac55
A
638 if (!nfsrv_is_initialized()) {
639 return (EINVAL);
640 }
641
55e303ae
A
642 fmode = FFLAGS(uap->flags);
643 /* why not allow a non-read/write open for our lockd? */
2d21ac55 644 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
55e303ae 645 return (EINVAL);
91447636
A
646
647 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
2d21ac55 648 if (error)
91447636
A
649 return (error);
650 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
2d21ac55 651 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
91447636
A
652 return (EINVAL);
653 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
2d21ac55 654 if (error)
55e303ae 655 return (error);
2d21ac55 656 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
91447636 657
2d21ac55 658 lck_rw_lock_shared(&nfsrv_export_rwlock);
91447636 659 /* now give me my vnode, it gets returned to me with a reference */
2d21ac55
A
660 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
661 lck_rw_done(&nfsrv_export_rwlock);
0c530ab8 662 if (error) {
2d21ac55
A
663 if (error == NFSERR_TRYLATER)
664 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
55e303ae 665 return (error);
0c530ab8 666 }
91447636 667
55e303ae 668 /*
91447636
A
669 * From now on we have to make sure not
670 * to forget about the vnode.
671 * Any error that causes an abort must vnode_put(vp).
672 * Just set error = err and 'goto bad;'.
55e303ae
A
673 */
674
675 /*
676 * from vn_open
677 */
91447636 678 if (vnode_vtype(vp) == VSOCK) {
55e303ae
A
679 error = EOPNOTSUPP;
680 goto bad;
681 }
682
91447636
A
683 /* disallow write operations on directories */
684 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
685 error = EISDIR;
55e303ae
A
686 goto bad;
687 }
688
4bd07ac2
A
689#if CONFIG_MACF
690 if ((error = mac_vnode_check_open(ctx, vp, fmode)))
691 goto bad;
692#endif
693
91447636
A
694 /* compute action to be authorized */
695 action = 0;
696 if (fmode & FREAD)
697 action |= KAUTH_VNODE_READ_DATA;
698 if (fmode & (FWRITE | O_TRUNC))
699 action |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 700 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
91447636 701 goto bad;
55e303ae 702
2d21ac55 703 if ((error = VNOP_OPEN(vp, fmode, ctx)))
91447636 704 goto bad;
6d2010ae 705 if ((error = vnode_ref_ext(vp, fmode, 0)))
55e303ae
A
706 goto bad;
707
55e303ae
A
708 /*
709 * end of vn_open code
710 */
711
91447636 712 // starting here... error paths should call vn_close/vnode_put
2d21ac55
A
713 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
714 vn_close(vp, fmode & FMASK, ctx);
55e303ae
A
715 goto bad;
716 }
717 fp = nfp;
718
91447636 719 fp->f_fglob->fg_flag = fmode & FMASK;
91447636
A
720 fp->f_fglob->fg_ops = &vnops;
721 fp->f_fglob->fg_data = (caddr_t)vp;
722
723 // XXX do we really need to support this with fhopen()?
55e303ae
A
724 if (fmode & (O_EXLOCK | O_SHLOCK)) {
725 lf.l_whence = SEEK_SET;
726 lf.l_start = 0;
727 lf.l_len = 0;
728 if (fmode & O_EXLOCK)
729 lf.l_type = F_WRLCK;
730 else
731 lf.l_type = F_RDLCK;
732 type = F_FLOCK;
733 if ((fmode & FNONBLOCK) == 0)
734 type |= F_WAIT;
39236c6e 735 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
2d21ac55
A
736 struct vfs_context context = *vfs_context_current();
737 /* Modify local copy (to not damage thread copy) */
738 context.vc_ucred = fp->f_fglob->fg_cred;
739
740 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636 741 fp_free(p, indx, fp);
55e303ae
A
742 return (error);
743 }
91447636 744 fp->f_fglob->fg_flag |= FHASLOCK;
55e303ae
A
745 }
746
91447636
A
747 vnode_put(vp);
748
749 proc_fdlock(p);
6601e61a 750 procfdtbl_releasefd(p, indx, NULL);
91447636
A
751 fp_drop(p, indx, fp, 1);
752 proc_fdunlock(p);
753
55e303ae
A
754 *retval = indx;
755 return (0);
756
757bad:
91447636 758 vnode_put(vp);
55e303ae
A
759 return (error);
760}
761
1c79356b 762/*
2d21ac55 763 * NFS server pseudo system call
1c79356b 764 */
1c79356b 765int
91447636 766nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
1c79356b 767{
91447636
A
768 mbuf_t nam;
769 struct user_nfsd_args user_nfsdarg;
91447636 770 socket_t so;
1c79356b
A
771 int error;
772
ccc36f2f
A
773 AUDIT_ARG(cmd, uap->flag);
774
1c79356b 775 /*
b0d623f7 776 * Must be super user for most operations (export ops checked later).
1c79356b 777 */
b0d623f7 778 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p))))
1c79356b 779 return (error);
2d21ac55
A
780#if CONFIG_MACF
781 error = mac_system_check_nfsd(kauth_cred_get());
782 if (error)
783 return (error);
784#endif
91447636 785
2d21ac55
A
786 /* make sure NFS server data structures have been initialized */
787 nfsrv_init();
1c79356b 788
2d21ac55 789 if (uap->flag & NFSSVC_ADDSOCK) {
91447636
A
790 if (IS_64BIT_PROCESS(p)) {
791 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
792 } else {
793 struct nfsd_args tmp_args;
794 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
795 if (error == 0) {
796 user_nfsdarg.sock = tmp_args.sock;
797 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
798 user_nfsdarg.namelen = tmp_args.namelen;
799 }
800 }
1c79356b
A
801 if (error)
802 return (error);
91447636
A
803 /* get the socket */
804 error = file_socket(user_nfsdarg.sock, &so);
1c79356b
A
805 if (error)
806 return (error);
91447636
A
807 /* Get the client address for connected sockets. */
808 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
809 nam = NULL;
810 } else {
811 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
812 if (error) {
813 /* drop the iocount file_socket() grabbed on the file descriptor */
814 file_drop(user_nfsdarg.sock);
1c79356b 815 return (error);
91447636 816 }
1c79356b 817 }
91447636
A
818 /*
819 * nfssvc_addsock() will grab a retain count on the socket
820 * to keep the socket from being closed when nfsd closes its
821 * file descriptor for it.
822 */
2d21ac55 823 error = nfssvc_addsock(so, nam);
91447636
A
824 /* drop the iocount file_socket() grabbed on the file descriptor */
825 file_drop(user_nfsdarg.sock);
826 } else if (uap->flag & NFSSVC_NFSD) {
2d21ac55 827 error = nfssvc_nfsd();
91447636 828 } else if (uap->flag & NFSSVC_EXPORT) {
2d21ac55 829 error = nfssvc_export(uap->argp);
91447636
A
830 } else {
831 error = EINVAL;
1c79356b 832 }
1c79356b
A
833 if (error == EINTR || error == ERESTART)
834 error = 0;
835 return (error);
836}
837
1c79356b
A
838/*
839 * Adds a socket to the list for servicing by nfsds.
840 */
b0d623f7 841int
2d21ac55 842nfssvc_addsock(socket_t so, mbuf_t mynam)
1c79356b 843{
2d21ac55
A
844 struct nfsrv_sock *slp;
845 int error = 0, sodomain, sotype, soprotocol, on = 1;
316670eb 846 int first;
91447636
A
847 struct timeval timeo;
848
849 /* make sure mbuf constants are set up */
2d21ac55 850 if (!nfs_mbuf_mhlen)
91447636
A
851 nfs_mbuf_init();
852
853 sock_gettype(so, &sodomain, &sotype, &soprotocol);
854
6d2010ae
A
855 /* There should be only one UDP socket for each of IPv4 and IPv6 */
856 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
857 mbuf_freem(mynam);
858 return (EEXIST);
859 }
860 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
91447636 861 mbuf_freem(mynam);
2d21ac55 862 return (EEXIST);
1c79356b
A
863 }
864
2d21ac55 865 /* Set protocol options and reserve some space (for UDP). */
3e170ce0
A
866 if (sotype == SOCK_STREAM) {
867 error = nfsrv_check_exports_allow_address(mynam);
868 if (error)
869 return (error);
91447636 870 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
3e170ce0 871 }
2d21ac55 872 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
91447636 873 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
2d21ac55
A
874 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
875 int reserve = NFS_UDPSOCKBUF;
876 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
877 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
878 if (error) {
879 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
880 error = 0;
881 }
1c79356b 882 }
91447636
A
883 sock_nointerrupt(so, 0);
884
2d21ac55
A
885 /*
886 * Set socket send/receive timeouts.
887 * Receive timeout shouldn't matter, but setting the send timeout
888 * will make sure that an unresponsive client can't hang the server.
889 */
91447636 890 timeo.tv_usec = 0;
2d21ac55
A
891 timeo.tv_sec = 1;
892 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
893 timeo.tv_sec = 30;
894 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
895 if (error) {
896 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
897 error = 0;
898 }
91447636 899
2d21ac55
A
900 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
901 if (!slp) {
902 mbuf_freem(mynam);
903 return (ENOMEM);
904 }
905 bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
906 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
907 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
908
909 lck_mtx_lock(nfsd_mutex);
910
911 if (soprotocol == IPPROTO_UDP) {
6d2010ae
A
912 if (sodomain == AF_INET) {
913 /* There should be only one UDP/IPv4 socket */
914 if (nfsrv_udpsock) {
915 lck_mtx_unlock(nfsd_mutex);
916 nfsrv_slpfree(slp);
917 mbuf_freem(mynam);
918 return (EEXIST);
919 }
920 nfsrv_udpsock = slp;
921 }
922 if (sodomain == AF_INET6) {
923 /* There should be only one UDP/IPv6 socket */
924 if (nfsrv_udp6sock) {
925 lck_mtx_unlock(nfsd_mutex);
926 nfsrv_slpfree(slp);
927 mbuf_freem(mynam);
928 return (EEXIST);
929 }
930 nfsrv_udp6sock = slp;
91447636 931 }
1c79356b 932 }
91447636 933
2d21ac55 934 /* add the socket to the list */
316670eb 935 first = TAILQ_EMPTY(&nfsrv_socklist);
2d21ac55 936 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
3e170ce0
A
937 if (soprotocol == IPPROTO_TCP) {
938 nfsrv_sock_tcp_cnt++;
939 if (nfsrv_sock_idle_timeout < 0)
940 nfsrv_sock_idle_timeout = 0;
941 if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT))
942 nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
943 /*
944 * Possibly start or stop the idle timer. We only start the idle timer when
945 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
946 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
947 * the number of connections.
948 */
949 if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
950 if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
951 if (nfsrv_idlesock_timer_on) {
952 thread_call_cancel(nfsrv_idlesock_timer_call);
953 nfsrv_idlesock_timer_on = 0;
954 }
955 } else {
956 struct nfsrv_sock *old_slp;
957 struct timeval now;
958 time_t time_to_wait = nfsrv_sock_idle_timeout;
959 /*
960 * Get the oldest tcp socket and calculate the
961 * earliest time for the next idle timer to fire
962 * based on the possibly updated nfsrv_sock_idle_timeout
963 */
964 TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
965 if (old_slp->ns_sotype == SOCK_STREAM) {
966 microuptime(&now);
967 time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
968 if (time_to_wait < 1)
969 time_to_wait = 1;
970 break;
971 }
972 }
973 /*
974 * If we have a timer scheduled, but if its going to fire too late,
975 * turn it off.
976 */
977 if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
978 thread_call_cancel(nfsrv_idlesock_timer_call);
979 nfsrv_idlesock_timer_on = 0;
980 }
981 /* Schedule the idle thread if it isn't already */
982 if (!nfsrv_idlesock_timer_on) {
983 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
984 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
985 }
986 }
987 }
988 }
2d21ac55 989
91447636 990 sock_retain(so); /* grab a retain count on the socket */
1c79356b 991 slp->ns_so = so;
91447636 992 slp->ns_sotype = sotype;
1c79356b 993 slp->ns_nam = mynam;
91447636 994
316670eb
A
995 /* set up the socket up-call */
996 nfsrv_uc_addsock(slp, first);
91447636 997
2d21ac55
A
998 /* mark that the socket is not in the nfsrv_sockwg list */
999 slp->ns_wgq.tqe_next = SLPNOLIST;
3e170ce0 1000
91447636
A
1001 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
1002
1c79356b 1003 nfsrv_wakenfsd(slp);
91447636
A
1004 lck_mtx_unlock(nfsd_mutex);
1005
1c79356b
A
1006 return (0);
1007}
1008
1009/*
2d21ac55
A
1010 * nfssvc_nfsd()
1011 *
1012 * nfsd theory of operation:
1013 *
1014 * The first nfsd thread stays in user mode accepting new TCP connections
1015 * which are then added via the "addsock" call. The rest of the nfsd threads
1016 * simply call into the kernel and remain there in a loop handling NFS
1017 * requests until killed by a signal.
1018 *
1019 * There's a list of nfsd threads (nfsd_head).
1020 * There's an nfsd queue that contains only those nfsds that are
1021 * waiting for work to do (nfsd_queue).
1022 *
1023 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
1024 * managing the work on the sockets:
1025 * nfsrv_sockwait - sockets w/new data waiting to be worked on
1026 * nfsrv_sockwork - sockets being worked on which may have more work to do
1027 * nfsrv_sockwg -- sockets which have pending write gather data
1028 * When a socket receives data, if it is not currently queued, it
1029 * will be placed at the end of the "wait" queue.
1030 * Whenever a socket needs servicing we make sure it is queued and
1031 * wake up a waiting nfsd (if there is one).
1032 *
1033 * nfsds will service at most 8 requests from the same socket before
1034 * defecting to work on another socket.
1035 * nfsds will defect immediately if there are any sockets in the "wait" queue
1036 * nfsds looking for a socket to work on check the "wait" queue first and
1037 * then check the "work" queue.
1038 * When an nfsd starts working on a socket, it removes it from the head of
1039 * the queue it's currently on and moves it to the end of the "work" queue.
1040 * When nfsds are checking the queues for work, any sockets found not to
1041 * have any work are simply dropped from the queue.
1042 *
1c79356b 1043 */
b0d623f7 1044int
2d21ac55 1045nfssvc_nfsd(void)
1c79356b 1046{
2d21ac55
A
1047 mbuf_t m, mrep;
1048 struct nfsrv_sock *slp;
1049 struct nfsd *nfsd;
1c79356b 1050 struct nfsrv_descript *nd = NULL;
91447636 1051 int error = 0, cacherep, writes_todo;
2d21ac55 1052 int siz, procrastinate, opcnt = 0;
1c79356b 1053 u_quad_t cur_usec;
55e303ae 1054 struct timeval now;
2d21ac55 1055 struct vfs_context context;
316670eb 1056 struct timespec to;
1c79356b
A
1057
1058#ifndef nolint
1059 cacherep = RC_DOIT;
1060 writes_todo = 0;
1061#endif
91447636 1062
2d21ac55
A
1063 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
1064 if (!nfsd)
1065 return (ENOMEM);
1066 bzero(nfsd, sizeof(struct nfsd));
1067 lck_mtx_lock(nfsd_mutex);
1068 if (nfsd_thread_count++ == 0)
1069 nfsrv_initcache(); /* Init the server request cache */
316670eb 1070
2d21ac55
A
1071 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
1072 lck_mtx_unlock(nfsd_mutex);
1073
1074 context.vc_thread = current_thread();
91447636 1075
316670eb
A
1076 /* Set time out so that nfsd threads can wake up a see if they are still needed. */
1077 to.tv_sec = 5;
1078 to.tv_nsec = 0;
1079
1c79356b
A
1080 /*
1081 * Loop getting rpc requests until SIGKILL.
1082 */
1083 for (;;) {
2d21ac55
A
1084 if (nfsd_thread_max <= 0) {
1085 /* NFS server shutting down, get out ASAP */
1086 error = EINTR;
1087 slp = nfsd->nfsd_slp;
1088 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
1089 /* already have some work to do */
1090 error = 0;
1091 slp = nfsd->nfsd_slp;
1092 } else {
1093 /* need to find work to do */
1094 error = 0;
91447636 1095 lck_mtx_lock(nfsd_mutex);
2d21ac55
A
1096 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
1097 if (nfsd_thread_count > nfsd_thread_max) {
1098 /*
1099 * If we have no socket and there are more
1100 * nfsd threads than configured, let's exit.
1101 */
1102 error = 0;
1103 goto done;
1104 }
1c79356b 1105 nfsd->nfsd_flag |= NFSD_WAITING;
2d21ac55 1106 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
316670eb 1107 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
91447636 1108 if (error) {
2d21ac55
A
1109 if (nfsd->nfsd_flag & NFSD_WAITING) {
1110 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
1111 nfsd->nfsd_flag &= ~NFSD_WAITING;
1112 }
316670eb
A
1113 if (error == EWOULDBLOCK)
1114 continue;
1c79356b 1115 goto done;
91447636 1116 }
1c79356b 1117 }
2d21ac55
A
1118 slp = nfsd->nfsd_slp;
1119 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
1120 /* look for a socket to work on in the wait queue */
1121 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
1122 lck_rw_lock_exclusive(&slp->ns_rwlock);
1123 /* remove from the head of the queue */
1124 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1125 slp->ns_flag &= ~SLP_WAITQ;
1126 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
1127 break;
1128 /* nothing to do, so skip this socket */
1129 lck_rw_done(&slp->ns_rwlock);
1c79356b 1130 }
2d21ac55
A
1131 }
1132 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
1133 /* look for a socket to work on in the work queue */
1134 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
1135 lck_rw_lock_exclusive(&slp->ns_rwlock);
1136 /* remove from the head of the queue */
1137 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1138 slp->ns_flag &= ~SLP_WORKQ;
1139 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
1140 break;
1141 /* nothing to do, so skip this socket */
1142 lck_rw_done(&slp->ns_rwlock);
1143 }
1144 }
1145 if (!nfsd->nfsd_slp && slp) {
1146 /* we found a socket to work on, grab a reference */
1147 slp->ns_sref++;
3e170ce0
A
1148 microuptime(&now);
1149 slp->ns_timestamp = now.tv_sec;
1150 /* We keep the socket list in least recently used order for reaping idle sockets */
1151 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1152 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
2d21ac55
A
1153 nfsd->nfsd_slp = slp;
1154 opcnt = 0;
1155 /* and put it at the back of the work queue */
1156 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1157 slp->ns_flag |= SLP_WORKQ;
1158 lck_rw_done(&slp->ns_rwlock);
1c79356b 1159 }
91447636 1160 lck_mtx_unlock(nfsd_mutex);
2d21ac55 1161 if (!slp)
1c79356b 1162 continue;
91447636 1163 lck_rw_lock_exclusive(&slp->ns_rwlock);
1c79356b 1164 if (slp->ns_flag & SLP_VALID) {
743b1565 1165 if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) {
91447636
A
1166 slp->ns_flag &= ~SLP_NEEDQ;
1167 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1168 }
743b1565
A
1169 if (slp->ns_flag & SLP_DISCONN)
1170 nfsrv_zapsock(slp);
1c79356b 1171 error = nfsrv_dorec(slp, nfsd, &nd);
2d21ac55
A
1172 if (error == EINVAL) { // RPCSEC_GSS drop
1173 if (slp->ns_sotype == SOCK_STREAM)
1174 nfsrv_zapsock(slp); // drop connection
1175 }
1176 writes_todo = 0;
1177 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1178 microuptime(&now);
1179 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1180 (u_quad_t)now.tv_usec;
1181 if (slp->ns_wgtime <= cur_usec) {
1182 error = 0;
1183 cacherep = RC_DOIT;
1184 writes_todo = 1;
1185 }
1186 slp->ns_flag &= ~SLP_DOWRITES;
1187 }
1c79356b
A
1188 nfsd->nfsd_flag |= NFSD_REQINPROG;
1189 }
91447636 1190 lck_rw_done(&slp->ns_rwlock);
1c79356b 1191 }
2d21ac55 1192 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1c79356b 1193 if (nd) {
2d21ac55 1194 nfsm_chain_cleanup(&nd->nd_nmreq);
55e303ae 1195 if (nd->nd_nam2)
91447636 1196 mbuf_freem(nd->nd_nam2);
0c530ab8
A
1197 if (IS_VALID_CRED(nd->nd_cr))
1198 kauth_cred_unref(&nd->nd_cr);
6d2010ae
A
1199 if (nd->nd_gss_context)
1200 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
2d21ac55 1201 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1c79356b
A
1202 nd = NULL;
1203 }
91447636 1204 nfsd->nfsd_slp = NULL;
1c79356b 1205 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
2d21ac55
A
1206 if (slp)
1207 nfsrv_slpderef(slp);
1208 if (nfsd_thread_max <= 0)
1209 break;
1c79356b
A
1210 continue;
1211 }
1c79356b 1212 if (nd) {
55e303ae 1213 microuptime(&nd->nd_starttime);
1c79356b
A
1214 if (nd->nd_nam2)
1215 nd->nd_nam = nd->nd_nam2;
1216 else
1217 nd->nd_nam = slp->ns_nam;
1218
2d21ac55
A
1219 cacherep = nfsrv_getcache(nd, slp, &mrep);
1220
1221 if (nfsrv_require_resv_port) {
1222 /* Check if source port is a reserved port */
6d2010ae
A
1223 in_port_t port = 0;
1224 struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1225
1226 if (saddr->sa_family == AF_INET)
1227 port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1228 else if (saddr->sa_family == AF_INET6)
1229 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1230 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1c79356b
A
1231 nd->nd_procnum = NFSPROC_NOOP;
1232 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1233 cacherep = RC_DOIT;
1c79356b
A
1234 }
1235 }
1236
1237 }
1238
1239 /*
2d21ac55 1240 * Loop to get all the write RPC replies that have been
1c79356b
A
1241 * gathered together.
1242 */
1243 do {
1244 switch (cacherep) {
1245 case RC_DOIT:
2d21ac55
A
1246 if (nd && (nd->nd_vers == NFS_VER3))
1247 procrastinate = nfsrv_wg_delay_v3;
1c79356b 1248 else
2d21ac55
A
1249 procrastinate = nfsrv_wg_delay;
1250 lck_rw_lock_shared(&nfsrv_export_rwlock);
1251 context.vc_ucred = NULL;
91447636 1252 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
2d21ac55
A
1253 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1254 else
1255 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1256 lck_rw_done(&nfsrv_export_rwlock);
1257 if (mrep == NULL) {
1258 /*
1259 * If this is a stream socket and we are not going
1260 * to send a reply we better close the connection
1261 * so the client doesn't hang.
1262 */
1263 if (error && slp->ns_sotype == SOCK_STREAM) {
1264 lck_rw_lock_exclusive(&slp->ns_rwlock);
1265 nfsrv_zapsock(slp);
1266 lck_rw_done(&slp->ns_rwlock);
1267 printf("NFS server: NULL reply from proc = %d error = %d\n",
1268 nd->nd_procnum, error);
1269 }
1c79356b 1270 break;
2d21ac55
A
1271
1272 }
1c79356b 1273 if (error) {
316670eb 1274 OSAddAtomic64(1, &nfsstats.srv_errs);
2d21ac55 1275 nfsrv_updatecache(nd, FALSE, mrep);
55e303ae 1276 if (nd->nd_nam2) {
91447636 1277 mbuf_freem(nd->nd_nam2);
55e303ae
A
1278 nd->nd_nam2 = NULL;
1279 }
1c79356b
A
1280 break;
1281 }
316670eb 1282 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
2d21ac55
A
1283 nfsrv_updatecache(nd, TRUE, mrep);
1284 /* FALLTHRU */
1285
1c79356b 1286 case RC_REPLY:
2d21ac55
A
1287 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1288 /*
1289 * Need to checksum or encrypt the reply
1290 */
1291 error = nfs_gss_svc_protect_reply(nd, mrep);
1292 if (error) {
1293 mbuf_freem(mrep);
1294 break;
1295 }
1296 }
1297
1298 /*
1299 * Get the total size of the reply
1300 */
1301 m = mrep;
1c79356b
A
1302 siz = 0;
1303 while (m) {
91447636
A
1304 siz += mbuf_len(m);
1305 m = mbuf_next(m);
1c79356b
A
1306 }
1307 if (siz <= 0 || siz > NFS_MAXPACKET) {
1308 printf("mbuf siz=%d\n",siz);
1309 panic("Bad nfs svc reply");
1310 }
2d21ac55 1311 m = mrep;
91447636
A
1312 mbuf_pkthdr_setlen(m, siz);
1313 error = mbuf_pkthdr_setrcvif(m, NULL);
1314 if (error)
1315 panic("nfsd setrcvif failed: %d", error);
1c79356b
A
1316 /*
1317 * For stream protocols, prepend a Sun RPC
1318 * Record Mark.
1319 */
91447636
A
1320 if (slp->ns_sotype == SOCK_STREAM) {
1321 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1322 if (!error)
b0d623f7 1323 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1c79356b 1324 }
91447636
A
1325 if (!error) {
1326 if (slp->ns_flag & SLP_VALID) {
2d21ac55 1327 error = nfsrv_send(slp, nd->nd_nam2, m);
91447636
A
1328 } else {
1329 error = EPIPE;
1330 mbuf_freem(m);
1331 }
1332 } else {
1333 mbuf_freem(m);
1c79356b 1334 }
2d21ac55 1335 mrep = NULL;
55e303ae 1336 if (nd->nd_nam2) {
91447636 1337 mbuf_freem(nd->nd_nam2);
55e303ae
A
1338 nd->nd_nam2 = NULL;
1339 }
91447636
A
1340 if (error == EPIPE) {
1341 lck_rw_lock_exclusive(&slp->ns_rwlock);
1c79356b 1342 nfsrv_zapsock(slp);
91447636
A
1343 lck_rw_done(&slp->ns_rwlock);
1344 }
1c79356b 1345 if (error == EINTR || error == ERESTART) {
2d21ac55 1346 nfsm_chain_cleanup(&nd->nd_nmreq);
0c530ab8
A
1347 if (IS_VALID_CRED(nd->nd_cr))
1348 kauth_cred_unref(&nd->nd_cr);
6d2010ae
A
1349 if (nd->nd_gss_context)
1350 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
2d21ac55 1351 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1c79356b 1352 nfsrv_slpderef(slp);
2d21ac55 1353 lck_mtx_lock(nfsd_mutex);
1c79356b
A
1354 goto done;
1355 }
1356 break;
1357 case RC_DROPIT:
91447636 1358 mbuf_freem(nd->nd_nam2);
2d21ac55 1359 nd->nd_nam2 = NULL;
1c79356b
A
1360 break;
1361 };
2d21ac55 1362 opcnt++;
1c79356b 1363 if (nd) {
2d21ac55 1364 nfsm_chain_cleanup(&nd->nd_nmreq);
55e303ae 1365 if (nd->nd_nam2)
91447636 1366 mbuf_freem(nd->nd_nam2);
0c530ab8
A
1367 if (IS_VALID_CRED(nd->nd_cr))
1368 kauth_cred_unref(&nd->nd_cr);
6d2010ae
A
1369 if (nd->nd_gss_context)
1370 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
2d21ac55 1371 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1c79356b
A
1372 nd = NULL;
1373 }
1374
1375 /*
1376 * Check to see if there are outstanding writes that
1377 * need to be serviced.
1378 */
2d21ac55
A
1379 writes_todo = 0;
1380 if (slp->ns_wgtime) {
1381 microuptime(&now);
1382 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1383 (u_quad_t)now.tv_usec;
1384 if (slp->ns_wgtime <= cur_usec) {
1385 cacherep = RC_DOIT;
1386 writes_todo = 1;
1387 }
91447636 1388 }
1c79356b 1389 } while (writes_todo);
2d21ac55
A
1390
1391 nd = NULL;
1392 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1393 lck_rw_lock_exclusive(&slp->ns_rwlock);
1394 error = nfsrv_dorec(slp, nfsd, &nd);
1395 if (error == EINVAL) { // RPCSEC_GSS drop
1396 if (slp->ns_sotype == SOCK_STREAM)
1397 nfsrv_zapsock(slp); // drop connection
1398 }
91447636 1399 lck_rw_done(&slp->ns_rwlock);
2d21ac55
A
1400 }
1401 if (!nd) {
1402 /* drop our reference on the socket */
1c79356b
A
1403 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1404 nfsd->nfsd_slp = NULL;
1405 nfsrv_slpderef(slp);
91447636 1406 }
1c79356b 1407 }
91447636 1408 lck_mtx_lock(nfsd_mutex);
2d21ac55 1409done:
1c79356b 1410 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
91447636 1411 FREE(nfsd, M_NFSD);
2d21ac55
A
1412 if (--nfsd_thread_count == 0)
1413 nfsrv_cleanup();
91447636 1414 lck_mtx_unlock(nfsd_mutex);
1c79356b
A
1415 return (error);
1416}
91447636 1417
b0d623f7 1418int
2d21ac55 1419nfssvc_export(user_addr_t argp)
91447636
A
1420{
1421 int error = 0, is_64bit;
1422 struct user_nfs_export_args unxa;
2d21ac55 1423 vfs_context_t ctx = vfs_context_current();
91447636 1424
2d21ac55 1425 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
91447636
A
1426
1427 /* copy in pointers to path and export args */
1428 if (is_64bit) {
1429 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1430 } else {
1431 struct nfs_export_args tnxa;
1432 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1433 if (error == 0) {
1434 /* munge into LP64 version of nfs_export_args structure */
1435 unxa.nxa_fsid = tnxa.nxa_fsid;
1436 unxa.nxa_expid = tnxa.nxa_expid;
1437 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1438 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1439 unxa.nxa_flags = tnxa.nxa_flags;
1440 unxa.nxa_netcount = tnxa.nxa_netcount;
1441 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1442 }
1443 }
1444 if (error)
1445 return (error);
1446
2d21ac55 1447 error = nfsrv_export(&unxa, ctx);
91447636
A
1448
1449 return (error);
1450}
1451
1c79356b 1452/*
2d21ac55 1453 * Shut down a socket associated with an nfsrv_sock structure.
1c79356b
A
1454 * Should be called with the send lock set, if required.
1455 * The trick here is to increment the sref at the start, so that the nfsds
1456 * will stop using it and clear ns_flag at the end so that it will not be
1457 * reassigned during cleanup.
1458 */
b0d623f7 1459void
2d21ac55 1460nfsrv_zapsock(struct nfsrv_sock *slp)
1c79356b 1461{
91447636 1462 socket_t so;
1c79356b 1463
91447636
A
1464 if ((slp->ns_flag & SLP_VALID) == 0)
1465 return;
1c79356b 1466 slp->ns_flag &= ~SLP_ALLFLAGS;
91447636
A
1467
1468 so = slp->ns_so;
1469 if (so == NULL)
1470 return;
1471
3e170ce0 1472 sock_setupcall(so, NULL, NULL);
91447636 1473 sock_shutdown(so, SHUT_RDWR);
316670eb
A
1474
1475 /*
1476 * Remove from the up-call queue
1477 */
1478 nfsrv_uc_dequeue(slp);
1c79356b
A
1479}
1480
1c79356b 1481/*
91447636 1482 * cleanup and release a server socket structure.
1c79356b 1483 */
b0d623f7 1484void
2d21ac55 1485nfsrv_slpfree(struct nfsrv_sock *slp)
1c79356b 1486{
91447636 1487 struct nfsrv_descript *nwp, *nnwp;
1c79356b 1488
91447636
A
1489 if (slp->ns_so) {
1490 sock_release(slp->ns_so);
1491 slp->ns_so = NULL;
1492 }
1493 if (slp->ns_nam)
1494 mbuf_free(slp->ns_nam);
1495 if (slp->ns_raw)
1496 mbuf_freem(slp->ns_raw);
1497 if (slp->ns_rec)
1498 mbuf_freem(slp->ns_rec);
2d21ac55
A
1499 if (slp->ns_frag)
1500 mbuf_freem(slp->ns_frag);
1501 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1502 slp->ns_reccnt = 0;
55e303ae 1503
91447636
A
1504 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1505 nnwp = nwp->nd_tq.le_next;
1506 LIST_REMOVE(nwp, nd_tq);
2d21ac55
A
1507 nfsm_chain_cleanup(&nwp->nd_nmreq);
1508 if (nwp->nd_mrep)
1509 mbuf_freem(nwp->nd_mrep);
1510 if (nwp->nd_nam2)
1511 mbuf_freem(nwp->nd_nam2);
0c530ab8
A
1512 if (IS_VALID_CRED(nwp->nd_cr))
1513 kauth_cred_unref(&nwp->nd_cr);
6d2010ae
A
1514 if (nwp->nd_gss_context)
1515 nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
2d21ac55 1516 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
55e303ae 1517 }
91447636
A
1518 LIST_INIT(&slp->ns_tq);
1519
2d21ac55
A
1520 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
1521 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
91447636 1522 FREE(slp, M_NFSSVC);
55e303ae
A
1523}
1524
1525/*
91447636
A
1526 * Derefence a server socket structure. If it has no more references and
1527 * is no longer valid, you can throw it away.
55e303ae 1528 */
3e170ce0
A
1529static void
1530nfsrv_slpderef_locked(struct nfsrv_sock *slp)
55e303ae 1531{
91447636
A
1532 lck_rw_lock_exclusive(&slp->ns_rwlock);
1533 slp->ns_sref--;
2d21ac55 1534
91447636 1535 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
2d21ac55
A
1536 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1537 /* remove socket from queue since there's no work */
1538 if (slp->ns_flag & SLP_WAITQ)
1539 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1540 else
1541 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1542 slp->ns_flag &= ~SLP_QUEUED;
1543 }
91447636 1544 lck_rw_done(&slp->ns_rwlock);
91447636 1545 return;
55e303ae 1546 }
91447636 1547
2d21ac55
A
1548 /* This socket is no longer valid, so we'll get rid of it */
1549
1550 if (slp->ns_flag & SLP_QUEUED) {
1551 if (slp->ns_flag & SLP_WAITQ)
1552 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1553 else
1554 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1555 slp->ns_flag &= ~SLP_QUEUED;
1556 }
3e170ce0 1557 lck_rw_done(&slp->ns_rwlock);
2d21ac55 1558
2d21ac55 1559 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
3e170ce0
A
1560 if (slp->ns_sotype == SOCK_STREAM)
1561 nfsrv_sock_tcp_cnt--;
2d21ac55 1562
2d21ac55
A
1563 /* now remove from the write gather socket list */
1564 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1565 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1566 slp->ns_wgq.tqe_next = SLPNOLIST;
1567 }
3e170ce0
A
1568 nfsrv_slpfree(slp);
1569}
1570
1571void
1572nfsrv_slpderef(struct nfsrv_sock *slp)
1573{
1574 lck_mtx_lock(nfsd_mutex);
1575 nfsrv_slpderef_locked(slp);
91447636 1576 lck_mtx_unlock(nfsd_mutex);
55e303ae
A
1577}
1578
1c79356b 1579/*
3e170ce0
A
1580 * Check periodically for idle sockest if needed and
1581 * zap them.
1c79356b
A
1582 */
1583void
3e170ce0 1584nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1c79356b 1585{
3e170ce0 1586 struct nfsrv_sock *slp, *tslp;
743b1565 1587 struct timeval now;
3e170ce0 1588 time_t time_to_wait = nfsrv_sock_idle_timeout;
1c79356b 1589
2d21ac55
A
1590 microuptime(&now);
1591 lck_mtx_lock(nfsd_mutex);
1c79356b 1592
3e170ce0
A
1593 /* Turn off the timer if we're suppose to and get out */
1594 if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)
1595 nfsrv_sock_idle_timeout = 0;
1596 if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1597 nfsrv_idlesock_timer_on = 0;
2d21ac55
A
1598 lck_mtx_unlock(nfsd_mutex);
1599 return;
91447636 1600 }
1c79356b 1601
3e170ce0
A
1602 TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1603 lck_rw_lock_exclusive(&slp->ns_rwlock);
1604 /* Skip udp and referenced sockets */
1605 if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1606 lck_rw_done(&slp->ns_rwlock);
1607 continue;
1608 }
1609 /*
1610 * If this is the first non-referenced socket that hasn't idle out,
1611 * use its time stamp to calculate the earlist time in the future
1612 * to start the next invocation of the timer. Since the nfsrv_socklist
1613 * is sorted oldest access to newest. Once we find the first one,
1614 * we're done and break out of the loop.
1615 */
1616 if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1617 nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1618 time_to_wait -= now.tv_sec - slp->ns_timestamp;
1619 if (time_to_wait < 1)
1620 time_to_wait = 1;
1621 lck_rw_done(&slp->ns_rwlock);
1622 break;
1623 }
1624 /*
1625 * Bump the ref count. nfsrv_slpderef below will destroy
1626 * the socket, since nfsrv_zapsock has closed it.
1627 */
1628 slp->ns_sref++;
1629 nfsrv_zapsock(slp);
1630 lck_rw_done(&slp->ns_rwlock);
1631 nfsrv_slpderef_locked(slp);
1632 }
2d21ac55 1633
3e170ce0
A
1634 /* Start ourself back up */
1635 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1636 /* Remember when the next timer will fire for nfssvc_addsock. */
1637 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1638 lck_mtx_unlock(nfsd_mutex);
1c79356b
A
1639}
1640
1641/*
2d21ac55 1642 * Clean up the data structures for the server.
1c79356b 1643 */
2d21ac55
A
1644void
1645nfsrv_cleanup(void)
1c79356b 1646{
2d21ac55 1647 struct nfsrv_sock *slp, *nslp;
55e303ae 1648 struct timeval now;
b0d623f7 1649#if CONFIG_FSE
2d21ac55
A
1650 struct nfsrv_fmod *fp, *nfp;
1651 int i;
b0d623f7 1652#endif
1c79356b 1653
55e303ae 1654 microuptime(&now);
2d21ac55
A
1655 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1656 nslp = TAILQ_NEXT(slp, ns_chain);
3e170ce0
A
1657 lck_rw_lock_exclusive(&slp->ns_rwlock);
1658 slp->ns_sref++;
1659 if (slp->ns_flag & SLP_VALID)
2d21ac55 1660 nfsrv_zapsock(slp);
3e170ce0
A
1661 lck_rw_done(&slp->ns_rwlock);
1662 nfsrv_slpderef_locked(slp);
2d21ac55 1663 }
3e170ce0 1664#
b0d623f7 1665#if CONFIG_FSE
2d21ac55
A
1666 /*
1667 * Flush pending file write fsevents
1668 */
1669 lck_mtx_lock(nfsrv_fmod_mutex);
1670 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1671 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1672 /*
1673 * Fire off the content modified fsevent for each
1674 * entry, remove it from the list, and free it.
1675 */
6d2010ae
A
1676 if (nfsrv_fsevents_enabled) {
1677 fp->fm_context.vc_thread = current_thread();
2d21ac55
A
1678 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1679 FSE_ARG_VNODE, fp->fm_vp,
1680 FSE_ARG_DONE);
6d2010ae 1681 }
2d21ac55
A
1682 vnode_put(fp->fm_vp);
1683 kauth_cred_unref(&fp->fm_context.vc_ucred);
1684 nfp = LIST_NEXT(fp, fm_link);
1685 LIST_REMOVE(fp, fm_link);
1686 FREE(fp, M_TEMP);
1687 }
1688 }
1689 nfsrv_fmod_pending = 0;
1690 lck_mtx_unlock(nfsrv_fmod_mutex);
b0d623f7 1691#endif
2d21ac55 1692
316670eb
A
1693 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
1694
2d21ac55
A
1695 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1696
1697 nfsrv_cleancache(); /* And clear out server cache */
1698
1699 nfsrv_udpsock = NULL;
6d2010ae 1700 nfsrv_udp6sock = NULL;
1c79356b 1701}
2d21ac55 1702
1c79356b 1703#endif /* NFS_NOSERVER */