]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_syscalls.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/file_internal.h>
78 #include <sys/filedesc.h>
79 #include <sys/stat.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/mount_internal.h>
82 #include <sys/proc_internal.h> /* for fdflags */
83 #include <sys/kauth.h>
84 #include <sys/sysctl.h>
85 #include <sys/ubc.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/kpi_mbuf.h>
89 #include <sys/socket.h>
90 #include <sys/socketvar.h>
91 #include <sys/domain.h>
92 #include <sys/protosw.h>
93 #include <sys/fcntl.h>
94 #include <sys/lockf.h>
95 #include <sys/syslog.h>
96 #include <sys/user.h>
97 #include <sys/sysproto.h>
98 #include <sys/kpi_socket.h>
99 #include <sys/fsevents.h>
100 #include <libkern/OSAtomic.h>
101 #include <kern/thread_call.h>
102 #include <kern/task.h>
103
104 #include <security/audit/audit.h>
105
106 #include <netinet/in.h>
107 #include <netinet/tcp.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/rpcv2.h>
110 #include <nfs/nfsproto.h>
111 #include <nfs/nfs.h>
112 #include <nfs/nfsm_subs.h>
113 #include <nfs/nfsrvcache.h>
114 #include <nfs/nfs_gss.h>
115 #include <nfs/nfsmount.h>
116 #include <nfs/nfsnode.h>
117 #include <nfs/nfs_lock.h>
118 #if CONFIG_MACF
119 #include <security/mac_framework.h>
120 #endif
121
122 kern_return_t thread_terminate(thread_t); /* XXX */
123
124 #if NFSSERVER
125
126 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
127
128 extern int nfsrv_wg_delay;
129 extern int nfsrv_wg_delay_v3;
130
131 static int nfsrv_require_resv_port = 0;
132 static time_t nfsrv_idlesock_timer_on = 0;
133 static int nfsrv_sock_tcp_cnt = 0;
134 #define NFSD_MIN_IDLE_TIMEOUT 30
135 static int nfsrv_sock_idle_timeout = 3600; /* One hour */
136
137 int nfssvc_export(user_addr_t argp);
138 int nfssvc_nfsd(void);
139 int nfssvc_addsock(socket_t, mbuf_t);
140 void nfsrv_zapsock(struct nfsrv_sock *);
141 void nfsrv_slpderef(struct nfsrv_sock *);
142 void nfsrv_slpfree(struct nfsrv_sock *);
143
144 #endif /* NFSSERVER */
145
146 /*
147 * sysctl stuff
148 */
149 SYSCTL_DECL(_vfs_generic);
150 SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
151
152 #if NFSCLIENT
153 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs client hinge");
154 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
158 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
159 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
162 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
163 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
165 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
166 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
167 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
168 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
169 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
170 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
171 SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
172 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
173 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
174 SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "");
175 #endif /* NFSCLIENT */
176
177 #if NFSSERVER
178 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
179 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
180 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
181 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
182 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
183 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
184 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
185 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
186 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
187 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
188 #if CONFIG_FSE
189 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
190 #endif
191 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
192 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
193 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
194 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
195 #ifdef NFS_UC_Q_DEBUG
196 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
197 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
198 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
199 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
200 #endif
201 #endif /* NFSSERVER */
202
203
204 #if NFSCLIENT
205
206 static int
207 mapname2id(struct nfs_testmapid *map)
208 {
209 int error;
210
211 error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
212 if (error) {
213 return error;
214 }
215
216 if (map->ntm_grpflag) {
217 error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
218 } else {
219 error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
220 }
221
222 return error;
223 }
224
225 static int
226 mapid2name(struct nfs_testmapid *map)
227 {
228 int error;
229 size_t len = sizeof(map->ntm_name);
230
231 if (map->ntm_grpflag) {
232 error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
233 } else {
234 error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
235 }
236
237 if (error) {
238 return error;
239 }
240
241 error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
242
243 return error;
244 }
245
246 static int
247 nfsclnt_testidmap(proc_t p, user_addr_t argp)
248 {
249 struct nfs_testmapid mapid;
250 int error, coerror;
251 size_t len = sizeof(mapid.ntm_name);
252
253 /* Let root make this call. */
254 error = proc_suser(p);
255 if (error) {
256 return error;
257 }
258
259 error = copyin(argp, &mapid, sizeof(mapid));
260 if (error) {
261 return error;
262 }
263 switch (mapid.ntm_lookup) {
264 case NTM_NAME2ID:
265 error = mapname2id(&mapid);
266 break;
267 case NTM_ID2NAME:
268 error = mapid2name(&mapid);
269 break;
270 case NTM_NAME2GUID:
271 error = nfs4_id2guid(mapid.ntm_name, &mapid.ntm_guid, mapid.ntm_grpflag);
272 break;
273 case NTM_GUID2NAME:
274 error = nfs4_guid2id(&mapid.ntm_guid, mapid.ntm_name, &len, mapid.ntm_grpflag);
275 break;
276 default:
277 return EINVAL;
278 }
279
280 coerror = copyout(&mapid, argp, sizeof(mapid));
281
282 return error ? error : coerror;
283 }
284
285 int
286 nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
287 {
288 struct lockd_ans la;
289 int error;
290
291 switch (uap->flag) {
292 case NFSCLNT_LOCKDANS:
293 error = copyin(uap->argp, &la, sizeof(la));
294 if (!error) {
295 error = nfslockdans(p, &la);
296 }
297 break;
298 case NFSCLNT_LOCKDNOTIFY:
299 error = nfslockdnotify(p, uap->argp);
300 break;
301 case NFSCLNT_TESTIDMAP:
302 error = nfsclnt_testidmap(p, uap->argp);
303 break;
304 default:
305 error = EINVAL;
306 }
307 return error;
308 }
309
310
311 /*
312 * Asynchronous I/O threads for client NFS.
313 * They do read-ahead and write-behind operations on the block I/O cache.
314 *
315 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
316 * when unused for a while. There are as many nfsiod structs as there are
317 * nfsiod threads; however there's no strict tie between a thread and a struct.
318 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
319 * up, it removes the next struct nfsiod from the queue and services it. Then
320 * it will put the struct at the head of free list and sleep on it.
321 * Async requests will pull the next struct nfsiod from the head of the free list,
322 * put it on the work queue, and wake whatever thread is waiting on that struct.
323 */
324
325 /*
326 * nfsiod thread exit routine
327 *
328 * Must be called with nfsiod_mutex held so that the
329 * decision to terminate is atomic with the termination.
330 */
331 void
332 nfsiod_terminate(struct nfsiod *niod)
333 {
334 nfsiod_thread_count--;
335 lck_mtx_unlock(nfsiod_mutex);
336 if (niod) {
337 FREE(niod, M_TEMP);
338 } else {
339 printf("nfsiod: terminating without niod\n");
340 }
341 thread_terminate(current_thread());
342 /*NOTREACHED*/
343 }
344
345 /* nfsiod thread startup routine */
346 void
347 nfsiod_thread(void)
348 {
349 struct nfsiod *niod;
350 int error;
351
352 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
353 if (!niod) {
354 lck_mtx_lock(nfsiod_mutex);
355 nfsiod_thread_count--;
356 wakeup(current_thread());
357 lck_mtx_unlock(nfsiod_mutex);
358 thread_terminate(current_thread());
359 /*NOTREACHED*/
360 }
361 bzero(niod, sizeof(*niod));
362 lck_mtx_lock(nfsiod_mutex);
363 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
364 wakeup(current_thread());
365 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
366 /* shouldn't return... so we have an error */
367 /* remove an old nfsiod struct and terminate */
368 lck_mtx_lock(nfsiod_mutex);
369 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
370 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
371 }
372 nfsiod_terminate(niod);
373 /*NOTREACHED*/
374 }
375
376 /*
377 * Start up another nfsiod thread.
378 * (unless we're already maxed out and there are nfsiods running)
379 */
380 int
381 nfsiod_start(void)
382 {
383 thread_t thd = THREAD_NULL;
384
385 lck_mtx_lock(nfsiod_mutex);
386 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
387 lck_mtx_unlock(nfsiod_mutex);
388 return EBUSY;
389 }
390 nfsiod_thread_count++;
391 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
392 lck_mtx_unlock(nfsiod_mutex);
393 return EBUSY;
394 }
395 /* wait for the thread to complete startup */
396 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
397 thread_deallocate(thd);
398 return 0;
399 }
400
401 /*
402 * Continuation for Asynchronous I/O threads for NFS client.
403 *
404 * Grab an nfsiod struct to work on, do some work, then drop it
405 */
406 int
407 nfsiod_continue(int error)
408 {
409 struct nfsiod *niod;
410 struct nfsmount *nmp;
411 struct nfsreq *req, *treq;
412 struct nfs_reqqhead iodq;
413 int morework;
414
415 lck_mtx_lock(nfsiod_mutex);
416 niod = TAILQ_FIRST(&nfsiodwork);
417 if (!niod) {
418 /* there's no work queued up */
419 /* remove an old nfsiod struct and terminate */
420 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
421 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
422 }
423 nfsiod_terminate(niod);
424 /*NOTREACHED*/
425 }
426 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
427
428 worktodo:
429 while ((nmp = niod->niod_nmp)) {
430 if (nmp == NULL) {
431 niod->niod_nmp = NULL;
432 break;
433 }
434
435 /*
436 * Service this mount's async I/O queue.
437 *
438 * In order to ensure some level of fairness between mounts,
439 * we grab all the work up front before processing it so any
440 * new work that arrives will be serviced on a subsequent
441 * iteration - and we have a chance to see if other work needs
442 * to be done (e.g. the delayed write queue needs to be pushed
443 * or other mounts are waiting for an nfsiod).
444 */
445 /* grab the current contents of the queue */
446 TAILQ_INIT(&iodq);
447 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
448 /* Mark each iod request as being managed by an iod */
449 TAILQ_FOREACH(req, &iodq, r_achain) {
450 lck_mtx_lock(&req->r_mtx);
451 assert(!(req->r_flags & R_IOD));
452 req->r_flags |= R_IOD;
453 lck_mtx_unlock(&req->r_mtx);
454 }
455 lck_mtx_unlock(nfsiod_mutex);
456
457 /* process the queue */
458 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
459 TAILQ_REMOVE(&iodq, req, r_achain);
460 req->r_achain.tqe_next = NFSREQNOLIST;
461 req->r_callback.rcb_func(req);
462 }
463
464 /* now check if there's more/other work to be done */
465 lck_mtx_lock(nfsiod_mutex);
466 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
467 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
468 /*
469 * we're going to stop working on this mount but if the
470 * mount still needs more work so queue it up
471 */
472 if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) {
473 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
474 }
475 nmp->nm_niod = NULL;
476 niod->niod_nmp = NULL;
477 }
478 }
479
480 /* loop if there's still a mount to work on */
481 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
482 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
483 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
484 niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
485 }
486 if (niod->niod_nmp) {
487 goto worktodo;
488 }
489
490 /* queue ourselves back up - if there aren't too many threads running */
491 if (nfsiod_thread_count <= NFSIOD_MAX) {
492 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
493 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
494 /* shouldn't return... so we have an error */
495 /* remove an old nfsiod struct and terminate */
496 lck_mtx_lock(nfsiod_mutex);
497 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
498 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
499 }
500 }
501 nfsiod_terminate(niod);
502 /*NOTREACHED*/
503 return 0;
504 }
505
506 #endif /* NFSCLIENT */
507
508
509 #if NFSSERVER
510
511 /*
512 * NFS server system calls
513 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
514 */
515
516 /*
517 * Get file handle system call
518 */
519 int
520 getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
521 {
522 vnode_t vp;
523 struct nfs_filehandle nfh;
524 int error, fhlen, fidlen;
525 struct nameidata nd;
526 char path[MAXPATHLEN], *ptr;
527 size_t pathlen;
528 struct nfs_exportfs *nxfs;
529 struct nfs_export *nx;
530
531 /*
532 * Must be super user
533 */
534 error = proc_suser(p);
535 if (error) {
536 return error;
537 }
538
539 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
540 if (!error) {
541 error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
542 }
543 if (error) {
544 return error;
545 }
546 /* limit fh size to length specified (or v3 size by default) */
547 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
548 fhlen = NFSV3_MAX_FH_SIZE;
549 }
550 fidlen = fhlen - sizeof(struct nfs_exphandle);
551
552 if (!nfsrv_is_initialized()) {
553 return EINVAL;
554 }
555
556 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
557 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
558 error = namei(&nd);
559 if (error) {
560 return error;
561 }
562 nameidone(&nd);
563
564 vp = nd.ni_vp;
565
566 // find exportfs that matches f_mntonname
567 lck_rw_lock_shared(&nfsrv_export_rwlock);
568 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
569 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
570 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
571 break;
572 }
573 }
574 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
575 error = EINVAL;
576 goto out;
577 }
578 // find export that best matches remainder of path
579 ptr = path + strlen(nxfs->nxfs_path);
580 while (*ptr && (*ptr == '/')) {
581 ptr++;
582 }
583 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
584 int len = strlen(nx->nx_path);
585 if (len == 0) { // we've hit the export entry for the root directory
586 break;
587 }
588 if (!strncmp(nx->nx_path, ptr, len)) {
589 break;
590 }
591 }
592 if (!nx) {
593 error = EINVAL;
594 goto out;
595 }
596
597 bzero(&nfh, sizeof(nfh));
598 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
599 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
600 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
601 nfh.nfh_xh.nxh_flags = 0;
602 nfh.nfh_xh.nxh_reserved = 0;
603 nfh.nfh_len = fidlen;
604 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
605 if (nfh.nfh_len > (uint32_t)fidlen) {
606 error = EOVERFLOW;
607 }
608 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
609 nfh.nfh_len += sizeof(nfh.nfh_xh);
610 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
611
612 out:
613 lck_rw_done(&nfsrv_export_rwlock);
614 vnode_put(vp);
615 if (error) {
616 return error;
617 }
618 /*
619 * At first blush, this may appear to leak a kernel stack
620 * address, but the copyout() never reaches &nfh.nfh_fhp
621 * (sizeof(fhandle_t) < sizeof(nfh)).
622 */
623 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
624 return error;
625 }
626
627 extern const struct fileops vnops;
628
629 /*
630 * syscall for the rpc.lockd to use to translate a NFS file handle into
631 * an open descriptor.
632 *
633 * warning: do not remove the suser() call or this becomes one giant
634 * security hole.
635 */
636 int
637 fhopen( proc_t p,
638 struct fhopen_args *uap,
639 int32_t *retval)
640 {
641 vnode_t vp;
642 struct nfs_filehandle nfh;
643 struct nfs_export *nx;
644 struct nfs_export_options *nxo;
645 struct flock lf;
646 struct fileproc *fp, *nfp;
647 int fmode, error, type;
648 int indx;
649 vfs_context_t ctx = vfs_context_current();
650 kauth_action_t action;
651
652 /*
653 * Must be super user
654 */
655 error = suser(vfs_context_ucred(ctx), 0);
656 if (error) {
657 return error;
658 }
659
660 if (!nfsrv_is_initialized()) {
661 return EINVAL;
662 }
663
664 fmode = FFLAGS(uap->flags);
665 /* why not allow a non-read/write open for our lockd? */
666 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
667 return EINVAL;
668 }
669
670 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
671 if (error) {
672 return error;
673 }
674 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
675 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
676 return EINVAL;
677 }
678 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
679 if (error) {
680 return error;
681 }
682 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
683
684 lck_rw_lock_shared(&nfsrv_export_rwlock);
685 /* now give me my vnode, it gets returned to me with a reference */
686 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
687 lck_rw_done(&nfsrv_export_rwlock);
688 if (error) {
689 if (error == NFSERR_TRYLATER) {
690 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
691 }
692 return error;
693 }
694
695 /*
696 * From now on we have to make sure not
697 * to forget about the vnode.
698 * Any error that causes an abort must vnode_put(vp).
699 * Just set error = err and 'goto bad;'.
700 */
701
702 /*
703 * from vn_open
704 */
705 if (vnode_vtype(vp) == VSOCK) {
706 error = EOPNOTSUPP;
707 goto bad;
708 }
709
710 /* disallow write operations on directories */
711 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
712 error = EISDIR;
713 goto bad;
714 }
715
716 #if CONFIG_MACF
717 if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
718 goto bad;
719 }
720 #endif
721
722 /* compute action to be authorized */
723 action = 0;
724 if (fmode & FREAD) {
725 action |= KAUTH_VNODE_READ_DATA;
726 }
727 if (fmode & (FWRITE | O_TRUNC)) {
728 action |= KAUTH_VNODE_WRITE_DATA;
729 }
730 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
731 goto bad;
732 }
733
734 if ((error = VNOP_OPEN(vp, fmode, ctx))) {
735 goto bad;
736 }
737 if ((error = vnode_ref_ext(vp, fmode, 0))) {
738 goto bad;
739 }
740
741 /*
742 * end of vn_open code
743 */
744
745 // starting here... error paths should call vn_close/vnode_put
746 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
747 vn_close(vp, fmode & FMASK, ctx);
748 goto bad;
749 }
750 fp = nfp;
751
752 fp->f_fglob->fg_flag = fmode & FMASK;
753 fp->f_fglob->fg_ops = &vnops;
754 fp->f_fglob->fg_data = (caddr_t)vp;
755
756 // XXX do we really need to support this with fhopen()?
757 if (fmode & (O_EXLOCK | O_SHLOCK)) {
758 lf.l_whence = SEEK_SET;
759 lf.l_start = 0;
760 lf.l_len = 0;
761 if (fmode & O_EXLOCK) {
762 lf.l_type = F_WRLCK;
763 } else {
764 lf.l_type = F_RDLCK;
765 }
766 type = F_FLOCK;
767 if ((fmode & FNONBLOCK) == 0) {
768 type |= F_WAIT;
769 }
770 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
771 struct vfs_context context = *vfs_context_current();
772 /* Modify local copy (to not damage thread copy) */
773 context.vc_ucred = fp->f_fglob->fg_cred;
774
775 vn_close(vp, fp->f_fglob->fg_flag, &context);
776 fp_free(p, indx, fp);
777 return error;
778 }
779 fp->f_fglob->fg_flag |= FHASLOCK;
780 }
781
782 vnode_put(vp);
783
784 proc_fdlock(p);
785 procfdtbl_releasefd(p, indx, NULL);
786 fp_drop(p, indx, fp, 1);
787 proc_fdunlock(p);
788
789 *retval = indx;
790 return 0;
791
792 bad:
793 vnode_put(vp);
794 return error;
795 }
796
797 /*
798 * NFS server pseudo system call
799 */
800 int
801 nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
802 {
803 mbuf_t nam;
804 struct user_nfsd_args user_nfsdarg;
805 socket_t so;
806 int error;
807
808 AUDIT_ARG(cmd, uap->flag);
809
810 /*
811 * Must be super user for most operations (export ops checked later).
812 */
813 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) {
814 return error;
815 }
816 #if CONFIG_MACF
817 error = mac_system_check_nfsd(kauth_cred_get());
818 if (error) {
819 return error;
820 }
821 #endif
822
823 /* make sure NFS server data structures have been initialized */
824 nfsrv_init();
825
826 if (uap->flag & NFSSVC_ADDSOCK) {
827 if (IS_64BIT_PROCESS(p)) {
828 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
829 } else {
830 struct nfsd_args tmp_args;
831 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
832 if (error == 0) {
833 user_nfsdarg.sock = tmp_args.sock;
834 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
835 user_nfsdarg.namelen = tmp_args.namelen;
836 }
837 }
838 if (error) {
839 return error;
840 }
841 /* get the socket */
842 error = file_socket(user_nfsdarg.sock, &so);
843 if (error) {
844 return error;
845 }
846 /* Get the client address for connected sockets. */
847 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
848 nam = NULL;
849 } else {
850 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
851 if (error) {
852 /* drop the iocount file_socket() grabbed on the file descriptor */
853 file_drop(user_nfsdarg.sock);
854 return error;
855 }
856 }
857 /*
858 * nfssvc_addsock() will grab a retain count on the socket
859 * to keep the socket from being closed when nfsd closes its
860 * file descriptor for it.
861 */
862 error = nfssvc_addsock(so, nam);
863 /* drop the iocount file_socket() grabbed on the file descriptor */
864 file_drop(user_nfsdarg.sock);
865 } else if (uap->flag & NFSSVC_NFSD) {
866 error = nfssvc_nfsd();
867 } else if (uap->flag & NFSSVC_EXPORT) {
868 error = nfssvc_export(uap->argp);
869 } else {
870 error = EINVAL;
871 }
872 if (error == EINTR || error == ERESTART) {
873 error = 0;
874 }
875 return error;
876 }
877
878 /*
879 * Adds a socket to the list for servicing by nfsds.
880 */
881 int
882 nfssvc_addsock(socket_t so, mbuf_t mynam)
883 {
884 struct nfsrv_sock *slp;
885 int error = 0, sodomain, sotype, soprotocol, on = 1;
886 int first;
887 struct timeval timeo;
888
889 /* make sure mbuf constants are set up */
890 if (!nfs_mbuf_mhlen) {
891 nfs_mbuf_init();
892 }
893
894 sock_gettype(so, &sodomain, &sotype, &soprotocol);
895
896 /* There should be only one UDP socket for each of IPv4 and IPv6 */
897 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
898 mbuf_freem(mynam);
899 return EEXIST;
900 }
901 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
902 mbuf_freem(mynam);
903 return EEXIST;
904 }
905
906 /* Set protocol options and reserve some space (for UDP). */
907 if (sotype == SOCK_STREAM) {
908 error = nfsrv_check_exports_allow_address(mynam);
909 if (error) {
910 return error;
911 }
912 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
913 }
914 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
915 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
916 }
917 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
918 int reserve = NFS_UDPSOCKBUF;
919 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
920 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
921 if (error) {
922 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
923 error = 0;
924 }
925 }
926 sock_nointerrupt(so, 0);
927
928 /*
929 * Set socket send/receive timeouts.
930 * Receive timeout shouldn't matter, but setting the send timeout
931 * will make sure that an unresponsive client can't hang the server.
932 */
933 timeo.tv_usec = 0;
934 timeo.tv_sec = 1;
935 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
936 timeo.tv_sec = 30;
937 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
938 if (error) {
939 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
940 error = 0;
941 }
942
943 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
944 if (!slp) {
945 mbuf_freem(mynam);
946 return ENOMEM;
947 }
948 bzero((caddr_t)slp, sizeof(struct nfsrv_sock));
949 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
950 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
951
952 lck_mtx_lock(nfsd_mutex);
953
954 if (soprotocol == IPPROTO_UDP) {
955 if (sodomain == AF_INET) {
956 /* There should be only one UDP/IPv4 socket */
957 if (nfsrv_udpsock) {
958 lck_mtx_unlock(nfsd_mutex);
959 nfsrv_slpfree(slp);
960 mbuf_freem(mynam);
961 return EEXIST;
962 }
963 nfsrv_udpsock = slp;
964 }
965 if (sodomain == AF_INET6) {
966 /* There should be only one UDP/IPv6 socket */
967 if (nfsrv_udp6sock) {
968 lck_mtx_unlock(nfsd_mutex);
969 nfsrv_slpfree(slp);
970 mbuf_freem(mynam);
971 return EEXIST;
972 }
973 nfsrv_udp6sock = slp;
974 }
975 }
976
977 /* add the socket to the list */
978 first = TAILQ_EMPTY(&nfsrv_socklist);
979 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
980 if (soprotocol == IPPROTO_TCP) {
981 nfsrv_sock_tcp_cnt++;
982 if (nfsrv_sock_idle_timeout < 0) {
983 nfsrv_sock_idle_timeout = 0;
984 }
985 if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
986 nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
987 }
988 /*
989 * Possibly start or stop the idle timer. We only start the idle timer when
990 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
991 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
992 * the number of connections.
993 */
994 if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
995 if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
996 if (nfsrv_idlesock_timer_on) {
997 thread_call_cancel(nfsrv_idlesock_timer_call);
998 nfsrv_idlesock_timer_on = 0;
999 }
1000 } else {
1001 struct nfsrv_sock *old_slp;
1002 struct timeval now;
1003 time_t time_to_wait = nfsrv_sock_idle_timeout;
1004 /*
1005 * Get the oldest tcp socket and calculate the
1006 * earliest time for the next idle timer to fire
1007 * based on the possibly updated nfsrv_sock_idle_timeout
1008 */
1009 TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
1010 if (old_slp->ns_sotype == SOCK_STREAM) {
1011 microuptime(&now);
1012 time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
1013 if (time_to_wait < 1) {
1014 time_to_wait = 1;
1015 }
1016 break;
1017 }
1018 }
1019 /*
1020 * If we have a timer scheduled, but if its going to fire too late,
1021 * turn it off.
1022 */
1023 if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
1024 thread_call_cancel(nfsrv_idlesock_timer_call);
1025 nfsrv_idlesock_timer_on = 0;
1026 }
1027 /* Schedule the idle thread if it isn't already */
1028 if (!nfsrv_idlesock_timer_on) {
1029 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1030 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1031 }
1032 }
1033 }
1034 }
1035
1036 sock_retain(so); /* grab a retain count on the socket */
1037 slp->ns_so = so;
1038 slp->ns_sotype = sotype;
1039 slp->ns_nam = mynam;
1040
1041 /* set up the socket up-call */
1042 nfsrv_uc_addsock(slp, first);
1043
1044 /* mark that the socket is not in the nfsrv_sockwg list */
1045 slp->ns_wgq.tqe_next = SLPNOLIST;
1046
1047 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
1048
1049 nfsrv_wakenfsd(slp);
1050 lck_mtx_unlock(nfsd_mutex);
1051
1052 return 0;
1053 }
1054
1055 /*
1056 * nfssvc_nfsd()
1057 *
1058 * nfsd theory of operation:
1059 *
1060 * The first nfsd thread stays in user mode accepting new TCP connections
1061 * which are then added via the "addsock" call. The rest of the nfsd threads
1062 * simply call into the kernel and remain there in a loop handling NFS
1063 * requests until killed by a signal.
1064 *
1065 * There's a list of nfsd threads (nfsd_head).
1066 * There's an nfsd queue that contains only those nfsds that are
1067 * waiting for work to do (nfsd_queue).
1068 *
1069 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
1070 * managing the work on the sockets:
1071 * nfsrv_sockwait - sockets w/new data waiting to be worked on
1072 * nfsrv_sockwork - sockets being worked on which may have more work to do
1073 * nfsrv_sockwg -- sockets which have pending write gather data
1074 * When a socket receives data, if it is not currently queued, it
1075 * will be placed at the end of the "wait" queue.
1076 * Whenever a socket needs servicing we make sure it is queued and
1077 * wake up a waiting nfsd (if there is one).
1078 *
1079 * nfsds will service at most 8 requests from the same socket before
1080 * defecting to work on another socket.
1081 * nfsds will defect immediately if there are any sockets in the "wait" queue
1082 * nfsds looking for a socket to work on check the "wait" queue first and
1083 * then check the "work" queue.
1084 * When an nfsd starts working on a socket, it removes it from the head of
1085 * the queue it's currently on and moves it to the end of the "work" queue.
1086 * When nfsds are checking the queues for work, any sockets found not to
1087 * have any work are simply dropped from the queue.
1088 *
1089 */
1090 int
1091 nfssvc_nfsd(void)
1092 {
1093 mbuf_t m, mrep;
1094 struct nfsrv_sock *slp;
1095 struct nfsd *nfsd;
1096 struct nfsrv_descript *nd = NULL;
1097 int error = 0, cacherep, writes_todo;
1098 int siz, procrastinate, opcnt = 0;
1099 u_quad_t cur_usec;
1100 struct timeval now;
1101 struct vfs_context context;
1102 struct timespec to;
1103
1104 #ifndef nolint
1105 cacherep = RC_DOIT;
1106 writes_todo = 0;
1107 #endif
1108
1109 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
1110 if (!nfsd) {
1111 return ENOMEM;
1112 }
1113 bzero(nfsd, sizeof(struct nfsd));
1114 lck_mtx_lock(nfsd_mutex);
1115 if (nfsd_thread_count++ == 0) {
1116 nfsrv_initcache(); /* Init the server request cache */
1117 }
1118 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
1119 lck_mtx_unlock(nfsd_mutex);
1120
1121 context.vc_thread = current_thread();
1122
1123 /* Set time out so that nfsd threads can wake up a see if they are still needed. */
1124 to.tv_sec = 5;
1125 to.tv_nsec = 0;
1126
1127 /*
1128 * Loop getting rpc requests until SIGKILL.
1129 */
1130 for (;;) {
1131 if (nfsd_thread_max <= 0) {
1132 /* NFS server shutting down, get out ASAP */
1133 error = EINTR;
1134 slp = nfsd->nfsd_slp;
1135 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
1136 /* already have some work to do */
1137 error = 0;
1138 slp = nfsd->nfsd_slp;
1139 } else {
1140 /* need to find work to do */
1141 error = 0;
1142 lck_mtx_lock(nfsd_mutex);
1143 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
1144 if (nfsd_thread_count > nfsd_thread_max) {
1145 /*
1146 * If we have no socket and there are more
1147 * nfsd threads than configured, let's exit.
1148 */
1149 error = 0;
1150 goto done;
1151 }
1152 nfsd->nfsd_flag |= NFSD_WAITING;
1153 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
1154 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
1155 if (error) {
1156 if (nfsd->nfsd_flag & NFSD_WAITING) {
1157 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
1158 nfsd->nfsd_flag &= ~NFSD_WAITING;
1159 }
1160 if (error == EWOULDBLOCK) {
1161 continue;
1162 }
1163 goto done;
1164 }
1165 }
1166 slp = nfsd->nfsd_slp;
1167 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
1168 /* look for a socket to work on in the wait queue */
1169 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
1170 lck_rw_lock_exclusive(&slp->ns_rwlock);
1171 /* remove from the head of the queue */
1172 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1173 slp->ns_flag &= ~SLP_WAITQ;
1174 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
1175 break;
1176 }
1177 /* nothing to do, so skip this socket */
1178 lck_rw_done(&slp->ns_rwlock);
1179 }
1180 }
1181 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
1182 /* look for a socket to work on in the work queue */
1183 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
1184 lck_rw_lock_exclusive(&slp->ns_rwlock);
1185 /* remove from the head of the queue */
1186 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1187 slp->ns_flag &= ~SLP_WORKQ;
1188 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
1189 break;
1190 }
1191 /* nothing to do, so skip this socket */
1192 lck_rw_done(&slp->ns_rwlock);
1193 }
1194 }
1195 if (!nfsd->nfsd_slp && slp) {
1196 /* we found a socket to work on, grab a reference */
1197 slp->ns_sref++;
1198 microuptime(&now);
1199 slp->ns_timestamp = now.tv_sec;
1200 /* We keep the socket list in least recently used order for reaping idle sockets */
1201 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1202 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1203 nfsd->nfsd_slp = slp;
1204 opcnt = 0;
1205 /* and put it at the back of the work queue */
1206 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1207 slp->ns_flag |= SLP_WORKQ;
1208 lck_rw_done(&slp->ns_rwlock);
1209 }
1210 lck_mtx_unlock(nfsd_mutex);
1211 if (!slp) {
1212 continue;
1213 }
1214 lck_rw_lock_exclusive(&slp->ns_rwlock);
1215 if (slp->ns_flag & SLP_VALID) {
1216 if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
1217 slp->ns_flag &= ~SLP_NEEDQ;
1218 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1219 }
1220 if (slp->ns_flag & SLP_DISCONN) {
1221 nfsrv_zapsock(slp);
1222 }
1223 error = nfsrv_dorec(slp, nfsd, &nd);
1224 if (error == EINVAL) { // RPCSEC_GSS drop
1225 if (slp->ns_sotype == SOCK_STREAM) {
1226 nfsrv_zapsock(slp); // drop connection
1227 }
1228 }
1229 writes_todo = 0;
1230 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1231 microuptime(&now);
1232 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1233 (u_quad_t)now.tv_usec;
1234 if (slp->ns_wgtime <= cur_usec) {
1235 error = 0;
1236 cacherep = RC_DOIT;
1237 writes_todo = 1;
1238 }
1239 slp->ns_flag &= ~SLP_DOWRITES;
1240 }
1241 nfsd->nfsd_flag |= NFSD_REQINPROG;
1242 }
1243 lck_rw_done(&slp->ns_rwlock);
1244 }
1245 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1246 if (nd) {
1247 nfsm_chain_cleanup(&nd->nd_nmreq);
1248 if (nd->nd_nam2) {
1249 mbuf_freem(nd->nd_nam2);
1250 }
1251 if (IS_VALID_CRED(nd->nd_cr)) {
1252 kauth_cred_unref(&nd->nd_cr);
1253 }
1254 if (nd->nd_gss_context) {
1255 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1256 }
1257 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1258 nd = NULL;
1259 }
1260 nfsd->nfsd_slp = NULL;
1261 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1262 if (slp) {
1263 nfsrv_slpderef(slp);
1264 }
1265 if (nfsd_thread_max <= 0) {
1266 break;
1267 }
1268 continue;
1269 }
1270 if (nd) {
1271 microuptime(&nd->nd_starttime);
1272 if (nd->nd_nam2) {
1273 nd->nd_nam = nd->nd_nam2;
1274 } else {
1275 nd->nd_nam = slp->ns_nam;
1276 }
1277
1278 cacherep = nfsrv_getcache(nd, slp, &mrep);
1279
1280 if (nfsrv_require_resv_port) {
1281 /* Check if source port is a reserved port */
1282 in_port_t port = 0;
1283 struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1284
1285 if (saddr->sa_family == AF_INET) {
1286 port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1287 } else if (saddr->sa_family == AF_INET6) {
1288 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1289 }
1290 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1291 nd->nd_procnum = NFSPROC_NOOP;
1292 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1293 cacherep = RC_DOIT;
1294 }
1295 }
1296 }
1297
1298 /*
1299 * Loop to get all the write RPC replies that have been
1300 * gathered together.
1301 */
1302 do {
1303 switch (cacherep) {
1304 case RC_DOIT:
1305 if (nd && (nd->nd_vers == NFS_VER3)) {
1306 procrastinate = nfsrv_wg_delay_v3;
1307 } else {
1308 procrastinate = nfsrv_wg_delay;
1309 }
1310 lck_rw_lock_shared(&nfsrv_export_rwlock);
1311 context.vc_ucred = NULL;
1312 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1313 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1314 } else {
1315 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1316 }
1317 lck_rw_done(&nfsrv_export_rwlock);
1318 if (mrep == NULL) {
1319 /*
1320 * If this is a stream socket and we are not going
1321 * to send a reply we better close the connection
1322 * so the client doesn't hang.
1323 */
1324 if (error && slp->ns_sotype == SOCK_STREAM) {
1325 lck_rw_lock_exclusive(&slp->ns_rwlock);
1326 nfsrv_zapsock(slp);
1327 lck_rw_done(&slp->ns_rwlock);
1328 printf("NFS server: NULL reply from proc = %d error = %d\n",
1329 nd->nd_procnum, error);
1330 }
1331 break;
1332 }
1333 if (error) {
1334 OSAddAtomic64(1, &nfsstats.srv_errs);
1335 nfsrv_updatecache(nd, FALSE, mrep);
1336 if (nd->nd_nam2) {
1337 mbuf_freem(nd->nd_nam2);
1338 nd->nd_nam2 = NULL;
1339 }
1340 break;
1341 }
1342 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
1343 nfsrv_updatecache(nd, TRUE, mrep);
1344 /* FALLTHRU */
1345
1346 case RC_REPLY:
1347 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1348 /*
1349 * Need to checksum or encrypt the reply
1350 */
1351 error = nfs_gss_svc_protect_reply(nd, mrep);
1352 if (error) {
1353 mbuf_freem(mrep);
1354 break;
1355 }
1356 }
1357
1358 /*
1359 * Get the total size of the reply
1360 */
1361 m = mrep;
1362 siz = 0;
1363 while (m) {
1364 siz += mbuf_len(m);
1365 m = mbuf_next(m);
1366 }
1367 if (siz <= 0 || siz > NFS_MAXPACKET) {
1368 printf("mbuf siz=%d\n", siz);
1369 panic("Bad nfs svc reply");
1370 }
1371 m = mrep;
1372 mbuf_pkthdr_setlen(m, siz);
1373 error = mbuf_pkthdr_setrcvif(m, NULL);
1374 if (error) {
1375 panic("nfsd setrcvif failed: %d", error);
1376 }
1377 /*
1378 * For stream protocols, prepend a Sun RPC
1379 * Record Mark.
1380 */
1381 if (slp->ns_sotype == SOCK_STREAM) {
1382 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1383 if (!error) {
1384 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1385 }
1386 }
1387 if (!error) {
1388 if (slp->ns_flag & SLP_VALID) {
1389 error = nfsrv_send(slp, nd->nd_nam2, m);
1390 } else {
1391 error = EPIPE;
1392 mbuf_freem(m);
1393 }
1394 } else {
1395 mbuf_freem(m);
1396 }
1397 mrep = NULL;
1398 if (nd->nd_nam2) {
1399 mbuf_freem(nd->nd_nam2);
1400 nd->nd_nam2 = NULL;
1401 }
1402 if (error == EPIPE) {
1403 lck_rw_lock_exclusive(&slp->ns_rwlock);
1404 nfsrv_zapsock(slp);
1405 lck_rw_done(&slp->ns_rwlock);
1406 }
1407 if (error == EINTR || error == ERESTART) {
1408 nfsm_chain_cleanup(&nd->nd_nmreq);
1409 if (IS_VALID_CRED(nd->nd_cr)) {
1410 kauth_cred_unref(&nd->nd_cr);
1411 }
1412 if (nd->nd_gss_context) {
1413 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1414 }
1415 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1416 nfsrv_slpderef(slp);
1417 lck_mtx_lock(nfsd_mutex);
1418 goto done;
1419 }
1420 break;
1421 case RC_DROPIT:
1422 mbuf_freem(nd->nd_nam2);
1423 nd->nd_nam2 = NULL;
1424 break;
1425 }
1426 ;
1427 opcnt++;
1428 if (nd) {
1429 nfsm_chain_cleanup(&nd->nd_nmreq);
1430 if (nd->nd_nam2) {
1431 mbuf_freem(nd->nd_nam2);
1432 }
1433 if (IS_VALID_CRED(nd->nd_cr)) {
1434 kauth_cred_unref(&nd->nd_cr);
1435 }
1436 if (nd->nd_gss_context) {
1437 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1438 }
1439 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1440 nd = NULL;
1441 }
1442
1443 /*
1444 * Check to see if there are outstanding writes that
1445 * need to be serviced.
1446 */
1447 writes_todo = 0;
1448 if (slp->ns_wgtime) {
1449 microuptime(&now);
1450 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1451 (u_quad_t)now.tv_usec;
1452 if (slp->ns_wgtime <= cur_usec) {
1453 cacherep = RC_DOIT;
1454 writes_todo = 1;
1455 }
1456 }
1457 } while (writes_todo);
1458
1459 nd = NULL;
1460 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1461 lck_rw_lock_exclusive(&slp->ns_rwlock);
1462 error = nfsrv_dorec(slp, nfsd, &nd);
1463 if (error == EINVAL) { // RPCSEC_GSS drop
1464 if (slp->ns_sotype == SOCK_STREAM) {
1465 nfsrv_zapsock(slp); // drop connection
1466 }
1467 }
1468 lck_rw_done(&slp->ns_rwlock);
1469 }
1470 if (!nd) {
1471 /* drop our reference on the socket */
1472 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1473 nfsd->nfsd_slp = NULL;
1474 nfsrv_slpderef(slp);
1475 }
1476 }
1477 lck_mtx_lock(nfsd_mutex);
1478 done:
1479 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1480 FREE(nfsd, M_NFSD);
1481 if (--nfsd_thread_count == 0) {
1482 nfsrv_cleanup();
1483 }
1484 lck_mtx_unlock(nfsd_mutex);
1485 return error;
1486 }
1487
1488 int
1489 nfssvc_export(user_addr_t argp)
1490 {
1491 int error = 0, is_64bit;
1492 struct user_nfs_export_args unxa;
1493 vfs_context_t ctx = vfs_context_current();
1494
1495 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
1496
1497 /* copy in pointers to path and export args */
1498 if (is_64bit) {
1499 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1500 } else {
1501 struct nfs_export_args tnxa;
1502 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1503 if (error == 0) {
1504 /* munge into LP64 version of nfs_export_args structure */
1505 unxa.nxa_fsid = tnxa.nxa_fsid;
1506 unxa.nxa_expid = tnxa.nxa_expid;
1507 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1508 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1509 unxa.nxa_flags = tnxa.nxa_flags;
1510 unxa.nxa_netcount = tnxa.nxa_netcount;
1511 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1512 }
1513 }
1514 if (error) {
1515 return error;
1516 }
1517
1518 error = nfsrv_export(&unxa, ctx);
1519
1520 return error;
1521 }
1522
1523 /*
1524 * Shut down a socket associated with an nfsrv_sock structure.
1525 * Should be called with the send lock set, if required.
1526 * The trick here is to increment the sref at the start, so that the nfsds
1527 * will stop using it and clear ns_flag at the end so that it will not be
1528 * reassigned during cleanup.
1529 */
1530 void
1531 nfsrv_zapsock(struct nfsrv_sock *slp)
1532 {
1533 socket_t so;
1534
1535 if ((slp->ns_flag & SLP_VALID) == 0) {
1536 return;
1537 }
1538 slp->ns_flag &= ~SLP_ALLFLAGS;
1539
1540 so = slp->ns_so;
1541 if (so == NULL) {
1542 return;
1543 }
1544
1545 sock_setupcall(so, NULL, NULL);
1546 sock_shutdown(so, SHUT_RDWR);
1547
1548 /*
1549 * Remove from the up-call queue
1550 */
1551 nfsrv_uc_dequeue(slp);
1552 }
1553
1554 /*
1555 * cleanup and release a server socket structure.
1556 */
1557 void
1558 nfsrv_slpfree(struct nfsrv_sock *slp)
1559 {
1560 struct nfsrv_descript *nwp, *nnwp;
1561
1562 if (slp->ns_so) {
1563 sock_release(slp->ns_so);
1564 slp->ns_so = NULL;
1565 }
1566 if (slp->ns_nam) {
1567 mbuf_free(slp->ns_nam);
1568 }
1569 if (slp->ns_raw) {
1570 mbuf_freem(slp->ns_raw);
1571 }
1572 if (slp->ns_rec) {
1573 mbuf_freem(slp->ns_rec);
1574 }
1575 if (slp->ns_frag) {
1576 mbuf_freem(slp->ns_frag);
1577 }
1578 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1579 slp->ns_reccnt = 0;
1580
1581 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1582 nnwp = nwp->nd_tq.le_next;
1583 LIST_REMOVE(nwp, nd_tq);
1584 nfsm_chain_cleanup(&nwp->nd_nmreq);
1585 if (nwp->nd_mrep) {
1586 mbuf_freem(nwp->nd_mrep);
1587 }
1588 if (nwp->nd_nam2) {
1589 mbuf_freem(nwp->nd_nam2);
1590 }
1591 if (IS_VALID_CRED(nwp->nd_cr)) {
1592 kauth_cred_unref(&nwp->nd_cr);
1593 }
1594 if (nwp->nd_gss_context) {
1595 nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1596 }
1597 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
1598 }
1599 LIST_INIT(&slp->ns_tq);
1600
1601 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
1602 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
1603 FREE(slp, M_NFSSVC);
1604 }
1605
1606 /*
1607 * Derefence a server socket structure. If it has no more references and
1608 * is no longer valid, you can throw it away.
1609 */
1610 static void
1611 nfsrv_slpderef_locked(struct nfsrv_sock *slp)
1612 {
1613 lck_rw_lock_exclusive(&slp->ns_rwlock);
1614 slp->ns_sref--;
1615
1616 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1617 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1618 /* remove socket from queue since there's no work */
1619 if (slp->ns_flag & SLP_WAITQ) {
1620 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1621 } else {
1622 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1623 }
1624 slp->ns_flag &= ~SLP_QUEUED;
1625 }
1626 lck_rw_done(&slp->ns_rwlock);
1627 return;
1628 }
1629
1630 /* This socket is no longer valid, so we'll get rid of it */
1631
1632 if (slp->ns_flag & SLP_QUEUED) {
1633 if (slp->ns_flag & SLP_WAITQ) {
1634 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1635 } else {
1636 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1637 }
1638 slp->ns_flag &= ~SLP_QUEUED;
1639 }
1640 lck_rw_done(&slp->ns_rwlock);
1641
1642 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1643 if (slp->ns_sotype == SOCK_STREAM) {
1644 nfsrv_sock_tcp_cnt--;
1645 }
1646
1647 /* now remove from the write gather socket list */
1648 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1649 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1650 slp->ns_wgq.tqe_next = SLPNOLIST;
1651 }
1652 nfsrv_slpfree(slp);
1653 }
1654
1655 void
1656 nfsrv_slpderef(struct nfsrv_sock *slp)
1657 {
1658 lck_mtx_lock(nfsd_mutex);
1659 nfsrv_slpderef_locked(slp);
1660 lck_mtx_unlock(nfsd_mutex);
1661 }
1662
1663 /*
1664 * Check periodically for idle sockest if needed and
1665 * zap them.
1666 */
1667 void
1668 nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1669 {
1670 struct nfsrv_sock *slp, *tslp;
1671 struct timeval now;
1672 time_t time_to_wait = nfsrv_sock_idle_timeout;
1673
1674 microuptime(&now);
1675 lck_mtx_lock(nfsd_mutex);
1676
1677 /* Turn off the timer if we're suppose to and get out */
1678 if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1679 nfsrv_sock_idle_timeout = 0;
1680 }
1681 if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1682 nfsrv_idlesock_timer_on = 0;
1683 lck_mtx_unlock(nfsd_mutex);
1684 return;
1685 }
1686
1687 TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1688 lck_rw_lock_exclusive(&slp->ns_rwlock);
1689 /* Skip udp and referenced sockets */
1690 if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1691 lck_rw_done(&slp->ns_rwlock);
1692 continue;
1693 }
1694 /*
1695 * If this is the first non-referenced socket that hasn't idle out,
1696 * use its time stamp to calculate the earlist time in the future
1697 * to start the next invocation of the timer. Since the nfsrv_socklist
1698 * is sorted oldest access to newest. Once we find the first one,
1699 * we're done and break out of the loop.
1700 */
1701 if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1702 nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1703 time_to_wait -= now.tv_sec - slp->ns_timestamp;
1704 if (time_to_wait < 1) {
1705 time_to_wait = 1;
1706 }
1707 lck_rw_done(&slp->ns_rwlock);
1708 break;
1709 }
1710 /*
1711 * Bump the ref count. nfsrv_slpderef below will destroy
1712 * the socket, since nfsrv_zapsock has closed it.
1713 */
1714 slp->ns_sref++;
1715 nfsrv_zapsock(slp);
1716 lck_rw_done(&slp->ns_rwlock);
1717 nfsrv_slpderef_locked(slp);
1718 }
1719
1720 /* Start ourself back up */
1721 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1722 /* Remember when the next timer will fire for nfssvc_addsock. */
1723 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1724 lck_mtx_unlock(nfsd_mutex);
1725 }
1726
1727 /*
1728 * Clean up the data structures for the server.
1729 */
1730 void
1731 nfsrv_cleanup(void)
1732 {
1733 struct nfsrv_sock *slp, *nslp;
1734 struct timeval now;
1735 #if CONFIG_FSE
1736 struct nfsrv_fmod *fp, *nfp;
1737 int i;
1738 #endif
1739
1740 microuptime(&now);
1741 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1742 nslp = TAILQ_NEXT(slp, ns_chain);
1743 lck_rw_lock_exclusive(&slp->ns_rwlock);
1744 slp->ns_sref++;
1745 if (slp->ns_flag & SLP_VALID) {
1746 nfsrv_zapsock(slp);
1747 }
1748 lck_rw_done(&slp->ns_rwlock);
1749 nfsrv_slpderef_locked(slp);
1750 }
1751 #
1752 #if CONFIG_FSE
1753 /*
1754 * Flush pending file write fsevents
1755 */
1756 lck_mtx_lock(nfsrv_fmod_mutex);
1757 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1758 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1759 /*
1760 * Fire off the content modified fsevent for each
1761 * entry, remove it from the list, and free it.
1762 */
1763 if (nfsrv_fsevents_enabled) {
1764 fp->fm_context.vc_thread = current_thread();
1765 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1766 FSE_ARG_VNODE, fp->fm_vp,
1767 FSE_ARG_DONE);
1768 }
1769 vnode_put(fp->fm_vp);
1770 kauth_cred_unref(&fp->fm_context.vc_ucred);
1771 nfp = LIST_NEXT(fp, fm_link);
1772 LIST_REMOVE(fp, fm_link);
1773 FREE(fp, M_TEMP);
1774 }
1775 }
1776 nfsrv_fmod_pending = 0;
1777 lck_mtx_unlock(nfsrv_fmod_mutex);
1778 #endif
1779
1780 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
1781
1782 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1783
1784 nfsrv_cleancache(); /* And clear out server cache */
1785
1786 nfsrv_udpsock = NULL;
1787 nfsrv_udp6sock = NULL;
1788 }
1789
1790 #endif /* NFS_NOSERVER */