]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_syscalls.c
fe4bb37cf7de586449dbef76ac7339cac72765d1
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
1 /*
2 * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/file_internal.h>
78 #include <sys/filedesc.h>
79 #include <sys/stat.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/mount_internal.h>
82 #include <sys/proc_internal.h> /* for fdflags */
83 #include <sys/kauth.h>
84 #include <sys/sysctl.h>
85 #include <sys/ubc.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/kpi_mbuf.h>
89 #include <sys/socket.h>
90 #include <sys/socketvar.h>
91 #include <sys/domain.h>
92 #include <sys/protosw.h>
93 #include <sys/fcntl.h>
94 #include <sys/lockf.h>
95 #include <sys/syslog.h>
96 #include <sys/user.h>
97 #include <sys/sysproto.h>
98 #include <sys/kpi_socket.h>
99 #include <sys/fsevents.h>
100 #include <libkern/OSAtomic.h>
101 #include <kern/thread_call.h>
102 #include <kern/task.h>
103
104 #include <security/audit/audit.h>
105
106 #include <netinet/in.h>
107 #include <netinet/tcp.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/rpcv2.h>
110 #include <nfs/nfsproto.h>
111 #include <nfs/nfs.h>
112 #include <nfs/nfsm_subs.h>
113 #include <nfs/nfsrvcache.h>
114 #include <nfs/nfs_gss.h>
115 #include <nfs/nfsmount.h>
116 #include <nfs/nfsnode.h>
117 #include <nfs/nfs_lock.h>
118 #if CONFIG_MACF
119 #include <security/mac_framework.h>
120 #endif
121
122 kern_return_t thread_terminate(thread_t); /* XXX */
123
124 #if NFSSERVER
125
126 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
127
128 extern int nfsrv_wg_delay;
129 extern int nfsrv_wg_delay_v3;
130
131 static int nfsrv_require_resv_port = 0;
132 static time_t nfsrv_idlesock_timer_on = 0;
133 static int nfsrv_sock_tcp_cnt = 0;
134 #define NFSD_MIN_IDLE_TIMEOUT 30
135 static int nfsrv_sock_idle_timeout = 3600; /* One hour */
136
137 int nfssvc_export(user_addr_t argp);
138 int nfssvc_nfsd(void);
139 int nfssvc_addsock(socket_t, mbuf_t);
140 void nfsrv_zapsock(struct nfsrv_sock *);
141 void nfsrv_slpderef(struct nfsrv_sock *);
142 void nfsrv_slpfree(struct nfsrv_sock *);
143
144 #endif /* NFSSERVER */
145
146 /*
147 * sysctl stuff
148 */
149 SYSCTL_DECL(_vfs_generic);
150 SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
151
152 #if NFSCLIENT
153 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs client hinge");
154 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
158 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
159 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
162 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
163 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
165 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
166 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
167 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
168 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
169 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
170 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
171 SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
172 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
173 #if CONFIG_NFS_GSS
174 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
175 #endif
176 #if CONFIG_NFS4
177 SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "");
178 #endif
179 #endif /* NFSCLIENT */
180
181 #if NFSSERVER
182 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
183 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
184 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
185 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
186 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
187 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
188 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
189 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
190 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
191 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
192 #if CONFIG_FSE
193 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
194 #endif
195 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
196 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
197 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
198 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
199 #ifdef NFS_UC_Q_DEBUG
200 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
201 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
202 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
203 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
204 #endif
205 #endif /* NFSSERVER */
206
207
208 #if NFSCLIENT
209
210 #if CONFIG_NFS4
211 static int
212 mapname2id(struct nfs_testmapid *map)
213 {
214 int error;
215 error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
216 if (error) {
217 return error;
218 }
219
220 if (map->ntm_grpflag) {
221 error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
222 } else {
223 error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
224 }
225
226 return error;
227 }
228
229 static int
230 mapid2name(struct nfs_testmapid *map)
231 {
232 int error;
233 size_t len = sizeof(map->ntm_name);
234
235 if (map->ntm_grpflag) {
236 error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
237 } else {
238 error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
239 }
240
241 if (error) {
242 return error;
243 }
244
245 error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
246
247 return error;
248 }
249
250 static int
251 nfsclnt_testidmap(proc_t p, user_addr_t argp)
252 {
253 struct nfs_testmapid mapid;
254 int error, coerror;
255 size_t len = sizeof(mapid.ntm_name);
256
257 /* Let root make this call. */
258 error = proc_suser(p);
259 if (error) {
260 return error;
261 }
262
263 error = copyin(argp, &mapid, sizeof(mapid));
264 mapid.ntm_name[MAXIDNAMELEN - 1] = '\0';
265
266 if (error) {
267 return error;
268 }
269 switch (mapid.ntm_lookup) {
270 case NTM_NAME2ID:
271 error = mapname2id(&mapid);
272 break;
273 case NTM_ID2NAME:
274 error = mapid2name(&mapid);
275 break;
276 case NTM_NAME2GUID:
277 error = nfs4_id2guid(mapid.ntm_name, &mapid.ntm_guid, mapid.ntm_grpflag);
278 break;
279 case NTM_GUID2NAME:
280 error = nfs4_guid2id(&mapid.ntm_guid, mapid.ntm_name, &len, mapid.ntm_grpflag);
281 break;
282 default:
283 return EINVAL;
284 }
285
286 coerror = copyout(&mapid, argp, sizeof(mapid));
287
288 return error ? error : coerror;
289 }
290 #endif
291
292 int
293 nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
294 {
295 struct lockd_ans la;
296 int error;
297
298 switch (uap->flag) {
299 case NFSCLNT_LOCKDANS:
300 error = copyin(uap->argp, &la, sizeof(la));
301 if (!error) {
302 error = nfslockdans(p, &la);
303 }
304 break;
305 case NFSCLNT_LOCKDNOTIFY:
306 error = nfslockdnotify(p, uap->argp);
307 break;
308 #if CONFIG_NFS4
309 case NFSCLNT_TESTIDMAP:
310 error = nfsclnt_testidmap(p, uap->argp);
311 break;
312 #endif
313 default:
314 error = EINVAL;
315 }
316 return error;
317 }
318
319
320 /*
321 * Asynchronous I/O threads for client NFS.
322 * They do read-ahead and write-behind operations on the block I/O cache.
323 *
324 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
325 * when unused for a while. There are as many nfsiod structs as there are
326 * nfsiod threads; however there's no strict tie between a thread and a struct.
327 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
328 * up, it removes the next struct nfsiod from the queue and services it. Then
329 * it will put the struct at the head of free list and sleep on it.
330 * Async requests will pull the next struct nfsiod from the head of the free list,
331 * put it on the work queue, and wake whatever thread is waiting on that struct.
332 */
333
334 /*
335 * nfsiod thread exit routine
336 *
337 * Must be called with nfsiod_mutex held so that the
338 * decision to terminate is atomic with the termination.
339 */
340 void
341 nfsiod_terminate(struct nfsiod *niod)
342 {
343 nfsiod_thread_count--;
344 lck_mtx_unlock(nfsiod_mutex);
345 if (niod) {
346 FREE(niod, M_TEMP);
347 } else {
348 printf("nfsiod: terminating without niod\n");
349 }
350 thread_terminate(current_thread());
351 /*NOTREACHED*/
352 }
353
354 /* nfsiod thread startup routine */
355 void
356 nfsiod_thread(void)
357 {
358 struct nfsiod *niod;
359 int error;
360
361 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
362 if (!niod) {
363 lck_mtx_lock(nfsiod_mutex);
364 nfsiod_thread_count--;
365 wakeup(current_thread());
366 lck_mtx_unlock(nfsiod_mutex);
367 thread_terminate(current_thread());
368 /*NOTREACHED*/
369 }
370 bzero(niod, sizeof(*niod));
371 lck_mtx_lock(nfsiod_mutex);
372 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
373 wakeup(current_thread());
374 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
375 /* shouldn't return... so we have an error */
376 /* remove an old nfsiod struct and terminate */
377 lck_mtx_lock(nfsiod_mutex);
378 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
379 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
380 }
381 nfsiod_terminate(niod);
382 /*NOTREACHED*/
383 }
384
385 /*
386 * Start up another nfsiod thread.
387 * (unless we're already maxed out and there are nfsiods running)
388 */
389 int
390 nfsiod_start(void)
391 {
392 thread_t thd = THREAD_NULL;
393
394 lck_mtx_lock(nfsiod_mutex);
395 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
396 lck_mtx_unlock(nfsiod_mutex);
397 return EBUSY;
398 }
399 nfsiod_thread_count++;
400 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
401 lck_mtx_unlock(nfsiod_mutex);
402 return EBUSY;
403 }
404 /* wait for the thread to complete startup */
405 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
406 thread_deallocate(thd);
407 return 0;
408 }
409
410 /*
411 * Continuation for Asynchronous I/O threads for NFS client.
412 *
413 * Grab an nfsiod struct to work on, do some work, then drop it
414 */
415 int
416 nfsiod_continue(int error)
417 {
418 struct nfsiod *niod;
419 struct nfsmount *nmp;
420 struct nfsreq *req, *treq;
421 struct nfs_reqqhead iodq;
422 int morework;
423
424 lck_mtx_lock(nfsiod_mutex);
425 niod = TAILQ_FIRST(&nfsiodwork);
426 if (!niod) {
427 /* there's no work queued up */
428 /* remove an old nfsiod struct and terminate */
429 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
430 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
431 }
432 nfsiod_terminate(niod);
433 /*NOTREACHED*/
434 }
435 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
436
437 worktodo:
438 while ((nmp = niod->niod_nmp)) {
439 if (nmp == NULL) {
440 niod->niod_nmp = NULL;
441 break;
442 }
443
444 /*
445 * Service this mount's async I/O queue.
446 *
447 * In order to ensure some level of fairness between mounts,
448 * we grab all the work up front before processing it so any
449 * new work that arrives will be serviced on a subsequent
450 * iteration - and we have a chance to see if other work needs
451 * to be done (e.g. the delayed write queue needs to be pushed
452 * or other mounts are waiting for an nfsiod).
453 */
454 /* grab the current contents of the queue */
455 TAILQ_INIT(&iodq);
456 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
457 /* Mark each iod request as being managed by an iod */
458 TAILQ_FOREACH(req, &iodq, r_achain) {
459 lck_mtx_lock(&req->r_mtx);
460 assert(!(req->r_flags & R_IOD));
461 req->r_flags |= R_IOD;
462 lck_mtx_unlock(&req->r_mtx);
463 }
464 lck_mtx_unlock(nfsiod_mutex);
465
466 /* process the queue */
467 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
468 TAILQ_REMOVE(&iodq, req, r_achain);
469 req->r_achain.tqe_next = NFSREQNOLIST;
470 req->r_callback.rcb_func(req);
471 }
472
473 /* now check if there's more/other work to be done */
474 lck_mtx_lock(nfsiod_mutex);
475 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
476 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
477 /*
478 * we're going to stop working on this mount but if the
479 * mount still needs more work so queue it up
480 */
481 if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) {
482 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
483 }
484 nmp->nm_niod = NULL;
485 niod->niod_nmp = NULL;
486 }
487 }
488
489 /* loop if there's still a mount to work on */
490 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
491 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
492 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
493 niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
494 }
495 if (niod->niod_nmp) {
496 goto worktodo;
497 }
498
499 /* queue ourselves back up - if there aren't too many threads running */
500 if (nfsiod_thread_count <= NFSIOD_MAX) {
501 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
502 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
503 /* shouldn't return... so we have an error */
504 /* remove an old nfsiod struct and terminate */
505 lck_mtx_lock(nfsiod_mutex);
506 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
507 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
508 }
509 }
510 nfsiod_terminate(niod);
511 /*NOTREACHED*/
512 return 0;
513 }
514
515 #endif /* NFSCLIENT */
516
517
518 #if NFSSERVER
519
520 /*
521 * NFS server system calls
522 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
523 */
524
525 static struct nfs_exportfs *
526 nfsrv_find_exportfs(const char *ptr)
527 {
528 struct nfs_exportfs *nxfs;
529
530 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
531 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
532 break;
533 }
534 }
535 if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
536 nxfs = NULL;
537 }
538
539 return nxfs;
540 }
541
542 /*
543 * Get file handle system call
544 */
545 int
546 getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
547 {
548 vnode_t vp;
549 struct nfs_filehandle nfh;
550 int error, fhlen, fidlen;
551 struct nameidata nd;
552 char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
553 size_t pathlen;
554 struct nfs_exportfs *nxfs;
555 struct nfs_export *nx;
556
557 /*
558 * Must be super user
559 */
560 error = proc_suser(p);
561 if (error) {
562 return error;
563 }
564
565 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
566 if (!error) {
567 error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
568 }
569 if (error) {
570 return error;
571 }
572 /* limit fh size to length specified (or v3 size by default) */
573 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
574 fhlen = NFSV3_MAX_FH_SIZE;
575 }
576 fidlen = fhlen - sizeof(struct nfs_exphandle);
577
578 if (!nfsrv_is_initialized()) {
579 return EINVAL;
580 }
581
582 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
583 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
584 error = namei(&nd);
585 if (error) {
586 return error;
587 }
588 nameidone(&nd);
589
590 vp = nd.ni_vp;
591
592 // find exportfs that matches f_mntonname
593 lck_rw_lock_shared(&nfsrv_export_rwlock);
594 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
595 if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
596 /*
597 * The f_mntonname might be a firmlink path. Resolve
598 * it into a physical path and try again.
599 */
600 int pathbuflen = MAXPATHLEN;
601 vnode_t rvp;
602
603 error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
604 if (error) {
605 goto out;
606 }
607 error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
608 VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
609 vnode_put(rvp);
610 if (error) {
611 goto out;
612 }
613 ptr = real_mntonname;
614 nxfs = nfsrv_find_exportfs(ptr);
615 }
616 if (nxfs == NULL) {
617 error = EINVAL;
618 goto out;
619 }
620 // find export that best matches remainder of path
621 ptr = path + strlen(nxfs->nxfs_path);
622 while (*ptr && (*ptr == '/')) {
623 ptr++;
624 }
625 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
626 int len = strlen(nx->nx_path);
627 if (len == 0) { // we've hit the export entry for the root directory
628 break;
629 }
630 if (!strncmp(nx->nx_path, ptr, len)) {
631 break;
632 }
633 }
634 if (!nx) {
635 error = EINVAL;
636 goto out;
637 }
638
639 bzero(&nfh, sizeof(nfh));
640 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
641 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
642 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
643 nfh.nfh_xh.nxh_flags = 0;
644 nfh.nfh_xh.nxh_reserved = 0;
645 nfh.nfh_len = fidlen;
646 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
647 if (nfh.nfh_len > (uint32_t)fidlen) {
648 error = EOVERFLOW;
649 }
650 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
651 nfh.nfh_len += sizeof(nfh.nfh_xh);
652 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
653
654 out:
655 lck_rw_done(&nfsrv_export_rwlock);
656 vnode_put(vp);
657 if (error) {
658 return error;
659 }
660 /*
661 * At first blush, this may appear to leak a kernel stack
662 * address, but the copyout() never reaches &nfh.nfh_fhp
663 * (sizeof(fhandle_t) < sizeof(nfh)).
664 */
665 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
666 return error;
667 }
668
669 extern const struct fileops vnops;
670
671 /*
672 * syscall for the rpc.lockd to use to translate a NFS file handle into
673 * an open descriptor.
674 *
675 * warning: do not remove the suser() call or this becomes one giant
676 * security hole.
677 */
678 int
679 fhopen( proc_t p,
680 struct fhopen_args *uap,
681 int32_t *retval)
682 {
683 vnode_t vp;
684 struct nfs_filehandle nfh;
685 struct nfs_export *nx;
686 struct nfs_export_options *nxo;
687 struct flock lf;
688 struct fileproc *fp, *nfp;
689 int fmode, error, type;
690 int indx;
691 vfs_context_t ctx = vfs_context_current();
692 kauth_action_t action;
693
694 /*
695 * Must be super user
696 */
697 error = suser(vfs_context_ucred(ctx), 0);
698 if (error) {
699 return error;
700 }
701
702 if (!nfsrv_is_initialized()) {
703 return EINVAL;
704 }
705
706 fmode = FFLAGS(uap->flags);
707 /* why not allow a non-read/write open for our lockd? */
708 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
709 return EINVAL;
710 }
711
712 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
713 if (error) {
714 return error;
715 }
716 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
717 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
718 return EINVAL;
719 }
720 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
721 if (error) {
722 return error;
723 }
724 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
725
726 lck_rw_lock_shared(&nfsrv_export_rwlock);
727 /* now give me my vnode, it gets returned to me with a reference */
728 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
729 lck_rw_done(&nfsrv_export_rwlock);
730 if (error) {
731 if (error == NFSERR_TRYLATER) {
732 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
733 }
734 return error;
735 }
736
737 /*
738 * From now on we have to make sure not
739 * to forget about the vnode.
740 * Any error that causes an abort must vnode_put(vp).
741 * Just set error = err and 'goto bad;'.
742 */
743
744 /*
745 * from vn_open
746 */
747 if (vnode_vtype(vp) == VSOCK) {
748 error = EOPNOTSUPP;
749 goto bad;
750 }
751
752 /* disallow write operations on directories */
753 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
754 error = EISDIR;
755 goto bad;
756 }
757
758 #if CONFIG_MACF
759 if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
760 goto bad;
761 }
762 #endif
763
764 /* compute action to be authorized */
765 action = 0;
766 if (fmode & FREAD) {
767 action |= KAUTH_VNODE_READ_DATA;
768 }
769 if (fmode & (FWRITE | O_TRUNC)) {
770 action |= KAUTH_VNODE_WRITE_DATA;
771 }
772 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
773 goto bad;
774 }
775
776 if ((error = VNOP_OPEN(vp, fmode, ctx))) {
777 goto bad;
778 }
779 if ((error = vnode_ref_ext(vp, fmode, 0))) {
780 goto bad;
781 }
782
783 /*
784 * end of vn_open code
785 */
786
787 // starting here... error paths should call vn_close/vnode_put
788 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
789 vn_close(vp, fmode & FMASK, ctx);
790 goto bad;
791 }
792 fp = nfp;
793
794 fp->f_fglob->fg_flag = fmode & FMASK;
795 fp->f_fglob->fg_ops = &vnops;
796 fp->f_fglob->fg_data = (caddr_t)vp;
797
798 // XXX do we really need to support this with fhopen()?
799 if (fmode & (O_EXLOCK | O_SHLOCK)) {
800 lf.l_whence = SEEK_SET;
801 lf.l_start = 0;
802 lf.l_len = 0;
803 if (fmode & O_EXLOCK) {
804 lf.l_type = F_WRLCK;
805 } else {
806 lf.l_type = F_RDLCK;
807 }
808 type = F_FLOCK;
809 if ((fmode & FNONBLOCK) == 0) {
810 type |= F_WAIT;
811 }
812 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
813 struct vfs_context context = *vfs_context_current();
814 /* Modify local copy (to not damage thread copy) */
815 context.vc_ucred = fp->f_fglob->fg_cred;
816
817 vn_close(vp, fp->f_fglob->fg_flag, &context);
818 fp_free(p, indx, fp);
819 return error;
820 }
821 fp->f_fglob->fg_flag |= FHASLOCK;
822 }
823
824 vnode_put(vp);
825
826 proc_fdlock(p);
827 procfdtbl_releasefd(p, indx, NULL);
828 fp_drop(p, indx, fp, 1);
829 proc_fdunlock(p);
830
831 *retval = indx;
832 return 0;
833
834 bad:
835 vnode_put(vp);
836 return error;
837 }
838
839 /*
840 * NFS server pseudo system call
841 */
842 int
843 nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
844 {
845 mbuf_t nam;
846 struct user_nfsd_args user_nfsdarg;
847 socket_t so;
848 int error;
849
850 AUDIT_ARG(cmd, uap->flag);
851
852 /*
853 * Must be super user for most operations (export ops checked later).
854 */
855 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) {
856 return error;
857 }
858 #if CONFIG_MACF
859 error = mac_system_check_nfsd(kauth_cred_get());
860 if (error) {
861 return error;
862 }
863 #endif
864
865 /* make sure NFS server data structures have been initialized */
866 nfsrv_init();
867
868 if (uap->flag & NFSSVC_ADDSOCK) {
869 if (IS_64BIT_PROCESS(p)) {
870 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
871 } else {
872 struct nfsd_args tmp_args;
873 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
874 if (error == 0) {
875 user_nfsdarg.sock = tmp_args.sock;
876 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
877 user_nfsdarg.namelen = tmp_args.namelen;
878 }
879 }
880 if (error) {
881 return error;
882 }
883 /* get the socket */
884 error = file_socket(user_nfsdarg.sock, &so);
885 if (error) {
886 return error;
887 }
888 /* Get the client address for connected sockets. */
889 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
890 nam = NULL;
891 } else {
892 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
893 if (error) {
894 /* drop the iocount file_socket() grabbed on the file descriptor */
895 file_drop(user_nfsdarg.sock);
896 return error;
897 }
898 }
899 /*
900 * nfssvc_addsock() will grab a retain count on the socket
901 * to keep the socket from being closed when nfsd closes its
902 * file descriptor for it.
903 */
904 error = nfssvc_addsock(so, nam);
905 /* drop the iocount file_socket() grabbed on the file descriptor */
906 file_drop(user_nfsdarg.sock);
907 } else if (uap->flag & NFSSVC_NFSD) {
908 error = nfssvc_nfsd();
909 } else if (uap->flag & NFSSVC_EXPORT) {
910 error = nfssvc_export(uap->argp);
911 } else {
912 error = EINVAL;
913 }
914 if (error == EINTR || error == ERESTART) {
915 error = 0;
916 }
917 return error;
918 }
919
920 /*
921 * Adds a socket to the list for servicing by nfsds.
922 */
923 int
924 nfssvc_addsock(socket_t so, mbuf_t mynam)
925 {
926 struct nfsrv_sock *slp;
927 int error = 0, sodomain, sotype, soprotocol, on = 1;
928 int first;
929 struct timeval timeo;
930
931 /* make sure mbuf constants are set up */
932 if (!nfs_mbuf_mhlen) {
933 nfs_mbuf_init();
934 }
935
936 sock_gettype(so, &sodomain, &sotype, &soprotocol);
937
938 /* There should be only one UDP socket for each of IPv4 and IPv6 */
939 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
940 mbuf_freem(mynam);
941 return EEXIST;
942 }
943 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
944 mbuf_freem(mynam);
945 return EEXIST;
946 }
947
948 /* Set protocol options and reserve some space (for UDP). */
949 if (sotype == SOCK_STREAM) {
950 error = nfsrv_check_exports_allow_address(mynam);
951 if (error) {
952 log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
953 mbuf_freem(mynam);
954 return error;
955 }
956 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
957 }
958 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
959 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
960 }
961 if (sotype == SOCK_DGRAM || sodomain == AF_LOCAL) { /* set socket buffer sizes for UDP */
962 int reserve = (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : (2 * 1024 * 1024);
963 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
964 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
965 if (error) {
966 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
967 error = 0;
968 }
969 }
970 sock_nointerrupt(so, 0);
971
972 /*
973 * Set socket send/receive timeouts.
974 * Receive timeout shouldn't matter, but setting the send timeout
975 * will make sure that an unresponsive client can't hang the server.
976 */
977 timeo.tv_usec = 0;
978 timeo.tv_sec = 1;
979 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
980 timeo.tv_sec = 30;
981 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
982 if (error) {
983 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
984 error = 0;
985 }
986
987 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
988 if (!slp) {
989 mbuf_freem(mynam);
990 return ENOMEM;
991 }
992 bzero((caddr_t)slp, sizeof(struct nfsrv_sock));
993 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
994 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
995
996 lck_mtx_lock(nfsd_mutex);
997
998 if (soprotocol == IPPROTO_UDP) {
999 if (sodomain == AF_INET) {
1000 /* There should be only one UDP/IPv4 socket */
1001 if (nfsrv_udpsock) {
1002 lck_mtx_unlock(nfsd_mutex);
1003 nfsrv_slpfree(slp);
1004 mbuf_freem(mynam);
1005 return EEXIST;
1006 }
1007 nfsrv_udpsock = slp;
1008 }
1009 if (sodomain == AF_INET6) {
1010 /* There should be only one UDP/IPv6 socket */
1011 if (nfsrv_udp6sock) {
1012 lck_mtx_unlock(nfsd_mutex);
1013 nfsrv_slpfree(slp);
1014 mbuf_freem(mynam);
1015 return EEXIST;
1016 }
1017 nfsrv_udp6sock = slp;
1018 }
1019 }
1020
1021 /* add the socket to the list */
1022 first = TAILQ_EMPTY(&nfsrv_socklist);
1023 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1024 if (sotype == SOCK_STREAM) {
1025 nfsrv_sock_tcp_cnt++;
1026 if (nfsrv_sock_idle_timeout < 0) {
1027 nfsrv_sock_idle_timeout = 0;
1028 }
1029 if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
1030 nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
1031 }
1032 /*
1033 * Possibly start or stop the idle timer. We only start the idle timer when
1034 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
1035 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
1036 * the number of connections.
1037 */
1038 if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
1039 if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1040 if (nfsrv_idlesock_timer_on) {
1041 thread_call_cancel(nfsrv_idlesock_timer_call);
1042 nfsrv_idlesock_timer_on = 0;
1043 }
1044 } else {
1045 struct nfsrv_sock *old_slp;
1046 struct timeval now;
1047 time_t time_to_wait = nfsrv_sock_idle_timeout;
1048 /*
1049 * Get the oldest tcp socket and calculate the
1050 * earliest time for the next idle timer to fire
1051 * based on the possibly updated nfsrv_sock_idle_timeout
1052 */
1053 TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
1054 if (old_slp->ns_sotype == SOCK_STREAM) {
1055 microuptime(&now);
1056 time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
1057 if (time_to_wait < 1) {
1058 time_to_wait = 1;
1059 }
1060 break;
1061 }
1062 }
1063 /*
1064 * If we have a timer scheduled, but if its going to fire too late,
1065 * turn it off.
1066 */
1067 if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
1068 thread_call_cancel(nfsrv_idlesock_timer_call);
1069 nfsrv_idlesock_timer_on = 0;
1070 }
1071 /* Schedule the idle thread if it isn't already */
1072 if (!nfsrv_idlesock_timer_on) {
1073 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1074 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1075 }
1076 }
1077 }
1078 }
1079
1080 sock_retain(so); /* grab a retain count on the socket */
1081 slp->ns_so = so;
1082 slp->ns_sotype = sotype;
1083 slp->ns_nam = mynam;
1084
1085 /* set up the socket up-call */
1086 nfsrv_uc_addsock(slp, first);
1087
1088 /* mark that the socket is not in the nfsrv_sockwg list */
1089 slp->ns_wgq.tqe_next = SLPNOLIST;
1090
1091 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
1092
1093 nfsrv_wakenfsd(slp);
1094 lck_mtx_unlock(nfsd_mutex);
1095
1096 return 0;
1097 }
1098
1099 /*
1100 * nfssvc_nfsd()
1101 *
1102 * nfsd theory of operation:
1103 *
1104 * The first nfsd thread stays in user mode accepting new TCP connections
1105 * which are then added via the "addsock" call. The rest of the nfsd threads
1106 * simply call into the kernel and remain there in a loop handling NFS
1107 * requests until killed by a signal.
1108 *
1109 * There's a list of nfsd threads (nfsd_head).
1110 * There's an nfsd queue that contains only those nfsds that are
1111 * waiting for work to do (nfsd_queue).
1112 *
1113 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
1114 * managing the work on the sockets:
1115 * nfsrv_sockwait - sockets w/new data waiting to be worked on
1116 * nfsrv_sockwork - sockets being worked on which may have more work to do
1117 * nfsrv_sockwg -- sockets which have pending write gather data
1118 * When a socket receives data, if it is not currently queued, it
1119 * will be placed at the end of the "wait" queue.
1120 * Whenever a socket needs servicing we make sure it is queued and
1121 * wake up a waiting nfsd (if there is one).
1122 *
1123 * nfsds will service at most 8 requests from the same socket before
1124 * defecting to work on another socket.
1125 * nfsds will defect immediately if there are any sockets in the "wait" queue
1126 * nfsds looking for a socket to work on check the "wait" queue first and
1127 * then check the "work" queue.
1128 * When an nfsd starts working on a socket, it removes it from the head of
1129 * the queue it's currently on and moves it to the end of the "work" queue.
1130 * When nfsds are checking the queues for work, any sockets found not to
1131 * have any work are simply dropped from the queue.
1132 *
1133 */
1134 int
1135 nfssvc_nfsd(void)
1136 {
1137 mbuf_t m, mrep;
1138 struct nfsrv_sock *slp;
1139 struct nfsd *nfsd;
1140 struct nfsrv_descript *nd = NULL;
1141 int error = 0, cacherep, writes_todo;
1142 int siz, procrastinate, opcnt = 0;
1143 u_quad_t cur_usec;
1144 struct timeval now;
1145 struct vfs_context context;
1146 struct timespec to;
1147
1148 #ifndef nolint
1149 cacherep = RC_DOIT;
1150 writes_todo = 0;
1151 #endif
1152
1153 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
1154 if (!nfsd) {
1155 return ENOMEM;
1156 }
1157 bzero(nfsd, sizeof(struct nfsd));
1158 lck_mtx_lock(nfsd_mutex);
1159 if (nfsd_thread_count++ == 0) {
1160 nfsrv_initcache(); /* Init the server request cache */
1161 }
1162 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
1163 lck_mtx_unlock(nfsd_mutex);
1164
1165 context.vc_thread = current_thread();
1166
1167 /* Set time out so that nfsd threads can wake up a see if they are still needed. */
1168 to.tv_sec = 5;
1169 to.tv_nsec = 0;
1170
1171 /*
1172 * Loop getting rpc requests until SIGKILL.
1173 */
1174 for (;;) {
1175 if (nfsd_thread_max <= 0) {
1176 /* NFS server shutting down, get out ASAP */
1177 error = EINTR;
1178 slp = nfsd->nfsd_slp;
1179 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
1180 /* already have some work to do */
1181 error = 0;
1182 slp = nfsd->nfsd_slp;
1183 } else {
1184 /* need to find work to do */
1185 error = 0;
1186 lck_mtx_lock(nfsd_mutex);
1187 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
1188 if (nfsd_thread_count > nfsd_thread_max) {
1189 /*
1190 * If we have no socket and there are more
1191 * nfsd threads than configured, let's exit.
1192 */
1193 error = 0;
1194 goto done;
1195 }
1196 nfsd->nfsd_flag |= NFSD_WAITING;
1197 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
1198 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
1199 if (error) {
1200 if (nfsd->nfsd_flag & NFSD_WAITING) {
1201 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
1202 nfsd->nfsd_flag &= ~NFSD_WAITING;
1203 }
1204 if (error == EWOULDBLOCK) {
1205 continue;
1206 }
1207 goto done;
1208 }
1209 }
1210 slp = nfsd->nfsd_slp;
1211 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
1212 /* look for a socket to work on in the wait queue */
1213 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
1214 lck_rw_lock_exclusive(&slp->ns_rwlock);
1215 /* remove from the head of the queue */
1216 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1217 slp->ns_flag &= ~SLP_WAITQ;
1218 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
1219 break;
1220 }
1221 /* nothing to do, so skip this socket */
1222 lck_rw_done(&slp->ns_rwlock);
1223 }
1224 }
1225 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
1226 /* look for a socket to work on in the work queue */
1227 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
1228 lck_rw_lock_exclusive(&slp->ns_rwlock);
1229 /* remove from the head of the queue */
1230 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1231 slp->ns_flag &= ~SLP_WORKQ;
1232 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
1233 break;
1234 }
1235 /* nothing to do, so skip this socket */
1236 lck_rw_done(&slp->ns_rwlock);
1237 }
1238 }
1239 if (!nfsd->nfsd_slp && slp) {
1240 /* we found a socket to work on, grab a reference */
1241 slp->ns_sref++;
1242 microuptime(&now);
1243 slp->ns_timestamp = now.tv_sec;
1244 /* We keep the socket list in least recently used order for reaping idle sockets */
1245 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1246 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1247 nfsd->nfsd_slp = slp;
1248 opcnt = 0;
1249 /* and put it at the back of the work queue */
1250 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1251 slp->ns_flag |= SLP_WORKQ;
1252 lck_rw_done(&slp->ns_rwlock);
1253 }
1254 lck_mtx_unlock(nfsd_mutex);
1255 if (!slp) {
1256 continue;
1257 }
1258 lck_rw_lock_exclusive(&slp->ns_rwlock);
1259 if (slp->ns_flag & SLP_VALID) {
1260 if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
1261 slp->ns_flag &= ~SLP_NEEDQ;
1262 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1263 }
1264 if (slp->ns_flag & SLP_DISCONN) {
1265 nfsrv_zapsock(slp);
1266 }
1267 error = nfsrv_dorec(slp, nfsd, &nd);
1268 if (error == EINVAL) { // RPCSEC_GSS drop
1269 if (slp->ns_sotype == SOCK_STREAM) {
1270 nfsrv_zapsock(slp); // drop connection
1271 }
1272 }
1273 writes_todo = 0;
1274 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1275 microuptime(&now);
1276 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1277 (u_quad_t)now.tv_usec;
1278 if (slp->ns_wgtime <= cur_usec) {
1279 error = 0;
1280 cacherep = RC_DOIT;
1281 writes_todo = 1;
1282 }
1283 slp->ns_flag &= ~SLP_DOWRITES;
1284 }
1285 nfsd->nfsd_flag |= NFSD_REQINPROG;
1286 }
1287 lck_rw_done(&slp->ns_rwlock);
1288 }
1289 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1290 if (nd) {
1291 nfsm_chain_cleanup(&nd->nd_nmreq);
1292 if (nd->nd_nam2) {
1293 mbuf_freem(nd->nd_nam2);
1294 }
1295 if (IS_VALID_CRED(nd->nd_cr)) {
1296 kauth_cred_unref(&nd->nd_cr);
1297 }
1298 if (nd->nd_gss_context) {
1299 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1300 }
1301 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1302 nd = NULL;
1303 }
1304 nfsd->nfsd_slp = NULL;
1305 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1306 if (slp) {
1307 nfsrv_slpderef(slp);
1308 }
1309 if (nfsd_thread_max <= 0) {
1310 break;
1311 }
1312 continue;
1313 }
1314 if (nd) {
1315 microuptime(&nd->nd_starttime);
1316 if (nd->nd_nam2) {
1317 nd->nd_nam = nd->nd_nam2;
1318 } else {
1319 nd->nd_nam = slp->ns_nam;
1320 }
1321
1322 cacherep = nfsrv_getcache(nd, slp, &mrep);
1323
1324 if (nfsrv_require_resv_port) {
1325 /* Check if source port is a reserved port */
1326 in_port_t port = 0;
1327 struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1328
1329 if (saddr->sa_family == AF_INET) {
1330 port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1331 } else if (saddr->sa_family == AF_INET6) {
1332 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1333 }
1334 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1335 nd->nd_procnum = NFSPROC_NOOP;
1336 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1337 cacherep = RC_DOIT;
1338 }
1339 }
1340 }
1341
1342 /*
1343 * Loop to get all the write RPC replies that have been
1344 * gathered together.
1345 */
1346 do {
1347 switch (cacherep) {
1348 case RC_DOIT:
1349 if (nd && (nd->nd_vers == NFS_VER3)) {
1350 procrastinate = nfsrv_wg_delay_v3;
1351 } else {
1352 procrastinate = nfsrv_wg_delay;
1353 }
1354 lck_rw_lock_shared(&nfsrv_export_rwlock);
1355 context.vc_ucred = NULL;
1356 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1357 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1358 } else {
1359 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1360 }
1361 lck_rw_done(&nfsrv_export_rwlock);
1362 if (mrep == NULL) {
1363 /*
1364 * If this is a stream socket and we are not going
1365 * to send a reply we better close the connection
1366 * so the client doesn't hang.
1367 */
1368 if (error && slp->ns_sotype == SOCK_STREAM) {
1369 lck_rw_lock_exclusive(&slp->ns_rwlock);
1370 nfsrv_zapsock(slp);
1371 lck_rw_done(&slp->ns_rwlock);
1372 printf("NFS server: NULL reply from proc = %d error = %d\n",
1373 nd->nd_procnum, error);
1374 }
1375 break;
1376 }
1377 if (error) {
1378 OSAddAtomic64(1, &nfsstats.srv_errs);
1379 nfsrv_updatecache(nd, FALSE, mrep);
1380 if (nd->nd_nam2) {
1381 mbuf_freem(nd->nd_nam2);
1382 nd->nd_nam2 = NULL;
1383 }
1384 break;
1385 }
1386 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
1387 nfsrv_updatecache(nd, TRUE, mrep);
1388 /* FALLTHRU */
1389
1390 case RC_REPLY:
1391 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1392 /*
1393 * Need to checksum or encrypt the reply
1394 */
1395 error = nfs_gss_svc_protect_reply(nd, mrep);
1396 if (error) {
1397 mbuf_freem(mrep);
1398 break;
1399 }
1400 }
1401
1402 /*
1403 * Get the total size of the reply
1404 */
1405 m = mrep;
1406 siz = 0;
1407 while (m) {
1408 siz += mbuf_len(m);
1409 m = mbuf_next(m);
1410 }
1411 if (siz <= 0 || siz > NFS_MAXPACKET) {
1412 printf("mbuf siz=%d\n", siz);
1413 panic("Bad nfs svc reply");
1414 }
1415 m = mrep;
1416 mbuf_pkthdr_setlen(m, siz);
1417 error = mbuf_pkthdr_setrcvif(m, NULL);
1418 if (error) {
1419 panic("nfsd setrcvif failed: %d", error);
1420 }
1421 /*
1422 * For stream protocols, prepend a Sun RPC
1423 * Record Mark.
1424 */
1425 if (slp->ns_sotype == SOCK_STREAM) {
1426 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1427 if (!error) {
1428 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1429 }
1430 }
1431 if (!error) {
1432 if (slp->ns_flag & SLP_VALID) {
1433 error = nfsrv_send(slp, nd->nd_nam2, m);
1434 } else {
1435 error = EPIPE;
1436 mbuf_freem(m);
1437 }
1438 } else {
1439 mbuf_freem(m);
1440 }
1441 mrep = NULL;
1442 if (nd->nd_nam2) {
1443 mbuf_freem(nd->nd_nam2);
1444 nd->nd_nam2 = NULL;
1445 }
1446 if (error == EPIPE) {
1447 lck_rw_lock_exclusive(&slp->ns_rwlock);
1448 nfsrv_zapsock(slp);
1449 lck_rw_done(&slp->ns_rwlock);
1450 }
1451 if (error == EINTR || error == ERESTART) {
1452 nfsm_chain_cleanup(&nd->nd_nmreq);
1453 if (IS_VALID_CRED(nd->nd_cr)) {
1454 kauth_cred_unref(&nd->nd_cr);
1455 }
1456 if (nd->nd_gss_context) {
1457 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1458 }
1459 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1460 nfsrv_slpderef(slp);
1461 lck_mtx_lock(nfsd_mutex);
1462 goto done;
1463 }
1464 break;
1465 case RC_DROPIT:
1466 mbuf_freem(nd->nd_nam2);
1467 nd->nd_nam2 = NULL;
1468 break;
1469 }
1470 ;
1471 opcnt++;
1472 if (nd) {
1473 nfsm_chain_cleanup(&nd->nd_nmreq);
1474 if (nd->nd_nam2) {
1475 mbuf_freem(nd->nd_nam2);
1476 }
1477 if (IS_VALID_CRED(nd->nd_cr)) {
1478 kauth_cred_unref(&nd->nd_cr);
1479 }
1480 if (nd->nd_gss_context) {
1481 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1482 }
1483 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1484 nd = NULL;
1485 }
1486
1487 /*
1488 * Check to see if there are outstanding writes that
1489 * need to be serviced.
1490 */
1491 writes_todo = 0;
1492 if (slp->ns_wgtime) {
1493 microuptime(&now);
1494 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1495 (u_quad_t)now.tv_usec;
1496 if (slp->ns_wgtime <= cur_usec) {
1497 cacherep = RC_DOIT;
1498 writes_todo = 1;
1499 }
1500 }
1501 } while (writes_todo);
1502
1503 nd = NULL;
1504 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1505 lck_rw_lock_exclusive(&slp->ns_rwlock);
1506 error = nfsrv_dorec(slp, nfsd, &nd);
1507 if (error == EINVAL) { // RPCSEC_GSS drop
1508 if (slp->ns_sotype == SOCK_STREAM) {
1509 nfsrv_zapsock(slp); // drop connection
1510 }
1511 }
1512 lck_rw_done(&slp->ns_rwlock);
1513 }
1514 if (!nd) {
1515 /* drop our reference on the socket */
1516 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1517 nfsd->nfsd_slp = NULL;
1518 nfsrv_slpderef(slp);
1519 }
1520 }
1521 lck_mtx_lock(nfsd_mutex);
1522 done:
1523 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1524 FREE(nfsd, M_NFSD);
1525 if (--nfsd_thread_count == 0) {
1526 nfsrv_cleanup();
1527 }
1528 lck_mtx_unlock(nfsd_mutex);
1529 return error;
1530 }
1531
1532 int
1533 nfssvc_export(user_addr_t argp)
1534 {
1535 int error = 0, is_64bit;
1536 struct user_nfs_export_args unxa;
1537 vfs_context_t ctx = vfs_context_current();
1538
1539 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
1540
1541 /* copy in pointers to path and export args */
1542 if (is_64bit) {
1543 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1544 } else {
1545 struct nfs_export_args tnxa;
1546 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1547 if (error == 0) {
1548 /* munge into LP64 version of nfs_export_args structure */
1549 unxa.nxa_fsid = tnxa.nxa_fsid;
1550 unxa.nxa_expid = tnxa.nxa_expid;
1551 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1552 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1553 unxa.nxa_flags = tnxa.nxa_flags;
1554 unxa.nxa_netcount = tnxa.nxa_netcount;
1555 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1556 }
1557 }
1558 if (error) {
1559 return error;
1560 }
1561
1562 error = nfsrv_export(&unxa, ctx);
1563
1564 return error;
1565 }
1566
1567 /*
1568 * Shut down a socket associated with an nfsrv_sock structure.
1569 * Should be called with the send lock set, if required.
1570 * The trick here is to increment the sref at the start, so that the nfsds
1571 * will stop using it and clear ns_flag at the end so that it will not be
1572 * reassigned during cleanup.
1573 */
1574 void
1575 nfsrv_zapsock(struct nfsrv_sock *slp)
1576 {
1577 socket_t so;
1578
1579 if ((slp->ns_flag & SLP_VALID) == 0) {
1580 return;
1581 }
1582 slp->ns_flag &= ~SLP_ALLFLAGS;
1583
1584 so = slp->ns_so;
1585 if (so == NULL) {
1586 return;
1587 }
1588
1589 sock_setupcall(so, NULL, NULL);
1590 sock_shutdown(so, SHUT_RDWR);
1591
1592 /*
1593 * Remove from the up-call queue
1594 */
1595 nfsrv_uc_dequeue(slp);
1596 }
1597
1598 /*
1599 * cleanup and release a server socket structure.
1600 */
1601 void
1602 nfsrv_slpfree(struct nfsrv_sock *slp)
1603 {
1604 struct nfsrv_descript *nwp, *nnwp;
1605
1606 if (slp->ns_so) {
1607 sock_release(slp->ns_so);
1608 slp->ns_so = NULL;
1609 }
1610 if (slp->ns_nam) {
1611 mbuf_free(slp->ns_nam);
1612 }
1613 if (slp->ns_raw) {
1614 mbuf_freem(slp->ns_raw);
1615 }
1616 if (slp->ns_rec) {
1617 mbuf_freem(slp->ns_rec);
1618 }
1619 if (slp->ns_frag) {
1620 mbuf_freem(slp->ns_frag);
1621 }
1622 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1623 slp->ns_reccnt = 0;
1624
1625 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1626 nnwp = nwp->nd_tq.le_next;
1627 LIST_REMOVE(nwp, nd_tq);
1628 nfsm_chain_cleanup(&nwp->nd_nmreq);
1629 if (nwp->nd_mrep) {
1630 mbuf_freem(nwp->nd_mrep);
1631 }
1632 if (nwp->nd_nam2) {
1633 mbuf_freem(nwp->nd_nam2);
1634 }
1635 if (IS_VALID_CRED(nwp->nd_cr)) {
1636 kauth_cred_unref(&nwp->nd_cr);
1637 }
1638 if (nwp->nd_gss_context) {
1639 nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1640 }
1641 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
1642 }
1643 LIST_INIT(&slp->ns_tq);
1644
1645 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
1646 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
1647 FREE(slp, M_NFSSVC);
1648 }
1649
1650 /*
1651 * Derefence a server socket structure. If it has no more references and
1652 * is no longer valid, you can throw it away.
1653 */
1654 static void
1655 nfsrv_slpderef_locked(struct nfsrv_sock *slp)
1656 {
1657 lck_rw_lock_exclusive(&slp->ns_rwlock);
1658 slp->ns_sref--;
1659
1660 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1661 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1662 /* remove socket from queue since there's no work */
1663 if (slp->ns_flag & SLP_WAITQ) {
1664 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1665 } else {
1666 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1667 }
1668 slp->ns_flag &= ~SLP_QUEUED;
1669 }
1670 lck_rw_done(&slp->ns_rwlock);
1671 return;
1672 }
1673
1674 /* This socket is no longer valid, so we'll get rid of it */
1675
1676 if (slp->ns_flag & SLP_QUEUED) {
1677 if (slp->ns_flag & SLP_WAITQ) {
1678 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1679 } else {
1680 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1681 }
1682 slp->ns_flag &= ~SLP_QUEUED;
1683 }
1684 lck_rw_done(&slp->ns_rwlock);
1685
1686 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1687 if (slp->ns_sotype == SOCK_STREAM) {
1688 nfsrv_sock_tcp_cnt--;
1689 }
1690
1691 /* now remove from the write gather socket list */
1692 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1693 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1694 slp->ns_wgq.tqe_next = SLPNOLIST;
1695 }
1696 nfsrv_slpfree(slp);
1697 }
1698
1699 void
1700 nfsrv_slpderef(struct nfsrv_sock *slp)
1701 {
1702 lck_mtx_lock(nfsd_mutex);
1703 nfsrv_slpderef_locked(slp);
1704 lck_mtx_unlock(nfsd_mutex);
1705 }
1706
1707 /*
1708 * Check periodically for idle sockest if needed and
1709 * zap them.
1710 */
1711 void
1712 nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1713 {
1714 struct nfsrv_sock *slp, *tslp;
1715 struct timeval now;
1716 time_t time_to_wait = nfsrv_sock_idle_timeout;
1717
1718 microuptime(&now);
1719 lck_mtx_lock(nfsd_mutex);
1720
1721 /* Turn off the timer if we're suppose to and get out */
1722 if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1723 nfsrv_sock_idle_timeout = 0;
1724 }
1725 if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1726 nfsrv_idlesock_timer_on = 0;
1727 lck_mtx_unlock(nfsd_mutex);
1728 return;
1729 }
1730
1731 TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1732 lck_rw_lock_exclusive(&slp->ns_rwlock);
1733 /* Skip udp and referenced sockets */
1734 if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1735 lck_rw_done(&slp->ns_rwlock);
1736 continue;
1737 }
1738 /*
1739 * If this is the first non-referenced socket that hasn't idle out,
1740 * use its time stamp to calculate the earlist time in the future
1741 * to start the next invocation of the timer. Since the nfsrv_socklist
1742 * is sorted oldest access to newest. Once we find the first one,
1743 * we're done and break out of the loop.
1744 */
1745 if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1746 nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1747 time_to_wait -= now.tv_sec - slp->ns_timestamp;
1748 if (time_to_wait < 1) {
1749 time_to_wait = 1;
1750 }
1751 lck_rw_done(&slp->ns_rwlock);
1752 break;
1753 }
1754 /*
1755 * Bump the ref count. nfsrv_slpderef below will destroy
1756 * the socket, since nfsrv_zapsock has closed it.
1757 */
1758 slp->ns_sref++;
1759 nfsrv_zapsock(slp);
1760 lck_rw_done(&slp->ns_rwlock);
1761 nfsrv_slpderef_locked(slp);
1762 }
1763
1764 /* Start ourself back up */
1765 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1766 /* Remember when the next timer will fire for nfssvc_addsock. */
1767 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1768 lck_mtx_unlock(nfsd_mutex);
1769 }
1770
1771 /*
1772 * Clean up the data structures for the server.
1773 */
1774 void
1775 nfsrv_cleanup(void)
1776 {
1777 struct nfsrv_sock *slp, *nslp;
1778 struct timeval now;
1779 #if CONFIG_FSE
1780 struct nfsrv_fmod *fp, *nfp;
1781 int i;
1782 #endif
1783
1784 microuptime(&now);
1785 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1786 nslp = TAILQ_NEXT(slp, ns_chain);
1787 lck_rw_lock_exclusive(&slp->ns_rwlock);
1788 slp->ns_sref++;
1789 if (slp->ns_flag & SLP_VALID) {
1790 nfsrv_zapsock(slp);
1791 }
1792 lck_rw_done(&slp->ns_rwlock);
1793 nfsrv_slpderef_locked(slp);
1794 }
1795 #
1796 #if CONFIG_FSE
1797 /*
1798 * Flush pending file write fsevents
1799 */
1800 lck_mtx_lock(nfsrv_fmod_mutex);
1801 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1802 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1803 /*
1804 * Fire off the content modified fsevent for each
1805 * entry, remove it from the list, and free it.
1806 */
1807 if (nfsrv_fsevents_enabled) {
1808 fp->fm_context.vc_thread = current_thread();
1809 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1810 FSE_ARG_VNODE, fp->fm_vp,
1811 FSE_ARG_DONE);
1812 }
1813 vnode_put(fp->fm_vp);
1814 kauth_cred_unref(&fp->fm_context.vc_ucred);
1815 nfp = LIST_NEXT(fp, fm_link);
1816 LIST_REMOVE(fp, fm_link);
1817 FREE(fp, M_TEMP);
1818 }
1819 }
1820 nfsrv_fmod_pending = 0;
1821 lck_mtx_unlock(nfsrv_fmod_mutex);
1822 #endif
1823
1824 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
1825
1826 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1827
1828 nfsrv_cleancache(); /* And clear out server cache */
1829
1830 nfsrv_udpsock = NULL;
1831 nfsrv_udp6sock = NULL;
1832 }
1833
1834 #endif /* NFS_NOSERVER */