]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_syscalls.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
1 /*
2 * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
67
68 #include <nfs/nfs_conf.h>
69
70 /*
71 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72 * support for mandatory and extensible security protections. This notice
73 * is included in support of clause 2.2 (b) of the Apple Public License,
74 * Version 2.0.
75 */
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/filedesc.h>
82 #include <sys/stat.h>
83 #include <sys/vnode_internal.h>
84 #include <sys/mount_internal.h>
85 #include <sys/proc_internal.h> /* for fdflags */
86 #include <sys/kauth.h>
87 #include <sys/sysctl.h>
88 #include <sys/ubc.h>
89 #include <sys/uio.h>
90 #include <sys/malloc.h>
91 #include <sys/kpi_mbuf.h>
92 #include <sys/socket.h>
93 #include <sys/socketvar.h>
94 #include <sys/domain.h>
95 #include <sys/protosw.h>
96 #include <sys/fcntl.h>
97 #include <sys/lockf.h>
98 #include <sys/syslog.h>
99 #include <sys/user.h>
100 #include <sys/sysproto.h>
101 #include <sys/kpi_socket.h>
102 #include <sys/fsevents.h>
103 #include <libkern/OSAtomic.h>
104 #include <kern/thread_call.h>
105 #include <kern/task.h>
106
107 #include <security/audit/audit.h>
108
109 #include <netinet/in.h>
110 #include <netinet/tcp.h>
111 #include <nfs/xdr_subs.h>
112 #include <nfs/rpcv2.h>
113 #include <nfs/nfsproto.h>
114 #include <nfs/nfs.h>
115 #include <nfs/nfsm_subs.h>
116 #include <nfs/nfsrvcache.h>
117 #include <nfs/nfs_gss.h>
118 #include <nfs/nfsmount.h>
119 #include <nfs/nfsnode.h>
120 #include <nfs/nfs_lock.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124
125 kern_return_t thread_terminate(thread_t); /* XXX */
126
127 #if CONFIG_NFS_SERVER
128
129 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
130
131 extern int nfsrv_wg_delay;
132 extern int nfsrv_wg_delay_v3;
133
134 static int nfsrv_require_resv_port = 0;
135 static time_t nfsrv_idlesock_timer_on = 0;
136 static int nfsrv_sock_tcp_cnt = 0;
137 #define NFSD_MIN_IDLE_TIMEOUT 30
138 static int nfsrv_sock_idle_timeout = 3600; /* One hour */
139
140 int nfssvc_export(user_addr_t argp);
141 int nfssvc_nfsd(void);
142 int nfssvc_addsock(socket_t, mbuf_t);
143 void nfsrv_zapsock(struct nfsrv_sock *);
144 void nfsrv_slpderef(struct nfsrv_sock *);
145 void nfsrv_slpfree(struct nfsrv_sock *);
146
147 #endif /* CONFIG_NFS_SERVER */
148
149 #if CONFIG_NFS
150 /*
151 * sysctl stuff
152 */
153 SYSCTL_DECL(_vfs_generic);
154 SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
155 #endif /* CONFIG_NFS */
156
157 #if CONFIG_NFS_CLIENT
158 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs client hinge");
159 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
162 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
163 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
165 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
166 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
167 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
168 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
169 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
170 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
171 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
172 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
173 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
174 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
175 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
176 SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
177 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
178 #if CONFIG_NFS_GSS
179 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
180 #endif
181 #if CONFIG_NFS4
182 SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "");
183 #endif
184 #endif /* CONFIG_NFS_CLIENT */
185
186 #if CONFIG_NFS_SERVER
187 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
188 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
189 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
190 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
191 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
192 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
193 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
194 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
195 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
196 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
197 #if CONFIG_FSE
198 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
199 #endif
200 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
201 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
202 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
203 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
204 #ifdef NFS_UC_Q_DEBUG
205 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
206 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
207 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
208 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
209 #endif
210 #endif /* CONFIG_NFS_SERVER */
211
212 #if CONFIG_NFS_CLIENT && CONFIG_NFS4
213 static int
214 mapname2id(struct nfs_testmapid *map)
215 {
216 int error;
217 error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
218 if (error) {
219 return error;
220 }
221
222 if (map->ntm_grpflag) {
223 error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
224 } else {
225 error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
226 }
227
228 return error;
229 }
230
231 static int
232 mapid2name(struct nfs_testmapid *map)
233 {
234 int error;
235 size_t len = sizeof(map->ntm_name);
236
237 if (map->ntm_grpflag) {
238 error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
239 } else {
240 error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
241 }
242
243 if (error) {
244 return error;
245 }
246
247 error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
248
249 return error;
250 }
251
252 static int
253 nfsclnt_testidmap(proc_t p, user_addr_t argp)
254 {
255 struct nfs_testmapid mapid;
256 int error, coerror;
257 size_t len = sizeof(mapid.ntm_name);
258
259 /* Let root make this call. */
260 error = proc_suser(p);
261 if (error) {
262 return error;
263 }
264
265 error = copyin(argp, &mapid, sizeof(mapid));
266 mapid.ntm_name[MAXIDNAMELEN - 1] = '\0';
267
268 if (error) {
269 return error;
270 }
271 switch (mapid.ntm_lookup) {
272 case NTM_NAME2ID:
273 error = mapname2id(&mapid);
274 break;
275 case NTM_ID2NAME:
276 error = mapid2name(&mapid);
277 break;
278 case NTM_NAME2GUID:
279 error = nfs4_id2guid(mapid.ntm_name, &mapid.ntm_guid, mapid.ntm_grpflag);
280 break;
281 case NTM_GUID2NAME:
282 error = nfs4_guid2id(&mapid.ntm_guid, mapid.ntm_name, &len, mapid.ntm_grpflag);
283 break;
284 default:
285 return EINVAL;
286 }
287
288 coerror = copyout(&mapid, argp, sizeof(mapid));
289
290 return error ? error : coerror;
291 }
292 #endif /* CONFIG_NFS_CLIENT && CONFIG_NFS4 */
293
294 #if !CONFIG_NFS_CLIENT
295 #define __no_nfs_client_unused __unused
296 #else
297 #define __no_nfs_client_unused /* nothing */
298 #endif
299
300 int
301 nfsclnt(
302 proc_t p __no_nfs_client_unused,
303 struct nfsclnt_args *uap __no_nfs_client_unused,
304 __unused int *retval)
305 {
306 #if CONFIG_NFS_CLIENT
307 struct lockd_ans la;
308 int error;
309
310 switch (uap->flag) {
311 case NFSCLNT_LOCKDANS:
312 error = copyin(uap->argp, &la, sizeof(la));
313 if (!error) {
314 error = nfslockdans(p, &la);
315 }
316 break;
317 case NFSCLNT_LOCKDNOTIFY:
318 error = nfslockdnotify(p, uap->argp);
319 break;
320 #if CONFIG_NFS4
321 case NFSCLNT_TESTIDMAP:
322 error = nfsclnt_testidmap(p, uap->argp);
323 break;
324 #endif
325 default:
326 error = EINVAL;
327 }
328 return error;
329 #else
330 return ENOSYS;
331 #endif /* CONFIG_NFS_CLIENT */
332 }
333
334 #if CONFIG_NFS_CLIENT
335
336 /*
337 * Asynchronous I/O threads for client NFS.
338 * They do read-ahead and write-behind operations on the block I/O cache.
339 *
340 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
341 * when unused for a while. There are as many nfsiod structs as there are
342 * nfsiod threads; however there's no strict tie between a thread and a struct.
343 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
344 * up, it removes the next struct nfsiod from the queue and services it. Then
345 * it will put the struct at the head of free list and sleep on it.
346 * Async requests will pull the next struct nfsiod from the head of the free list,
347 * put it on the work queue, and wake whatever thread is waiting on that struct.
348 */
349
350 /*
351 * nfsiod thread exit routine
352 *
353 * Must be called with nfsiod_mutex held so that the
354 * decision to terminate is atomic with the termination.
355 */
356 void
357 nfsiod_terminate(struct nfsiod *niod)
358 {
359 nfsiod_thread_count--;
360 lck_mtx_unlock(nfsiod_mutex);
361 if (niod) {
362 FREE(niod, M_TEMP);
363 } else {
364 printf("nfsiod: terminating without niod\n");
365 }
366 thread_terminate(current_thread());
367 /*NOTREACHED*/
368 }
369
370 /* nfsiod thread startup routine */
371 void
372 nfsiod_thread(void)
373 {
374 struct nfsiod *niod;
375 int error;
376
377 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
378 if (!niod) {
379 lck_mtx_lock(nfsiod_mutex);
380 nfsiod_thread_count--;
381 wakeup(current_thread());
382 lck_mtx_unlock(nfsiod_mutex);
383 thread_terminate(current_thread());
384 /*NOTREACHED*/
385 }
386 bzero(niod, sizeof(*niod));
387 lck_mtx_lock(nfsiod_mutex);
388 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
389 wakeup(current_thread());
390 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
391 /* shouldn't return... so we have an error */
392 /* remove an old nfsiod struct and terminate */
393 lck_mtx_lock(nfsiod_mutex);
394 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
395 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
396 }
397 nfsiod_terminate(niod);
398 /*NOTREACHED*/
399 }
400
401 /*
402 * Start up another nfsiod thread.
403 * (unless we're already maxed out and there are nfsiods running)
404 */
405 int
406 nfsiod_start(void)
407 {
408 thread_t thd = THREAD_NULL;
409
410 lck_mtx_lock(nfsiod_mutex);
411 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
412 lck_mtx_unlock(nfsiod_mutex);
413 return EBUSY;
414 }
415 nfsiod_thread_count++;
416 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
417 lck_mtx_unlock(nfsiod_mutex);
418 return EBUSY;
419 }
420 /* wait for the thread to complete startup */
421 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
422 thread_deallocate(thd);
423 return 0;
424 }
425
426 /*
427 * Continuation for Asynchronous I/O threads for NFS client.
428 *
429 * Grab an nfsiod struct to work on, do some work, then drop it
430 */
431 int
432 nfsiod_continue(int error)
433 {
434 struct nfsiod *niod;
435 struct nfsmount *nmp;
436 struct nfsreq *req, *treq;
437 struct nfs_reqqhead iodq;
438 int morework;
439
440 lck_mtx_lock(nfsiod_mutex);
441 niod = TAILQ_FIRST(&nfsiodwork);
442 if (!niod) {
443 /* there's no work queued up */
444 /* remove an old nfsiod struct and terminate */
445 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
446 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
447 }
448 nfsiod_terminate(niod);
449 /*NOTREACHED*/
450 }
451 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
452
453 worktodo:
454 while ((nmp = niod->niod_nmp)) {
455 if (nmp == NULL) {
456 niod->niod_nmp = NULL;
457 break;
458 }
459
460 /*
461 * Service this mount's async I/O queue.
462 *
463 * In order to ensure some level of fairness between mounts,
464 * we grab all the work up front before processing it so any
465 * new work that arrives will be serviced on a subsequent
466 * iteration - and we have a chance to see if other work needs
467 * to be done (e.g. the delayed write queue needs to be pushed
468 * or other mounts are waiting for an nfsiod).
469 */
470 /* grab the current contents of the queue */
471 TAILQ_INIT(&iodq);
472 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
473 /* Mark each iod request as being managed by an iod */
474 TAILQ_FOREACH(req, &iodq, r_achain) {
475 lck_mtx_lock(&req->r_mtx);
476 assert(!(req->r_flags & R_IOD));
477 req->r_flags |= R_IOD;
478 lck_mtx_unlock(&req->r_mtx);
479 }
480 lck_mtx_unlock(nfsiod_mutex);
481
482 /* process the queue */
483 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
484 TAILQ_REMOVE(&iodq, req, r_achain);
485 req->r_achain.tqe_next = NFSREQNOLIST;
486 req->r_callback.rcb_func(req);
487 }
488
489 /* now check if there's more/other work to be done */
490 lck_mtx_lock(nfsiod_mutex);
491 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
492 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
493 /*
494 * we're going to stop working on this mount but if the
495 * mount still needs more work so queue it up
496 */
497 if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) {
498 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
499 }
500 nmp->nm_niod = NULL;
501 niod->niod_nmp = NULL;
502 }
503 }
504
505 /* loop if there's still a mount to work on */
506 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
507 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
508 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
509 niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
510 }
511 if (niod->niod_nmp) {
512 goto worktodo;
513 }
514
515 /* queue ourselves back up - if there aren't too many threads running */
516 if (nfsiod_thread_count <= NFSIOD_MAX) {
517 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
518 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
519 /* shouldn't return... so we have an error */
520 /* remove an old nfsiod struct and terminate */
521 lck_mtx_lock(nfsiod_mutex);
522 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
523 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
524 }
525 }
526 nfsiod_terminate(niod);
527 /*NOTREACHED*/
528 return 0;
529 }
530
531 #endif /* CONFIG_NFS_CLIENT */
532
533 #if !CONFIG_NFS_SERVER
534 #define __no_nfs_server_unused __unused
535 #else
536 #define __no_nfs_server_unused /* nothing */
537 #endif
538
539 /*
540 * NFS server system calls
541 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
542 */
543
544 #if CONFIG_NFS_SERVER
545 static struct nfs_exportfs *
546 nfsrv_find_exportfs(const char *ptr)
547 {
548 struct nfs_exportfs *nxfs;
549
550 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
551 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
552 break;
553 }
554 }
555 if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
556 nxfs = NULL;
557 }
558
559 return nxfs;
560 }
561
562 /*
563 * Get file handle system call
564 */
565 int
566 getfh(
567 proc_t p __no_nfs_server_unused,
568 struct getfh_args *uap __no_nfs_server_unused,
569 __unused int *retval)
570 {
571 vnode_t vp;
572 struct nfs_filehandle nfh;
573 int error, fhlen, fidlen;
574 struct nameidata nd;
575 char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
576 size_t pathlen;
577 struct nfs_exportfs *nxfs;
578 struct nfs_export *nx;
579
580 /*
581 * Must be super user
582 */
583 error = proc_suser(p);
584 if (error) {
585 return error;
586 }
587
588 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
589 if (!error) {
590 error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
591 }
592 if (error) {
593 return error;
594 }
595 /* limit fh size to length specified (or v3 size by default) */
596 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
597 fhlen = NFSV3_MAX_FH_SIZE;
598 }
599 fidlen = fhlen - sizeof(struct nfs_exphandle);
600
601 if (!nfsrv_is_initialized()) {
602 return EINVAL;
603 }
604
605 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
606 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
607 error = namei(&nd);
608 if (error) {
609 return error;
610 }
611 nameidone(&nd);
612
613 vp = nd.ni_vp;
614
615 // find exportfs that matches f_mntonname
616 lck_rw_lock_shared(&nfsrv_export_rwlock);
617 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
618 if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
619 /*
620 * The f_mntonname might be a firmlink path. Resolve
621 * it into a physical path and try again.
622 */
623 int pathbuflen = MAXPATHLEN;
624 vnode_t rvp;
625
626 error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
627 if (error) {
628 goto out;
629 }
630 error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
631 VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
632 vnode_put(rvp);
633 if (error) {
634 goto out;
635 }
636 ptr = real_mntonname;
637 nxfs = nfsrv_find_exportfs(ptr);
638 }
639 if (nxfs == NULL) {
640 error = EINVAL;
641 goto out;
642 }
643 // find export that best matches remainder of path
644 ptr = path + strlen(nxfs->nxfs_path);
645 while (*ptr && (*ptr == '/')) {
646 ptr++;
647 }
648 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
649 int len = strlen(nx->nx_path);
650 if (len == 0) { // we've hit the export entry for the root directory
651 break;
652 }
653 if (!strncmp(nx->nx_path, ptr, len)) {
654 break;
655 }
656 }
657 if (!nx) {
658 error = EINVAL;
659 goto out;
660 }
661
662 bzero(&nfh, sizeof(nfh));
663 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
664 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
665 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
666 nfh.nfh_xh.nxh_flags = 0;
667 nfh.nfh_xh.nxh_reserved = 0;
668 nfh.nfh_len = fidlen;
669 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
670 if (nfh.nfh_len > (uint32_t)fidlen) {
671 error = EOVERFLOW;
672 }
673 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
674 nfh.nfh_len += sizeof(nfh.nfh_xh);
675 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
676
677 out:
678 lck_rw_done(&nfsrv_export_rwlock);
679 vnode_put(vp);
680 if (error) {
681 return error;
682 }
683 /*
684 * At first blush, this may appear to leak a kernel stack
685 * address, but the copyout() never reaches &nfh.nfh_fhp
686 * (sizeof(fhandle_t) < sizeof(nfh)).
687 */
688 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
689 return error;
690 }
691 #endif /* CONFIG_NFS_SERVER */
692
693 #if CONFIG_NFS_SERVER
694 extern const struct fileops vnops;
695
696 /*
697 * syscall for the rpc.lockd to use to translate a NFS file handle into
698 * an open descriptor.
699 *
700 * warning: do not remove the suser() call or this becomes one giant
701 * security hole.
702 */
703 int
704 fhopen(proc_t p __no_nfs_server_unused,
705 struct fhopen_args *uap __no_nfs_server_unused,
706 int32_t *retval __no_nfs_server_unused)
707 {
708 vnode_t vp;
709 struct nfs_filehandle nfh;
710 struct nfs_export *nx;
711 struct nfs_export_options *nxo;
712 struct flock lf;
713 struct fileproc *fp, *nfp;
714 int fmode, error, type;
715 int indx;
716 vfs_context_t ctx = vfs_context_current();
717 kauth_action_t action;
718
719 /*
720 * Must be super user
721 */
722 error = suser(vfs_context_ucred(ctx), 0);
723 if (error) {
724 return error;
725 }
726
727 if (!nfsrv_is_initialized()) {
728 return EINVAL;
729 }
730
731 fmode = FFLAGS(uap->flags);
732 /* why not allow a non-read/write open for our lockd? */
733 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
734 return EINVAL;
735 }
736
737 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
738 if (error) {
739 return error;
740 }
741 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
742 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
743 return EINVAL;
744 }
745 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
746 if (error) {
747 return error;
748 }
749 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
750
751 lck_rw_lock_shared(&nfsrv_export_rwlock);
752 /* now give me my vnode, it gets returned to me with a reference */
753 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
754 lck_rw_done(&nfsrv_export_rwlock);
755 if (error) {
756 if (error == NFSERR_TRYLATER) {
757 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
758 }
759 return error;
760 }
761
762 /*
763 * From now on we have to make sure not
764 * to forget about the vnode.
765 * Any error that causes an abort must vnode_put(vp).
766 * Just set error = err and 'goto bad;'.
767 */
768
769 /*
770 * from vn_open
771 */
772 if (vnode_vtype(vp) == VSOCK) {
773 error = EOPNOTSUPP;
774 goto bad;
775 }
776
777 /* disallow write operations on directories */
778 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
779 error = EISDIR;
780 goto bad;
781 }
782
783 #if CONFIG_MACF
784 if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
785 goto bad;
786 }
787 #endif
788
789 /* compute action to be authorized */
790 action = 0;
791 if (fmode & FREAD) {
792 action |= KAUTH_VNODE_READ_DATA;
793 }
794 if (fmode & (FWRITE | O_TRUNC)) {
795 action |= KAUTH_VNODE_WRITE_DATA;
796 }
797 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
798 goto bad;
799 }
800
801 if ((error = VNOP_OPEN(vp, fmode, ctx))) {
802 goto bad;
803 }
804 if ((error = vnode_ref_ext(vp, fmode, 0))) {
805 goto bad;
806 }
807
808 /*
809 * end of vn_open code
810 */
811
812 // starting here... error paths should call vn_close/vnode_put
813 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
814 vn_close(vp, fmode & FMASK, ctx);
815 goto bad;
816 }
817 fp = nfp;
818
819 fp->f_fglob->fg_flag = fmode & FMASK;
820 fp->f_fglob->fg_ops = &vnops;
821 fp->f_fglob->fg_data = (caddr_t)vp;
822
823 // XXX do we really need to support this with fhopen()?
824 if (fmode & (O_EXLOCK | O_SHLOCK)) {
825 lf.l_whence = SEEK_SET;
826 lf.l_start = 0;
827 lf.l_len = 0;
828 if (fmode & O_EXLOCK) {
829 lf.l_type = F_WRLCK;
830 } else {
831 lf.l_type = F_RDLCK;
832 }
833 type = F_FLOCK;
834 if ((fmode & FNONBLOCK) == 0) {
835 type |= F_WAIT;
836 }
837 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
838 struct vfs_context context = *vfs_context_current();
839 /* Modify local copy (to not damage thread copy) */
840 context.vc_ucred = fp->f_fglob->fg_cred;
841
842 vn_close(vp, fp->f_fglob->fg_flag, &context);
843 fp_free(p, indx, fp);
844 return error;
845 }
846 fp->f_fglob->fg_flag |= FHASLOCK;
847 }
848
849 vnode_put(vp);
850
851 proc_fdlock(p);
852 procfdtbl_releasefd(p, indx, NULL);
853 fp_drop(p, indx, fp, 1);
854 proc_fdunlock(p);
855
856 *retval = indx;
857 return 0;
858
859 bad:
860 vnode_put(vp);
861 return error;
862 }
863 #endif /* CONFIG_NFS_SERVER */
864
865 #if CONFIG_NFS_SERVER
866 /*
867 * NFS server pseudo system call
868 */
869 int
870 nfssvc(proc_t p __no_nfs_server_unused,
871 struct nfssvc_args *uap __no_nfs_server_unused,
872 __unused int *retval)
873 {
874 mbuf_t nam;
875 struct user_nfsd_args user_nfsdarg;
876 socket_t so;
877 int error;
878
879 AUDIT_ARG(cmd, uap->flag);
880
881 /*
882 * Must be super user for most operations (export ops checked later).
883 */
884 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) {
885 return error;
886 }
887 #if CONFIG_MACF
888 error = mac_system_check_nfsd(kauth_cred_get());
889 if (error) {
890 return error;
891 }
892 #endif
893
894 /* make sure NFS server data structures have been initialized */
895 nfsrv_init();
896
897 if (uap->flag & NFSSVC_ADDSOCK) {
898 if (IS_64BIT_PROCESS(p)) {
899 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
900 } else {
901 struct nfsd_args tmp_args;
902 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
903 if (error == 0) {
904 user_nfsdarg.sock = tmp_args.sock;
905 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
906 user_nfsdarg.namelen = tmp_args.namelen;
907 }
908 }
909 if (error) {
910 return error;
911 }
912 /* get the socket */
913 error = file_socket(user_nfsdarg.sock, &so);
914 if (error) {
915 return error;
916 }
917 /* Get the client address for connected sockets. */
918 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
919 nam = NULL;
920 } else {
921 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
922 if (error) {
923 /* drop the iocount file_socket() grabbed on the file descriptor */
924 file_drop(user_nfsdarg.sock);
925 return error;
926 }
927 }
928 /*
929 * nfssvc_addsock() will grab a retain count on the socket
930 * to keep the socket from being closed when nfsd closes its
931 * file descriptor for it.
932 */
933 error = nfssvc_addsock(so, nam);
934 /* drop the iocount file_socket() grabbed on the file descriptor */
935 file_drop(user_nfsdarg.sock);
936 } else if (uap->flag & NFSSVC_NFSD) {
937 error = nfssvc_nfsd();
938 } else if (uap->flag & NFSSVC_EXPORT) {
939 error = nfssvc_export(uap->argp);
940 } else {
941 error = EINVAL;
942 }
943 if (error == EINTR || error == ERESTART) {
944 error = 0;
945 }
946 return error;
947 }
948 #endif /* CONFIG_NFS_SERVER */
949
950 #if CONFIG_NFS_SERVER
951
952 /*
953 * Adds a socket to the list for servicing by nfsds.
954 */
955 int
956 nfssvc_addsock(socket_t so, mbuf_t mynam)
957 {
958 struct nfsrv_sock *slp;
959 int error = 0, sodomain, sotype, soprotocol, on = 1;
960 int first;
961 struct timeval timeo;
962
963 /* make sure mbuf constants are set up */
964 if (!nfs_mbuf_mhlen) {
965 nfs_mbuf_init();
966 }
967
968 sock_gettype(so, &sodomain, &sotype, &soprotocol);
969
970 /* There should be only one UDP socket for each of IPv4 and IPv6 */
971 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
972 mbuf_freem(mynam);
973 return EEXIST;
974 }
975 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
976 mbuf_freem(mynam);
977 return EEXIST;
978 }
979
980 /* Set protocol options and reserve some space (for UDP). */
981 if (sotype == SOCK_STREAM) {
982 error = nfsrv_check_exports_allow_address(mynam);
983 if (error) {
984 log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
985 mbuf_freem(mynam);
986 return error;
987 }
988 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
989 }
990 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
991 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
992 }
993 if (sotype == SOCK_DGRAM || sodomain == AF_LOCAL) { /* set socket buffer sizes for UDP */
994 int reserve = (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : (2 * 1024 * 1024);
995 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
996 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
997 if (error) {
998 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
999 error = 0;
1000 }
1001 }
1002 sock_nointerrupt(so, 0);
1003
1004 /*
1005 * Set socket send/receive timeouts.
1006 * Receive timeout shouldn't matter, but setting the send timeout
1007 * will make sure that an unresponsive client can't hang the server.
1008 */
1009 timeo.tv_usec = 0;
1010 timeo.tv_sec = 1;
1011 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
1012 timeo.tv_sec = 30;
1013 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
1014 if (error) {
1015 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
1016 error = 0;
1017 }
1018
1019 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
1020 if (!slp) {
1021 mbuf_freem(mynam);
1022 return ENOMEM;
1023 }
1024 bzero((caddr_t)slp, sizeof(struct nfsrv_sock));
1025 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
1026 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
1027
1028 lck_mtx_lock(nfsd_mutex);
1029
1030 if (soprotocol == IPPROTO_UDP) {
1031 if (sodomain == AF_INET) {
1032 /* There should be only one UDP/IPv4 socket */
1033 if (nfsrv_udpsock) {
1034 lck_mtx_unlock(nfsd_mutex);
1035 nfsrv_slpfree(slp);
1036 mbuf_freem(mynam);
1037 return EEXIST;
1038 }
1039 nfsrv_udpsock = slp;
1040 }
1041 if (sodomain == AF_INET6) {
1042 /* There should be only one UDP/IPv6 socket */
1043 if (nfsrv_udp6sock) {
1044 lck_mtx_unlock(nfsd_mutex);
1045 nfsrv_slpfree(slp);
1046 mbuf_freem(mynam);
1047 return EEXIST;
1048 }
1049 nfsrv_udp6sock = slp;
1050 }
1051 }
1052
1053 /* add the socket to the list */
1054 first = TAILQ_EMPTY(&nfsrv_socklist);
1055 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1056 if (sotype == SOCK_STREAM) {
1057 nfsrv_sock_tcp_cnt++;
1058 if (nfsrv_sock_idle_timeout < 0) {
1059 nfsrv_sock_idle_timeout = 0;
1060 }
1061 if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
1062 nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
1063 }
1064 /*
1065 * Possibly start or stop the idle timer. We only start the idle timer when
1066 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
1067 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
1068 * the number of connections.
1069 */
1070 if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
1071 if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1072 if (nfsrv_idlesock_timer_on) {
1073 thread_call_cancel(nfsrv_idlesock_timer_call);
1074 nfsrv_idlesock_timer_on = 0;
1075 }
1076 } else {
1077 struct nfsrv_sock *old_slp;
1078 struct timeval now;
1079 time_t time_to_wait = nfsrv_sock_idle_timeout;
1080 /*
1081 * Get the oldest tcp socket and calculate the
1082 * earliest time for the next idle timer to fire
1083 * based on the possibly updated nfsrv_sock_idle_timeout
1084 */
1085 TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
1086 if (old_slp->ns_sotype == SOCK_STREAM) {
1087 microuptime(&now);
1088 time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
1089 if (time_to_wait < 1) {
1090 time_to_wait = 1;
1091 }
1092 break;
1093 }
1094 }
1095 /*
1096 * If we have a timer scheduled, but if its going to fire too late,
1097 * turn it off.
1098 */
1099 if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
1100 thread_call_cancel(nfsrv_idlesock_timer_call);
1101 nfsrv_idlesock_timer_on = 0;
1102 }
1103 /* Schedule the idle thread if it isn't already */
1104 if (!nfsrv_idlesock_timer_on) {
1105 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1106 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1107 }
1108 }
1109 }
1110 }
1111
1112 sock_retain(so); /* grab a retain count on the socket */
1113 slp->ns_so = so;
1114 slp->ns_sotype = sotype;
1115 slp->ns_nam = mynam;
1116
1117 /* set up the socket up-call */
1118 nfsrv_uc_addsock(slp, first);
1119
1120 /* mark that the socket is not in the nfsrv_sockwg list */
1121 slp->ns_wgq.tqe_next = SLPNOLIST;
1122
1123 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
1124
1125 nfsrv_wakenfsd(slp);
1126 lck_mtx_unlock(nfsd_mutex);
1127
1128 return 0;
1129 }
1130
1131 /*
1132 * nfssvc_nfsd()
1133 *
1134 * nfsd theory of operation:
1135 *
1136 * The first nfsd thread stays in user mode accepting new TCP connections
1137 * which are then added via the "addsock" call. The rest of the nfsd threads
1138 * simply call into the kernel and remain there in a loop handling NFS
1139 * requests until killed by a signal.
1140 *
1141 * There's a list of nfsd threads (nfsd_head).
1142 * There's an nfsd queue that contains only those nfsds that are
1143 * waiting for work to do (nfsd_queue).
1144 *
1145 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
1146 * managing the work on the sockets:
1147 * nfsrv_sockwait - sockets w/new data waiting to be worked on
1148 * nfsrv_sockwork - sockets being worked on which may have more work to do
1149 * nfsrv_sockwg -- sockets which have pending write gather data
1150 * When a socket receives data, if it is not currently queued, it
1151 * will be placed at the end of the "wait" queue.
1152 * Whenever a socket needs servicing we make sure it is queued and
1153 * wake up a waiting nfsd (if there is one).
1154 *
1155 * nfsds will service at most 8 requests from the same socket before
1156 * defecting to work on another socket.
1157 * nfsds will defect immediately if there are any sockets in the "wait" queue
1158 * nfsds looking for a socket to work on check the "wait" queue first and
1159 * then check the "work" queue.
1160 * When an nfsd starts working on a socket, it removes it from the head of
1161 * the queue it's currently on and moves it to the end of the "work" queue.
1162 * When nfsds are checking the queues for work, any sockets found not to
1163 * have any work are simply dropped from the queue.
1164 *
1165 */
1166 int
1167 nfssvc_nfsd(void)
1168 {
1169 mbuf_t m, mrep;
1170 struct nfsrv_sock *slp;
1171 struct nfsd *nfsd;
1172 struct nfsrv_descript *nd = NULL;
1173 int error = 0, cacherep, writes_todo;
1174 int siz, procrastinate, opcnt = 0;
1175 u_quad_t cur_usec;
1176 struct timeval now;
1177 struct vfs_context context;
1178 struct timespec to;
1179
1180 #ifndef nolint
1181 cacherep = RC_DOIT;
1182 writes_todo = 0;
1183 #endif
1184
1185 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
1186 if (!nfsd) {
1187 return ENOMEM;
1188 }
1189 bzero(nfsd, sizeof(struct nfsd));
1190 lck_mtx_lock(nfsd_mutex);
1191 if (nfsd_thread_count++ == 0) {
1192 nfsrv_initcache(); /* Init the server request cache */
1193 }
1194 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
1195 lck_mtx_unlock(nfsd_mutex);
1196
1197 context.vc_thread = current_thread();
1198
1199 /* Set time out so that nfsd threads can wake up a see if they are still needed. */
1200 to.tv_sec = 5;
1201 to.tv_nsec = 0;
1202
1203 /*
1204 * Loop getting rpc requests until SIGKILL.
1205 */
1206 for (;;) {
1207 if (nfsd_thread_max <= 0) {
1208 /* NFS server shutting down, get out ASAP */
1209 error = EINTR;
1210 slp = nfsd->nfsd_slp;
1211 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
1212 /* already have some work to do */
1213 error = 0;
1214 slp = nfsd->nfsd_slp;
1215 } else {
1216 /* need to find work to do */
1217 error = 0;
1218 lck_mtx_lock(nfsd_mutex);
1219 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
1220 if (nfsd_thread_count > nfsd_thread_max) {
1221 /*
1222 * If we have no socket and there are more
1223 * nfsd threads than configured, let's exit.
1224 */
1225 error = 0;
1226 goto done;
1227 }
1228 nfsd->nfsd_flag |= NFSD_WAITING;
1229 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
1230 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
1231 if (error) {
1232 if (nfsd->nfsd_flag & NFSD_WAITING) {
1233 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
1234 nfsd->nfsd_flag &= ~NFSD_WAITING;
1235 }
1236 if (error == EWOULDBLOCK) {
1237 continue;
1238 }
1239 goto done;
1240 }
1241 }
1242 slp = nfsd->nfsd_slp;
1243 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
1244 /* look for a socket to work on in the wait queue */
1245 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
1246 lck_rw_lock_exclusive(&slp->ns_rwlock);
1247 /* remove from the head of the queue */
1248 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1249 slp->ns_flag &= ~SLP_WAITQ;
1250 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
1251 break;
1252 }
1253 /* nothing to do, so skip this socket */
1254 lck_rw_done(&slp->ns_rwlock);
1255 }
1256 }
1257 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
1258 /* look for a socket to work on in the work queue */
1259 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
1260 lck_rw_lock_exclusive(&slp->ns_rwlock);
1261 /* remove from the head of the queue */
1262 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1263 slp->ns_flag &= ~SLP_WORKQ;
1264 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
1265 break;
1266 }
1267 /* nothing to do, so skip this socket */
1268 lck_rw_done(&slp->ns_rwlock);
1269 }
1270 }
1271 if (!nfsd->nfsd_slp && slp) {
1272 /* we found a socket to work on, grab a reference */
1273 slp->ns_sref++;
1274 microuptime(&now);
1275 slp->ns_timestamp = now.tv_sec;
1276 /* We keep the socket list in least recently used order for reaping idle sockets */
1277 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1278 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1279 nfsd->nfsd_slp = slp;
1280 opcnt = 0;
1281 /* and put it at the back of the work queue */
1282 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1283 slp->ns_flag |= SLP_WORKQ;
1284 lck_rw_done(&slp->ns_rwlock);
1285 }
1286 lck_mtx_unlock(nfsd_mutex);
1287 if (!slp) {
1288 continue;
1289 }
1290 lck_rw_lock_exclusive(&slp->ns_rwlock);
1291 if (slp->ns_flag & SLP_VALID) {
1292 if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
1293 slp->ns_flag &= ~SLP_NEEDQ;
1294 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1295 }
1296 if (slp->ns_flag & SLP_DISCONN) {
1297 nfsrv_zapsock(slp);
1298 }
1299 error = nfsrv_dorec(slp, nfsd, &nd);
1300 if (error == EINVAL) { // RPCSEC_GSS drop
1301 if (slp->ns_sotype == SOCK_STREAM) {
1302 nfsrv_zapsock(slp); // drop connection
1303 }
1304 }
1305 writes_todo = 0;
1306 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1307 microuptime(&now);
1308 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1309 (u_quad_t)now.tv_usec;
1310 if (slp->ns_wgtime <= cur_usec) {
1311 error = 0;
1312 cacherep = RC_DOIT;
1313 writes_todo = 1;
1314 }
1315 slp->ns_flag &= ~SLP_DOWRITES;
1316 }
1317 nfsd->nfsd_flag |= NFSD_REQINPROG;
1318 }
1319 lck_rw_done(&slp->ns_rwlock);
1320 }
1321 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1322 if (nd) {
1323 nfsm_chain_cleanup(&nd->nd_nmreq);
1324 if (nd->nd_nam2) {
1325 mbuf_freem(nd->nd_nam2);
1326 }
1327 if (IS_VALID_CRED(nd->nd_cr)) {
1328 kauth_cred_unref(&nd->nd_cr);
1329 }
1330 if (nd->nd_gss_context) {
1331 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1332 }
1333 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1334 nd = NULL;
1335 }
1336 nfsd->nfsd_slp = NULL;
1337 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1338 if (slp) {
1339 nfsrv_slpderef(slp);
1340 }
1341 if (nfsd_thread_max <= 0) {
1342 break;
1343 }
1344 continue;
1345 }
1346 if (nd) {
1347 microuptime(&nd->nd_starttime);
1348 if (nd->nd_nam2) {
1349 nd->nd_nam = nd->nd_nam2;
1350 } else {
1351 nd->nd_nam = slp->ns_nam;
1352 }
1353
1354 cacherep = nfsrv_getcache(nd, slp, &mrep);
1355
1356 if (nfsrv_require_resv_port) {
1357 /* Check if source port is a reserved port */
1358 in_port_t port = 0;
1359 struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1360
1361 if (saddr->sa_family == AF_INET) {
1362 port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1363 } else if (saddr->sa_family == AF_INET6) {
1364 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1365 }
1366 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1367 nd->nd_procnum = NFSPROC_NOOP;
1368 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1369 cacherep = RC_DOIT;
1370 }
1371 }
1372 }
1373
1374 /*
1375 * Loop to get all the write RPC replies that have been
1376 * gathered together.
1377 */
1378 do {
1379 switch (cacherep) {
1380 case RC_DOIT:
1381 if (nd && (nd->nd_vers == NFS_VER3)) {
1382 procrastinate = nfsrv_wg_delay_v3;
1383 } else {
1384 procrastinate = nfsrv_wg_delay;
1385 }
1386 lck_rw_lock_shared(&nfsrv_export_rwlock);
1387 context.vc_ucred = NULL;
1388 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1389 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1390 } else {
1391 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1392 }
1393 lck_rw_done(&nfsrv_export_rwlock);
1394 if (mrep == NULL) {
1395 /*
1396 * If this is a stream socket and we are not going
1397 * to send a reply we better close the connection
1398 * so the client doesn't hang.
1399 */
1400 if (error && slp->ns_sotype == SOCK_STREAM) {
1401 lck_rw_lock_exclusive(&slp->ns_rwlock);
1402 nfsrv_zapsock(slp);
1403 lck_rw_done(&slp->ns_rwlock);
1404 printf("NFS server: NULL reply from proc = %d error = %d\n",
1405 nd->nd_procnum, error);
1406 }
1407 break;
1408 }
1409 if (error) {
1410 OSAddAtomic64(1, &nfsstats.srv_errs);
1411 nfsrv_updatecache(nd, FALSE, mrep);
1412 if (nd->nd_nam2) {
1413 mbuf_freem(nd->nd_nam2);
1414 nd->nd_nam2 = NULL;
1415 }
1416 break;
1417 }
1418 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
1419 nfsrv_updatecache(nd, TRUE, mrep);
1420 /* FALLTHRU */
1421
1422 case RC_REPLY:
1423 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1424 /*
1425 * Need to checksum or encrypt the reply
1426 */
1427 error = nfs_gss_svc_protect_reply(nd, mrep);
1428 if (error) {
1429 mbuf_freem(mrep);
1430 break;
1431 }
1432 }
1433
1434 /*
1435 * Get the total size of the reply
1436 */
1437 m = mrep;
1438 siz = 0;
1439 while (m) {
1440 siz += mbuf_len(m);
1441 m = mbuf_next(m);
1442 }
1443 if (siz <= 0 || siz > NFS_MAXPACKET) {
1444 printf("mbuf siz=%d\n", siz);
1445 panic("Bad nfs svc reply");
1446 }
1447 m = mrep;
1448 mbuf_pkthdr_setlen(m, siz);
1449 error = mbuf_pkthdr_setrcvif(m, NULL);
1450 if (error) {
1451 panic("nfsd setrcvif failed: %d", error);
1452 }
1453 /*
1454 * For stream protocols, prepend a Sun RPC
1455 * Record Mark.
1456 */
1457 if (slp->ns_sotype == SOCK_STREAM) {
1458 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1459 if (!error) {
1460 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1461 }
1462 }
1463 if (!error) {
1464 if (slp->ns_flag & SLP_VALID) {
1465 error = nfsrv_send(slp, nd->nd_nam2, m);
1466 } else {
1467 error = EPIPE;
1468 mbuf_freem(m);
1469 }
1470 } else {
1471 mbuf_freem(m);
1472 }
1473 mrep = NULL;
1474 if (nd->nd_nam2) {
1475 mbuf_freem(nd->nd_nam2);
1476 nd->nd_nam2 = NULL;
1477 }
1478 if (error == EPIPE) {
1479 lck_rw_lock_exclusive(&slp->ns_rwlock);
1480 nfsrv_zapsock(slp);
1481 lck_rw_done(&slp->ns_rwlock);
1482 }
1483 if (error == EINTR || error == ERESTART) {
1484 nfsm_chain_cleanup(&nd->nd_nmreq);
1485 if (IS_VALID_CRED(nd->nd_cr)) {
1486 kauth_cred_unref(&nd->nd_cr);
1487 }
1488 if (nd->nd_gss_context) {
1489 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1490 }
1491 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1492 nfsrv_slpderef(slp);
1493 lck_mtx_lock(nfsd_mutex);
1494 goto done;
1495 }
1496 break;
1497 case RC_DROPIT:
1498 mbuf_freem(nd->nd_nam2);
1499 nd->nd_nam2 = NULL;
1500 break;
1501 }
1502 ;
1503 opcnt++;
1504 if (nd) {
1505 nfsm_chain_cleanup(&nd->nd_nmreq);
1506 if (nd->nd_nam2) {
1507 mbuf_freem(nd->nd_nam2);
1508 }
1509 if (IS_VALID_CRED(nd->nd_cr)) {
1510 kauth_cred_unref(&nd->nd_cr);
1511 }
1512 if (nd->nd_gss_context) {
1513 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1514 }
1515 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1516 nd = NULL;
1517 }
1518
1519 /*
1520 * Check to see if there are outstanding writes that
1521 * need to be serviced.
1522 */
1523 writes_todo = 0;
1524 if (slp->ns_wgtime) {
1525 microuptime(&now);
1526 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1527 (u_quad_t)now.tv_usec;
1528 if (slp->ns_wgtime <= cur_usec) {
1529 cacherep = RC_DOIT;
1530 writes_todo = 1;
1531 }
1532 }
1533 } while (writes_todo);
1534
1535 nd = NULL;
1536 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1537 lck_rw_lock_exclusive(&slp->ns_rwlock);
1538 error = nfsrv_dorec(slp, nfsd, &nd);
1539 if (error == EINVAL) { // RPCSEC_GSS drop
1540 if (slp->ns_sotype == SOCK_STREAM) {
1541 nfsrv_zapsock(slp); // drop connection
1542 }
1543 }
1544 lck_rw_done(&slp->ns_rwlock);
1545 }
1546 if (!nd) {
1547 /* drop our reference on the socket */
1548 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1549 nfsd->nfsd_slp = NULL;
1550 nfsrv_slpderef(slp);
1551 }
1552 }
1553 lck_mtx_lock(nfsd_mutex);
1554 done:
1555 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1556 FREE(nfsd, M_NFSD);
1557 if (--nfsd_thread_count == 0) {
1558 nfsrv_cleanup();
1559 }
1560 lck_mtx_unlock(nfsd_mutex);
1561 return error;
1562 }
1563
1564 int
1565 nfssvc_export(user_addr_t argp)
1566 {
1567 int error = 0, is_64bit;
1568 struct user_nfs_export_args unxa;
1569 vfs_context_t ctx = vfs_context_current();
1570
1571 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
1572
1573 /* copy in pointers to path and export args */
1574 if (is_64bit) {
1575 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1576 } else {
1577 struct nfs_export_args tnxa;
1578 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1579 if (error == 0) {
1580 /* munge into LP64 version of nfs_export_args structure */
1581 unxa.nxa_fsid = tnxa.nxa_fsid;
1582 unxa.nxa_expid = tnxa.nxa_expid;
1583 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1584 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1585 unxa.nxa_flags = tnxa.nxa_flags;
1586 unxa.nxa_netcount = tnxa.nxa_netcount;
1587 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1588 }
1589 }
1590 if (error) {
1591 return error;
1592 }
1593
1594 error = nfsrv_export(&unxa, ctx);
1595
1596 return error;
1597 }
1598
1599 /*
1600 * Shut down a socket associated with an nfsrv_sock structure.
1601 * Should be called with the send lock set, if required.
1602 * The trick here is to increment the sref at the start, so that the nfsds
1603 * will stop using it and clear ns_flag at the end so that it will not be
1604 * reassigned during cleanup.
1605 */
1606 void
1607 nfsrv_zapsock(struct nfsrv_sock *slp)
1608 {
1609 socket_t so;
1610
1611 if ((slp->ns_flag & SLP_VALID) == 0) {
1612 return;
1613 }
1614 slp->ns_flag &= ~SLP_ALLFLAGS;
1615
1616 so = slp->ns_so;
1617 if (so == NULL) {
1618 return;
1619 }
1620
1621 sock_setupcall(so, NULL, NULL);
1622 sock_shutdown(so, SHUT_RDWR);
1623
1624 /*
1625 * Remove from the up-call queue
1626 */
1627 nfsrv_uc_dequeue(slp);
1628 }
1629
1630 /*
1631 * cleanup and release a server socket structure.
1632 */
1633 void
1634 nfsrv_slpfree(struct nfsrv_sock *slp)
1635 {
1636 struct nfsrv_descript *nwp, *nnwp;
1637
1638 if (slp->ns_so) {
1639 sock_release(slp->ns_so);
1640 slp->ns_so = NULL;
1641 }
1642 if (slp->ns_nam) {
1643 mbuf_free(slp->ns_nam);
1644 }
1645 if (slp->ns_raw) {
1646 mbuf_freem(slp->ns_raw);
1647 }
1648 if (slp->ns_rec) {
1649 mbuf_freem(slp->ns_rec);
1650 }
1651 if (slp->ns_frag) {
1652 mbuf_freem(slp->ns_frag);
1653 }
1654 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1655 slp->ns_reccnt = 0;
1656
1657 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1658 nnwp = nwp->nd_tq.le_next;
1659 LIST_REMOVE(nwp, nd_tq);
1660 nfsm_chain_cleanup(&nwp->nd_nmreq);
1661 if (nwp->nd_mrep) {
1662 mbuf_freem(nwp->nd_mrep);
1663 }
1664 if (nwp->nd_nam2) {
1665 mbuf_freem(nwp->nd_nam2);
1666 }
1667 if (IS_VALID_CRED(nwp->nd_cr)) {
1668 kauth_cred_unref(&nwp->nd_cr);
1669 }
1670 if (nwp->nd_gss_context) {
1671 nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1672 }
1673 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
1674 }
1675 LIST_INIT(&slp->ns_tq);
1676
1677 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
1678 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
1679 FREE(slp, M_NFSSVC);
1680 }
1681
1682 /*
1683 * Derefence a server socket structure. If it has no more references and
1684 * is no longer valid, you can throw it away.
1685 */
1686 static void
1687 nfsrv_slpderef_locked(struct nfsrv_sock *slp)
1688 {
1689 lck_rw_lock_exclusive(&slp->ns_rwlock);
1690 slp->ns_sref--;
1691
1692 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1693 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1694 /* remove socket from queue since there's no work */
1695 if (slp->ns_flag & SLP_WAITQ) {
1696 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1697 } else {
1698 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1699 }
1700 slp->ns_flag &= ~SLP_QUEUED;
1701 }
1702 lck_rw_done(&slp->ns_rwlock);
1703 return;
1704 }
1705
1706 /* This socket is no longer valid, so we'll get rid of it */
1707
1708 if (slp->ns_flag & SLP_QUEUED) {
1709 if (slp->ns_flag & SLP_WAITQ) {
1710 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1711 } else {
1712 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1713 }
1714 slp->ns_flag &= ~SLP_QUEUED;
1715 }
1716 lck_rw_done(&slp->ns_rwlock);
1717
1718 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1719 if (slp->ns_sotype == SOCK_STREAM) {
1720 nfsrv_sock_tcp_cnt--;
1721 }
1722
1723 /* now remove from the write gather socket list */
1724 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1725 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1726 slp->ns_wgq.tqe_next = SLPNOLIST;
1727 }
1728 nfsrv_slpfree(slp);
1729 }
1730
1731 void
1732 nfsrv_slpderef(struct nfsrv_sock *slp)
1733 {
1734 lck_mtx_lock(nfsd_mutex);
1735 nfsrv_slpderef_locked(slp);
1736 lck_mtx_unlock(nfsd_mutex);
1737 }
1738
1739 /*
1740 * Check periodically for idle sockest if needed and
1741 * zap them.
1742 */
1743 void
1744 nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1745 {
1746 struct nfsrv_sock *slp, *tslp;
1747 struct timeval now;
1748 time_t time_to_wait = nfsrv_sock_idle_timeout;
1749
1750 microuptime(&now);
1751 lck_mtx_lock(nfsd_mutex);
1752
1753 /* Turn off the timer if we're suppose to and get out */
1754 if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1755 nfsrv_sock_idle_timeout = 0;
1756 }
1757 if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1758 nfsrv_idlesock_timer_on = 0;
1759 lck_mtx_unlock(nfsd_mutex);
1760 return;
1761 }
1762
1763 TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1764 lck_rw_lock_exclusive(&slp->ns_rwlock);
1765 /* Skip udp and referenced sockets */
1766 if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1767 lck_rw_done(&slp->ns_rwlock);
1768 continue;
1769 }
1770 /*
1771 * If this is the first non-referenced socket that hasn't idle out,
1772 * use its time stamp to calculate the earlist time in the future
1773 * to start the next invocation of the timer. Since the nfsrv_socklist
1774 * is sorted oldest access to newest. Once we find the first one,
1775 * we're done and break out of the loop.
1776 */
1777 if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1778 nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1779 time_to_wait -= now.tv_sec - slp->ns_timestamp;
1780 if (time_to_wait < 1) {
1781 time_to_wait = 1;
1782 }
1783 lck_rw_done(&slp->ns_rwlock);
1784 break;
1785 }
1786 /*
1787 * Bump the ref count. nfsrv_slpderef below will destroy
1788 * the socket, since nfsrv_zapsock has closed it.
1789 */
1790 slp->ns_sref++;
1791 nfsrv_zapsock(slp);
1792 lck_rw_done(&slp->ns_rwlock);
1793 nfsrv_slpderef_locked(slp);
1794 }
1795
1796 /* Start ourself back up */
1797 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1798 /* Remember when the next timer will fire for nfssvc_addsock. */
1799 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1800 lck_mtx_unlock(nfsd_mutex);
1801 }
1802
1803 /*
1804 * Clean up the data structures for the server.
1805 */
1806 void
1807 nfsrv_cleanup(void)
1808 {
1809 struct nfsrv_sock *slp, *nslp;
1810 struct timeval now;
1811 #if CONFIG_FSE
1812 struct nfsrv_fmod *fp, *nfp;
1813 int i;
1814 #endif
1815
1816 microuptime(&now);
1817 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1818 nslp = TAILQ_NEXT(slp, ns_chain);
1819 lck_rw_lock_exclusive(&slp->ns_rwlock);
1820 slp->ns_sref++;
1821 if (slp->ns_flag & SLP_VALID) {
1822 nfsrv_zapsock(slp);
1823 }
1824 lck_rw_done(&slp->ns_rwlock);
1825 nfsrv_slpderef_locked(slp);
1826 }
1827 #
1828 #if CONFIG_FSE
1829 /*
1830 * Flush pending file write fsevents
1831 */
1832 lck_mtx_lock(nfsrv_fmod_mutex);
1833 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1834 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1835 /*
1836 * Fire off the content modified fsevent for each
1837 * entry, remove it from the list, and free it.
1838 */
1839 if (nfsrv_fsevents_enabled) {
1840 fp->fm_context.vc_thread = current_thread();
1841 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1842 FSE_ARG_VNODE, fp->fm_vp,
1843 FSE_ARG_DONE);
1844 }
1845 vnode_put(fp->fm_vp);
1846 kauth_cred_unref(&fp->fm_context.vc_ucred);
1847 nfp = LIST_NEXT(fp, fm_link);
1848 LIST_REMOVE(fp, fm_link);
1849 FREE(fp, M_TEMP);
1850 }
1851 }
1852 nfsrv_fmod_pending = 0;
1853 lck_mtx_unlock(nfsrv_fmod_mutex);
1854 #endif
1855
1856 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
1857
1858 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1859
1860 nfsrv_cleancache(); /* And clear out server cache */
1861
1862 nfsrv_udpsock = NULL;
1863 nfsrv_udp6sock = NULL;
1864 }
1865
1866 #endif /* CONFIG_NFS_SERVER */