]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_syscalls.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
ea3f0419
A
67
68#include <nfs/nfs_conf.h>
69
2d21ac55
A
70/*
71 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72 * support for mandatory and extensible security protections. This notice
73 * is included in support of clause 2.2 (b) of the Apple Public License,
74 * Version 2.0.
75 */
1c79356b
A
76
77#include <sys/param.h>
78#include <sys/systm.h>
1c79356b 79#include <sys/kernel.h>
91447636 80#include <sys/file_internal.h>
1c79356b
A
81#include <sys/filedesc.h>
82#include <sys/stat.h>
91447636
A
83#include <sys/vnode_internal.h>
84#include <sys/mount_internal.h>
85#include <sys/proc_internal.h> /* for fdflags */
86#include <sys/kauth.h>
1c79356b 87#include <sys/sysctl.h>
55e303ae 88#include <sys/ubc.h>
1c79356b
A
89#include <sys/uio.h>
90#include <sys/malloc.h>
91447636 91#include <sys/kpi_mbuf.h>
1c79356b
A
92#include <sys/socket.h>
93#include <sys/socketvar.h>
94#include <sys/domain.h>
95#include <sys/protosw.h>
55e303ae
A
96#include <sys/fcntl.h>
97#include <sys/lockf.h>
1c79356b
A
98#include <sys/syslog.h>
99#include <sys/user.h>
91447636
A
100#include <sys/sysproto.h>
101#include <sys/kpi_socket.h>
2d21ac55 102#include <sys/fsevents.h>
91447636 103#include <libkern/OSAtomic.h>
2d21ac55
A
104#include <kern/thread_call.h>
105#include <kern/task.h>
1c79356b 106
b0d623f7 107#include <security/audit/audit.h>
ccc36f2f 108
1c79356b
A
109#include <netinet/in.h>
110#include <netinet/tcp.h>
1c79356b
A
111#include <nfs/xdr_subs.h>
112#include <nfs/rpcv2.h>
113#include <nfs/nfsproto.h>
114#include <nfs/nfs.h>
115#include <nfs/nfsm_subs.h>
116#include <nfs/nfsrvcache.h>
2d21ac55 117#include <nfs/nfs_gss.h>
1c79356b
A
118#include <nfs/nfsmount.h>
119#include <nfs/nfsnode.h>
55e303ae 120#include <nfs/nfs_lock.h>
2d21ac55
A
121#if CONFIG_MACF
122#include <security/mac_framework.h>
1c79356b
A
123#endif
124
0a7de745 125kern_return_t thread_terminate(thread_t); /* XXX */
2d21ac55 126
ea3f0419 127#if CONFIG_NFS_SERVER
2d21ac55 128
5c9f4661
A
129extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
130
2d21ac55
A
131extern int nfsrv_wg_delay;
132extern int nfsrv_wg_delay_v3;
133
134static int nfsrv_require_resv_port = 0;
3e170ce0
A
135static time_t nfsrv_idlesock_timer_on = 0;
136static int nfsrv_sock_tcp_cnt = 0;
137#define NFSD_MIN_IDLE_TIMEOUT 30
138static int nfsrv_sock_idle_timeout = 3600; /* One hour */
2d21ac55 139
0a7de745
A
140int nfssvc_export(user_addr_t argp);
141int nfssvc_nfsd(void);
142int nfssvc_addsock(socket_t, mbuf_t);
143void nfsrv_zapsock(struct nfsrv_sock *);
144void nfsrv_slpderef(struct nfsrv_sock *);
145void nfsrv_slpfree(struct nfsrv_sock *);
2d21ac55 146
ea3f0419 147#endif /* CONFIG_NFS_SERVER */
2d21ac55 148
ea3f0419 149#if CONFIG_NFS
2d21ac55
A
150/*
151 * sysctl stuff
152 */
153SYSCTL_DECL(_vfs_generic);
0a7de745 154SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
ea3f0419 155#endif /* CONFIG_NFS */
2d21ac55 156
ea3f0419 157#if CONFIG_NFS_CLIENT
0a7de745 158SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs client hinge");
6d2010ae
A
159SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
160SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
161SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
162SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
163SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
164SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
165SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
166SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
167SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
168SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
6d2010ae
A
169SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
170SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
171SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
172SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
173SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
316670eb
A
174SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
175SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
f427ee49 176SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, tcp_sockbuf, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tcp_sockbuf, 0, "");
39236c6e 177SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
fe8ab488 178SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
cb323159 179#if CONFIG_NFS_GSS
3e170ce0 180SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
cb323159
A
181#endif
182#if CONFIG_NFS4
5ba3f43e 183SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "");
cb323159 184#endif
ea3f0419 185#endif /* CONFIG_NFS_CLIENT */
2d21ac55 186
ea3f0419 187#if CONFIG_NFS_SERVER
0a7de745 188SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
6d2010ae
A
189SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
190SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
191SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
192SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
193SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
194SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
195SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
196SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
197SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
b0d623f7 198#if CONFIG_FSE
6d2010ae 199SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
b0d623f7 200#endif
6d2010ae
A
201SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
202SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
3e170ce0
A
203SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
204SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
316670eb
A
205#ifdef NFS_UC_Q_DEBUG
206SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
207SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
208SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
3e170ce0 209SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
316670eb 210#endif
ea3f0419 211#endif /* CONFIG_NFS_SERVER */
2d21ac55 212
ea3f0419 213#if CONFIG_NFS_CLIENT && CONFIG_NFS4
fe8ab488
A
214static int
215mapname2id(struct nfs_testmapid *map)
216{
217 int error;
fe8ab488 218 error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
0a7de745
A
219 if (error) {
220 return error;
221 }
fe8ab488 222
0a7de745 223 if (map->ntm_grpflag) {
fe8ab488 224 error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
0a7de745 225 } else {
fe8ab488 226 error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
0a7de745 227 }
fe8ab488 228
0a7de745 229 return error;
fe8ab488
A
230}
231
232static int
233mapid2name(struct nfs_testmapid *map)
234{
235 int error;
5ba3f43e 236 size_t len = sizeof(map->ntm_name);
0a7de745
A
237
238 if (map->ntm_grpflag) {
fe8ab488 239 error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
0a7de745 240 } else {
fe8ab488 241 error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
0a7de745
A
242 }
243
244 if (error) {
245 return error;
246 }
fe8ab488 247
fe8ab488
A
248 error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
249
0a7de745 250 return error;
fe8ab488
A
251}
252
fe8ab488
A
253static int
254nfsclnt_testidmap(proc_t p, user_addr_t argp)
255{
256 struct nfs_testmapid mapid;
257 int error, coerror;
5ba3f43e 258 size_t len = sizeof(mapid.ntm_name);
0a7de745
A
259
260 /* Let root make this call. */
fe8ab488 261 error = proc_suser(p);
0a7de745
A
262 if (error) {
263 return error;
264 }
fe8ab488
A
265
266 error = copyin(argp, &mapid, sizeof(mapid));
cb323159
A
267 mapid.ntm_name[MAXIDNAMELEN - 1] = '\0';
268
0a7de745
A
269 if (error) {
270 return error;
271 }
5ba3f43e
A
272 switch (mapid.ntm_lookup) {
273 case NTM_NAME2ID:
fe8ab488 274 error = mapname2id(&mapid);
5ba3f43e
A
275 break;
276 case NTM_ID2NAME:
fe8ab488 277 error = mapid2name(&mapid);
5ba3f43e
A
278 break;
279 case NTM_NAME2GUID:
280 error = nfs4_id2guid(mapid.ntm_name, &mapid.ntm_guid, mapid.ntm_grpflag);
281 break;
282 case NTM_GUID2NAME:
283 error = nfs4_guid2id(&mapid.ntm_guid, mapid.ntm_name, &len, mapid.ntm_grpflag);
284 break;
285 default:
0a7de745 286 return EINVAL;
5ba3f43e 287 }
fe8ab488
A
288
289 coerror = copyout(&mapid, argp, sizeof(mapid));
290
0a7de745 291 return error ? error : coerror;
fe8ab488 292}
ea3f0419
A
293#endif /* CONFIG_NFS_CLIENT && CONFIG_NFS4 */
294
295#if !CONFIG_NFS_CLIENT
296#define __no_nfs_client_unused __unused
297#else
298#define __no_nfs_client_unused /* nothing */
cb323159 299#endif
fe8ab488 300
2d21ac55 301int
ea3f0419
A
302nfsclnt(
303 proc_t p __no_nfs_client_unused,
304 struct nfsclnt_args *uap __no_nfs_client_unused,
305 __unused int *retval)
2d21ac55 306{
ea3f0419 307#if CONFIG_NFS_CLIENT
2d21ac55
A
308 struct lockd_ans la;
309 int error;
310
6d2010ae
A
311 switch (uap->flag) {
312 case NFSCLNT_LOCKDANS:
2d21ac55 313 error = copyin(uap->argp, &la, sizeof(la));
0a7de745 314 if (!error) {
6d2010ae 315 error = nfslockdans(p, &la);
0a7de745 316 }
6d2010ae
A
317 break;
318 case NFSCLNT_LOCKDNOTIFY:
319 error = nfslockdnotify(p, uap->argp);
320 break;
cb323159 321#if CONFIG_NFS4
fe8ab488
A
322 case NFSCLNT_TESTIDMAP:
323 error = nfsclnt_testidmap(p, uap->argp);
324 break;
cb323159 325#endif
6d2010ae
A
326 default:
327 error = EINVAL;
2d21ac55 328 }
0a7de745 329 return error;
ea3f0419
A
330#else
331 return ENOSYS;
332#endif /* CONFIG_NFS_CLIENT */
2d21ac55
A
333}
334
ea3f0419 335#if CONFIG_NFS_CLIENT
fe8ab488 336
2d21ac55
A
337/*
338 * Asynchronous I/O threads for client NFS.
339 * They do read-ahead and write-behind operations on the block I/O cache.
340 *
341 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
342 * when unused for a while. There are as many nfsiod structs as there are
343 * nfsiod threads; however there's no strict tie between a thread and a struct.
344 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
345 * up, it removes the next struct nfsiod from the queue and services it. Then
346 * it will put the struct at the head of free list and sleep on it.
347 * Async requests will pull the next struct nfsiod from the head of the free list,
348 * put it on the work queue, and wake whatever thread is waiting on that struct.
349 */
2d21ac55
A
350
351/*
352 * nfsiod thread exit routine
353 *
354 * Must be called with nfsiod_mutex held so that the
355 * decision to terminate is atomic with the termination.
356 */
b0d623f7 357void
2d21ac55
A
358nfsiod_terminate(struct nfsiod *niod)
359{
360 nfsiod_thread_count--;
c3c9b80d 361 lck_mtx_unlock(&nfsiod_mutex);
0a7de745 362 if (niod) {
2d21ac55 363 FREE(niod, M_TEMP);
0a7de745 364 } else {
2d21ac55 365 printf("nfsiod: terminating without niod\n");
0a7de745 366 }
2d21ac55
A
367 thread_terminate(current_thread());
368 /*NOTREACHED*/
369}
370
371/* nfsiod thread startup routine */
b0d623f7 372void
2d21ac55
A
373nfsiod_thread(void)
374{
375 struct nfsiod *niod;
376 int error;
377
378 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
379 if (!niod) {
c3c9b80d 380 lck_mtx_lock(&nfsiod_mutex);
2d21ac55 381 nfsiod_thread_count--;
b0d623f7 382 wakeup(current_thread());
c3c9b80d 383 lck_mtx_unlock(&nfsiod_mutex);
2d21ac55
A
384 thread_terminate(current_thread());
385 /*NOTREACHED*/
386 }
387 bzero(niod, sizeof(*niod));
c3c9b80d 388 lck_mtx_lock(&nfsiod_mutex);
2d21ac55
A
389 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
390 wakeup(current_thread());
c3c9b80d 391 error = msleep0(niod, &nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
2d21ac55
A
392 /* shouldn't return... so we have an error */
393 /* remove an old nfsiod struct and terminate */
c3c9b80d 394 lck_mtx_lock(&nfsiod_mutex);
0a7de745 395 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
2d21ac55 396 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
0a7de745 397 }
2d21ac55
A
398 nfsiod_terminate(niod);
399 /*NOTREACHED*/
400}
401
402/*
403 * Start up another nfsiod thread.
404 * (unless we're already maxed out and there are nfsiods running)
405 */
406int
407nfsiod_start(void)
408{
b0d623f7 409 thread_t thd = THREAD_NULL;
2d21ac55 410
c3c9b80d 411 lck_mtx_lock(&nfsiod_mutex);
2d21ac55 412 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
c3c9b80d 413 lck_mtx_unlock(&nfsiod_mutex);
0a7de745 414 return EBUSY;
2d21ac55
A
415 }
416 nfsiod_thread_count++;
b0d623f7 417 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
c3c9b80d 418 lck_mtx_unlock(&nfsiod_mutex);
0a7de745 419 return EBUSY;
b0d623f7 420 }
2d21ac55 421 /* wait for the thread to complete startup */
c3c9b80d 422 msleep(thd, &nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
b0d623f7 423 thread_deallocate(thd);
0a7de745 424 return 0;
2d21ac55
A
425}
426
427/*
428 * Continuation for Asynchronous I/O threads for NFS client.
429 *
430 * Grab an nfsiod struct to work on, do some work, then drop it
431 */
b0d623f7 432int
2d21ac55
A
433nfsiod_continue(int error)
434{
435 struct nfsiod *niod;
436 struct nfsmount *nmp;
437 struct nfsreq *req, *treq;
438 struct nfs_reqqhead iodq;
439 int morework;
440
c3c9b80d 441 lck_mtx_lock(&nfsiod_mutex);
2d21ac55
A
442 niod = TAILQ_FIRST(&nfsiodwork);
443 if (!niod) {
444 /* there's no work queued up */
2d21ac55 445 /* remove an old nfsiod struct and terminate */
0a7de745 446 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
2d21ac55 447 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
0a7de745 448 }
2d21ac55
A
449 nfsiod_terminate(niod);
450 /*NOTREACHED*/
451 }
452 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
453
454worktodo:
455 while ((nmp = niod->niod_nmp)) {
0a7de745 456 if (nmp == NULL) {
fe8ab488
A
457 niod->niod_nmp = NULL;
458 break;
459 }
460
0a7de745 461 /*
2d21ac55
A
462 * Service this mount's async I/O queue.
463 *
464 * In order to ensure some level of fairness between mounts,
465 * we grab all the work up front before processing it so any
466 * new work that arrives will be serviced on a subsequent
467 * iteration - and we have a chance to see if other work needs
468 * to be done (e.g. the delayed write queue needs to be pushed
469 * or other mounts are waiting for an nfsiod).
470 */
471 /* grab the current contents of the queue */
472 TAILQ_INIT(&iodq);
473 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
3e170ce0
A
474 /* Mark each iod request as being managed by an iod */
475 TAILQ_FOREACH(req, &iodq, r_achain) {
476 lck_mtx_lock(&req->r_mtx);
477 assert(!(req->r_flags & R_IOD));
478 req->r_flags |= R_IOD;
479 lck_mtx_unlock(&req->r_mtx);
480 }
c3c9b80d 481 lck_mtx_unlock(&nfsiod_mutex);
2d21ac55
A
482
483 /* process the queue */
484 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
485 TAILQ_REMOVE(&iodq, req, r_achain);
3e170ce0 486 req->r_achain.tqe_next = NFSREQNOLIST;
2d21ac55
A
487 req->r_callback.rcb_func(req);
488 }
489
490 /* now check if there's more/other work to be done */
c3c9b80d 491 lck_mtx_lock(&nfsiod_mutex);
2d21ac55
A
492 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
493 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
0a7de745
A
494 /*
495 * we're going to stop working on this mount but if the
fe8ab488
A
496 * mount still needs more work so queue it up
497 */
0a7de745 498 if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) {
2d21ac55 499 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
0a7de745 500 }
2d21ac55
A
501 nmp->nm_niod = NULL;
502 niod->niod_nmp = NULL;
503 }
504 }
505
506 /* loop if there's still a mount to work on */
507 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
508 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
509 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
fe8ab488 510 niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
2d21ac55 511 }
0a7de745 512 if (niod->niod_nmp) {
2d21ac55 513 goto worktodo;
0a7de745 514 }
2d21ac55
A
515
516 /* queue ourselves back up - if there aren't too many threads running */
517 if (nfsiod_thread_count <= NFSIOD_MAX) {
518 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
c3c9b80d 519 error = msleep0(niod, &nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
2d21ac55
A
520 /* shouldn't return... so we have an error */
521 /* remove an old nfsiod struct and terminate */
c3c9b80d 522 lck_mtx_lock(&nfsiod_mutex);
0a7de745 523 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
2d21ac55 524 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
0a7de745 525 }
2d21ac55
A
526 }
527 nfsiod_terminate(niod);
528 /*NOTREACHED*/
0a7de745 529 return 0;
2d21ac55
A
530}
531
ea3f0419 532#endif /* CONFIG_NFS_CLIENT */
2d21ac55 533
ea3f0419
A
534#if !CONFIG_NFS_SERVER
535#define __no_nfs_server_unused __unused
536#else
537#define __no_nfs_server_unused /* nothing */
538#endif
2d21ac55 539
1c79356b
A
540/*
541 * NFS server system calls
542 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
543 */
544
ea3f0419 545#if CONFIG_NFS_SERVER
94ff46dc
A
546static struct nfs_exportfs *
547nfsrv_find_exportfs(const char *ptr)
548{
549 struct nfs_exportfs *nxfs;
550
551 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
552 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
553 break;
554 }
555 }
556 if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
557 nxfs = NULL;
558 }
559
560 return nxfs;
561}
562
1c79356b
A
563/*
564 * Get file handle system call
565 */
1c79356b 566int
ea3f0419
A
567getfh(
568 proc_t p __no_nfs_server_unused,
569 struct getfh_args *uap __no_nfs_server_unused,
570 __unused int *retval)
1c79356b 571{
91447636
A
572 vnode_t vp;
573 struct nfs_filehandle nfh;
f427ee49 574 int error, fhlen = 0, fidlen;
1c79356b 575 struct nameidata nd;
94ff46dc 576 char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
6d2010ae 577 size_t pathlen;
91447636
A
578 struct nfs_exportfs *nxfs;
579 struct nfs_export *nx;
580
1c79356b
A
581 /*
582 * Must be super user
583 */
91447636 584 error = proc_suser(p);
0a7de745
A
585 if (error) {
586 return error;
587 }
91447636 588
6d2010ae 589 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
0a7de745 590 if (!error) {
6d2010ae 591 error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
0a7de745
A
592 }
593 if (error) {
594 return error;
595 }
6d2010ae 596 /* limit fh size to length specified (or v3 size by default) */
0a7de745 597 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
6d2010ae 598 fhlen = NFSV3_MAX_FH_SIZE;
0a7de745 599 }
6d2010ae 600 fidlen = fhlen - sizeof(struct nfs_exphandle);
91447636 601
0a7de745
A
602 if (!nfsrv_is_initialized()) {
603 return EINVAL;
604 }
2d21ac55 605
0a7de745
A
606 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
607 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
1c79356b 608 error = namei(&nd);
0a7de745
A
609 if (error) {
610 return error;
611 }
91447636
A
612 nameidone(&nd);
613
1c79356b 614 vp = nd.ni_vp;
91447636
A
615
616 // find exportfs that matches f_mntonname
2d21ac55 617 lck_rw_lock_shared(&nfsrv_export_rwlock);
91447636 618 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
94ff46dc
A
619 if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
620 /*
621 * The f_mntonname might be a firmlink path. Resolve
622 * it into a physical path and try again.
623 */
624 int pathbuflen = MAXPATHLEN;
625 vnode_t rvp;
626
627 error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
628 if (error) {
629 goto out;
630 }
631 error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
632 VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
633 vnode_put(rvp);
634 if (error) {
635 goto out;
0a7de745 636 }
94ff46dc
A
637 ptr = real_mntonname;
638 nxfs = nfsrv_find_exportfs(ptr);
91447636 639 }
94ff46dc 640 if (nxfs == NULL) {
91447636
A
641 error = EINVAL;
642 goto out;
643 }
644 // find export that best matches remainder of path
645 ptr = path + strlen(nxfs->nxfs_path);
0a7de745 646 while (*ptr && (*ptr == '/')) {
91447636 647 ptr++;
0a7de745 648 }
91447636 649 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
f427ee49 650 size_t len = strlen(nx->nx_path);
0a7de745 651 if (len == 0) { // we've hit the export entry for the root directory
91447636 652 break;
0a7de745
A
653 }
654 if (!strncmp(nx->nx_path, ptr, len)) {
91447636 655 break;
0a7de745 656 }
91447636
A
657 }
658 if (!nx) {
659 error = EINVAL;
660 goto out;
661 }
662
663 bzero(&nfh, sizeof(nfh));
0c530ab8
A
664 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
665 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
666 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
91447636
A
667 nfh.nfh_xh.nxh_flags = 0;
668 nfh.nfh_xh.nxh_reserved = 0;
6d2010ae 669 nfh.nfh_len = fidlen;
2d21ac55 670 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
0a7de745 671 if (nfh.nfh_len > (uint32_t)fidlen) {
91447636 672 error = EOVERFLOW;
0a7de745 673 }
91447636
A
674 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
675 nfh.nfh_len += sizeof(nfh.nfh_xh);
2d21ac55 676 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
91447636
A
677
678out:
2d21ac55 679 lck_rw_done(&nfsrv_export_rwlock);
91447636 680 vnode_put(vp);
0a7de745
A
681 if (error) {
682 return error;
683 }
5ba3f43e
A
684 /*
685 * At first blush, this may appear to leak a kernel stack
686 * address, but the copyout() never reaches &nfh.nfh_fhp
687 * (sizeof(fhandle_t) < sizeof(nfh)).
688 */
6d2010ae 689 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
0a7de745 690 return error;
1c79356b 691}
ea3f0419 692#endif /* CONFIG_NFS_SERVER */
1c79356b 693
ea3f0419 694#if CONFIG_NFS_SERVER
39236c6e 695extern const struct fileops vnops;
91447636 696
55e303ae
A
697/*
698 * syscall for the rpc.lockd to use to translate a NFS file handle into
699 * an open descriptor.
700 *
701 * warning: do not remove the suser() call or this becomes one giant
702 * security hole.
703 */
55e303ae 704int
ea3f0419
A
705fhopen(proc_t p __no_nfs_server_unused,
706 struct fhopen_args *uap __no_nfs_server_unused,
707 int32_t *retval __no_nfs_server_unused)
55e303ae 708{
91447636
A
709 vnode_t vp;
710 struct nfs_filehandle nfh;
711 struct nfs_export *nx;
712 struct nfs_export_options *nxo;
55e303ae 713 struct flock lf;
91447636
A
714 struct fileproc *fp, *nfp;
715 int fmode, error, type;
55e303ae 716 int indx;
2d21ac55 717 vfs_context_t ctx = vfs_context_current();
91447636
A
718 kauth_action_t action;
719
55e303ae
A
720 /*
721 * Must be super user
722 */
2d21ac55 723 error = suser(vfs_context_ucred(ctx), 0);
0c530ab8 724 if (error) {
0a7de745 725 return error;
0c530ab8 726 }
55e303ae 727
2d21ac55 728 if (!nfsrv_is_initialized()) {
0a7de745 729 return EINVAL;
2d21ac55
A
730 }
731
55e303ae
A
732 fmode = FFLAGS(uap->flags);
733 /* why not allow a non-read/write open for our lockd? */
0a7de745
A
734 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
735 return EINVAL;
736 }
91447636
A
737
738 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
0a7de745
A
739 if (error) {
740 return error;
741 }
91447636 742 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
0a7de745
A
743 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
744 return EINVAL;
745 }
91447636 746 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
0a7de745
A
747 if (error) {
748 return error;
749 }
2d21ac55 750 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
91447636 751
2d21ac55 752 lck_rw_lock_shared(&nfsrv_export_rwlock);
91447636 753 /* now give me my vnode, it gets returned to me with a reference */
2d21ac55
A
754 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
755 lck_rw_done(&nfsrv_export_rwlock);
0c530ab8 756 if (error) {
0a7de745 757 if (error == NFSERR_TRYLATER) {
2d21ac55 758 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
0a7de745
A
759 }
760 return error;
0c530ab8 761 }
91447636 762
55e303ae 763 /*
91447636
A
764 * From now on we have to make sure not
765 * to forget about the vnode.
766 * Any error that causes an abort must vnode_put(vp).
767 * Just set error = err and 'goto bad;'.
55e303ae
A
768 */
769
770 /*
0a7de745
A
771 * from vn_open
772 */
91447636 773 if (vnode_vtype(vp) == VSOCK) {
55e303ae 774 error = EOPNOTSUPP;
0a7de745 775 goto bad;
55e303ae
A
776 }
777
91447636
A
778 /* disallow write operations on directories */
779 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
780 error = EISDIR;
55e303ae
A
781 goto bad;
782 }
783
4bd07ac2 784#if CONFIG_MACF
0a7de745 785 if ((error = mac_vnode_check_open(ctx, vp, fmode))) {
4bd07ac2 786 goto bad;
0a7de745 787 }
4bd07ac2
A
788#endif
789
91447636
A
790 /* compute action to be authorized */
791 action = 0;
0a7de745 792 if (fmode & FREAD) {
91447636 793 action |= KAUTH_VNODE_READ_DATA;
0a7de745
A
794 }
795 if (fmode & (FWRITE | O_TRUNC)) {
91447636 796 action |= KAUTH_VNODE_WRITE_DATA;
0a7de745
A
797 }
798 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
91447636 799 goto bad;
0a7de745 800 }
55e303ae 801
0a7de745 802 if ((error = VNOP_OPEN(vp, fmode, ctx))) {
91447636 803 goto bad;
0a7de745
A
804 }
805 if ((error = vnode_ref_ext(vp, fmode, 0))) {
55e303ae 806 goto bad;
0a7de745 807 }
55e303ae 808
55e303ae
A
809 /*
810 * end of vn_open code
811 */
812
91447636 813 // starting here... error paths should call vn_close/vnode_put
2d21ac55
A
814 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
815 vn_close(vp, fmode & FMASK, ctx);
55e303ae
A
816 goto bad;
817 }
818 fp = nfp;
819
f427ee49
A
820 fp->fp_glob->fg_flag = fmode & FMASK;
821 fp->fp_glob->fg_ops = &vnops;
822 fp->fp_glob->fg_data = (caddr_t)vp;
91447636
A
823
824 // XXX do we really need to support this with fhopen()?
55e303ae
A
825 if (fmode & (O_EXLOCK | O_SHLOCK)) {
826 lf.l_whence = SEEK_SET;
827 lf.l_start = 0;
828 lf.l_len = 0;
0a7de745 829 if (fmode & O_EXLOCK) {
55e303ae 830 lf.l_type = F_WRLCK;
0a7de745 831 } else {
55e303ae 832 lf.l_type = F_RDLCK;
0a7de745 833 }
55e303ae 834 type = F_FLOCK;
0a7de745 835 if ((fmode & FNONBLOCK) == 0) {
55e303ae 836 type |= F_WAIT;
0a7de745 837 }
f427ee49 838 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
2d21ac55
A
839 struct vfs_context context = *vfs_context_current();
840 /* Modify local copy (to not damage thread copy) */
f427ee49 841 context.vc_ucred = fp->fp_glob->fg_cred;
2d21ac55 842
f427ee49 843 vn_close(vp, fp->fp_glob->fg_flag, &context);
91447636 844 fp_free(p, indx, fp);
f427ee49 845 goto bad;
55e303ae 846 }
f427ee49 847 fp->fp_glob->fg_flag |= FWASLOCKED;
55e303ae
A
848 }
849
91447636
A
850 vnode_put(vp);
851
852 proc_fdlock(p);
6601e61a 853 procfdtbl_releasefd(p, indx, NULL);
91447636
A
854 fp_drop(p, indx, fp, 1);
855 proc_fdunlock(p);
856
55e303ae 857 *retval = indx;
0a7de745 858 return 0;
55e303ae
A
859
860bad:
91447636 861 vnode_put(vp);
0a7de745 862 return error;
55e303ae 863}
ea3f0419 864#endif /* CONFIG_NFS_SERVER */
55e303ae 865
ea3f0419 866#if CONFIG_NFS_SERVER
1c79356b 867/*
2d21ac55 868 * NFS server pseudo system call
1c79356b 869 */
1c79356b 870int
ea3f0419
A
871nfssvc(proc_t p __no_nfs_server_unused,
872 struct nfssvc_args *uap __no_nfs_server_unused,
873 __unused int *retval)
1c79356b 874{
91447636
A
875 mbuf_t nam;
876 struct user_nfsd_args user_nfsdarg;
91447636 877 socket_t so;
1c79356b
A
878 int error;
879
ccc36f2f
A
880 AUDIT_ARG(cmd, uap->flag);
881
1c79356b 882 /*
b0d623f7 883 * Must be super user for most operations (export ops checked later).
1c79356b 884 */
0a7de745
A
885 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) {
886 return error;
887 }
2d21ac55
A
888#if CONFIG_MACF
889 error = mac_system_check_nfsd(kauth_cred_get());
0a7de745
A
890 if (error) {
891 return error;
892 }
2d21ac55 893#endif
91447636 894
2d21ac55
A
895 /* make sure NFS server data structures have been initialized */
896 nfsrv_init();
1c79356b 897
2d21ac55 898 if (uap->flag & NFSSVC_ADDSOCK) {
91447636
A
899 if (IS_64BIT_PROCESS(p)) {
900 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
901 } else {
902 struct nfsd_args tmp_args;
903 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
904 if (error == 0) {
905 user_nfsdarg.sock = tmp_args.sock;
906 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
907 user_nfsdarg.namelen = tmp_args.namelen;
908 }
909 }
0a7de745
A
910 if (error) {
911 return error;
912 }
91447636
A
913 /* get the socket */
914 error = file_socket(user_nfsdarg.sock, &so);
0a7de745
A
915 if (error) {
916 return error;
917 }
91447636
A
918 /* Get the client address for connected sockets. */
919 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
920 nam = NULL;
921 } else {
922 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
923 if (error) {
924 /* drop the iocount file_socket() grabbed on the file descriptor */
925 file_drop(user_nfsdarg.sock);
0a7de745 926 return error;
91447636 927 }
1c79356b 928 }
91447636
A
929 /*
930 * nfssvc_addsock() will grab a retain count on the socket
931 * to keep the socket from being closed when nfsd closes its
932 * file descriptor for it.
933 */
2d21ac55 934 error = nfssvc_addsock(so, nam);
91447636
A
935 /* drop the iocount file_socket() grabbed on the file descriptor */
936 file_drop(user_nfsdarg.sock);
937 } else if (uap->flag & NFSSVC_NFSD) {
2d21ac55 938 error = nfssvc_nfsd();
91447636 939 } else if (uap->flag & NFSSVC_EXPORT) {
2d21ac55 940 error = nfssvc_export(uap->argp);
91447636
A
941 } else {
942 error = EINVAL;
1c79356b 943 }
0a7de745 944 if (error == EINTR || error == ERESTART) {
1c79356b 945 error = 0;
0a7de745
A
946 }
947 return error;
1c79356b 948}
ea3f0419
A
949#endif /* CONFIG_NFS_SERVER */
950
951#if CONFIG_NFS_SERVER
1c79356b 952
1c79356b
A
953/*
954 * Adds a socket to the list for servicing by nfsds.
955 */
b0d623f7 956int
2d21ac55 957nfssvc_addsock(socket_t so, mbuf_t mynam)
1c79356b 958{
2d21ac55
A
959 struct nfsrv_sock *slp;
960 int error = 0, sodomain, sotype, soprotocol, on = 1;
f427ee49 961 int first, sobufsize;
91447636 962 struct timeval timeo;
f427ee49 963 u_quad_t sbmaxsize;
91447636
A
964
965 /* make sure mbuf constants are set up */
0a7de745 966 if (!nfs_mbuf_mhlen) {
91447636 967 nfs_mbuf_init();
0a7de745 968 }
91447636
A
969
970 sock_gettype(so, &sodomain, &sotype, &soprotocol);
971
6d2010ae
A
972 /* There should be only one UDP socket for each of IPv4 and IPv6 */
973 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
974 mbuf_freem(mynam);
0a7de745 975 return EEXIST;
6d2010ae
A
976 }
977 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
91447636 978 mbuf_freem(mynam);
0a7de745 979 return EEXIST;
1c79356b
A
980 }
981
2d21ac55 982 /* Set protocol options and reserve some space (for UDP). */
3e170ce0
A
983 if (sotype == SOCK_STREAM) {
984 error = nfsrv_check_exports_allow_address(mynam);
0a7de745 985 if (error) {
cb323159
A
986 log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
987 mbuf_freem(mynam);
0a7de745
A
988 return error;
989 }
91447636 990 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
3e170ce0 991 }
0a7de745 992 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
91447636 993 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
0a7de745 994 }
f427ee49
A
995
996 /* Calculate maximum supported socket buffers sizes */
997 sbmaxsize = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
998
999 /* Set socket buffer sizes for UDP/TCP */
1000 sobufsize = min(sbmaxsize, (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : NFSRV_TCPSOCKBUF);
1001 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sobufsize, sizeof(sobufsize));
1002 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &sobufsize, sizeof(sobufsize));
1003
1004 if (error) {
1005 log(LOG_INFO, "nfssvc_addsock: socket buffer setting error(s) %d\n", error);
1006 error = 0;
1c79356b 1007 }
91447636
A
1008 sock_nointerrupt(so, 0);
1009
2d21ac55
A
1010 /*
1011 * Set socket send/receive timeouts.
1012 * Receive timeout shouldn't matter, but setting the send timeout
1013 * will make sure that an unresponsive client can't hang the server.
1014 */
91447636 1015 timeo.tv_usec = 0;
2d21ac55
A
1016 timeo.tv_sec = 1;
1017 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
1018 timeo.tv_sec = 30;
1019 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
1020 if (error) {
1021 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
1022 error = 0;
1023 }
91447636 1024
2d21ac55
A
1025 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
1026 if (!slp) {
1027 mbuf_freem(mynam);
0a7de745 1028 return ENOMEM;
2d21ac55 1029 }
0a7de745 1030 bzero((caddr_t)slp, sizeof(struct nfsrv_sock));
c3c9b80d
A
1031 lck_rw_init(&slp->ns_rwlock, &nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
1032 lck_mtx_init(&slp->ns_wgmutex, &nfsrv_slp_mutex_group, LCK_ATTR_NULL);
2d21ac55 1033
c3c9b80d 1034 lck_mtx_lock(&nfsd_mutex);
2d21ac55
A
1035
1036 if (soprotocol == IPPROTO_UDP) {
6d2010ae
A
1037 if (sodomain == AF_INET) {
1038 /* There should be only one UDP/IPv4 socket */
1039 if (nfsrv_udpsock) {
c3c9b80d 1040 lck_mtx_unlock(&nfsd_mutex);
6d2010ae
A
1041 nfsrv_slpfree(slp);
1042 mbuf_freem(mynam);
0a7de745 1043 return EEXIST;
6d2010ae
A
1044 }
1045 nfsrv_udpsock = slp;
1046 }
1047 if (sodomain == AF_INET6) {
1048 /* There should be only one UDP/IPv6 socket */
1049 if (nfsrv_udp6sock) {
c3c9b80d 1050 lck_mtx_unlock(&nfsd_mutex);
6d2010ae
A
1051 nfsrv_slpfree(slp);
1052 mbuf_freem(mynam);
0a7de745 1053 return EEXIST;
6d2010ae
A
1054 }
1055 nfsrv_udp6sock = slp;
91447636 1056 }
1c79356b 1057 }
91447636 1058
2d21ac55 1059 /* add the socket to the list */
316670eb 1060 first = TAILQ_EMPTY(&nfsrv_socklist);
2d21ac55 1061 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
cb323159 1062 if (sotype == SOCK_STREAM) {
3e170ce0 1063 nfsrv_sock_tcp_cnt++;
0a7de745 1064 if (nfsrv_sock_idle_timeout < 0) {
3e170ce0 1065 nfsrv_sock_idle_timeout = 0;
0a7de745
A
1066 }
1067 if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
3e170ce0 1068 nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
0a7de745 1069 }
3e170ce0
A
1070 /*
1071 * Possibly start or stop the idle timer. We only start the idle timer when
1072 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
1073 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
1074 * the number of connections.
1075 */
1076 if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
1077 if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1078 if (nfsrv_idlesock_timer_on) {
1079 thread_call_cancel(nfsrv_idlesock_timer_call);
1080 nfsrv_idlesock_timer_on = 0;
1081 }
1082 } else {
1083 struct nfsrv_sock *old_slp;
1084 struct timeval now;
1085 time_t time_to_wait = nfsrv_sock_idle_timeout;
1086 /*
1087 * Get the oldest tcp socket and calculate the
1088 * earliest time for the next idle timer to fire
1089 * based on the possibly updated nfsrv_sock_idle_timeout
1090 */
1091 TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
1092 if (old_slp->ns_sotype == SOCK_STREAM) {
1093 microuptime(&now);
1094 time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
0a7de745 1095 if (time_to_wait < 1) {
3e170ce0 1096 time_to_wait = 1;
0a7de745 1097 }
3e170ce0
A
1098 break;
1099 }
1100 }
1101 /*
1102 * If we have a timer scheduled, but if its going to fire too late,
1103 * turn it off.
1104 */
1105 if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
1106 thread_call_cancel(nfsrv_idlesock_timer_call);
1107 nfsrv_idlesock_timer_on = 0;
1108 }
1109 /* Schedule the idle thread if it isn't already */
1110 if (!nfsrv_idlesock_timer_on) {
1111 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1112 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1113 }
1114 }
1115 }
1116 }
2d21ac55 1117
91447636 1118 sock_retain(so); /* grab a retain count on the socket */
1c79356b 1119 slp->ns_so = so;
91447636 1120 slp->ns_sotype = sotype;
1c79356b 1121 slp->ns_nam = mynam;
f427ee49 1122 slp->ns_sobufsize = sobufsize;
91447636 1123
316670eb
A
1124 /* set up the socket up-call */
1125 nfsrv_uc_addsock(slp, first);
91447636 1126
2d21ac55
A
1127 /* mark that the socket is not in the nfsrv_sockwg list */
1128 slp->ns_wgq.tqe_next = SLPNOLIST;
3e170ce0 1129
91447636
A
1130 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
1131
1c79356b 1132 nfsrv_wakenfsd(slp);
c3c9b80d 1133 lck_mtx_unlock(&nfsd_mutex);
91447636 1134
0a7de745 1135 return 0;
1c79356b
A
1136}
1137
1138/*
2d21ac55
A
1139 * nfssvc_nfsd()
1140 *
1141 * nfsd theory of operation:
1142 *
1143 * The first nfsd thread stays in user mode accepting new TCP connections
1144 * which are then added via the "addsock" call. The rest of the nfsd threads
1145 * simply call into the kernel and remain there in a loop handling NFS
1146 * requests until killed by a signal.
0a7de745 1147 *
2d21ac55
A
1148 * There's a list of nfsd threads (nfsd_head).
1149 * There's an nfsd queue that contains only those nfsds that are
1150 * waiting for work to do (nfsd_queue).
1151 *
1152 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
1153 * managing the work on the sockets:
1154 * nfsrv_sockwait - sockets w/new data waiting to be worked on
1155 * nfsrv_sockwork - sockets being worked on which may have more work to do
1156 * nfsrv_sockwg -- sockets which have pending write gather data
1157 * When a socket receives data, if it is not currently queued, it
1158 * will be placed at the end of the "wait" queue.
1159 * Whenever a socket needs servicing we make sure it is queued and
1160 * wake up a waiting nfsd (if there is one).
1161 *
1162 * nfsds will service at most 8 requests from the same socket before
1163 * defecting to work on another socket.
1164 * nfsds will defect immediately if there are any sockets in the "wait" queue
1165 * nfsds looking for a socket to work on check the "wait" queue first and
1166 * then check the "work" queue.
1167 * When an nfsd starts working on a socket, it removes it from the head of
1168 * the queue it's currently on and moves it to the end of the "work" queue.
0a7de745 1169 * When nfsds are checking the queues for work, any sockets found not to
2d21ac55
A
1170 * have any work are simply dropped from the queue.
1171 *
1c79356b 1172 */
b0d623f7 1173int
2d21ac55 1174nfssvc_nfsd(void)
1c79356b 1175{
f427ee49 1176 mbuf_t m, mrep = NULL;
2d21ac55
A
1177 struct nfsrv_sock *slp;
1178 struct nfsd *nfsd;
1c79356b 1179 struct nfsrv_descript *nd = NULL;
91447636 1180 int error = 0, cacherep, writes_todo;
2d21ac55 1181 int siz, procrastinate, opcnt = 0;
f427ee49 1182 time_t cur_usec;
55e303ae 1183 struct timeval now;
2d21ac55 1184 struct vfs_context context;
316670eb 1185 struct timespec to;
1c79356b
A
1186
1187#ifndef nolint
1188 cacherep = RC_DOIT;
1189 writes_todo = 0;
1190#endif
91447636 1191
2d21ac55 1192 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
0a7de745
A
1193 if (!nfsd) {
1194 return ENOMEM;
1195 }
2d21ac55 1196 bzero(nfsd, sizeof(struct nfsd));
c3c9b80d 1197 lck_mtx_lock(&nfsd_mutex);
0a7de745
A
1198 if (nfsd_thread_count++ == 0) {
1199 nfsrv_initcache(); /* Init the server request cache */
1200 }
2d21ac55 1201 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
c3c9b80d 1202 lck_mtx_unlock(&nfsd_mutex);
2d21ac55
A
1203
1204 context.vc_thread = current_thread();
91447636 1205
316670eb
A
1206 /* Set time out so that nfsd threads can wake up a see if they are still needed. */
1207 to.tv_sec = 5;
1208 to.tv_nsec = 0;
1209
1c79356b
A
1210 /*
1211 * Loop getting rpc requests until SIGKILL.
1212 */
1213 for (;;) {
2d21ac55
A
1214 if (nfsd_thread_max <= 0) {
1215 /* NFS server shutting down, get out ASAP */
1216 error = EINTR;
1217 slp = nfsd->nfsd_slp;
1218 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
1219 /* already have some work to do */
1220 error = 0;
1221 slp = nfsd->nfsd_slp;
1222 } else {
1223 /* need to find work to do */
1224 error = 0;
c3c9b80d 1225 lck_mtx_lock(&nfsd_mutex);
2d21ac55
A
1226 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
1227 if (nfsd_thread_count > nfsd_thread_max) {
1228 /*
1229 * If we have no socket and there are more
1230 * nfsd threads than configured, let's exit.
1231 */
1232 error = 0;
1233 goto done;
1234 }
1c79356b 1235 nfsd->nfsd_flag |= NFSD_WAITING;
2d21ac55 1236 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
c3c9b80d 1237 error = msleep(nfsd, &nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
91447636 1238 if (error) {
2d21ac55
A
1239 if (nfsd->nfsd_flag & NFSD_WAITING) {
1240 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
1241 nfsd->nfsd_flag &= ~NFSD_WAITING;
1242 }
0a7de745 1243 if (error == EWOULDBLOCK) {
316670eb 1244 continue;
0a7de745 1245 }
1c79356b 1246 goto done;
91447636 1247 }
1c79356b 1248 }
2d21ac55
A
1249 slp = nfsd->nfsd_slp;
1250 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
1251 /* look for a socket to work on in the wait queue */
1252 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
1253 lck_rw_lock_exclusive(&slp->ns_rwlock);
1254 /* remove from the head of the queue */
1255 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1256 slp->ns_flag &= ~SLP_WAITQ;
0a7de745 1257 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
2d21ac55 1258 break;
0a7de745 1259 }
2d21ac55
A
1260 /* nothing to do, so skip this socket */
1261 lck_rw_done(&slp->ns_rwlock);
1c79356b 1262 }
2d21ac55
A
1263 }
1264 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
1265 /* look for a socket to work on in the work queue */
1266 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
1267 lck_rw_lock_exclusive(&slp->ns_rwlock);
1268 /* remove from the head of the queue */
1269 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1270 slp->ns_flag &= ~SLP_WORKQ;
0a7de745 1271 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
2d21ac55 1272 break;
0a7de745 1273 }
2d21ac55
A
1274 /* nothing to do, so skip this socket */
1275 lck_rw_done(&slp->ns_rwlock);
1276 }
1277 }
1278 if (!nfsd->nfsd_slp && slp) {
1279 /* we found a socket to work on, grab a reference */
1280 slp->ns_sref++;
3e170ce0
A
1281 microuptime(&now);
1282 slp->ns_timestamp = now.tv_sec;
1283 /* We keep the socket list in least recently used order for reaping idle sockets */
1284 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1285 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
2d21ac55
A
1286 nfsd->nfsd_slp = slp;
1287 opcnt = 0;
1288 /* and put it at the back of the work queue */
1289 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1290 slp->ns_flag |= SLP_WORKQ;
1291 lck_rw_done(&slp->ns_rwlock);
1c79356b 1292 }
c3c9b80d 1293 lck_mtx_unlock(&nfsd_mutex);
0a7de745 1294 if (!slp) {
1c79356b 1295 continue;
0a7de745 1296 }
91447636 1297 lck_rw_lock_exclusive(&slp->ns_rwlock);
1c79356b 1298 if (slp->ns_flag & SLP_VALID) {
0a7de745 1299 if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
91447636
A
1300 slp->ns_flag &= ~SLP_NEEDQ;
1301 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1302 }
0a7de745 1303 if (slp->ns_flag & SLP_DISCONN) {
743b1565 1304 nfsrv_zapsock(slp);
0a7de745 1305 }
1c79356b 1306 error = nfsrv_dorec(slp, nfsd, &nd);
0a7de745
A
1307 if (error == EINVAL) { // RPCSEC_GSS drop
1308 if (slp->ns_sotype == SOCK_STREAM) {
2d21ac55 1309 nfsrv_zapsock(slp); // drop connection
0a7de745 1310 }
2d21ac55
A
1311 }
1312 writes_todo = 0;
1313 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1314 microuptime(&now);
f427ee49 1315 cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
2d21ac55
A
1316 if (slp->ns_wgtime <= cur_usec) {
1317 error = 0;
1318 cacherep = RC_DOIT;
1319 writes_todo = 1;
1320 }
1321 slp->ns_flag &= ~SLP_DOWRITES;
1322 }
1c79356b
A
1323 nfsd->nfsd_flag |= NFSD_REQINPROG;
1324 }
91447636 1325 lck_rw_done(&slp->ns_rwlock);
1c79356b 1326 }
2d21ac55 1327 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1c79356b 1328 if (nd) {
2d21ac55 1329 nfsm_chain_cleanup(&nd->nd_nmreq);
0a7de745 1330 if (nd->nd_nam2) {
91447636 1331 mbuf_freem(nd->nd_nam2);
0a7de745
A
1332 }
1333 if (IS_VALID_CRED(nd->nd_cr)) {
0c530ab8 1334 kauth_cred_unref(&nd->nd_cr);
0a7de745
A
1335 }
1336 if (nd->nd_gss_context) {
6d2010ae 1337 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
0a7de745 1338 }
f427ee49 1339 NFS_ZFREE(nfsrv_descript_zone, nd);
1c79356b 1340 }
91447636 1341 nfsd->nfsd_slp = NULL;
1c79356b 1342 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
0a7de745 1343 if (slp) {
2d21ac55 1344 nfsrv_slpderef(slp);
0a7de745
A
1345 }
1346 if (nfsd_thread_max <= 0) {
2d21ac55 1347 break;
0a7de745 1348 }
1c79356b
A
1349 continue;
1350 }
1c79356b 1351 if (nd) {
0a7de745
A
1352 microuptime(&nd->nd_starttime);
1353 if (nd->nd_nam2) {
1354 nd->nd_nam = nd->nd_nam2;
1355 } else {
1356 nd->nd_nam = slp->ns_nam;
1c79356b 1357 }
1c79356b 1358
0a7de745
A
1359 cacherep = nfsrv_getcache(nd, slp, &mrep);
1360
1361 if (nfsrv_require_resv_port) {
1362 /* Check if source port is a reserved port */
1363 in_port_t port = 0;
1364 struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1365
1366 if (saddr->sa_family == AF_INET) {
1367 port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1368 } else if (saddr->sa_family == AF_INET6) {
1369 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1370 }
1371 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1372 nd->nd_procnum = NFSPROC_NOOP;
1373 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1374 cacherep = RC_DOIT;
1375 }
1376 }
1c79356b
A
1377 }
1378
1379 /*
2d21ac55 1380 * Loop to get all the write RPC replies that have been
1c79356b
A
1381 * gathered together.
1382 */
1383 do {
0a7de745
A
1384 switch (cacherep) {
1385 case RC_DOIT:
1386 if (nd && (nd->nd_vers == NFS_VER3)) {
1387 procrastinate = nfsrv_wg_delay_v3;
1388 } else {
1389 procrastinate = nfsrv_wg_delay;
2d21ac55 1390 }
0a7de745
A
1391 lck_rw_lock_shared(&nfsrv_export_rwlock);
1392 context.vc_ucred = NULL;
1393 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1394 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1395 } else {
1396 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
55e303ae 1397 }
0a7de745
A
1398 lck_rw_done(&nfsrv_export_rwlock);
1399 if (mrep == NULL) {
1400 /*
1401 * If this is a stream socket and we are not going
1402 * to send a reply we better close the connection
1403 * so the client doesn't hang.
1404 */
1405 if (error && slp->ns_sotype == SOCK_STREAM) {
1406 lck_rw_lock_exclusive(&slp->ns_rwlock);
1407 nfsrv_zapsock(slp);
1408 lck_rw_done(&slp->ns_rwlock);
1409 printf("NFS server: NULL reply from proc = %d error = %d\n",
1410 nd->nd_procnum, error);
1411 }
1412 break;
1413 }
1414 if (error) {
1415 OSAddAtomic64(1, &nfsstats.srv_errs);
1416 nfsrv_updatecache(nd, FALSE, mrep);
1417 if (nd->nd_nam2) {
1418 mbuf_freem(nd->nd_nam2);
1419 nd->nd_nam2 = NULL;
1420 }
1421 break;
1422 }
1423 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
1424 nfsrv_updatecache(nd, TRUE, mrep);
f427ee49 1425 OS_FALLTHROUGH;
2d21ac55 1426
0a7de745
A
1427 case RC_REPLY:
1428 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1429 /*
1430 * Need to checksum or encrypt the reply
1431 */
1432 error = nfs_gss_svc_protect_reply(nd, mrep);
1433 if (error) {
1434 mbuf_freem(mrep);
1435 break;
1436 }
1437 }
1438
2d21ac55 1439 /*
0a7de745 1440 * Get the total size of the reply
2d21ac55 1441 */
0a7de745
A
1442 m = mrep;
1443 siz = 0;
1444 while (m) {
1445 siz += mbuf_len(m);
1446 m = mbuf_next(m);
1447 }
1448 if (siz <= 0 || siz > NFS_MAXPACKET) {
1449 printf("mbuf siz=%d\n", siz);
1450 panic("Bad nfs svc reply");
1451 }
1452 m = mrep;
1453 mbuf_pkthdr_setlen(m, siz);
1454 error = mbuf_pkthdr_setrcvif(m, NULL);
2d21ac55 1455 if (error) {
0a7de745 1456 panic("nfsd setrcvif failed: %d", error);
2d21ac55 1457 }
0a7de745
A
1458 /*
1459 * For stream protocols, prepend a Sun RPC
1460 * Record Mark.
1461 */
1462 if (slp->ns_sotype == SOCK_STREAM) {
1463 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1464 if (!error) {
1465 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1466 }
1467 }
1468 if (!error) {
1469 if (slp->ns_flag & SLP_VALID) {
1470 error = nfsrv_send(slp, nd->nd_nam2, m);
1471 } else {
1472 error = EPIPE;
1473 mbuf_freem(m);
1474 }
91447636 1475 } else {
0a7de745 1476 mbuf_freem(m);
91447636 1477 }
0a7de745
A
1478 mrep = NULL;
1479 if (nd->nd_nam2) {
1480 mbuf_freem(nd->nd_nam2);
1481 nd->nd_nam2 = NULL;
1482 }
1483 if (error == EPIPE) {
1484 lck_rw_lock_exclusive(&slp->ns_rwlock);
1485 nfsrv_zapsock(slp);
1486 lck_rw_done(&slp->ns_rwlock);
1487 }
1488 if (error == EINTR || error == ERESTART) {
1489 nfsm_chain_cleanup(&nd->nd_nmreq);
1490 if (IS_VALID_CRED(nd->nd_cr)) {
1491 kauth_cred_unref(&nd->nd_cr);
1492 }
1493 if (nd->nd_gss_context) {
1494 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1495 }
f427ee49 1496 NFS_ZFREE(nfsrv_descript_zone, nd);
0a7de745 1497 nfsrv_slpderef(slp);
c3c9b80d 1498 lck_mtx_lock(&nfsd_mutex);
0a7de745
A
1499 goto done;
1500 }
1501 break;
1502 case RC_DROPIT:
91447636 1503 mbuf_freem(nd->nd_nam2);
55e303ae 1504 nd->nd_nam2 = NULL;
0a7de745 1505 break;
55e303ae 1506 }
0a7de745
A
1507 ;
1508 opcnt++;
1509 if (nd) {
2d21ac55 1510 nfsm_chain_cleanup(&nd->nd_nmreq);
0a7de745
A
1511 if (nd->nd_nam2) {
1512 mbuf_freem(nd->nd_nam2);
1513 }
1514 if (IS_VALID_CRED(nd->nd_cr)) {
0c530ab8 1515 kauth_cred_unref(&nd->nd_cr);
0a7de745
A
1516 }
1517 if (nd->nd_gss_context) {
6d2010ae 1518 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
0a7de745 1519 }
f427ee49 1520 NFS_ZFREE(nfsrv_descript_zone, nd);
1c79356b 1521 }
0a7de745
A
1522
1523 /*
1524 * Check to see if there are outstanding writes that
1525 * need to be serviced.
1526 */
1527 writes_todo = 0;
1528 if (slp->ns_wgtime) {
1529 microuptime(&now);
f427ee49 1530 cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
0a7de745
A
1531 if (slp->ns_wgtime <= cur_usec) {
1532 cacherep = RC_DOIT;
1533 writes_todo = 1;
1534 }
2d21ac55 1535 }
1c79356b 1536 } while (writes_todo);
2d21ac55
A
1537
1538 nd = NULL;
1539 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1540 lck_rw_lock_exclusive(&slp->ns_rwlock);
1541 error = nfsrv_dorec(slp, nfsd, &nd);
0a7de745
A
1542 if (error == EINVAL) { // RPCSEC_GSS drop
1543 if (slp->ns_sotype == SOCK_STREAM) {
2d21ac55 1544 nfsrv_zapsock(slp); // drop connection
0a7de745 1545 }
2d21ac55 1546 }
91447636 1547 lck_rw_done(&slp->ns_rwlock);
2d21ac55
A
1548 }
1549 if (!nd) {
1550 /* drop our reference on the socket */
1c79356b
A
1551 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1552 nfsd->nfsd_slp = NULL;
1553 nfsrv_slpderef(slp);
91447636 1554 }
1c79356b 1555 }
c3c9b80d 1556 lck_mtx_lock(&nfsd_mutex);
2d21ac55 1557done:
1c79356b 1558 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
91447636 1559 FREE(nfsd, M_NFSD);
0a7de745 1560 if (--nfsd_thread_count == 0) {
2d21ac55 1561 nfsrv_cleanup();
0a7de745 1562 }
c3c9b80d 1563 lck_mtx_unlock(&nfsd_mutex);
0a7de745 1564 return error;
1c79356b 1565}
91447636 1566
b0d623f7 1567int
2d21ac55 1568nfssvc_export(user_addr_t argp)
91447636
A
1569{
1570 int error = 0, is_64bit;
1571 struct user_nfs_export_args unxa;
2d21ac55 1572 vfs_context_t ctx = vfs_context_current();
91447636 1573
2d21ac55 1574 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
91447636
A
1575
1576 /* copy in pointers to path and export args */
1577 if (is_64bit) {
1578 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1579 } else {
1580 struct nfs_export_args tnxa;
1581 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1582 if (error == 0) {
1583 /* munge into LP64 version of nfs_export_args structure */
1584 unxa.nxa_fsid = tnxa.nxa_fsid;
1585 unxa.nxa_expid = tnxa.nxa_expid;
1586 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1587 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1588 unxa.nxa_flags = tnxa.nxa_flags;
1589 unxa.nxa_netcount = tnxa.nxa_netcount;
1590 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1591 }
1592 }
0a7de745
A
1593 if (error) {
1594 return error;
1595 }
91447636 1596
2d21ac55 1597 error = nfsrv_export(&unxa, ctx);
91447636 1598
0a7de745 1599 return error;
91447636
A
1600}
1601
1c79356b 1602/*
2d21ac55 1603 * Shut down a socket associated with an nfsrv_sock structure.
1c79356b
A
1604 * Should be called with the send lock set, if required.
1605 * The trick here is to increment the sref at the start, so that the nfsds
1606 * will stop using it and clear ns_flag at the end so that it will not be
1607 * reassigned during cleanup.
1608 */
b0d623f7 1609void
2d21ac55 1610nfsrv_zapsock(struct nfsrv_sock *slp)
1c79356b 1611{
91447636 1612 socket_t so;
1c79356b 1613
0a7de745 1614 if ((slp->ns_flag & SLP_VALID) == 0) {
91447636 1615 return;
0a7de745 1616 }
1c79356b 1617 slp->ns_flag &= ~SLP_ALLFLAGS;
91447636
A
1618
1619 so = slp->ns_so;
0a7de745 1620 if (so == NULL) {
91447636 1621 return;
0a7de745 1622 }
91447636 1623
3e170ce0 1624 sock_setupcall(so, NULL, NULL);
91447636 1625 sock_shutdown(so, SHUT_RDWR);
316670eb
A
1626
1627 /*
1628 * Remove from the up-call queue
1629 */
1630 nfsrv_uc_dequeue(slp);
1c79356b
A
1631}
1632
1c79356b 1633/*
91447636 1634 * cleanup and release a server socket structure.
1c79356b 1635 */
b0d623f7 1636void
2d21ac55 1637nfsrv_slpfree(struct nfsrv_sock *slp)
1c79356b 1638{
91447636 1639 struct nfsrv_descript *nwp, *nnwp;
1c79356b 1640
91447636
A
1641 if (slp->ns_so) {
1642 sock_release(slp->ns_so);
1643 slp->ns_so = NULL;
1644 }
0a7de745 1645 if (slp->ns_nam) {
91447636 1646 mbuf_free(slp->ns_nam);
0a7de745
A
1647 }
1648 if (slp->ns_raw) {
91447636 1649 mbuf_freem(slp->ns_raw);
0a7de745
A
1650 }
1651 if (slp->ns_rec) {
91447636 1652 mbuf_freem(slp->ns_rec);
0a7de745
A
1653 }
1654 if (slp->ns_frag) {
2d21ac55 1655 mbuf_freem(slp->ns_frag);
0a7de745 1656 }
2d21ac55
A
1657 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1658 slp->ns_reccnt = 0;
55e303ae 1659
91447636
A
1660 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1661 nnwp = nwp->nd_tq.le_next;
1662 LIST_REMOVE(nwp, nd_tq);
2d21ac55 1663 nfsm_chain_cleanup(&nwp->nd_nmreq);
0a7de745 1664 if (nwp->nd_mrep) {
2d21ac55 1665 mbuf_freem(nwp->nd_mrep);
0a7de745
A
1666 }
1667 if (nwp->nd_nam2) {
2d21ac55 1668 mbuf_freem(nwp->nd_nam2);
0a7de745
A
1669 }
1670 if (IS_VALID_CRED(nwp->nd_cr)) {
0c530ab8 1671 kauth_cred_unref(&nwp->nd_cr);
0a7de745
A
1672 }
1673 if (nwp->nd_gss_context) {
6d2010ae 1674 nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
0a7de745 1675 }
f427ee49 1676 NFS_ZFREE(nfsrv_descript_zone, nwp);
55e303ae 1677 }
91447636
A
1678 LIST_INIT(&slp->ns_tq);
1679
c3c9b80d
A
1680 lck_rw_destroy(&slp->ns_rwlock, &nfsrv_slp_rwlock_group);
1681 lck_mtx_destroy(&slp->ns_wgmutex, &nfsrv_slp_mutex_group);
91447636 1682 FREE(slp, M_NFSSVC);
55e303ae
A
1683}
1684
1685/*
91447636
A
1686 * Derefence a server socket structure. If it has no more references and
1687 * is no longer valid, you can throw it away.
55e303ae 1688 */
3e170ce0
A
1689static void
1690nfsrv_slpderef_locked(struct nfsrv_sock *slp)
55e303ae 1691{
91447636
A
1692 lck_rw_lock_exclusive(&slp->ns_rwlock);
1693 slp->ns_sref--;
2d21ac55 1694
91447636 1695 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
2d21ac55
A
1696 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1697 /* remove socket from queue since there's no work */
0a7de745 1698 if (slp->ns_flag & SLP_WAITQ) {
2d21ac55 1699 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
0a7de745 1700 } else {
2d21ac55 1701 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
0a7de745 1702 }
2d21ac55
A
1703 slp->ns_flag &= ~SLP_QUEUED;
1704 }
91447636 1705 lck_rw_done(&slp->ns_rwlock);
91447636 1706 return;
55e303ae 1707 }
91447636 1708
2d21ac55
A
1709 /* This socket is no longer valid, so we'll get rid of it */
1710
1711 if (slp->ns_flag & SLP_QUEUED) {
0a7de745 1712 if (slp->ns_flag & SLP_WAITQ) {
2d21ac55 1713 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
0a7de745 1714 } else {
2d21ac55 1715 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
0a7de745 1716 }
2d21ac55
A
1717 slp->ns_flag &= ~SLP_QUEUED;
1718 }
3e170ce0 1719 lck_rw_done(&slp->ns_rwlock);
2d21ac55 1720
2d21ac55 1721 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
0a7de745 1722 if (slp->ns_sotype == SOCK_STREAM) {
3e170ce0 1723 nfsrv_sock_tcp_cnt--;
0a7de745 1724 }
2d21ac55 1725
0a7de745 1726 /* now remove from the write gather socket list */
2d21ac55
A
1727 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1728 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1729 slp->ns_wgq.tqe_next = SLPNOLIST;
1730 }
3e170ce0
A
1731 nfsrv_slpfree(slp);
1732}
1733
1734void
1735nfsrv_slpderef(struct nfsrv_sock *slp)
1736{
c3c9b80d 1737 lck_mtx_lock(&nfsd_mutex);
3e170ce0 1738 nfsrv_slpderef_locked(slp);
c3c9b80d 1739 lck_mtx_unlock(&nfsd_mutex);
55e303ae
A
1740}
1741
1c79356b 1742/*
3e170ce0
A
1743 * Check periodically for idle sockest if needed and
1744 * zap them.
1c79356b
A
1745 */
1746void
3e170ce0 1747nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1c79356b 1748{
3e170ce0 1749 struct nfsrv_sock *slp, *tslp;
743b1565 1750 struct timeval now;
3e170ce0 1751 time_t time_to_wait = nfsrv_sock_idle_timeout;
1c79356b 1752
2d21ac55 1753 microuptime(&now);
c3c9b80d 1754 lck_mtx_lock(&nfsd_mutex);
1c79356b 1755
3e170ce0 1756 /* Turn off the timer if we're suppose to and get out */
0a7de745
A
1757 if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1758 nfsrv_sock_idle_timeout = 0;
1759 }
3e170ce0
A
1760 if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1761 nfsrv_idlesock_timer_on = 0;
c3c9b80d 1762 lck_mtx_unlock(&nfsd_mutex);
2d21ac55 1763 return;
91447636 1764 }
1c79356b 1765
3e170ce0
A
1766 TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1767 lck_rw_lock_exclusive(&slp->ns_rwlock);
1768 /* Skip udp and referenced sockets */
1769 if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1770 lck_rw_done(&slp->ns_rwlock);
1771 continue;
1772 }
1773 /*
1774 * If this is the first non-referenced socket that hasn't idle out,
1775 * use its time stamp to calculate the earlist time in the future
1776 * to start the next invocation of the timer. Since the nfsrv_socklist
1777 * is sorted oldest access to newest. Once we find the first one,
1778 * we're done and break out of the loop.
1779 */
0a7de745
A
1780 if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1781 nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
3e170ce0 1782 time_to_wait -= now.tv_sec - slp->ns_timestamp;
0a7de745 1783 if (time_to_wait < 1) {
3e170ce0 1784 time_to_wait = 1;
0a7de745 1785 }
3e170ce0
A
1786 lck_rw_done(&slp->ns_rwlock);
1787 break;
1788 }
1789 /*
1790 * Bump the ref count. nfsrv_slpderef below will destroy
1791 * the socket, since nfsrv_zapsock has closed it.
1792 */
1793 slp->ns_sref++;
1794 nfsrv_zapsock(slp);
1795 lck_rw_done(&slp->ns_rwlock);
1796 nfsrv_slpderef_locked(slp);
1797 }
2d21ac55 1798
3e170ce0
A
1799 /* Start ourself back up */
1800 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1801 /* Remember when the next timer will fire for nfssvc_addsock. */
1802 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
c3c9b80d 1803 lck_mtx_unlock(&nfsd_mutex);
1c79356b
A
1804}
1805
1806/*
2d21ac55 1807 * Clean up the data structures for the server.
1c79356b 1808 */
2d21ac55
A
1809void
1810nfsrv_cleanup(void)
1c79356b 1811{
2d21ac55 1812 struct nfsrv_sock *slp, *nslp;
55e303ae 1813 struct timeval now;
b0d623f7 1814#if CONFIG_FSE
2d21ac55
A
1815 struct nfsrv_fmod *fp, *nfp;
1816 int i;
b0d623f7 1817#endif
1c79356b 1818
55e303ae 1819 microuptime(&now);
2d21ac55
A
1820 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1821 nslp = TAILQ_NEXT(slp, ns_chain);
3e170ce0
A
1822 lck_rw_lock_exclusive(&slp->ns_rwlock);
1823 slp->ns_sref++;
0a7de745 1824 if (slp->ns_flag & SLP_VALID) {
2d21ac55 1825 nfsrv_zapsock(slp);
0a7de745 1826 }
3e170ce0
A
1827 lck_rw_done(&slp->ns_rwlock);
1828 nfsrv_slpderef_locked(slp);
2d21ac55 1829 }
3e170ce0 1830#
b0d623f7 1831#if CONFIG_FSE
2d21ac55
A
1832 /*
1833 * Flush pending file write fsevents
1834 */
c3c9b80d 1835 lck_mtx_lock(&nfsrv_fmod_mutex);
2d21ac55
A
1836 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1837 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1838 /*
1839 * Fire off the content modified fsevent for each
1840 * entry, remove it from the list, and free it.
1841 */
6d2010ae
A
1842 if (nfsrv_fsevents_enabled) {
1843 fp->fm_context.vc_thread = current_thread();
2d21ac55 1844 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
0a7de745
A
1845 FSE_ARG_VNODE, fp->fm_vp,
1846 FSE_ARG_DONE);
6d2010ae 1847 }
2d21ac55
A
1848 vnode_put(fp->fm_vp);
1849 kauth_cred_unref(&fp->fm_context.vc_ucred);
1850 nfp = LIST_NEXT(fp, fm_link);
1851 LIST_REMOVE(fp, fm_link);
1852 FREE(fp, M_TEMP);
1853 }
1854 }
1855 nfsrv_fmod_pending = 0;
c3c9b80d 1856 lck_mtx_unlock(&nfsrv_fmod_mutex);
b0d623f7 1857#endif
2d21ac55 1858
316670eb 1859 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
2d21ac55 1860
0a7de745
A
1861 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1862
1863 nfsrv_cleancache(); /* And clear out server cache */
2d21ac55
A
1864
1865 nfsrv_udpsock = NULL;
6d2010ae 1866 nfsrv_udp6sock = NULL;
1c79356b 1867}
2d21ac55 1868
ea3f0419 1869#endif /* CONFIG_NFS_SERVER */