2 * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
68 #include <nfs/nfs_conf.h>
71 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72 * support for mandatory and extensible security protections. This notice
73 * is included in support of clause 2.2 (b) of the Apple Public License,
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/filedesc.h>
83 #include <sys/vnode_internal.h>
84 #include <sys/mount_internal.h>
85 #include <sys/proc_internal.h> /* for fdflags */
86 #include <sys/kauth.h>
87 #include <sys/sysctl.h>
90 #include <sys/malloc.h>
91 #include <sys/kpi_mbuf.h>
92 #include <sys/socket.h>
93 #include <sys/socketvar.h>
94 #include <sys/domain.h>
95 #include <sys/protosw.h>
96 #include <sys/fcntl.h>
97 #include <sys/lockf.h>
98 #include <sys/syslog.h>
100 #include <sys/sysproto.h>
101 #include <sys/kpi_socket.h>
102 #include <sys/fsevents.h>
103 #include <libkern/OSAtomic.h>
104 #include <kern/thread_call.h>
105 #include <kern/task.h>
107 #include <security/audit/audit.h>
109 #include <netinet/in.h>
110 #include <netinet/tcp.h>
111 #include <nfs/xdr_subs.h>
112 #include <nfs/rpcv2.h>
113 #include <nfs/nfsproto.h>
115 #include <nfs/nfsm_subs.h>
116 #include <nfs/nfsrvcache.h>
117 #include <nfs/nfs_gss.h>
118 #include <nfs/nfsmount.h>
119 #include <nfs/nfsnode.h>
120 #include <nfs/nfs_lock.h>
122 #include <security/mac_framework.h>
125 kern_return_t
thread_terminate(thread_t
); /* XXX */
127 #if CONFIG_NFS_SERVER
129 extern const nfsrv_proc_t nfsrv_procs
[NFS_NPROCS
];
131 extern int nfsrv_wg_delay
;
132 extern int nfsrv_wg_delay_v3
;
134 static int nfsrv_require_resv_port
= 0;
135 static time_t nfsrv_idlesock_timer_on
= 0;
136 static int nfsrv_sock_tcp_cnt
= 0;
137 #define NFSD_MIN_IDLE_TIMEOUT 30
138 static int nfsrv_sock_idle_timeout
= 3600; /* One hour */
140 int nfssvc_export(user_addr_t argp
);
141 int nfssvc_nfsd(void);
142 int nfssvc_addsock(socket_t
, mbuf_t
);
143 void nfsrv_zapsock(struct nfsrv_sock
*);
144 void nfsrv_slpderef(struct nfsrv_sock
*);
145 void nfsrv_slpfree(struct nfsrv_sock
*);
147 #endif /* CONFIG_NFS_SERVER */
153 SYSCTL_DECL(_vfs_generic
);
154 SYSCTL_NODE(_vfs_generic
, OID_AUTO
, nfs
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "nfs hinge");
155 #endif /* CONFIG_NFS */
157 #if CONFIG_NFS_CLIENT
158 SYSCTL_NODE(_vfs_generic_nfs
, OID_AUTO
, client
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "nfs client hinge");
159 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, initialdowndelay
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_tprintf_initial_delay
, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, nextdowndelay
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_tprintf_delay
, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, iosize
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_iosize
, 0, "");
162 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, access_cache_timeout
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_access_cache_timeout
, 0, "");
163 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, allow_async
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_allow_async
, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, statfs_rate_limit
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_statfs_rate_limit
, 0, "");
165 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, nfsiod_thread_max
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsiod_thread_max
, 0, "");
166 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, nfsiod_thread_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
, &nfsiod_thread_count
, 0, "");
167 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, lockd_mounts
, CTLFLAG_RD
| CTLFLAG_LOCKED
, &nfs_lockd_mounts
, 0, "");
168 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, max_async_writes
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_max_async_writes
, 0, "");
169 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, access_delete
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_access_delete
, 0, "");
170 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, access_dotzfs
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_access_dotzfs
, 0, "");
171 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, access_for_getattr
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_access_for_getattr
, 0, "");
172 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, idmap_ctrl
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_idmap_ctrl
, 0, "");
173 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, callback_port
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_callback_port
, 0, "");
174 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, is_mobile
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_is_mobile
, 0, "");
175 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, squishy_flags
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_squishy_flags
, 0, "");
176 SYSCTL_UINT(_vfs_generic_nfs_client
, OID_AUTO
, debug_ctl
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_debug_ctl
, 0, "");
177 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, readlink_nocache
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_readlink_nocache
, 0, "");
179 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, root_steals_gss_context
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfs_root_steals_ctx
, 0, "");
182 SYSCTL_STRING(_vfs_generic_nfs_client
, OID_AUTO
, default_nfs4domain
, CTLFLAG_RW
| CTLFLAG_LOCKED
, nfs4_default_domain
, sizeof(nfs4_default_domain
), "");
184 #endif /* CONFIG_NFS_CLIENT */
186 #if CONFIG_NFS_SERVER
187 SYSCTL_NODE(_vfs_generic_nfs
, OID_AUTO
, server
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "nfs server hinge");
188 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, wg_delay
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_wg_delay
, 0, "");
189 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, wg_delay_v3
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_wg_delay_v3
, 0, "");
190 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, require_resv_port
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_require_resv_port
, 0, "");
191 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, async
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_async
, 0, "");
192 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, export_hash_size
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_export_hash_size
, 0, "");
193 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, reqcache_size
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_reqcache_size
, 0, "");
194 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, request_queue_length
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_sock_max_rec_queue_length
, 0, "");
195 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, user_stats
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_user_stat_enabled
, 0, "");
196 SYSCTL_UINT(_vfs_generic_nfs_server
, OID_AUTO
, gss_context_ttl
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_gss_context_ttl
, 0, "");
198 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, fsevents
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_fsevents_enabled
, 0, "");
200 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, nfsd_thread_max
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsd_thread_max
, 0, "");
201 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, nfsd_thread_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
, &nfsd_thread_count
, 0, "");
202 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, nfsd_sock_idle_timeout
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_sock_idle_timeout
, 0, "");
203 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, nfsd_tcp_connections
, CTLFLAG_RD
| CTLFLAG_LOCKED
, &nfsrv_sock_tcp_cnt
, 0, "");
204 #ifdef NFS_UC_Q_DEBUG
205 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, use_upcall_svc
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_uc_use_proxy
, 0, "");
206 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, upcall_queue_limit
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_uc_queue_limit
, 0, "");
207 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, upcall_queue_max_seen
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &nfsrv_uc_queue_max_seen
, 0, "");
208 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, upcall_queue_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
, __DECONST(int *, &nfsrv_uc_queue_count
), 0, "");
210 #endif /* CONFIG_NFS_SERVER */
212 #if CONFIG_NFS_CLIENT && CONFIG_NFS4
214 mapname2id(struct nfs_testmapid
*map
)
217 error
= nfs4_id2guid(map
->ntm_name
, &map
->ntm_guid
, map
->ntm_grpflag
);
222 if (map
->ntm_grpflag
) {
223 error
= kauth_cred_guid2gid(&map
->ntm_guid
, (gid_t
*)&map
->ntm_id
);
225 error
= kauth_cred_guid2uid(&map
->ntm_guid
, (uid_t
*)&map
->ntm_id
);
232 mapid2name(struct nfs_testmapid
*map
)
235 size_t len
= sizeof(map
->ntm_name
);
237 if (map
->ntm_grpflag
) {
238 error
= kauth_cred_gid2guid((gid_t
)map
->ntm_id
, &map
->ntm_guid
);
240 error
= kauth_cred_uid2guid((uid_t
)map
->ntm_id
, &map
->ntm_guid
);
247 error
= nfs4_guid2id(&map
->ntm_guid
, map
->ntm_name
, &len
, map
->ntm_grpflag
);
253 nfsclnt_testidmap(proc_t p
, user_addr_t argp
)
255 struct nfs_testmapid mapid
;
257 size_t len
= sizeof(mapid
.ntm_name
);
259 /* Let root make this call. */
260 error
= proc_suser(p
);
265 error
= copyin(argp
, &mapid
, sizeof(mapid
));
266 mapid
.ntm_name
[MAXIDNAMELEN
- 1] = '\0';
271 switch (mapid
.ntm_lookup
) {
273 error
= mapname2id(&mapid
);
276 error
= mapid2name(&mapid
);
279 error
= nfs4_id2guid(mapid
.ntm_name
, &mapid
.ntm_guid
, mapid
.ntm_grpflag
);
282 error
= nfs4_guid2id(&mapid
.ntm_guid
, mapid
.ntm_name
, &len
, mapid
.ntm_grpflag
);
288 coerror
= copyout(&mapid
, argp
, sizeof(mapid
));
290 return error
? error
: coerror
;
292 #endif /* CONFIG_NFS_CLIENT && CONFIG_NFS4 */
294 #if !CONFIG_NFS_CLIENT
295 #define __no_nfs_client_unused __unused
297 #define __no_nfs_client_unused /* nothing */
302 proc_t p __no_nfs_client_unused
,
303 struct nfsclnt_args
*uap __no_nfs_client_unused
,
304 __unused
int *retval
)
306 #if CONFIG_NFS_CLIENT
311 case NFSCLNT_LOCKDANS
:
312 error
= copyin(uap
->argp
, &la
, sizeof(la
));
314 error
= nfslockdans(p
, &la
);
317 case NFSCLNT_LOCKDNOTIFY
:
318 error
= nfslockdnotify(p
, uap
->argp
);
321 case NFSCLNT_TESTIDMAP
:
322 error
= nfsclnt_testidmap(p
, uap
->argp
);
331 #endif /* CONFIG_NFS_CLIENT */
334 #if CONFIG_NFS_CLIENT
337 * Asynchronous I/O threads for client NFS.
338 * They do read-ahead and write-behind operations on the block I/O cache.
340 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
341 * when unused for a while. There are as many nfsiod structs as there are
342 * nfsiod threads; however there's no strict tie between a thread and a struct.
343 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
344 * up, it removes the next struct nfsiod from the queue and services it. Then
345 * it will put the struct at the head of free list and sleep on it.
346 * Async requests will pull the next struct nfsiod from the head of the free list,
347 * put it on the work queue, and wake whatever thread is waiting on that struct.
351 * nfsiod thread exit routine
353 * Must be called with nfsiod_mutex held so that the
354 * decision to terminate is atomic with the termination.
357 nfsiod_terminate(struct nfsiod
*niod
)
359 nfsiod_thread_count
--;
360 lck_mtx_unlock(nfsiod_mutex
);
364 printf("nfsiod: terminating without niod\n");
366 thread_terminate(current_thread());
370 /* nfsiod thread startup routine */
377 MALLOC(niod
, struct nfsiod
*, sizeof(struct nfsiod
), M_TEMP
, M_WAITOK
);
379 lck_mtx_lock(nfsiod_mutex
);
380 nfsiod_thread_count
--;
381 wakeup(current_thread());
382 lck_mtx_unlock(nfsiod_mutex
);
383 thread_terminate(current_thread());
386 bzero(niod
, sizeof(*niod
));
387 lck_mtx_lock(nfsiod_mutex
);
388 TAILQ_INSERT_HEAD(&nfsiodfree
, niod
, niod_link
);
389 wakeup(current_thread());
390 error
= msleep0(niod
, nfsiod_mutex
, PWAIT
| PDROP
, "nfsiod", NFS_ASYNCTHREADMAXIDLE
* hz
, nfsiod_continue
);
391 /* shouldn't return... so we have an error */
392 /* remove an old nfsiod struct and terminate */
393 lck_mtx_lock(nfsiod_mutex
);
394 if ((niod
= TAILQ_LAST(&nfsiodfree
, nfsiodlist
))) {
395 TAILQ_REMOVE(&nfsiodfree
, niod
, niod_link
);
397 nfsiod_terminate(niod
);
402 * Start up another nfsiod thread.
403 * (unless we're already maxed out and there are nfsiods running)
408 thread_t thd
= THREAD_NULL
;
410 lck_mtx_lock(nfsiod_mutex
);
411 if ((nfsiod_thread_count
>= NFSIOD_MAX
) && (nfsiod_thread_count
> 0)) {
412 lck_mtx_unlock(nfsiod_mutex
);
415 nfsiod_thread_count
++;
416 if (kernel_thread_start((thread_continue_t
)nfsiod_thread
, NULL
, &thd
) != KERN_SUCCESS
) {
417 lck_mtx_unlock(nfsiod_mutex
);
420 /* wait for the thread to complete startup */
421 msleep(thd
, nfsiod_mutex
, PWAIT
| PDROP
, "nfsiodw", NULL
);
422 thread_deallocate(thd
);
427 * Continuation for Asynchronous I/O threads for NFS client.
429 * Grab an nfsiod struct to work on, do some work, then drop it
432 nfsiod_continue(int error
)
435 struct nfsmount
*nmp
;
436 struct nfsreq
*req
, *treq
;
437 struct nfs_reqqhead iodq
;
440 lck_mtx_lock(nfsiod_mutex
);
441 niod
= TAILQ_FIRST(&nfsiodwork
);
443 /* there's no work queued up */
444 /* remove an old nfsiod struct and terminate */
445 if ((niod
= TAILQ_LAST(&nfsiodfree
, nfsiodlist
))) {
446 TAILQ_REMOVE(&nfsiodfree
, niod
, niod_link
);
448 nfsiod_terminate(niod
);
451 TAILQ_REMOVE(&nfsiodwork
, niod
, niod_link
);
454 while ((nmp
= niod
->niod_nmp
)) {
456 niod
->niod_nmp
= NULL
;
461 * Service this mount's async I/O queue.
463 * In order to ensure some level of fairness between mounts,
464 * we grab all the work up front before processing it so any
465 * new work that arrives will be serviced on a subsequent
466 * iteration - and we have a chance to see if other work needs
467 * to be done (e.g. the delayed write queue needs to be pushed
468 * or other mounts are waiting for an nfsiod).
470 /* grab the current contents of the queue */
472 TAILQ_CONCAT(&iodq
, &nmp
->nm_iodq
, r_achain
);
473 /* Mark each iod request as being managed by an iod */
474 TAILQ_FOREACH(req
, &iodq
, r_achain
) {
475 lck_mtx_lock(&req
->r_mtx
);
476 assert(!(req
->r_flags
& R_IOD
));
477 req
->r_flags
|= R_IOD
;
478 lck_mtx_unlock(&req
->r_mtx
);
480 lck_mtx_unlock(nfsiod_mutex
);
482 /* process the queue */
483 TAILQ_FOREACH_SAFE(req
, &iodq
, r_achain
, treq
) {
484 TAILQ_REMOVE(&iodq
, req
, r_achain
);
485 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
486 req
->r_callback
.rcb_func(req
);
489 /* now check if there's more/other work to be done */
490 lck_mtx_lock(nfsiod_mutex
);
491 morework
= !TAILQ_EMPTY(&nmp
->nm_iodq
);
492 if (!morework
|| !TAILQ_EMPTY(&nfsiodmounts
)) {
494 * we're going to stop working on this mount but if the
495 * mount still needs more work so queue it up
497 if (morework
&& nmp
->nm_iodlink
.tqe_next
== NFSNOLIST
) {
498 TAILQ_INSERT_TAIL(&nfsiodmounts
, nmp
, nm_iodlink
);
501 niod
->niod_nmp
= NULL
;
505 /* loop if there's still a mount to work on */
506 if (!niod
->niod_nmp
&& !TAILQ_EMPTY(&nfsiodmounts
)) {
507 niod
->niod_nmp
= TAILQ_FIRST(&nfsiodmounts
);
508 TAILQ_REMOVE(&nfsiodmounts
, niod
->niod_nmp
, nm_iodlink
);
509 niod
->niod_nmp
->nm_iodlink
.tqe_next
= NFSNOLIST
;
511 if (niod
->niod_nmp
) {
515 /* queue ourselves back up - if there aren't too many threads running */
516 if (nfsiod_thread_count
<= NFSIOD_MAX
) {
517 TAILQ_INSERT_HEAD(&nfsiodfree
, niod
, niod_link
);
518 error
= msleep0(niod
, nfsiod_mutex
, PWAIT
| PDROP
, "nfsiod", NFS_ASYNCTHREADMAXIDLE
* hz
, nfsiod_continue
);
519 /* shouldn't return... so we have an error */
520 /* remove an old nfsiod struct and terminate */
521 lck_mtx_lock(nfsiod_mutex
);
522 if ((niod
= TAILQ_LAST(&nfsiodfree
, nfsiodlist
))) {
523 TAILQ_REMOVE(&nfsiodfree
, niod
, niod_link
);
526 nfsiod_terminate(niod
);
531 #endif /* CONFIG_NFS_CLIENT */
533 #if !CONFIG_NFS_SERVER
534 #define __no_nfs_server_unused __unused
536 #define __no_nfs_server_unused /* nothing */
540 * NFS server system calls
541 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
544 #if CONFIG_NFS_SERVER
545 static struct nfs_exportfs
*
546 nfsrv_find_exportfs(const char *ptr
)
548 struct nfs_exportfs
*nxfs
;
550 LIST_FOREACH(nxfs
, &nfsrv_exports
, nxfs_next
) {
551 if (!strncmp(nxfs
->nxfs_path
, ptr
, MAXPATHLEN
)) {
555 if (nxfs
&& strncmp(nxfs
->nxfs_path
, ptr
, strlen(nxfs
->nxfs_path
))) {
563 * Get file handle system call
567 proc_t p __no_nfs_server_unused
,
568 struct getfh_args
*uap __no_nfs_server_unused
,
569 __unused
int *retval
)
572 struct nfs_filehandle nfh
;
573 int error
, fhlen
, fidlen
;
575 char path
[MAXPATHLEN
], real_mntonname
[MAXPATHLEN
], *ptr
;
577 struct nfs_exportfs
*nxfs
;
578 struct nfs_export
*nx
;
583 error
= proc_suser(p
);
588 error
= copyinstr(uap
->fname
, path
, MAXPATHLEN
, &pathlen
);
590 error
= copyin(uap
->fhp
, &fhlen
, sizeof(fhlen
));
595 /* limit fh size to length specified (or v3 size by default) */
596 if ((fhlen
!= NFSV2_MAX_FH_SIZE
) && (fhlen
!= NFSV3_MAX_FH_SIZE
)) {
597 fhlen
= NFSV3_MAX_FH_SIZE
;
599 fidlen
= fhlen
- sizeof(struct nfs_exphandle
);
601 if (!nfsrv_is_initialized()) {
605 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| LOCKLEAF
| AUDITVNPATH1
,
606 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), vfs_context_current());
615 // find exportfs that matches f_mntonname
616 lck_rw_lock_shared(&nfsrv_export_rwlock
);
617 ptr
= vnode_mount(vp
)->mnt_vfsstat
.f_mntonname
;
618 if ((nxfs
= nfsrv_find_exportfs(ptr
)) == NULL
) {
620 * The f_mntonname might be a firmlink path. Resolve
621 * it into a physical path and try again.
623 int pathbuflen
= MAXPATHLEN
;
626 error
= VFS_ROOT(vnode_mount(vp
), &rvp
, vfs_context_current());
630 error
= vn_getpath_ext(rvp
, NULLVP
, real_mntonname
, &pathbuflen
,
631 VN_GETPATH_FSENTER
| VN_GETPATH_NO_FIRMLINK
);
636 ptr
= real_mntonname
;
637 nxfs
= nfsrv_find_exportfs(ptr
);
643 // find export that best matches remainder of path
644 ptr
= path
+ strlen(nxfs
->nxfs_path
);
645 while (*ptr
&& (*ptr
== '/')) {
648 LIST_FOREACH(nx
, &nxfs
->nxfs_exports
, nx_next
) {
649 int len
= strlen(nx
->nx_path
);
650 if (len
== 0) { // we've hit the export entry for the root directory
653 if (!strncmp(nx
->nx_path
, ptr
, len
)) {
662 bzero(&nfh
, sizeof(nfh
));
663 nfh
.nfh_xh
.nxh_version
= htonl(NFS_FH_VERSION
);
664 nfh
.nfh_xh
.nxh_fsid
= htonl(nxfs
->nxfs_id
);
665 nfh
.nfh_xh
.nxh_expid
= htonl(nx
->nx_id
);
666 nfh
.nfh_xh
.nxh_flags
= 0;
667 nfh
.nfh_xh
.nxh_reserved
= 0;
668 nfh
.nfh_len
= fidlen
;
669 error
= VFS_VPTOFH(vp
, (int*)&nfh
.nfh_len
, &nfh
.nfh_fid
[0], NULL
);
670 if (nfh
.nfh_len
> (uint32_t)fidlen
) {
673 nfh
.nfh_xh
.nxh_fidlen
= nfh
.nfh_len
;
674 nfh
.nfh_len
+= sizeof(nfh
.nfh_xh
);
675 nfh
.nfh_fhp
= (u_char
*)&nfh
.nfh_xh
;
678 lck_rw_done(&nfsrv_export_rwlock
);
684 * At first blush, this may appear to leak a kernel stack
685 * address, but the copyout() never reaches &nfh.nfh_fhp
686 * (sizeof(fhandle_t) < sizeof(nfh)).
688 error
= copyout((caddr_t
)&nfh
, uap
->fhp
, sizeof(fhandle_t
));
691 #endif /* CONFIG_NFS_SERVER */
693 #if CONFIG_NFS_SERVER
694 extern const struct fileops vnops
;
697 * syscall for the rpc.lockd to use to translate a NFS file handle into
698 * an open descriptor.
700 * warning: do not remove the suser() call or this becomes one giant
704 fhopen(proc_t p __no_nfs_server_unused
,
705 struct fhopen_args
*uap __no_nfs_server_unused
,
706 int32_t *retval __no_nfs_server_unused
)
709 struct nfs_filehandle nfh
;
710 struct nfs_export
*nx
;
711 struct nfs_export_options
*nxo
;
713 struct fileproc
*fp
, *nfp
;
714 int fmode
, error
, type
;
716 vfs_context_t ctx
= vfs_context_current();
717 kauth_action_t action
;
722 error
= suser(vfs_context_ucred(ctx
), 0);
727 if (!nfsrv_is_initialized()) {
731 fmode
= FFLAGS(uap
->flags
);
732 /* why not allow a non-read/write open for our lockd? */
733 if (((fmode
& (FREAD
| FWRITE
)) == 0) || (fmode
& O_CREAT
)) {
737 error
= copyin(uap
->u_fhp
, &nfh
.nfh_len
, sizeof(nfh
.nfh_len
));
741 if ((nfh
.nfh_len
< (int)sizeof(struct nfs_exphandle
)) ||
742 (nfh
.nfh_len
> (int)NFSV3_MAX_FH_SIZE
)) {
745 error
= copyin(uap
->u_fhp
, &nfh
, sizeof(nfh
.nfh_len
) + nfh
.nfh_len
);
749 nfh
.nfh_fhp
= (u_char
*)&nfh
.nfh_xh
;
751 lck_rw_lock_shared(&nfsrv_export_rwlock
);
752 /* now give me my vnode, it gets returned to me with a reference */
753 error
= nfsrv_fhtovp(&nfh
, NULL
, &vp
, &nx
, &nxo
);
754 lck_rw_done(&nfsrv_export_rwlock
);
756 if (error
== NFSERR_TRYLATER
) {
757 error
= EAGAIN
; // XXX EBUSY? Or just leave as TRYLATER?
763 * From now on we have to make sure not
764 * to forget about the vnode.
765 * Any error that causes an abort must vnode_put(vp).
766 * Just set error = err and 'goto bad;'.
772 if (vnode_vtype(vp
) == VSOCK
) {
777 /* disallow write operations on directories */
778 if (vnode_isdir(vp
) && (fmode
& (FWRITE
| O_TRUNC
))) {
784 if ((error
= mac_vnode_check_open(ctx
, vp
, fmode
))) {
789 /* compute action to be authorized */
792 action
|= KAUTH_VNODE_READ_DATA
;
794 if (fmode
& (FWRITE
| O_TRUNC
)) {
795 action
|= KAUTH_VNODE_WRITE_DATA
;
797 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0) {
801 if ((error
= VNOP_OPEN(vp
, fmode
, ctx
))) {
804 if ((error
= vnode_ref_ext(vp
, fmode
, 0))) {
809 * end of vn_open code
812 // starting here... error paths should call vn_close/vnode_put
813 if ((error
= falloc(p
, &nfp
, &indx
, ctx
)) != 0) {
814 vn_close(vp
, fmode
& FMASK
, ctx
);
819 fp
->f_fglob
->fg_flag
= fmode
& FMASK
;
820 fp
->f_fglob
->fg_ops
= &vnops
;
821 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
823 // XXX do we really need to support this with fhopen()?
824 if (fmode
& (O_EXLOCK
| O_SHLOCK
)) {
825 lf
.l_whence
= SEEK_SET
;
828 if (fmode
& O_EXLOCK
) {
834 if ((fmode
& FNONBLOCK
) == 0) {
837 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
))) {
838 struct vfs_context context
= *vfs_context_current();
839 /* Modify local copy (to not damage thread copy) */
840 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
842 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
843 fp_free(p
, indx
, fp
);
846 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
852 procfdtbl_releasefd(p
, indx
, NULL
);
853 fp_drop(p
, indx
, fp
, 1);
863 #endif /* CONFIG_NFS_SERVER */
865 #if CONFIG_NFS_SERVER
867 * NFS server pseudo system call
870 nfssvc(proc_t p __no_nfs_server_unused
,
871 struct nfssvc_args
*uap __no_nfs_server_unused
,
872 __unused
int *retval
)
875 struct user_nfsd_args user_nfsdarg
;
879 AUDIT_ARG(cmd
, uap
->flag
);
882 * Must be super user for most operations (export ops checked later).
884 if ((uap
->flag
!= NFSSVC_EXPORT
) && ((error
= proc_suser(p
)))) {
888 error
= mac_system_check_nfsd(kauth_cred_get());
894 /* make sure NFS server data structures have been initialized */
897 if (uap
->flag
& NFSSVC_ADDSOCK
) {
898 if (IS_64BIT_PROCESS(p
)) {
899 error
= copyin(uap
->argp
, (caddr_t
)&user_nfsdarg
, sizeof(user_nfsdarg
));
901 struct nfsd_args tmp_args
;
902 error
= copyin(uap
->argp
, (caddr_t
)&tmp_args
, sizeof(tmp_args
));
904 user_nfsdarg
.sock
= tmp_args
.sock
;
905 user_nfsdarg
.name
= CAST_USER_ADDR_T(tmp_args
.name
);
906 user_nfsdarg
.namelen
= tmp_args
.namelen
;
913 error
= file_socket(user_nfsdarg
.sock
, &so
);
917 /* Get the client address for connected sockets. */
918 if (user_nfsdarg
.name
== USER_ADDR_NULL
|| user_nfsdarg
.namelen
== 0) {
921 error
= sockargs(&nam
, user_nfsdarg
.name
, user_nfsdarg
.namelen
, MBUF_TYPE_SONAME
);
923 /* drop the iocount file_socket() grabbed on the file descriptor */
924 file_drop(user_nfsdarg
.sock
);
929 * nfssvc_addsock() will grab a retain count on the socket
930 * to keep the socket from being closed when nfsd closes its
931 * file descriptor for it.
933 error
= nfssvc_addsock(so
, nam
);
934 /* drop the iocount file_socket() grabbed on the file descriptor */
935 file_drop(user_nfsdarg
.sock
);
936 } else if (uap
->flag
& NFSSVC_NFSD
) {
937 error
= nfssvc_nfsd();
938 } else if (uap
->flag
& NFSSVC_EXPORT
) {
939 error
= nfssvc_export(uap
->argp
);
943 if (error
== EINTR
|| error
== ERESTART
) {
948 #endif /* CONFIG_NFS_SERVER */
950 #if CONFIG_NFS_SERVER
953 * Adds a socket to the list for servicing by nfsds.
956 nfssvc_addsock(socket_t so
, mbuf_t mynam
)
958 struct nfsrv_sock
*slp
;
959 int error
= 0, sodomain
, sotype
, soprotocol
, on
= 1;
961 struct timeval timeo
;
963 /* make sure mbuf constants are set up */
964 if (!nfs_mbuf_mhlen
) {
968 sock_gettype(so
, &sodomain
, &sotype
, &soprotocol
);
970 /* There should be only one UDP socket for each of IPv4 and IPv6 */
971 if ((sodomain
== AF_INET
) && (soprotocol
== IPPROTO_UDP
) && nfsrv_udpsock
) {
975 if ((sodomain
== AF_INET6
) && (soprotocol
== IPPROTO_UDP
) && nfsrv_udp6sock
) {
980 /* Set protocol options and reserve some space (for UDP). */
981 if (sotype
== SOCK_STREAM
) {
982 error
= nfsrv_check_exports_allow_address(mynam
);
984 log(LOG_INFO
, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error
);
988 sock_setsockopt(so
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
));
990 if ((sodomain
== AF_INET
) && (soprotocol
== IPPROTO_TCP
)) {
991 sock_setsockopt(so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
993 if (sotype
== SOCK_DGRAM
|| sodomain
== AF_LOCAL
) { /* set socket buffer sizes for UDP */
994 int reserve
= (sotype
== SOCK_DGRAM
) ? NFS_UDPSOCKBUF
: (2 * 1024 * 1024);
995 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDBUF
, &reserve
, sizeof(reserve
));
996 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVBUF
, &reserve
, sizeof(reserve
));
998 log(LOG_INFO
, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error
);
1002 sock_nointerrupt(so
, 0);
1005 * Set socket send/receive timeouts.
1006 * Receive timeout shouldn't matter, but setting the send timeout
1007 * will make sure that an unresponsive client can't hang the server.
1011 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
1013 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
1015 log(LOG_INFO
, "nfssvc_addsock: socket timeout setting error(s) %d\n", error
);
1019 MALLOC(slp
, struct nfsrv_sock
*, sizeof(struct nfsrv_sock
), M_NFSSVC
, M_WAITOK
);
1024 bzero((caddr_t
)slp
, sizeof(struct nfsrv_sock
));
1025 lck_rw_init(&slp
->ns_rwlock
, nfsrv_slp_rwlock_group
, LCK_ATTR_NULL
);
1026 lck_mtx_init(&slp
->ns_wgmutex
, nfsrv_slp_mutex_group
, LCK_ATTR_NULL
);
1028 lck_mtx_lock(nfsd_mutex
);
1030 if (soprotocol
== IPPROTO_UDP
) {
1031 if (sodomain
== AF_INET
) {
1032 /* There should be only one UDP/IPv4 socket */
1033 if (nfsrv_udpsock
) {
1034 lck_mtx_unlock(nfsd_mutex
);
1039 nfsrv_udpsock
= slp
;
1041 if (sodomain
== AF_INET6
) {
1042 /* There should be only one UDP/IPv6 socket */
1043 if (nfsrv_udp6sock
) {
1044 lck_mtx_unlock(nfsd_mutex
);
1049 nfsrv_udp6sock
= slp
;
1053 /* add the socket to the list */
1054 first
= TAILQ_EMPTY(&nfsrv_socklist
);
1055 TAILQ_INSERT_TAIL(&nfsrv_socklist
, slp
, ns_chain
);
1056 if (sotype
== SOCK_STREAM
) {
1057 nfsrv_sock_tcp_cnt
++;
1058 if (nfsrv_sock_idle_timeout
< 0) {
1059 nfsrv_sock_idle_timeout
= 0;
1061 if (nfsrv_sock_idle_timeout
&& (nfsrv_sock_idle_timeout
< NFSD_MIN_IDLE_TIMEOUT
)) {
1062 nfsrv_sock_idle_timeout
= NFSD_MIN_IDLE_TIMEOUT
;
1065 * Possibly start or stop the idle timer. We only start the idle timer when
1066 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
1067 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
1068 * the number of connections.
1070 if ((nfsrv_sock_tcp_cnt
> 2 * nfsd_thread_max
) || nfsrv_idlesock_timer_on
) {
1071 if (nfsrv_sock_idle_timeout
== 0 || nfsrv_sock_tcp_cnt
<= 2 * nfsd_thread_max
) {
1072 if (nfsrv_idlesock_timer_on
) {
1073 thread_call_cancel(nfsrv_idlesock_timer_call
);
1074 nfsrv_idlesock_timer_on
= 0;
1077 struct nfsrv_sock
*old_slp
;
1079 time_t time_to_wait
= nfsrv_sock_idle_timeout
;
1081 * Get the oldest tcp socket and calculate the
1082 * earliest time for the next idle timer to fire
1083 * based on the possibly updated nfsrv_sock_idle_timeout
1085 TAILQ_FOREACH(old_slp
, &nfsrv_socklist
, ns_chain
) {
1086 if (old_slp
->ns_sotype
== SOCK_STREAM
) {
1088 time_to_wait
-= now
.tv_sec
- old_slp
->ns_timestamp
;
1089 if (time_to_wait
< 1) {
1096 * If we have a timer scheduled, but if its going to fire too late,
1099 if (nfsrv_idlesock_timer_on
> now
.tv_sec
+ time_to_wait
) {
1100 thread_call_cancel(nfsrv_idlesock_timer_call
);
1101 nfsrv_idlesock_timer_on
= 0;
1103 /* Schedule the idle thread if it isn't already */
1104 if (!nfsrv_idlesock_timer_on
) {
1105 nfs_interval_timer_start(nfsrv_idlesock_timer_call
, time_to_wait
* 1000);
1106 nfsrv_idlesock_timer_on
= now
.tv_sec
+ time_to_wait
;
1112 sock_retain(so
); /* grab a retain count on the socket */
1114 slp
->ns_sotype
= sotype
;
1115 slp
->ns_nam
= mynam
;
1117 /* set up the socket up-call */
1118 nfsrv_uc_addsock(slp
, first
);
1120 /* mark that the socket is not in the nfsrv_sockwg list */
1121 slp
->ns_wgq
.tqe_next
= SLPNOLIST
;
1123 slp
->ns_flag
= SLP_VALID
| SLP_NEEDQ
;
1125 nfsrv_wakenfsd(slp
);
1126 lck_mtx_unlock(nfsd_mutex
);
1134 * nfsd theory of operation:
1136 * The first nfsd thread stays in user mode accepting new TCP connections
1137 * which are then added via the "addsock" call. The rest of the nfsd threads
1138 * simply call into the kernel and remain there in a loop handling NFS
1139 * requests until killed by a signal.
1141 * There's a list of nfsd threads (nfsd_head).
1142 * There's an nfsd queue that contains only those nfsds that are
1143 * waiting for work to do (nfsd_queue).
1145 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
1146 * managing the work on the sockets:
1147 * nfsrv_sockwait - sockets w/new data waiting to be worked on
1148 * nfsrv_sockwork - sockets being worked on which may have more work to do
1149 * nfsrv_sockwg -- sockets which have pending write gather data
1150 * When a socket receives data, if it is not currently queued, it
1151 * will be placed at the end of the "wait" queue.
1152 * Whenever a socket needs servicing we make sure it is queued and
1153 * wake up a waiting nfsd (if there is one).
1155 * nfsds will service at most 8 requests from the same socket before
1156 * defecting to work on another socket.
1157 * nfsds will defect immediately if there are any sockets in the "wait" queue
1158 * nfsds looking for a socket to work on check the "wait" queue first and
1159 * then check the "work" queue.
1160 * When an nfsd starts working on a socket, it removes it from the head of
1161 * the queue it's currently on and moves it to the end of the "work" queue.
1162 * When nfsds are checking the queues for work, any sockets found not to
1163 * have any work are simply dropped from the queue.
1170 struct nfsrv_sock
*slp
;
1172 struct nfsrv_descript
*nd
= NULL
;
1173 int error
= 0, cacherep
, writes_todo
;
1174 int siz
, procrastinate
, opcnt
= 0;
1177 struct vfs_context context
;
1185 MALLOC(nfsd
, struct nfsd
*, sizeof(struct nfsd
), M_NFSD
, M_WAITOK
);
1189 bzero(nfsd
, sizeof(struct nfsd
));
1190 lck_mtx_lock(nfsd_mutex
);
1191 if (nfsd_thread_count
++ == 0) {
1192 nfsrv_initcache(); /* Init the server request cache */
1194 TAILQ_INSERT_TAIL(&nfsd_head
, nfsd
, nfsd_chain
);
1195 lck_mtx_unlock(nfsd_mutex
);
1197 context
.vc_thread
= current_thread();
1199 /* Set time out so that nfsd threads can wake up a see if they are still needed. */
1204 * Loop getting rpc requests until SIGKILL.
1207 if (nfsd_thread_max
<= 0) {
1208 /* NFS server shutting down, get out ASAP */
1210 slp
= nfsd
->nfsd_slp
;
1211 } else if (nfsd
->nfsd_flag
& NFSD_REQINPROG
) {
1212 /* already have some work to do */
1214 slp
= nfsd
->nfsd_slp
;
1216 /* need to find work to do */
1218 lck_mtx_lock(nfsd_mutex
);
1219 while (!nfsd
->nfsd_slp
&& TAILQ_EMPTY(&nfsrv_sockwait
) && TAILQ_EMPTY(&nfsrv_sockwork
)) {
1220 if (nfsd_thread_count
> nfsd_thread_max
) {
1222 * If we have no socket and there are more
1223 * nfsd threads than configured, let's exit.
1228 nfsd
->nfsd_flag
|= NFSD_WAITING
;
1229 TAILQ_INSERT_HEAD(&nfsd_queue
, nfsd
, nfsd_queue
);
1230 error
= msleep(nfsd
, nfsd_mutex
, PSOCK
| PCATCH
, "nfsd", &to
);
1232 if (nfsd
->nfsd_flag
& NFSD_WAITING
) {
1233 TAILQ_REMOVE(&nfsd_queue
, nfsd
, nfsd_queue
);
1234 nfsd
->nfsd_flag
&= ~NFSD_WAITING
;
1236 if (error
== EWOULDBLOCK
) {
1242 slp
= nfsd
->nfsd_slp
;
1243 if (!slp
&& !TAILQ_EMPTY(&nfsrv_sockwait
)) {
1244 /* look for a socket to work on in the wait queue */
1245 while ((slp
= TAILQ_FIRST(&nfsrv_sockwait
))) {
1246 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1247 /* remove from the head of the queue */
1248 TAILQ_REMOVE(&nfsrv_sockwait
, slp
, ns_svcq
);
1249 slp
->ns_flag
&= ~SLP_WAITQ
;
1250 if ((slp
->ns_flag
& SLP_VALID
) && (slp
->ns_flag
& SLP_WORKTODO
)) {
1253 /* nothing to do, so skip this socket */
1254 lck_rw_done(&slp
->ns_rwlock
);
1257 if (!slp
&& !TAILQ_EMPTY(&nfsrv_sockwork
)) {
1258 /* look for a socket to work on in the work queue */
1259 while ((slp
= TAILQ_FIRST(&nfsrv_sockwork
))) {
1260 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1261 /* remove from the head of the queue */
1262 TAILQ_REMOVE(&nfsrv_sockwork
, slp
, ns_svcq
);
1263 slp
->ns_flag
&= ~SLP_WORKQ
;
1264 if ((slp
->ns_flag
& SLP_VALID
) && (slp
->ns_flag
& SLP_WORKTODO
)) {
1267 /* nothing to do, so skip this socket */
1268 lck_rw_done(&slp
->ns_rwlock
);
1271 if (!nfsd
->nfsd_slp
&& slp
) {
1272 /* we found a socket to work on, grab a reference */
1275 slp
->ns_timestamp
= now
.tv_sec
;
1276 /* We keep the socket list in least recently used order for reaping idle sockets */
1277 TAILQ_REMOVE(&nfsrv_socklist
, slp
, ns_chain
);
1278 TAILQ_INSERT_TAIL(&nfsrv_socklist
, slp
, ns_chain
);
1279 nfsd
->nfsd_slp
= slp
;
1281 /* and put it at the back of the work queue */
1282 TAILQ_INSERT_TAIL(&nfsrv_sockwork
, slp
, ns_svcq
);
1283 slp
->ns_flag
|= SLP_WORKQ
;
1284 lck_rw_done(&slp
->ns_rwlock
);
1286 lck_mtx_unlock(nfsd_mutex
);
1290 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1291 if (slp
->ns_flag
& SLP_VALID
) {
1292 if ((slp
->ns_flag
& (SLP_NEEDQ
| SLP_DISCONN
)) == SLP_NEEDQ
) {
1293 slp
->ns_flag
&= ~SLP_NEEDQ
;
1294 nfsrv_rcv_locked(slp
->ns_so
, slp
, MBUF_WAITOK
);
1296 if (slp
->ns_flag
& SLP_DISCONN
) {
1299 error
= nfsrv_dorec(slp
, nfsd
, &nd
);
1300 if (error
== EINVAL
) { // RPCSEC_GSS drop
1301 if (slp
->ns_sotype
== SOCK_STREAM
) {
1302 nfsrv_zapsock(slp
); // drop connection
1306 if (error
&& (slp
->ns_wgtime
|| (slp
->ns_flag
& SLP_DOWRITES
))) {
1308 cur_usec
= (u_quad_t
)now
.tv_sec
* 1000000 +
1309 (u_quad_t
)now
.tv_usec
;
1310 if (slp
->ns_wgtime
<= cur_usec
) {
1315 slp
->ns_flag
&= ~SLP_DOWRITES
;
1317 nfsd
->nfsd_flag
|= NFSD_REQINPROG
;
1319 lck_rw_done(&slp
->ns_rwlock
);
1321 if (error
|| (slp
&& !(slp
->ns_flag
& SLP_VALID
))) {
1323 nfsm_chain_cleanup(&nd
->nd_nmreq
);
1325 mbuf_freem(nd
->nd_nam2
);
1327 if (IS_VALID_CRED(nd
->nd_cr
)) {
1328 kauth_cred_unref(&nd
->nd_cr
);
1330 if (nd
->nd_gss_context
) {
1331 nfs_gss_svc_ctx_deref(nd
->nd_gss_context
);
1333 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
1336 nfsd
->nfsd_slp
= NULL
;
1337 nfsd
->nfsd_flag
&= ~NFSD_REQINPROG
;
1339 nfsrv_slpderef(slp
);
1341 if (nfsd_thread_max
<= 0) {
1347 microuptime(&nd
->nd_starttime
);
1349 nd
->nd_nam
= nd
->nd_nam2
;
1351 nd
->nd_nam
= slp
->ns_nam
;
1354 cacherep
= nfsrv_getcache(nd
, slp
, &mrep
);
1356 if (nfsrv_require_resv_port
) {
1357 /* Check if source port is a reserved port */
1359 struct sockaddr
*saddr
= mbuf_data(nd
->nd_nam
);
1361 if (saddr
->sa_family
== AF_INET
) {
1362 port
= ntohs(((struct sockaddr_in
*)saddr
)->sin_port
);
1363 } else if (saddr
->sa_family
== AF_INET6
) {
1364 port
= ntohs(((struct sockaddr_in6
*)saddr
)->sin6_port
);
1366 if ((port
>= IPPORT_RESERVED
) && (nd
->nd_procnum
!= NFSPROC_NULL
)) {
1367 nd
->nd_procnum
= NFSPROC_NOOP
;
1368 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1375 * Loop to get all the write RPC replies that have been
1376 * gathered together.
1381 if (nd
&& (nd
->nd_vers
== NFS_VER3
)) {
1382 procrastinate
= nfsrv_wg_delay_v3
;
1384 procrastinate
= nfsrv_wg_delay
;
1386 lck_rw_lock_shared(&nfsrv_export_rwlock
);
1387 context
.vc_ucred
= NULL
;
1388 if (writes_todo
|| ((nd
->nd_procnum
== NFSPROC_WRITE
) && (procrastinate
> 0))) {
1389 error
= nfsrv_writegather(&nd
, slp
, &context
, &mrep
);
1391 error
= (*(nfsrv_procs
[nd
->nd_procnum
]))(nd
, slp
, &context
, &mrep
);
1393 lck_rw_done(&nfsrv_export_rwlock
);
1396 * If this is a stream socket and we are not going
1397 * to send a reply we better close the connection
1398 * so the client doesn't hang.
1400 if (error
&& slp
->ns_sotype
== SOCK_STREAM
) {
1401 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1403 lck_rw_done(&slp
->ns_rwlock
);
1404 printf("NFS server: NULL reply from proc = %d error = %d\n",
1405 nd
->nd_procnum
, error
);
1410 OSAddAtomic64(1, &nfsstats
.srv_errs
);
1411 nfsrv_updatecache(nd
, FALSE
, mrep
);
1413 mbuf_freem(nd
->nd_nam2
);
1418 OSAddAtomic64(1, &nfsstats
.srvrpccnt
[nd
->nd_procnum
]);
1419 nfsrv_updatecache(nd
, TRUE
, mrep
);
1423 if (nd
->nd_gss_mb
!= NULL
) { // It's RPCSEC_GSS
1425 * Need to checksum or encrypt the reply
1427 error
= nfs_gss_svc_protect_reply(nd
, mrep
);
1435 * Get the total size of the reply
1443 if (siz
<= 0 || siz
> NFS_MAXPACKET
) {
1444 printf("mbuf siz=%d\n", siz
);
1445 panic("Bad nfs svc reply");
1448 mbuf_pkthdr_setlen(m
, siz
);
1449 error
= mbuf_pkthdr_setrcvif(m
, NULL
);
1451 panic("nfsd setrcvif failed: %d", error
);
1454 * For stream protocols, prepend a Sun RPC
1457 if (slp
->ns_sotype
== SOCK_STREAM
) {
1458 error
= mbuf_prepend(&m
, NFSX_UNSIGNED
, MBUF_WAITOK
);
1460 *(u_int32_t
*)mbuf_data(m
) = htonl(0x80000000 | siz
);
1464 if (slp
->ns_flag
& SLP_VALID
) {
1465 error
= nfsrv_send(slp
, nd
->nd_nam2
, m
);
1475 mbuf_freem(nd
->nd_nam2
);
1478 if (error
== EPIPE
) {
1479 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1481 lck_rw_done(&slp
->ns_rwlock
);
1483 if (error
== EINTR
|| error
== ERESTART
) {
1484 nfsm_chain_cleanup(&nd
->nd_nmreq
);
1485 if (IS_VALID_CRED(nd
->nd_cr
)) {
1486 kauth_cred_unref(&nd
->nd_cr
);
1488 if (nd
->nd_gss_context
) {
1489 nfs_gss_svc_ctx_deref(nd
->nd_gss_context
);
1491 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
1492 nfsrv_slpderef(slp
);
1493 lck_mtx_lock(nfsd_mutex
);
1498 mbuf_freem(nd
->nd_nam2
);
1505 nfsm_chain_cleanup(&nd
->nd_nmreq
);
1507 mbuf_freem(nd
->nd_nam2
);
1509 if (IS_VALID_CRED(nd
->nd_cr
)) {
1510 kauth_cred_unref(&nd
->nd_cr
);
1512 if (nd
->nd_gss_context
) {
1513 nfs_gss_svc_ctx_deref(nd
->nd_gss_context
);
1515 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
1520 * Check to see if there are outstanding writes that
1521 * need to be serviced.
1524 if (slp
->ns_wgtime
) {
1526 cur_usec
= (u_quad_t
)now
.tv_sec
* 1000000 +
1527 (u_quad_t
)now
.tv_usec
;
1528 if (slp
->ns_wgtime
<= cur_usec
) {
1533 } while (writes_todo
);
1536 if (TAILQ_EMPTY(&nfsrv_sockwait
) && (opcnt
< 8)) {
1537 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1538 error
= nfsrv_dorec(slp
, nfsd
, &nd
);
1539 if (error
== EINVAL
) { // RPCSEC_GSS drop
1540 if (slp
->ns_sotype
== SOCK_STREAM
) {
1541 nfsrv_zapsock(slp
); // drop connection
1544 lck_rw_done(&slp
->ns_rwlock
);
1547 /* drop our reference on the socket */
1548 nfsd
->nfsd_flag
&= ~NFSD_REQINPROG
;
1549 nfsd
->nfsd_slp
= NULL
;
1550 nfsrv_slpderef(slp
);
1553 lck_mtx_lock(nfsd_mutex
);
1555 TAILQ_REMOVE(&nfsd_head
, nfsd
, nfsd_chain
);
1557 if (--nfsd_thread_count
== 0) {
1560 lck_mtx_unlock(nfsd_mutex
);
1565 nfssvc_export(user_addr_t argp
)
1567 int error
= 0, is_64bit
;
1568 struct user_nfs_export_args unxa
;
1569 vfs_context_t ctx
= vfs_context_current();
1571 is_64bit
= IS_64BIT_PROCESS(vfs_context_proc(ctx
));
1573 /* copy in pointers to path and export args */
1575 error
= copyin(argp
, (caddr_t
)&unxa
, sizeof(unxa
));
1577 struct nfs_export_args tnxa
;
1578 error
= copyin(argp
, (caddr_t
)&tnxa
, sizeof(tnxa
));
1580 /* munge into LP64 version of nfs_export_args structure */
1581 unxa
.nxa_fsid
= tnxa
.nxa_fsid
;
1582 unxa
.nxa_expid
= tnxa
.nxa_expid
;
1583 unxa
.nxa_fspath
= CAST_USER_ADDR_T(tnxa
.nxa_fspath
);
1584 unxa
.nxa_exppath
= CAST_USER_ADDR_T(tnxa
.nxa_exppath
);
1585 unxa
.nxa_flags
= tnxa
.nxa_flags
;
1586 unxa
.nxa_netcount
= tnxa
.nxa_netcount
;
1587 unxa
.nxa_nets
= CAST_USER_ADDR_T(tnxa
.nxa_nets
);
1594 error
= nfsrv_export(&unxa
, ctx
);
1600 * Shut down a socket associated with an nfsrv_sock structure.
1601 * Should be called with the send lock set, if required.
1602 * The trick here is to increment the sref at the start, so that the nfsds
1603 * will stop using it and clear ns_flag at the end so that it will not be
1604 * reassigned during cleanup.
1607 nfsrv_zapsock(struct nfsrv_sock
*slp
)
1611 if ((slp
->ns_flag
& SLP_VALID
) == 0) {
1614 slp
->ns_flag
&= ~SLP_ALLFLAGS
;
1621 sock_setupcall(so
, NULL
, NULL
);
1622 sock_shutdown(so
, SHUT_RDWR
);
1625 * Remove from the up-call queue
1627 nfsrv_uc_dequeue(slp
);
1631 * cleanup and release a server socket structure.
1634 nfsrv_slpfree(struct nfsrv_sock
*slp
)
1636 struct nfsrv_descript
*nwp
, *nnwp
;
1639 sock_release(slp
->ns_so
);
1643 mbuf_free(slp
->ns_nam
);
1646 mbuf_freem(slp
->ns_raw
);
1649 mbuf_freem(slp
->ns_rec
);
1652 mbuf_freem(slp
->ns_frag
);
1654 slp
->ns_nam
= slp
->ns_raw
= slp
->ns_rec
= slp
->ns_frag
= NULL
;
1657 for (nwp
= slp
->ns_tq
.lh_first
; nwp
; nwp
= nnwp
) {
1658 nnwp
= nwp
->nd_tq
.le_next
;
1659 LIST_REMOVE(nwp
, nd_tq
);
1660 nfsm_chain_cleanup(&nwp
->nd_nmreq
);
1662 mbuf_freem(nwp
->nd_mrep
);
1665 mbuf_freem(nwp
->nd_nam2
);
1667 if (IS_VALID_CRED(nwp
->nd_cr
)) {
1668 kauth_cred_unref(&nwp
->nd_cr
);
1670 if (nwp
->nd_gss_context
) {
1671 nfs_gss_svc_ctx_deref(nwp
->nd_gss_context
);
1673 FREE_ZONE(nwp
, sizeof(*nwp
), M_NFSRVDESC
);
1675 LIST_INIT(&slp
->ns_tq
);
1677 lck_rw_destroy(&slp
->ns_rwlock
, nfsrv_slp_rwlock_group
);
1678 lck_mtx_destroy(&slp
->ns_wgmutex
, nfsrv_slp_mutex_group
);
1679 FREE(slp
, M_NFSSVC
);
1683 * Derefence a server socket structure. If it has no more references and
1684 * is no longer valid, you can throw it away.
1687 nfsrv_slpderef_locked(struct nfsrv_sock
*slp
)
1689 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1692 if (slp
->ns_sref
|| (slp
->ns_flag
& SLP_VALID
)) {
1693 if ((slp
->ns_flag
& SLP_QUEUED
) && !(slp
->ns_flag
& SLP_WORKTODO
)) {
1694 /* remove socket from queue since there's no work */
1695 if (slp
->ns_flag
& SLP_WAITQ
) {
1696 TAILQ_REMOVE(&nfsrv_sockwait
, slp
, ns_svcq
);
1698 TAILQ_REMOVE(&nfsrv_sockwork
, slp
, ns_svcq
);
1700 slp
->ns_flag
&= ~SLP_QUEUED
;
1702 lck_rw_done(&slp
->ns_rwlock
);
1706 /* This socket is no longer valid, so we'll get rid of it */
1708 if (slp
->ns_flag
& SLP_QUEUED
) {
1709 if (slp
->ns_flag
& SLP_WAITQ
) {
1710 TAILQ_REMOVE(&nfsrv_sockwait
, slp
, ns_svcq
);
1712 TAILQ_REMOVE(&nfsrv_sockwork
, slp
, ns_svcq
);
1714 slp
->ns_flag
&= ~SLP_QUEUED
;
1716 lck_rw_done(&slp
->ns_rwlock
);
1718 TAILQ_REMOVE(&nfsrv_socklist
, slp
, ns_chain
);
1719 if (slp
->ns_sotype
== SOCK_STREAM
) {
1720 nfsrv_sock_tcp_cnt
--;
1723 /* now remove from the write gather socket list */
1724 if (slp
->ns_wgq
.tqe_next
!= SLPNOLIST
) {
1725 TAILQ_REMOVE(&nfsrv_sockwg
, slp
, ns_wgq
);
1726 slp
->ns_wgq
.tqe_next
= SLPNOLIST
;
1732 nfsrv_slpderef(struct nfsrv_sock
*slp
)
1734 lck_mtx_lock(nfsd_mutex
);
1735 nfsrv_slpderef_locked(slp
);
1736 lck_mtx_unlock(nfsd_mutex
);
1740 * Check periodically for idle sockest if needed and
1744 nfsrv_idlesock_timer(__unused
void *param0
, __unused
void *param1
)
1746 struct nfsrv_sock
*slp
, *tslp
;
1748 time_t time_to_wait
= nfsrv_sock_idle_timeout
;
1751 lck_mtx_lock(nfsd_mutex
);
1753 /* Turn off the timer if we're suppose to and get out */
1754 if (nfsrv_sock_idle_timeout
< NFSD_MIN_IDLE_TIMEOUT
) {
1755 nfsrv_sock_idle_timeout
= 0;
1757 if ((nfsrv_sock_tcp_cnt
<= 2 * nfsd_thread_max
) || (nfsrv_sock_idle_timeout
== 0)) {
1758 nfsrv_idlesock_timer_on
= 0;
1759 lck_mtx_unlock(nfsd_mutex
);
1763 TAILQ_FOREACH_SAFE(slp
, &nfsrv_socklist
, ns_chain
, tslp
) {
1764 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1765 /* Skip udp and referenced sockets */
1766 if (slp
->ns_sotype
== SOCK_DGRAM
|| slp
->ns_sref
) {
1767 lck_rw_done(&slp
->ns_rwlock
);
1771 * If this is the first non-referenced socket that hasn't idle out,
1772 * use its time stamp to calculate the earlist time in the future
1773 * to start the next invocation of the timer. Since the nfsrv_socklist
1774 * is sorted oldest access to newest. Once we find the first one,
1775 * we're done and break out of the loop.
1777 if (((slp
->ns_timestamp
+ nfsrv_sock_idle_timeout
) > now
.tv_sec
) ||
1778 nfsrv_sock_tcp_cnt
<= 2 * nfsd_thread_max
) {
1779 time_to_wait
-= now
.tv_sec
- slp
->ns_timestamp
;
1780 if (time_to_wait
< 1) {
1783 lck_rw_done(&slp
->ns_rwlock
);
1787 * Bump the ref count. nfsrv_slpderef below will destroy
1788 * the socket, since nfsrv_zapsock has closed it.
1792 lck_rw_done(&slp
->ns_rwlock
);
1793 nfsrv_slpderef_locked(slp
);
1796 /* Start ourself back up */
1797 nfs_interval_timer_start(nfsrv_idlesock_timer_call
, time_to_wait
* 1000);
1798 /* Remember when the next timer will fire for nfssvc_addsock. */
1799 nfsrv_idlesock_timer_on
= now
.tv_sec
+ time_to_wait
;
1800 lck_mtx_unlock(nfsd_mutex
);
1804 * Clean up the data structures for the server.
1809 struct nfsrv_sock
*slp
, *nslp
;
1812 struct nfsrv_fmod
*fp
, *nfp
;
1817 for (slp
= TAILQ_FIRST(&nfsrv_socklist
); slp
!= 0; slp
= nslp
) {
1818 nslp
= TAILQ_NEXT(slp
, ns_chain
);
1819 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1821 if (slp
->ns_flag
& SLP_VALID
) {
1824 lck_rw_done(&slp
->ns_rwlock
);
1825 nfsrv_slpderef_locked(slp
);
1830 * Flush pending file write fsevents
1832 lck_mtx_lock(nfsrv_fmod_mutex
);
1833 for (i
= 0; i
< NFSRVFMODHASHSZ
; i
++) {
1834 for (fp
= LIST_FIRST(&nfsrv_fmod_hashtbl
[i
]); fp
; fp
= nfp
) {
1836 * Fire off the content modified fsevent for each
1837 * entry, remove it from the list, and free it.
1839 if (nfsrv_fsevents_enabled
) {
1840 fp
->fm_context
.vc_thread
= current_thread();
1841 add_fsevent(FSE_CONTENT_MODIFIED
, &fp
->fm_context
,
1842 FSE_ARG_VNODE
, fp
->fm_vp
,
1845 vnode_put(fp
->fm_vp
);
1846 kauth_cred_unref(&fp
->fm_context
.vc_ucred
);
1847 nfp
= LIST_NEXT(fp
, fm_link
);
1848 LIST_REMOVE(fp
, fm_link
);
1852 nfsrv_fmod_pending
= 0;
1853 lck_mtx_unlock(nfsrv_fmod_mutex
);
1856 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
1858 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1860 nfsrv_cleancache(); /* And clear out server cache */
1862 nfsrv_udpsock
= NULL
;
1863 nfsrv_udp6sock
= NULL
;
1866 #endif /* CONFIG_NFS_SERVER */