2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/file_internal.h>
78 #include <sys/filedesc.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/mount_internal.h>
82 #include <sys/proc_internal.h> /* for fdflags */
83 #include <sys/kauth.h>
84 #include <sys/sysctl.h>
87 #include <sys/malloc.h>
88 #include <sys/kpi_mbuf.h>
89 #include <sys/socket.h>
90 #include <sys/socketvar.h>
91 #include <sys/domain.h>
92 #include <sys/protosw.h>
93 #include <sys/fcntl.h>
94 #include <sys/lockf.h>
95 #include <sys/syslog.h>
97 #include <sys/sysproto.h>
98 #include <sys/kpi_socket.h>
99 #include <sys/fsevents.h>
100 #include <libkern/OSAtomic.h>
101 #include <kern/thread_call.h>
102 #include <kern/task.h>
104 #include <security/audit/audit.h>
106 #include <netinet/in.h>
107 #include <netinet/tcp.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/rpcv2.h>
110 #include <nfs/nfsproto.h>
112 #include <nfs/nfsm_subs.h>
113 #include <nfs/nfsrvcache.h>
114 #include <nfs/nfs_gss.h>
115 #include <nfs/nfsmount.h>
116 #include <nfs/nfsnode.h>
117 #include <nfs/nfs_lock.h>
119 #include <security/mac_framework.h>
122 kern_return_t
thread_terminate(thread_t
); /* XXX */
126 extern int (*nfsrv_procs
[NFS_NPROCS
])(struct nfsrv_descript
*nd
,
127 struct nfsrv_sock
*slp
,
130 extern int nfsrv_wg_delay
;
131 extern int nfsrv_wg_delay_v3
;
133 static int nfsrv_require_resv_port
= 0;
134 static int nfsrv_deadsock_timer_on
= 0;
136 int nfssvc_export(user_addr_t argp
);
137 int nfssvc_nfsd(void);
138 int nfssvc_addsock(socket_t
, mbuf_t
);
139 void nfsrv_zapsock(struct nfsrv_sock
*);
140 void nfsrv_slpderef(struct nfsrv_sock
*);
141 void nfsrv_slpfree(struct nfsrv_sock
*);
143 #endif /* NFSSERVER */
148 SYSCTL_DECL(_vfs_generic
);
149 SYSCTL_NODE(_vfs_generic
, OID_AUTO
, nfs
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "nfs hinge");
152 SYSCTL_NODE(_vfs_generic_nfs
, OID_AUTO
, client
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "nfs client hinge");
153 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, initialdowndelay
, CTLFLAG_RW
, &nfs_tprintf_initial_delay
, 0, "");
154 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, nextdowndelay
, CTLFLAG_RW
, &nfs_tprintf_delay
, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, iosize
, CTLFLAG_RW
, &nfs_iosize
, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, access_cache_timeout
, CTLFLAG_RW
, &nfs_access_cache_timeout
, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, allow_async
, CTLFLAG_RW
, &nfs_allow_async
, 0, "");
158 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, statfs_rate_limit
, CTLFLAG_RW
, &nfs_statfs_rate_limit
, 0, "");
159 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, nfsiod_thread_max
, CTLFLAG_RW
, &nfsiod_thread_max
, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, nfsiod_thread_count
, CTLFLAG_RD
, &nfsiod_thread_count
, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, lockd_mounts
, CTLFLAG_RD
, &nfs_lockd_mounts
, 0, "");
162 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, max_async_writes
, CTLFLAG_RW
, &nfs_max_async_writes
, 0, "");
163 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, single_des
, CTLFLAG_RW
, &nfs_single_des
, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client
, OID_AUTO
, access_delete
, CTLFLAG_RW
, &nfs_access_delete
, 0, "");
165 #endif /* NFSCLIENT */
168 SYSCTL_NODE(_vfs_generic_nfs
, OID_AUTO
, server
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "nfs server hinge");
169 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, wg_delay
, CTLFLAG_RW
, &nfsrv_wg_delay
, 0, "");
170 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, wg_delay_v3
, CTLFLAG_RW
, &nfsrv_wg_delay_v3
, 0, "");
171 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, require_resv_port
, CTLFLAG_RW
, &nfsrv_require_resv_port
, 0, "");
172 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, async
, CTLFLAG_RW
, &nfsrv_async
, 0, "");
173 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, export_hash_size
, CTLFLAG_RW
, &nfsrv_export_hash_size
, 0, "");
174 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, reqcache_size
, CTLFLAG_RW
, &nfsrv_reqcache_size
, 0, "");
175 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, request_queue_length
, CTLFLAG_RW
, &nfsrv_sock_max_rec_queue_length
, 0, "");
176 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, user_stats
, CTLFLAG_RW
, &nfsrv_user_stat_enabled
, 0, "");
177 SYSCTL_UINT(_vfs_generic_nfs_server
, OID_AUTO
, gss_context_ttl
, CTLFLAG_RW
, &nfsrv_gss_context_ttl
, 0, "");
179 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, fsevents
, CTLFLAG_RW
, &nfsrv_fsevents_enabled
, 0, "");
181 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, nfsd_thread_max
, CTLFLAG_RW
, &nfsd_thread_max
, 0, "");
182 SYSCTL_INT(_vfs_generic_nfs_server
, OID_AUTO
, nfsd_thread_count
, CTLFLAG_RD
, &nfsd_thread_count
, 0, "");
183 #endif /* NFSSERVER */
189 nfsclnt(proc_t p
, struct nfsclnt_args
*uap
, __unused
int *retval
)
194 if (uap
->flag
== NFSCLNT_LOCKDANS
) {
195 error
= copyin(uap
->argp
, &la
, sizeof(la
));
196 return (error
!= 0 ? error
: nfslockdans(p
, &la
));
202 * Asynchronous I/O threads for client NFS.
203 * They do read-ahead and write-behind operations on the block I/O cache.
205 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
206 * when unused for a while. There are as many nfsiod structs as there are
207 * nfsiod threads; however there's no strict tie between a thread and a struct.
208 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
209 * up, it removes the next struct nfsiod from the queue and services it. Then
210 * it will put the struct at the head of free list and sleep on it.
211 * Async requests will pull the next struct nfsiod from the head of the free list,
212 * put it on the work queue, and wake whatever thread is waiting on that struct.
216 * nfsiod thread exit routine
218 * Must be called with nfsiod_mutex held so that the
219 * decision to terminate is atomic with the termination.
222 nfsiod_terminate(struct nfsiod
*niod
)
224 nfsiod_thread_count
--;
225 lck_mtx_unlock(nfsiod_mutex
);
229 printf("nfsiod: terminating without niod\n");
230 thread_terminate(current_thread());
234 /* nfsiod thread startup routine */
241 MALLOC(niod
, struct nfsiod
*, sizeof(struct nfsiod
), M_TEMP
, M_WAITOK
);
243 lck_mtx_lock(nfsiod_mutex
);
244 nfsiod_thread_count
--;
245 wakeup(current_thread());
246 lck_mtx_unlock(nfsiod_mutex
);
247 thread_terminate(current_thread());
250 bzero(niod
, sizeof(*niod
));
251 lck_mtx_lock(nfsiod_mutex
);
252 TAILQ_INSERT_HEAD(&nfsiodfree
, niod
, niod_link
);
253 wakeup(current_thread());
254 error
= msleep0(niod
, nfsiod_mutex
, PWAIT
| PDROP
, "nfsiod", NFS_ASYNCTHREADMAXIDLE
*hz
, nfsiod_continue
);
255 /* shouldn't return... so we have an error */
256 /* remove an old nfsiod struct and terminate */
257 lck_mtx_lock(nfsiod_mutex
);
258 if ((niod
= TAILQ_LAST(&nfsiodfree
, nfsiodlist
)))
259 TAILQ_REMOVE(&nfsiodfree
, niod
, niod_link
);
260 nfsiod_terminate(niod
);
265 * Start up another nfsiod thread.
266 * (unless we're already maxed out and there are nfsiods running)
271 thread_t thd
= THREAD_NULL
;
273 lck_mtx_lock(nfsiod_mutex
);
274 if ((nfsiod_thread_count
>= NFSIOD_MAX
) && (nfsiod_thread_count
> 0)) {
275 lck_mtx_unlock(nfsiod_mutex
);
278 nfsiod_thread_count
++;
279 if (kernel_thread_start((thread_continue_t
)nfsiod_thread
, NULL
, &thd
) != KERN_SUCCESS
) {
280 lck_mtx_unlock(nfsiod_mutex
);
283 /* wait for the thread to complete startup */
284 msleep(thd
, nfsiod_mutex
, PWAIT
| PDROP
, "nfsiodw", NULL
);
285 thread_deallocate(thd
);
290 * Continuation for Asynchronous I/O threads for NFS client.
292 * Grab an nfsiod struct to work on, do some work, then drop it
295 nfsiod_continue(int error
)
298 struct nfsmount
*nmp
;
299 struct nfsreq
*req
, *treq
;
300 struct nfs_reqqhead iodq
;
303 lck_mtx_lock(nfsiod_mutex
);
304 niod
= TAILQ_FIRST(&nfsiodwork
);
306 /* there's no work queued up */
307 /* remove an old nfsiod struct and terminate */
308 if ((niod
= TAILQ_LAST(&nfsiodfree
, nfsiodlist
)))
309 TAILQ_REMOVE(&nfsiodfree
, niod
, niod_link
);
310 nfsiod_terminate(niod
);
313 TAILQ_REMOVE(&nfsiodwork
, niod
, niod_link
);
316 while ((nmp
= niod
->niod_nmp
)) {
318 * Service this mount's async I/O queue.
320 * In order to ensure some level of fairness between mounts,
321 * we grab all the work up front before processing it so any
322 * new work that arrives will be serviced on a subsequent
323 * iteration - and we have a chance to see if other work needs
324 * to be done (e.g. the delayed write queue needs to be pushed
325 * or other mounts are waiting for an nfsiod).
327 /* grab the current contents of the queue */
329 TAILQ_CONCAT(&iodq
, &nmp
->nm_iodq
, r_achain
);
330 lck_mtx_unlock(nfsiod_mutex
);
332 /* process the queue */
333 TAILQ_FOREACH_SAFE(req
, &iodq
, r_achain
, treq
) {
334 TAILQ_REMOVE(&iodq
, req
, r_achain
);
335 req
->r_achain
.tqe_next
= NFSREQNOLIST
;
336 req
->r_callback
.rcb_func(req
);
339 /* now check if there's more/other work to be done */
340 lck_mtx_lock(nfsiod_mutex
);
341 morework
= !TAILQ_EMPTY(&nmp
->nm_iodq
);
342 if (!morework
|| !TAILQ_EMPTY(&nfsiodmounts
)) {
343 /* we're going to stop working on this mount */
344 if (morework
) /* mount still needs more work so queue it up */
345 TAILQ_INSERT_TAIL(&nfsiodmounts
, nmp
, nm_iodlink
);
347 niod
->niod_nmp
= NULL
;
351 /* loop if there's still a mount to work on */
352 if (!niod
->niod_nmp
&& !TAILQ_EMPTY(&nfsiodmounts
)) {
353 niod
->niod_nmp
= TAILQ_FIRST(&nfsiodmounts
);
354 TAILQ_REMOVE(&nfsiodmounts
, niod
->niod_nmp
, nm_iodlink
);
359 /* queue ourselves back up - if there aren't too many threads running */
360 if (nfsiod_thread_count
<= NFSIOD_MAX
) {
361 TAILQ_INSERT_HEAD(&nfsiodfree
, niod
, niod_link
);
362 error
= msleep0(niod
, nfsiod_mutex
, PWAIT
| PDROP
, "nfsiod", NFS_ASYNCTHREADMAXIDLE
*hz
, nfsiod_continue
);
363 /* shouldn't return... so we have an error */
364 /* remove an old nfsiod struct and terminate */
365 lck_mtx_lock(nfsiod_mutex
);
366 if ((niod
= TAILQ_LAST(&nfsiodfree
, nfsiodlist
)))
367 TAILQ_REMOVE(&nfsiodfree
, niod
, niod_link
);
369 nfsiod_terminate(niod
);
374 #endif /* NFSCLIENT */
380 * NFS server system calls
381 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
385 * Get file handle system call
388 getfh(proc_t p
, struct getfh_args
*uap
, __unused
int *retval
)
391 struct nfs_filehandle nfh
;
394 char path
[MAXPATHLEN
], *ptr
;
396 struct nfs_exportfs
*nxfs
;
397 struct nfs_export
*nx
;
402 error
= proc_suser(p
);
406 error
= copyinstr(uap
->fname
, path
, MAXPATHLEN
, (size_t *)&pathlen
);
410 if (!nfsrv_is_initialized())
413 NDINIT(&nd
, LOOKUP
, FOLLOW
| LOCKLEAF
| AUDITVNPATH1
,
414 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), vfs_context_current());
422 // find exportfs that matches f_mntonname
423 lck_rw_lock_shared(&nfsrv_export_rwlock
);
424 ptr
= vnode_mount(vp
)->mnt_vfsstat
.f_mntonname
;
425 LIST_FOREACH(nxfs
, &nfsrv_exports
, nxfs_next
) {
426 if (!strncmp(nxfs
->nxfs_path
, ptr
, MAXPATHLEN
))
429 if (!nxfs
|| strncmp(nxfs
->nxfs_path
, path
, strlen(nxfs
->nxfs_path
))) {
433 // find export that best matches remainder of path
434 ptr
= path
+ strlen(nxfs
->nxfs_path
);
435 while (*ptr
&& (*ptr
== '/'))
437 LIST_FOREACH(nx
, &nxfs
->nxfs_exports
, nx_next
) {
438 int len
= strlen(nx
->nx_path
);
439 if (len
== 0) // we've hit the export entry for the root directory
441 if (!strncmp(nx
->nx_path
, ptr
, len
))
449 bzero(&nfh
, sizeof(nfh
));
450 nfh
.nfh_xh
.nxh_version
= htonl(NFS_FH_VERSION
);
451 nfh
.nfh_xh
.nxh_fsid
= htonl(nxfs
->nxfs_id
);
452 nfh
.nfh_xh
.nxh_expid
= htonl(nx
->nx_id
);
453 nfh
.nfh_xh
.nxh_flags
= 0;
454 nfh
.nfh_xh
.nxh_reserved
= 0;
455 nfh
.nfh_len
= NFSV3_MAX_FID_SIZE
;
456 error
= VFS_VPTOFH(vp
, (int*)&nfh
.nfh_len
, &nfh
.nfh_fid
[0], NULL
);
457 if (nfh
.nfh_len
> (int)NFSV3_MAX_FID_SIZE
)
459 nfh
.nfh_xh
.nxh_fidlen
= nfh
.nfh_len
;
460 nfh
.nfh_len
+= sizeof(nfh
.nfh_xh
);
461 nfh
.nfh_fhp
= (u_char
*)&nfh
.nfh_xh
;
464 lck_rw_done(&nfsrv_export_rwlock
);
468 error
= copyout((caddr_t
)&nfh
, uap
->fhp
, sizeof(nfh
));
472 extern struct fileops vnops
;
475 * syscall for the rpc.lockd to use to translate a NFS file handle into
476 * an open descriptor.
478 * warning: do not remove the suser() call or this becomes one giant
483 struct fhopen_args
*uap
,
487 struct nfs_filehandle nfh
;
488 struct nfs_export
*nx
;
489 struct nfs_export_options
*nxo
;
491 struct fileproc
*fp
, *nfp
;
492 int fmode
, error
, type
;
494 vfs_context_t ctx
= vfs_context_current();
495 kauth_action_t action
;
500 error
= suser(vfs_context_ucred(ctx
), 0);
505 if (!nfsrv_is_initialized()) {
509 fmode
= FFLAGS(uap
->flags
);
510 /* why not allow a non-read/write open for our lockd? */
511 if (((fmode
& (FREAD
| FWRITE
)) == 0) || (fmode
& O_CREAT
))
514 error
= copyin(uap
->u_fhp
, &nfh
.nfh_len
, sizeof(nfh
.nfh_len
));
517 if ((nfh
.nfh_len
< (int)sizeof(struct nfs_exphandle
)) ||
518 (nfh
.nfh_len
> (int)NFSV3_MAX_FH_SIZE
))
520 error
= copyin(uap
->u_fhp
, &nfh
, sizeof(nfh
.nfh_len
) + nfh
.nfh_len
);
523 nfh
.nfh_fhp
= (u_char
*)&nfh
.nfh_xh
;
525 lck_rw_lock_shared(&nfsrv_export_rwlock
);
526 /* now give me my vnode, it gets returned to me with a reference */
527 error
= nfsrv_fhtovp(&nfh
, NULL
, &vp
, &nx
, &nxo
);
528 lck_rw_done(&nfsrv_export_rwlock
);
530 if (error
== NFSERR_TRYLATER
)
531 error
= EAGAIN
; // XXX EBUSY? Or just leave as TRYLATER?
536 * From now on we have to make sure not
537 * to forget about the vnode.
538 * Any error that causes an abort must vnode_put(vp).
539 * Just set error = err and 'goto bad;'.
545 if (vnode_vtype(vp
) == VSOCK
) {
550 /* disallow write operations on directories */
551 if (vnode_isdir(vp
) && (fmode
& (FWRITE
| O_TRUNC
))) {
556 /* compute action to be authorized */
559 action
|= KAUTH_VNODE_READ_DATA
;
560 if (fmode
& (FWRITE
| O_TRUNC
))
561 action
|= KAUTH_VNODE_WRITE_DATA
;
562 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
565 if ((error
= VNOP_OPEN(vp
, fmode
, ctx
)))
567 if ((error
= vnode_ref_ext(vp
, fmode
)))
571 * end of vn_open code
574 // starting here... error paths should call vn_close/vnode_put
575 if ((error
= falloc(p
, &nfp
, &indx
, ctx
)) != 0) {
576 vn_close(vp
, fmode
& FMASK
, ctx
);
581 fp
->f_fglob
->fg_flag
= fmode
& FMASK
;
582 fp
->f_fglob
->fg_type
= DTYPE_VNODE
;
583 fp
->f_fglob
->fg_ops
= &vnops
;
584 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
586 // XXX do we really need to support this with fhopen()?
587 if (fmode
& (O_EXLOCK
| O_SHLOCK
)) {
588 lf
.l_whence
= SEEK_SET
;
591 if (fmode
& O_EXLOCK
)
596 if ((fmode
& FNONBLOCK
) == 0)
598 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
))) {
599 struct vfs_context context
= *vfs_context_current();
600 /* Modify local copy (to not damage thread copy) */
601 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
603 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
604 fp_free(p
, indx
, fp
);
607 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
613 procfdtbl_releasefd(p
, indx
, NULL
);
614 fp_drop(p
, indx
, fp
, 1);
626 * NFS server pseudo system call
629 nfssvc(proc_t p
, struct nfssvc_args
*uap
, __unused
int *retval
)
632 struct user_nfsd_args user_nfsdarg
;
636 AUDIT_ARG(cmd
, uap
->flag
);
639 * Must be super user for most operations (export ops checked later).
641 if ((uap
->flag
!= NFSSVC_EXPORT
) && ((error
= proc_suser(p
))))
644 error
= mac_system_check_nfsd(kauth_cred_get());
649 /* make sure NFS server data structures have been initialized */
652 if (uap
->flag
& NFSSVC_ADDSOCK
) {
653 if (IS_64BIT_PROCESS(p
)) {
654 error
= copyin(uap
->argp
, (caddr_t
)&user_nfsdarg
, sizeof(user_nfsdarg
));
656 struct nfsd_args tmp_args
;
657 error
= copyin(uap
->argp
, (caddr_t
)&tmp_args
, sizeof(tmp_args
));
659 user_nfsdarg
.sock
= tmp_args
.sock
;
660 user_nfsdarg
.name
= CAST_USER_ADDR_T(tmp_args
.name
);
661 user_nfsdarg
.namelen
= tmp_args
.namelen
;
667 error
= file_socket(user_nfsdarg
.sock
, &so
);
670 /* Get the client address for connected sockets. */
671 if (user_nfsdarg
.name
== USER_ADDR_NULL
|| user_nfsdarg
.namelen
== 0) {
674 error
= sockargs(&nam
, user_nfsdarg
.name
, user_nfsdarg
.namelen
, MBUF_TYPE_SONAME
);
676 /* drop the iocount file_socket() grabbed on the file descriptor */
677 file_drop(user_nfsdarg
.sock
);
682 * nfssvc_addsock() will grab a retain count on the socket
683 * to keep the socket from being closed when nfsd closes its
684 * file descriptor for it.
686 error
= nfssvc_addsock(so
, nam
);
687 /* drop the iocount file_socket() grabbed on the file descriptor */
688 file_drop(user_nfsdarg
.sock
);
689 } else if (uap
->flag
& NFSSVC_NFSD
) {
690 error
= nfssvc_nfsd();
691 } else if (uap
->flag
& NFSSVC_EXPORT
) {
692 error
= nfssvc_export(uap
->argp
);
696 if (error
== EINTR
|| error
== ERESTART
)
702 * Adds a socket to the list for servicing by nfsds.
705 nfssvc_addsock(socket_t so
, mbuf_t mynam
)
707 struct nfsrv_sock
*slp
;
708 int error
= 0, sodomain
, sotype
, soprotocol
, on
= 1;
709 struct timeval timeo
;
711 /* make sure mbuf constants are set up */
715 sock_gettype(so
, &sodomain
, &sotype
, &soprotocol
);
717 /* There should be only one UDP socket */
718 if ((soprotocol
== IPPROTO_UDP
) && nfsrv_udpsock
) {
723 /* Set protocol options and reserve some space (for UDP). */
724 if (sotype
== SOCK_STREAM
)
725 sock_setsockopt(so
, SOL_SOCKET
, SO_KEEPALIVE
, &on
, sizeof(on
));
726 if ((sodomain
== AF_INET
) && (soprotocol
== IPPROTO_TCP
))
727 sock_setsockopt(so
, IPPROTO_TCP
, TCP_NODELAY
, &on
, sizeof(on
));
728 if (sotype
== SOCK_DGRAM
) { /* set socket buffer sizes for UDP */
729 int reserve
= NFS_UDPSOCKBUF
;
730 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDBUF
, &reserve
, sizeof(reserve
));
731 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVBUF
, &reserve
, sizeof(reserve
));
733 log(LOG_INFO
, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error
);
737 sock_nointerrupt(so
, 0);
740 * Set socket send/receive timeouts.
741 * Receive timeout shouldn't matter, but setting the send timeout
742 * will make sure that an unresponsive client can't hang the server.
746 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_RCVTIMEO
, &timeo
, sizeof(timeo
));
748 error
|= sock_setsockopt(so
, SOL_SOCKET
, SO_SNDTIMEO
, &timeo
, sizeof(timeo
));
750 log(LOG_INFO
, "nfssvc_addsock: socket timeout setting error(s) %d\n", error
);
754 MALLOC(slp
, struct nfsrv_sock
*, sizeof(struct nfsrv_sock
), M_NFSSVC
, M_WAITOK
);
759 bzero((caddr_t
)slp
, sizeof (struct nfsrv_sock
));
760 lck_rw_init(&slp
->ns_rwlock
, nfsrv_slp_rwlock_group
, LCK_ATTR_NULL
);
761 lck_mtx_init(&slp
->ns_wgmutex
, nfsrv_slp_mutex_group
, LCK_ATTR_NULL
);
763 lck_mtx_lock(nfsd_mutex
);
765 if (soprotocol
== IPPROTO_UDP
) {
766 /* There should be only one UDP socket */
768 lck_mtx_unlock(nfsd_mutex
);
776 /* add the socket to the list */
777 TAILQ_INSERT_TAIL(&nfsrv_socklist
, slp
, ns_chain
);
779 sock_retain(so
); /* grab a retain count on the socket */
781 slp
->ns_sotype
= sotype
;
784 /* set up the socket upcall */
786 so
->so_upcallarg
= (caddr_t
)slp
;
787 so
->so_upcall
= nfsrv_rcv
;
788 so
->so_rcv
.sb_flags
|= SB_UPCALL
;
789 socket_unlock(so
, 1);
790 /* just playin' it safe */
791 sock_setsockopt(so
, SOL_SOCKET
, SO_UPCALLCLOSEWAIT
, &on
, sizeof(on
));
793 /* mark that the socket is not in the nfsrv_sockwg list */
794 slp
->ns_wgq
.tqe_next
= SLPNOLIST
;
796 slp
->ns_flag
= SLP_VALID
| SLP_NEEDQ
;
799 lck_mtx_unlock(nfsd_mutex
);
807 * nfsd theory of operation:
809 * The first nfsd thread stays in user mode accepting new TCP connections
810 * which are then added via the "addsock" call. The rest of the nfsd threads
811 * simply call into the kernel and remain there in a loop handling NFS
812 * requests until killed by a signal.
814 * There's a list of nfsd threads (nfsd_head).
815 * There's an nfsd queue that contains only those nfsds that are
816 * waiting for work to do (nfsd_queue).
818 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
819 * managing the work on the sockets:
820 * nfsrv_sockwait - sockets w/new data waiting to be worked on
821 * nfsrv_sockwork - sockets being worked on which may have more work to do
822 * nfsrv_sockwg -- sockets which have pending write gather data
823 * When a socket receives data, if it is not currently queued, it
824 * will be placed at the end of the "wait" queue.
825 * Whenever a socket needs servicing we make sure it is queued and
826 * wake up a waiting nfsd (if there is one).
828 * nfsds will service at most 8 requests from the same socket before
829 * defecting to work on another socket.
830 * nfsds will defect immediately if there are any sockets in the "wait" queue
831 * nfsds looking for a socket to work on check the "wait" queue first and
832 * then check the "work" queue.
833 * When an nfsd starts working on a socket, it removes it from the head of
834 * the queue it's currently on and moves it to the end of the "work" queue.
835 * When nfsds are checking the queues for work, any sockets found not to
836 * have any work are simply dropped from the queue.
843 struct nfsrv_sock
*slp
;
845 struct nfsrv_descript
*nd
= NULL
;
846 int error
= 0, cacherep
, writes_todo
;
847 int siz
, procrastinate
, opcnt
= 0;
850 struct vfs_context context
;
857 MALLOC(nfsd
, struct nfsd
*, sizeof(struct nfsd
), M_NFSD
, M_WAITOK
);
860 bzero(nfsd
, sizeof(struct nfsd
));
861 lck_mtx_lock(nfsd_mutex
);
862 if (nfsd_thread_count
++ == 0)
863 nfsrv_initcache(); /* Init the server request cache */
864 TAILQ_INSERT_TAIL(&nfsd_head
, nfsd
, nfsd_chain
);
865 lck_mtx_unlock(nfsd_mutex
);
867 context
.vc_thread
= current_thread();
870 * Loop getting rpc requests until SIGKILL.
873 if (nfsd_thread_max
<= 0) {
874 /* NFS server shutting down, get out ASAP */
876 slp
= nfsd
->nfsd_slp
;
877 } else if (nfsd
->nfsd_flag
& NFSD_REQINPROG
) {
878 /* already have some work to do */
880 slp
= nfsd
->nfsd_slp
;
882 /* need to find work to do */
884 lck_mtx_lock(nfsd_mutex
);
885 while (!nfsd
->nfsd_slp
&& TAILQ_EMPTY(&nfsrv_sockwait
) && TAILQ_EMPTY(&nfsrv_sockwork
)) {
886 if (nfsd_thread_count
> nfsd_thread_max
) {
888 * If we have no socket and there are more
889 * nfsd threads than configured, let's exit.
894 nfsd
->nfsd_flag
|= NFSD_WAITING
;
895 TAILQ_INSERT_HEAD(&nfsd_queue
, nfsd
, nfsd_queue
);
896 error
= msleep(nfsd
, nfsd_mutex
, PSOCK
| PCATCH
, "nfsd", NULL
);
898 if (nfsd
->nfsd_flag
& NFSD_WAITING
) {
899 TAILQ_REMOVE(&nfsd_queue
, nfsd
, nfsd_queue
);
900 nfsd
->nfsd_flag
&= ~NFSD_WAITING
;
905 slp
= nfsd
->nfsd_slp
;
906 if (!slp
&& !TAILQ_EMPTY(&nfsrv_sockwait
)) {
907 /* look for a socket to work on in the wait queue */
908 while ((slp
= TAILQ_FIRST(&nfsrv_sockwait
))) {
909 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
910 /* remove from the head of the queue */
911 TAILQ_REMOVE(&nfsrv_sockwait
, slp
, ns_svcq
);
912 slp
->ns_flag
&= ~SLP_WAITQ
;
913 if ((slp
->ns_flag
& SLP_VALID
) && (slp
->ns_flag
& SLP_WORKTODO
))
915 /* nothing to do, so skip this socket */
916 lck_rw_done(&slp
->ns_rwlock
);
919 if (!slp
&& !TAILQ_EMPTY(&nfsrv_sockwork
)) {
920 /* look for a socket to work on in the work queue */
921 while ((slp
= TAILQ_FIRST(&nfsrv_sockwork
))) {
922 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
923 /* remove from the head of the queue */
924 TAILQ_REMOVE(&nfsrv_sockwork
, slp
, ns_svcq
);
925 slp
->ns_flag
&= ~SLP_WORKQ
;
926 if ((slp
->ns_flag
& SLP_VALID
) && (slp
->ns_flag
& SLP_WORKTODO
))
928 /* nothing to do, so skip this socket */
929 lck_rw_done(&slp
->ns_rwlock
);
932 if (!nfsd
->nfsd_slp
&& slp
) {
933 /* we found a socket to work on, grab a reference */
935 nfsd
->nfsd_slp
= slp
;
937 /* and put it at the back of the work queue */
938 TAILQ_INSERT_TAIL(&nfsrv_sockwork
, slp
, ns_svcq
);
939 slp
->ns_flag
|= SLP_WORKQ
;
940 lck_rw_done(&slp
->ns_rwlock
);
942 lck_mtx_unlock(nfsd_mutex
);
945 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
946 if (slp
->ns_flag
& SLP_VALID
) {
947 if ((slp
->ns_flag
& (SLP_NEEDQ
|SLP_DISCONN
)) == SLP_NEEDQ
) {
948 slp
->ns_flag
&= ~SLP_NEEDQ
;
949 nfsrv_rcv_locked(slp
->ns_so
, slp
, MBUF_WAITOK
);
951 if (slp
->ns_flag
& SLP_DISCONN
)
953 error
= nfsrv_dorec(slp
, nfsd
, &nd
);
954 if (error
== EINVAL
) { // RPCSEC_GSS drop
955 if (slp
->ns_sotype
== SOCK_STREAM
)
956 nfsrv_zapsock(slp
); // drop connection
959 if (error
&& (slp
->ns_wgtime
|| (slp
->ns_flag
& SLP_DOWRITES
))) {
961 cur_usec
= (u_quad_t
)now
.tv_sec
* 1000000 +
962 (u_quad_t
)now
.tv_usec
;
963 if (slp
->ns_wgtime
<= cur_usec
) {
968 slp
->ns_flag
&= ~SLP_DOWRITES
;
970 nfsd
->nfsd_flag
|= NFSD_REQINPROG
;
972 lck_rw_done(&slp
->ns_rwlock
);
974 if (error
|| (slp
&& !(slp
->ns_flag
& SLP_VALID
))) {
976 nfsm_chain_cleanup(&nd
->nd_nmreq
);
978 mbuf_freem(nd
->nd_nam2
);
979 if (IS_VALID_CRED(nd
->nd_cr
))
980 kauth_cred_unref(&nd
->nd_cr
);
981 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
984 nfsd
->nfsd_slp
= NULL
;
985 nfsd
->nfsd_flag
&= ~NFSD_REQINPROG
;
988 if (nfsd_thread_max
<= 0)
993 microuptime(&nd
->nd_starttime
);
995 nd
->nd_nam
= nd
->nd_nam2
;
997 nd
->nd_nam
= slp
->ns_nam
;
999 cacherep
= nfsrv_getcache(nd
, slp
, &mrep
);
1001 if (nfsrv_require_resv_port
) {
1002 /* Check if source port is a reserved port */
1004 struct sockaddr
*nam
= mbuf_data(nd
->nd_nam
);
1005 struct sockaddr_in
*sin
;
1007 sin
= (struct sockaddr_in
*)nam
;
1008 port
= ntohs(sin
->sin_port
);
1009 if (port
>= IPPORT_RESERVED
&&
1010 nd
->nd_procnum
!= NFSPROC_NULL
) {
1011 char strbuf
[MAX_IPv4_STR_LEN
];
1012 nd
->nd_procnum
= NFSPROC_NOOP
;
1013 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1015 printf("NFS request from unprivileged port (%s:%d)\n",
1016 inet_ntop(AF_INET
, &sin
->sin_addr
, strbuf
, sizeof(strbuf
)),
1024 * Loop to get all the write RPC replies that have been
1025 * gathered together.
1030 if (nd
&& (nd
->nd_vers
== NFS_VER3
))
1031 procrastinate
= nfsrv_wg_delay_v3
;
1033 procrastinate
= nfsrv_wg_delay
;
1034 lck_rw_lock_shared(&nfsrv_export_rwlock
);
1035 context
.vc_ucred
= NULL
;
1036 if (writes_todo
|| ((nd
->nd_procnum
== NFSPROC_WRITE
) && (procrastinate
> 0)))
1037 error
= nfsrv_writegather(&nd
, slp
, &context
, &mrep
);
1039 error
= (*(nfsrv_procs
[nd
->nd_procnum
]))(nd
, slp
, &context
, &mrep
);
1040 lck_rw_done(&nfsrv_export_rwlock
);
1043 * If this is a stream socket and we are not going
1044 * to send a reply we better close the connection
1045 * so the client doesn't hang.
1047 if (error
&& slp
->ns_sotype
== SOCK_STREAM
) {
1048 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1050 lck_rw_done(&slp
->ns_rwlock
);
1051 printf("NFS server: NULL reply from proc = %d error = %d\n",
1052 nd
->nd_procnum
, error
);
1058 OSAddAtomic(1, &nfsstats
.srv_errs
);
1059 nfsrv_updatecache(nd
, FALSE
, mrep
);
1061 mbuf_freem(nd
->nd_nam2
);
1066 OSAddAtomic(1, &nfsstats
.srvrpccnt
[nd
->nd_procnum
]);
1067 nfsrv_updatecache(nd
, TRUE
, mrep
);
1071 if (nd
->nd_gss_mb
!= NULL
) { // It's RPCSEC_GSS
1073 * Need to checksum or encrypt the reply
1075 error
= nfs_gss_svc_protect_reply(nd
, mrep
);
1083 * Get the total size of the reply
1091 if (siz
<= 0 || siz
> NFS_MAXPACKET
) {
1092 printf("mbuf siz=%d\n",siz
);
1093 panic("Bad nfs svc reply");
1096 mbuf_pkthdr_setlen(m
, siz
);
1097 error
= mbuf_pkthdr_setrcvif(m
, NULL
);
1099 panic("nfsd setrcvif failed: %d", error
);
1101 * For stream protocols, prepend a Sun RPC
1104 if (slp
->ns_sotype
== SOCK_STREAM
) {
1105 error
= mbuf_prepend(&m
, NFSX_UNSIGNED
, MBUF_WAITOK
);
1107 *(u_int32_t
*)mbuf_data(m
) = htonl(0x80000000 | siz
);
1110 if (slp
->ns_flag
& SLP_VALID
) {
1111 error
= nfsrv_send(slp
, nd
->nd_nam2
, m
);
1121 mbuf_freem(nd
->nd_nam2
);
1124 if (error
== EPIPE
) {
1125 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1127 lck_rw_done(&slp
->ns_rwlock
);
1129 if (error
== EINTR
|| error
== ERESTART
) {
1130 nfsm_chain_cleanup(&nd
->nd_nmreq
);
1131 if (IS_VALID_CRED(nd
->nd_cr
))
1132 kauth_cred_unref(&nd
->nd_cr
);
1133 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
1134 nfsrv_slpderef(slp
);
1135 lck_mtx_lock(nfsd_mutex
);
1140 mbuf_freem(nd
->nd_nam2
);
1146 nfsm_chain_cleanup(&nd
->nd_nmreq
);
1148 mbuf_freem(nd
->nd_nam2
);
1149 if (IS_VALID_CRED(nd
->nd_cr
))
1150 kauth_cred_unref(&nd
->nd_cr
);
1151 FREE_ZONE(nd
, sizeof(*nd
), M_NFSRVDESC
);
1156 * Check to see if there are outstanding writes that
1157 * need to be serviced.
1160 if (slp
->ns_wgtime
) {
1162 cur_usec
= (u_quad_t
)now
.tv_sec
* 1000000 +
1163 (u_quad_t
)now
.tv_usec
;
1164 if (slp
->ns_wgtime
<= cur_usec
) {
1169 } while (writes_todo
);
1172 if (TAILQ_EMPTY(&nfsrv_sockwait
) && (opcnt
< 8)) {
1173 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1174 error
= nfsrv_dorec(slp
, nfsd
, &nd
);
1175 if (error
== EINVAL
) { // RPCSEC_GSS drop
1176 if (slp
->ns_sotype
== SOCK_STREAM
)
1177 nfsrv_zapsock(slp
); // drop connection
1179 lck_rw_done(&slp
->ns_rwlock
);
1182 /* drop our reference on the socket */
1183 nfsd
->nfsd_flag
&= ~NFSD_REQINPROG
;
1184 nfsd
->nfsd_slp
= NULL
;
1185 nfsrv_slpderef(slp
);
1188 lck_mtx_lock(nfsd_mutex
);
1190 TAILQ_REMOVE(&nfsd_head
, nfsd
, nfsd_chain
);
1192 if (--nfsd_thread_count
== 0)
1194 lck_mtx_unlock(nfsd_mutex
);
1199 nfssvc_export(user_addr_t argp
)
1201 int error
= 0, is_64bit
;
1202 struct user_nfs_export_args unxa
;
1203 vfs_context_t ctx
= vfs_context_current();
1205 is_64bit
= IS_64BIT_PROCESS(vfs_context_proc(ctx
));
1207 /* copy in pointers to path and export args */
1209 error
= copyin(argp
, (caddr_t
)&unxa
, sizeof(unxa
));
1211 struct nfs_export_args tnxa
;
1212 error
= copyin(argp
, (caddr_t
)&tnxa
, sizeof(tnxa
));
1214 /* munge into LP64 version of nfs_export_args structure */
1215 unxa
.nxa_fsid
= tnxa
.nxa_fsid
;
1216 unxa
.nxa_expid
= tnxa
.nxa_expid
;
1217 unxa
.nxa_fspath
= CAST_USER_ADDR_T(tnxa
.nxa_fspath
);
1218 unxa
.nxa_exppath
= CAST_USER_ADDR_T(tnxa
.nxa_exppath
);
1219 unxa
.nxa_flags
= tnxa
.nxa_flags
;
1220 unxa
.nxa_netcount
= tnxa
.nxa_netcount
;
1221 unxa
.nxa_nets
= CAST_USER_ADDR_T(tnxa
.nxa_nets
);
1227 error
= nfsrv_export(&unxa
, ctx
);
1233 * Shut down a socket associated with an nfsrv_sock structure.
1234 * Should be called with the send lock set, if required.
1235 * The trick here is to increment the sref at the start, so that the nfsds
1236 * will stop using it and clear ns_flag at the end so that it will not be
1237 * reassigned during cleanup.
1240 nfsrv_zapsock(struct nfsrv_sock
*slp
)
1244 if ((slp
->ns_flag
& SLP_VALID
) == 0)
1246 slp
->ns_flag
&= ~SLP_ALLFLAGS
;
1253 * Attempt to deter future upcalls, but leave the
1254 * upcall info in place to avoid a race with the
1258 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
1259 socket_unlock(so
, 1);
1261 sock_shutdown(so
, SHUT_RDWR
);
1265 * cleanup and release a server socket structure.
1268 nfsrv_slpfree(struct nfsrv_sock
*slp
)
1270 struct nfsrv_descript
*nwp
, *nnwp
;
1273 sock_release(slp
->ns_so
);
1277 mbuf_free(slp
->ns_nam
);
1279 mbuf_freem(slp
->ns_raw
);
1281 mbuf_freem(slp
->ns_rec
);
1283 mbuf_freem(slp
->ns_frag
);
1284 slp
->ns_nam
= slp
->ns_raw
= slp
->ns_rec
= slp
->ns_frag
= NULL
;
1287 for (nwp
= slp
->ns_tq
.lh_first
; nwp
; nwp
= nnwp
) {
1288 nnwp
= nwp
->nd_tq
.le_next
;
1289 LIST_REMOVE(nwp
, nd_tq
);
1290 nfsm_chain_cleanup(&nwp
->nd_nmreq
);
1292 mbuf_freem(nwp
->nd_mrep
);
1294 mbuf_freem(nwp
->nd_nam2
);
1295 if (IS_VALID_CRED(nwp
->nd_cr
))
1296 kauth_cred_unref(&nwp
->nd_cr
);
1297 FREE_ZONE(nwp
, sizeof(*nwp
), M_NFSRVDESC
);
1299 LIST_INIT(&slp
->ns_tq
);
1301 lck_rw_destroy(&slp
->ns_rwlock
, nfsrv_slp_rwlock_group
);
1302 lck_mtx_destroy(&slp
->ns_wgmutex
, nfsrv_slp_mutex_group
);
1303 FREE(slp
, M_NFSSVC
);
1307 * Derefence a server socket structure. If it has no more references and
1308 * is no longer valid, you can throw it away.
1311 nfsrv_slpderef(struct nfsrv_sock
*slp
)
1315 lck_mtx_lock(nfsd_mutex
);
1316 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1319 if (slp
->ns_sref
|| (slp
->ns_flag
& SLP_VALID
)) {
1320 if ((slp
->ns_flag
& SLP_QUEUED
) && !(slp
->ns_flag
& SLP_WORKTODO
)) {
1321 /* remove socket from queue since there's no work */
1322 if (slp
->ns_flag
& SLP_WAITQ
)
1323 TAILQ_REMOVE(&nfsrv_sockwait
, slp
, ns_svcq
);
1325 TAILQ_REMOVE(&nfsrv_sockwork
, slp
, ns_svcq
);
1326 slp
->ns_flag
&= ~SLP_QUEUED
;
1328 lck_rw_done(&slp
->ns_rwlock
);
1329 lck_mtx_unlock(nfsd_mutex
);
1333 /* This socket is no longer valid, so we'll get rid of it */
1335 if (slp
->ns_flag
& SLP_QUEUED
) {
1336 if (slp
->ns_flag
& SLP_WAITQ
)
1337 TAILQ_REMOVE(&nfsrv_sockwait
, slp
, ns_svcq
);
1339 TAILQ_REMOVE(&nfsrv_sockwork
, slp
, ns_svcq
);
1340 slp
->ns_flag
&= ~SLP_QUEUED
;
1344 * Queue the socket up for deletion
1345 * and start the timer to delete it
1346 * after it has been in limbo for
1350 slp
->ns_timestamp
= now
.tv_sec
;
1351 TAILQ_REMOVE(&nfsrv_socklist
, slp
, ns_chain
);
1352 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist
, slp
, ns_chain
);
1353 if (!nfsrv_deadsock_timer_on
) {
1354 nfsrv_deadsock_timer_on
= 1;
1355 nfs_interval_timer_start(nfsrv_deadsock_timer_call
,
1356 NFSRV_DEADSOCKDELAY
* 1000);
1359 lck_rw_done(&slp
->ns_rwlock
);
1360 /* now remove from the write gather socket list */
1361 if (slp
->ns_wgq
.tqe_next
!= SLPNOLIST
) {
1362 TAILQ_REMOVE(&nfsrv_sockwg
, slp
, ns_wgq
);
1363 slp
->ns_wgq
.tqe_next
= SLPNOLIST
;
1365 lck_mtx_unlock(nfsd_mutex
);
1369 * Check periodically for dead sockets pending delete.
1370 * If a socket has been dead for more than NFSRV_DEADSOCKDELAY
1371 * seconds then we assume it's safe to free.
1374 nfsrv_deadsock_timer(__unused
void *param0
, __unused
void *param1
)
1376 struct nfsrv_sock
*slp
;
1378 time_t time_to_wait
;
1381 lck_mtx_lock(nfsd_mutex
);
1383 while ((slp
= TAILQ_FIRST(&nfsrv_deadsocklist
))) {
1384 if ((slp
->ns_timestamp
+ NFSRV_DEADSOCKDELAY
) > now
.tv_sec
)
1386 TAILQ_REMOVE(&nfsrv_deadsocklist
, slp
, ns_chain
);
1389 if (TAILQ_EMPTY(&nfsrv_deadsocklist
)) {
1390 nfsrv_deadsock_timer_on
= 0;
1391 lck_mtx_unlock(nfsd_mutex
);
1394 time_to_wait
= (slp
->ns_timestamp
+ NFSRV_DEADSOCKDELAY
) - now
.tv_sec
;
1395 if (time_to_wait
< 1)
1398 lck_mtx_unlock(nfsd_mutex
);
1400 nfs_interval_timer_start(nfsrv_deadsock_timer_call
,
1401 time_to_wait
* 1000);
1405 * Clean up the data structures for the server.
1410 struct nfsrv_sock
*slp
, *nslp
;
1413 struct nfsrv_fmod
*fp
, *nfp
;
1418 for (slp
= TAILQ_FIRST(&nfsrv_socklist
); slp
!= 0; slp
= nslp
) {
1419 nslp
= TAILQ_NEXT(slp
, ns_chain
);
1420 if (slp
->ns_flag
& SLP_VALID
) {
1421 lck_rw_lock_exclusive(&slp
->ns_rwlock
);
1423 lck_rw_done(&slp
->ns_rwlock
);
1425 if (slp
->ns_flag
& SLP_QUEUED
) {
1426 if (slp
->ns_flag
& SLP_WAITQ
)
1427 TAILQ_REMOVE(&nfsrv_sockwait
, slp
, ns_svcq
);
1429 TAILQ_REMOVE(&nfsrv_sockwork
, slp
, ns_svcq
);
1430 slp
->ns_flag
&= ~SLP_QUEUED
;
1432 if (slp
->ns_wgq
.tqe_next
!= SLPNOLIST
) {
1433 TAILQ_REMOVE(&nfsrv_sockwg
, slp
, ns_wgq
);
1434 slp
->ns_wgq
.tqe_next
= SLPNOLIST
;
1436 /* queue the socket up for deletion */
1437 slp
->ns_timestamp
= now
.tv_sec
;
1438 TAILQ_REMOVE(&nfsrv_socklist
, slp
, ns_chain
);
1439 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist
, slp
, ns_chain
);
1440 if (!nfsrv_deadsock_timer_on
) {
1441 nfsrv_deadsock_timer_on
= 1;
1442 nfs_interval_timer_start(nfsrv_deadsock_timer_call
,
1443 NFSRV_DEADSOCKDELAY
* 1000);
1449 * Flush pending file write fsevents
1451 lck_mtx_lock(nfsrv_fmod_mutex
);
1452 for (i
= 0; i
< NFSRVFMODHASHSZ
; i
++) {
1453 for (fp
= LIST_FIRST(&nfsrv_fmod_hashtbl
[i
]); fp
; fp
= nfp
) {
1455 * Fire off the content modified fsevent for each
1456 * entry, remove it from the list, and free it.
1458 if (nfsrv_fsevents_enabled
)
1459 add_fsevent(FSE_CONTENT_MODIFIED
, &fp
->fm_context
,
1460 FSE_ARG_VNODE
, fp
->fm_vp
,
1462 vnode_put(fp
->fm_vp
);
1463 kauth_cred_unref(&fp
->fm_context
.vc_ucred
);
1464 nfp
= LIST_NEXT(fp
, fm_link
);
1465 LIST_REMOVE(fp
, fm_link
);
1469 nfsrv_fmod_pending
= 0;
1470 lck_mtx_unlock(nfsrv_fmod_mutex
);
1473 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1475 nfsrv_cleancache(); /* And clear out server cache */
1477 nfsrv_udpsock
= NULL
;
1480 #endif /* NFS_NOSERVER */