]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
6d2010ae | 2 | * Copyright (c) 2000-2010 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * Copyright (c) 1989, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | |
32 | * | |
33 | * This code is derived from software contributed to Berkeley by | |
34 | * Rick Macklem at The University of Guelph. | |
35 | * | |
36 | * Redistribution and use in source and binary forms, with or without | |
37 | * modification, are permitted provided that the following conditions | |
38 | * are met: | |
39 | * 1. Redistributions of source code must retain the above copyright | |
40 | * notice, this list of conditions and the following disclaimer. | |
41 | * 2. Redistributions in binary form must reproduce the above copyright | |
42 | * notice, this list of conditions and the following disclaimer in the | |
43 | * documentation and/or other materials provided with the distribution. | |
44 | * 3. All advertising materials mentioning features or use of this software | |
45 | * must display the following acknowledgement: | |
46 | * This product includes software developed by the University of | |
47 | * California, Berkeley and its contributors. | |
48 | * 4. Neither the name of the University nor the names of its contributors | |
49 | * may be used to endorse or promote products derived from this software | |
50 | * without specific prior written permission. | |
51 | * | |
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
62 | * SUCH DAMAGE. | |
63 | * | |
64 | * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 | |
65 | * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $ | |
66 | */ | |
2d21ac55 A |
67 | /* |
68 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
69 | * support for mandatory and extensible security protections. This notice | |
70 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
71 | * Version 2.0. | |
72 | */ | |
1c79356b A |
73 | |
74 | #include <sys/param.h> | |
75 | #include <sys/systm.h> | |
1c79356b | 76 | #include <sys/kernel.h> |
91447636 | 77 | #include <sys/file_internal.h> |
1c79356b A |
78 | #include <sys/filedesc.h> |
79 | #include <sys/stat.h> | |
91447636 A |
80 | #include <sys/vnode_internal.h> |
81 | #include <sys/mount_internal.h> | |
82 | #include <sys/proc_internal.h> /* for fdflags */ | |
83 | #include <sys/kauth.h> | |
1c79356b | 84 | #include <sys/sysctl.h> |
55e303ae | 85 | #include <sys/ubc.h> |
1c79356b A |
86 | #include <sys/uio.h> |
87 | #include <sys/malloc.h> | |
91447636 | 88 | #include <sys/kpi_mbuf.h> |
1c79356b A |
89 | #include <sys/socket.h> |
90 | #include <sys/socketvar.h> | |
91 | #include <sys/domain.h> | |
92 | #include <sys/protosw.h> | |
55e303ae A |
93 | #include <sys/fcntl.h> |
94 | #include <sys/lockf.h> | |
1c79356b A |
95 | #include <sys/syslog.h> |
96 | #include <sys/user.h> | |
91447636 A |
97 | #include <sys/sysproto.h> |
98 | #include <sys/kpi_socket.h> | |
2d21ac55 | 99 | #include <sys/fsevents.h> |
91447636 | 100 | #include <libkern/OSAtomic.h> |
2d21ac55 A |
101 | #include <kern/thread_call.h> |
102 | #include <kern/task.h> | |
1c79356b | 103 | |
b0d623f7 | 104 | #include <security/audit/audit.h> |
ccc36f2f | 105 | |
1c79356b A |
106 | #include <netinet/in.h> |
107 | #include <netinet/tcp.h> | |
1c79356b A |
108 | #include <nfs/xdr_subs.h> |
109 | #include <nfs/rpcv2.h> | |
110 | #include <nfs/nfsproto.h> | |
111 | #include <nfs/nfs.h> | |
112 | #include <nfs/nfsm_subs.h> | |
113 | #include <nfs/nfsrvcache.h> | |
2d21ac55 | 114 | #include <nfs/nfs_gss.h> |
1c79356b A |
115 | #include <nfs/nfsmount.h> |
116 | #include <nfs/nfsnode.h> | |
55e303ae | 117 | #include <nfs/nfs_lock.h> |
2d21ac55 A |
118 | #if CONFIG_MACF |
119 | #include <security/mac_framework.h> | |
1c79356b A |
120 | #endif |
121 | ||
2d21ac55 A |
122 | kern_return_t thread_terminate(thread_t); /* XXX */ |
123 | ||
124 | #if NFSSERVER | |
125 | ||
126 | extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, | |
127 | struct nfsrv_sock *slp, | |
128 | vfs_context_t ctx, | |
129 | mbuf_t *mrepp); | |
130 | extern int nfsrv_wg_delay; | |
131 | extern int nfsrv_wg_delay_v3; | |
132 | ||
133 | static int nfsrv_require_resv_port = 0; | |
134 | static int nfsrv_deadsock_timer_on = 0; | |
135 | ||
b0d623f7 A |
136 | int nfssvc_export(user_addr_t argp); |
137 | int nfssvc_nfsd(void); | |
138 | int nfssvc_addsock(socket_t, mbuf_t); | |
139 | void nfsrv_zapsock(struct nfsrv_sock *); | |
140 | void nfsrv_slpderef(struct nfsrv_sock *); | |
141 | void nfsrv_slpfree(struct nfsrv_sock *); | |
2d21ac55 A |
142 | |
143 | #endif /* NFSSERVER */ | |
144 | ||
145 | /* | |
146 | * sysctl stuff | |
147 | */ | |
148 | SYSCTL_DECL(_vfs_generic); | |
149 | SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge"); | |
150 | ||
151 | #if NFSCLIENT | |
152 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge"); | |
6d2010ae A |
153 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, ""); |
154 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, ""); | |
155 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, ""); | |
156 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, ""); | |
157 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, ""); | |
158 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, ""); | |
159 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, ""); | |
160 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, ""); | |
161 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, ""); | |
162 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, ""); | |
163 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, ""); | |
164 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, ""); | |
165 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, ""); | |
166 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, ""); | |
167 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, ""); | |
168 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); | |
2d21ac55 A |
169 | #endif /* NFSCLIENT */ |
170 | ||
171 | #if NFSSERVER | |
172 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge"); | |
6d2010ae A |
173 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, ""); |
174 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, ""); | |
175 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, ""); | |
176 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, ""); | |
177 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, ""); | |
178 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, ""); | |
179 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, ""); | |
180 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, ""); | |
181 | SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, ""); | |
b0d623f7 | 182 | #if CONFIG_FSE |
6d2010ae | 183 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, ""); |
b0d623f7 | 184 | #endif |
6d2010ae A |
185 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); |
186 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); | |
2d21ac55 A |
187 | #endif /* NFSSERVER */ |
188 | ||
189 | ||
190 | #if NFSCLIENT | |
191 | ||
192 | int | |
193 | nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) | |
194 | { | |
195 | struct lockd_ans la; | |
196 | int error; | |
197 | ||
6d2010ae A |
198 | switch (uap->flag) { |
199 | case NFSCLNT_LOCKDANS: | |
2d21ac55 | 200 | error = copyin(uap->argp, &la, sizeof(la)); |
6d2010ae A |
201 | if (!error) |
202 | error = nfslockdans(p, &la); | |
203 | break; | |
204 | case NFSCLNT_LOCKDNOTIFY: | |
205 | error = nfslockdnotify(p, uap->argp); | |
206 | break; | |
207 | default: | |
208 | error = EINVAL; | |
2d21ac55 | 209 | } |
6d2010ae | 210 | return (error); |
2d21ac55 A |
211 | } |
212 | ||
213 | /* | |
214 | * Asynchronous I/O threads for client NFS. | |
215 | * They do read-ahead and write-behind operations on the block I/O cache. | |
216 | * | |
217 | * The pool of up to nfsiod_thread_max threads is launched on demand and exit | |
218 | * when unused for a while. There are as many nfsiod structs as there are | |
219 | * nfsiod threads; however there's no strict tie between a thread and a struct. | |
220 | * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes | |
221 | * up, it removes the next struct nfsiod from the queue and services it. Then | |
222 | * it will put the struct at the head of free list and sleep on it. | |
223 | * Async requests will pull the next struct nfsiod from the head of the free list, | |
224 | * put it on the work queue, and wake whatever thread is waiting on that struct. | |
225 | */ | |
2d21ac55 A |
226 | |
227 | /* | |
228 | * nfsiod thread exit routine | |
229 | * | |
230 | * Must be called with nfsiod_mutex held so that the | |
231 | * decision to terminate is atomic with the termination. | |
232 | */ | |
b0d623f7 | 233 | void |
2d21ac55 A |
234 | nfsiod_terminate(struct nfsiod *niod) |
235 | { | |
236 | nfsiod_thread_count--; | |
237 | lck_mtx_unlock(nfsiod_mutex); | |
238 | if (niod) | |
239 | FREE(niod, M_TEMP); | |
240 | else | |
241 | printf("nfsiod: terminating without niod\n"); | |
242 | thread_terminate(current_thread()); | |
243 | /*NOTREACHED*/ | |
244 | } | |
245 | ||
246 | /* nfsiod thread startup routine */ | |
b0d623f7 | 247 | void |
2d21ac55 A |
248 | nfsiod_thread(void) |
249 | { | |
250 | struct nfsiod *niod; | |
251 | int error; | |
252 | ||
253 | MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK); | |
254 | if (!niod) { | |
255 | lck_mtx_lock(nfsiod_mutex); | |
256 | nfsiod_thread_count--; | |
b0d623f7 | 257 | wakeup(current_thread()); |
2d21ac55 A |
258 | lck_mtx_unlock(nfsiod_mutex); |
259 | thread_terminate(current_thread()); | |
260 | /*NOTREACHED*/ | |
261 | } | |
262 | bzero(niod, sizeof(*niod)); | |
263 | lck_mtx_lock(nfsiod_mutex); | |
264 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); | |
265 | wakeup(current_thread()); | |
266 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); | |
267 | /* shouldn't return... so we have an error */ | |
268 | /* remove an old nfsiod struct and terminate */ | |
269 | lck_mtx_lock(nfsiod_mutex); | |
270 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
271 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
272 | nfsiod_terminate(niod); | |
273 | /*NOTREACHED*/ | |
274 | } | |
275 | ||
276 | /* | |
277 | * Start up another nfsiod thread. | |
278 | * (unless we're already maxed out and there are nfsiods running) | |
279 | */ | |
280 | int | |
281 | nfsiod_start(void) | |
282 | { | |
b0d623f7 | 283 | thread_t thd = THREAD_NULL; |
2d21ac55 A |
284 | |
285 | lck_mtx_lock(nfsiod_mutex); | |
286 | if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { | |
287 | lck_mtx_unlock(nfsiod_mutex); | |
288 | return (EBUSY); | |
289 | } | |
290 | nfsiod_thread_count++; | |
b0d623f7 A |
291 | if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { |
292 | lck_mtx_unlock(nfsiod_mutex); | |
293 | return (EBUSY); | |
294 | } | |
2d21ac55 A |
295 | /* wait for the thread to complete startup */ |
296 | msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL); | |
b0d623f7 | 297 | thread_deallocate(thd); |
2d21ac55 A |
298 | return (0); |
299 | } | |
300 | ||
301 | /* | |
302 | * Continuation for Asynchronous I/O threads for NFS client. | |
303 | * | |
304 | * Grab an nfsiod struct to work on, do some work, then drop it | |
305 | */ | |
b0d623f7 | 306 | int |
2d21ac55 A |
307 | nfsiod_continue(int error) |
308 | { | |
309 | struct nfsiod *niod; | |
310 | struct nfsmount *nmp; | |
311 | struct nfsreq *req, *treq; | |
312 | struct nfs_reqqhead iodq; | |
313 | int morework; | |
314 | ||
315 | lck_mtx_lock(nfsiod_mutex); | |
316 | niod = TAILQ_FIRST(&nfsiodwork); | |
317 | if (!niod) { | |
318 | /* there's no work queued up */ | |
2d21ac55 A |
319 | /* remove an old nfsiod struct and terminate */ |
320 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
321 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
322 | nfsiod_terminate(niod); | |
323 | /*NOTREACHED*/ | |
324 | } | |
325 | TAILQ_REMOVE(&nfsiodwork, niod, niod_link); | |
326 | ||
327 | worktodo: | |
328 | while ((nmp = niod->niod_nmp)) { | |
329 | /* | |
330 | * Service this mount's async I/O queue. | |
331 | * | |
332 | * In order to ensure some level of fairness between mounts, | |
333 | * we grab all the work up front before processing it so any | |
334 | * new work that arrives will be serviced on a subsequent | |
335 | * iteration - and we have a chance to see if other work needs | |
336 | * to be done (e.g. the delayed write queue needs to be pushed | |
337 | * or other mounts are waiting for an nfsiod). | |
338 | */ | |
339 | /* grab the current contents of the queue */ | |
340 | TAILQ_INIT(&iodq); | |
341 | TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); | |
342 | lck_mtx_unlock(nfsiod_mutex); | |
343 | ||
344 | /* process the queue */ | |
345 | TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { | |
346 | TAILQ_REMOVE(&iodq, req, r_achain); | |
347 | req->r_achain.tqe_next = NFSREQNOLIST; | |
348 | req->r_callback.rcb_func(req); | |
349 | } | |
350 | ||
351 | /* now check if there's more/other work to be done */ | |
352 | lck_mtx_lock(nfsiod_mutex); | |
353 | morework = !TAILQ_EMPTY(&nmp->nm_iodq); | |
354 | if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { | |
355 | /* we're going to stop working on this mount */ | |
356 | if (morework) /* mount still needs more work so queue it up */ | |
357 | TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); | |
358 | nmp->nm_niod = NULL; | |
359 | niod->niod_nmp = NULL; | |
360 | } | |
361 | } | |
362 | ||
363 | /* loop if there's still a mount to work on */ | |
364 | if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { | |
365 | niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); | |
366 | TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); | |
367 | } | |
368 | if (niod->niod_nmp) | |
369 | goto worktodo; | |
370 | ||
371 | /* queue ourselves back up - if there aren't too many threads running */ | |
372 | if (nfsiod_thread_count <= NFSIOD_MAX) { | |
373 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); | |
374 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); | |
375 | /* shouldn't return... so we have an error */ | |
376 | /* remove an old nfsiod struct and terminate */ | |
377 | lck_mtx_lock(nfsiod_mutex); | |
378 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
379 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
380 | } | |
381 | nfsiod_terminate(niod); | |
382 | /*NOTREACHED*/ | |
383 | return (0); | |
384 | } | |
385 | ||
386 | #endif /* NFSCLIENT */ | |
387 | ||
388 | ||
389 | #if NFSSERVER | |
390 | ||
1c79356b A |
391 | /* |
392 | * NFS server system calls | |
393 | * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c | |
394 | */ | |
395 | ||
396 | /* | |
397 | * Get file handle system call | |
398 | */ | |
1c79356b | 399 | int |
91447636 | 400 | getfh(proc_t p, struct getfh_args *uap, __unused int *retval) |
1c79356b | 401 | { |
91447636 A |
402 | vnode_t vp; |
403 | struct nfs_filehandle nfh; | |
6d2010ae | 404 | int error, fhlen, fidlen; |
1c79356b | 405 | struct nameidata nd; |
91447636 | 406 | char path[MAXPATHLEN], *ptr; |
6d2010ae | 407 | size_t pathlen; |
91447636 A |
408 | struct nfs_exportfs *nxfs; |
409 | struct nfs_export *nx; | |
410 | ||
1c79356b A |
411 | /* |
412 | * Must be super user | |
413 | */ | |
91447636 A |
414 | error = proc_suser(p); |
415 | if (error) | |
416 | return (error); | |
417 | ||
6d2010ae A |
418 | error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); |
419 | if (!error) | |
420 | error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); | |
91447636 | 421 | if (error) |
1c79356b | 422 | return (error); |
6d2010ae A |
423 | /* limit fh size to length specified (or v3 size by default) */ |
424 | if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) | |
425 | fhlen = NFSV3_MAX_FH_SIZE; | |
426 | fidlen = fhlen - sizeof(struct nfs_exphandle); | |
91447636 | 427 | |
2d21ac55 A |
428 | if (!nfsrv_is_initialized()) |
429 | return (EINVAL); | |
430 | ||
6d2010ae | 431 | NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, |
2d21ac55 | 432 | UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); |
1c79356b A |
433 | error = namei(&nd); |
434 | if (error) | |
435 | return (error); | |
91447636 A |
436 | nameidone(&nd); |
437 | ||
1c79356b | 438 | vp = nd.ni_vp; |
91447636 A |
439 | |
440 | // find exportfs that matches f_mntonname | |
2d21ac55 | 441 | lck_rw_lock_shared(&nfsrv_export_rwlock); |
91447636 | 442 | ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; |
2d21ac55 A |
443 | LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { |
444 | if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) | |
91447636 A |
445 | break; |
446 | } | |
447 | if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { | |
448 | error = EINVAL; | |
449 | goto out; | |
450 | } | |
451 | // find export that best matches remainder of path | |
452 | ptr = path + strlen(nxfs->nxfs_path); | |
453 | while (*ptr && (*ptr == '/')) | |
454 | ptr++; | |
455 | LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { | |
456 | int len = strlen(nx->nx_path); | |
457 | if (len == 0) // we've hit the export entry for the root directory | |
458 | break; | |
459 | if (!strncmp(nx->nx_path, ptr, len)) | |
460 | break; | |
461 | } | |
462 | if (!nx) { | |
463 | error = EINVAL; | |
464 | goto out; | |
465 | } | |
466 | ||
467 | bzero(&nfh, sizeof(nfh)); | |
0c530ab8 A |
468 | nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); |
469 | nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); | |
470 | nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); | |
91447636 A |
471 | nfh.nfh_xh.nxh_flags = 0; |
472 | nfh.nfh_xh.nxh_reserved = 0; | |
6d2010ae | 473 | nfh.nfh_len = fidlen; |
2d21ac55 | 474 | error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); |
6d2010ae | 475 | if (nfh.nfh_len > (uint32_t)fidlen) |
91447636 A |
476 | error = EOVERFLOW; |
477 | nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; | |
478 | nfh.nfh_len += sizeof(nfh.nfh_xh); | |
2d21ac55 | 479 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; |
91447636 A |
480 | |
481 | out: | |
2d21ac55 | 482 | lck_rw_done(&nfsrv_export_rwlock); |
91447636 | 483 | vnode_put(vp); |
1c79356b A |
484 | if (error) |
485 | return (error); | |
6d2010ae | 486 | error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); |
1c79356b A |
487 | return (error); |
488 | } | |
489 | ||
91447636 A |
490 | extern struct fileops vnops; |
491 | ||
55e303ae A |
492 | /* |
493 | * syscall for the rpc.lockd to use to translate a NFS file handle into | |
494 | * an open descriptor. | |
495 | * | |
496 | * warning: do not remove the suser() call or this becomes one giant | |
497 | * security hole. | |
498 | */ | |
55e303ae | 499 | int |
91447636 A |
500 | fhopen( proc_t p, |
501 | struct fhopen_args *uap, | |
b0d623f7 | 502 | int32_t *retval) |
55e303ae | 503 | { |
91447636 A |
504 | vnode_t vp; |
505 | struct nfs_filehandle nfh; | |
506 | struct nfs_export *nx; | |
507 | struct nfs_export_options *nxo; | |
55e303ae | 508 | struct flock lf; |
91447636 A |
509 | struct fileproc *fp, *nfp; |
510 | int fmode, error, type; | |
55e303ae | 511 | int indx; |
2d21ac55 | 512 | vfs_context_t ctx = vfs_context_current(); |
91447636 A |
513 | kauth_action_t action; |
514 | ||
55e303ae A |
515 | /* |
516 | * Must be super user | |
517 | */ | |
2d21ac55 | 518 | error = suser(vfs_context_ucred(ctx), 0); |
0c530ab8 | 519 | if (error) { |
55e303ae | 520 | return (error); |
0c530ab8 | 521 | } |
55e303ae | 522 | |
2d21ac55 A |
523 | if (!nfsrv_is_initialized()) { |
524 | return (EINVAL); | |
525 | } | |
526 | ||
55e303ae A |
527 | fmode = FFLAGS(uap->flags); |
528 | /* why not allow a non-read/write open for our lockd? */ | |
2d21ac55 | 529 | if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) |
55e303ae | 530 | return (EINVAL); |
91447636 A |
531 | |
532 | error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); | |
2d21ac55 | 533 | if (error) |
91447636 A |
534 | return (error); |
535 | if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || | |
2d21ac55 | 536 | (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) |
91447636 A |
537 | return (EINVAL); |
538 | error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); | |
2d21ac55 | 539 | if (error) |
55e303ae | 540 | return (error); |
2d21ac55 | 541 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; |
91447636 | 542 | |
2d21ac55 | 543 | lck_rw_lock_shared(&nfsrv_export_rwlock); |
91447636 | 544 | /* now give me my vnode, it gets returned to me with a reference */ |
2d21ac55 A |
545 | error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo); |
546 | lck_rw_done(&nfsrv_export_rwlock); | |
0c530ab8 | 547 | if (error) { |
2d21ac55 A |
548 | if (error == NFSERR_TRYLATER) |
549 | error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER? | |
55e303ae | 550 | return (error); |
0c530ab8 | 551 | } |
91447636 | 552 | |
55e303ae | 553 | /* |
91447636 A |
554 | * From now on we have to make sure not |
555 | * to forget about the vnode. | |
556 | * Any error that causes an abort must vnode_put(vp). | |
557 | * Just set error = err and 'goto bad;'. | |
55e303ae A |
558 | */ |
559 | ||
560 | /* | |
561 | * from vn_open | |
562 | */ | |
91447636 | 563 | if (vnode_vtype(vp) == VSOCK) { |
55e303ae A |
564 | error = EOPNOTSUPP; |
565 | goto bad; | |
566 | } | |
567 | ||
91447636 A |
568 | /* disallow write operations on directories */ |
569 | if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { | |
570 | error = EISDIR; | |
55e303ae A |
571 | goto bad; |
572 | } | |
573 | ||
91447636 A |
574 | /* compute action to be authorized */ |
575 | action = 0; | |
576 | if (fmode & FREAD) | |
577 | action |= KAUTH_VNODE_READ_DATA; | |
578 | if (fmode & (FWRITE | O_TRUNC)) | |
579 | action |= KAUTH_VNODE_WRITE_DATA; | |
2d21ac55 | 580 | if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) |
91447636 | 581 | goto bad; |
55e303ae | 582 | |
2d21ac55 | 583 | if ((error = VNOP_OPEN(vp, fmode, ctx))) |
91447636 | 584 | goto bad; |
6d2010ae | 585 | if ((error = vnode_ref_ext(vp, fmode, 0))) |
55e303ae A |
586 | goto bad; |
587 | ||
55e303ae A |
588 | /* |
589 | * end of vn_open code | |
590 | */ | |
591 | ||
91447636 | 592 | // starting here... error paths should call vn_close/vnode_put |
2d21ac55 A |
593 | if ((error = falloc(p, &nfp, &indx, ctx)) != 0) { |
594 | vn_close(vp, fmode & FMASK, ctx); | |
55e303ae A |
595 | goto bad; |
596 | } | |
597 | fp = nfp; | |
598 | ||
91447636 A |
599 | fp->f_fglob->fg_flag = fmode & FMASK; |
600 | fp->f_fglob->fg_type = DTYPE_VNODE; | |
601 | fp->f_fglob->fg_ops = &vnops; | |
602 | fp->f_fglob->fg_data = (caddr_t)vp; | |
603 | ||
604 | // XXX do we really need to support this with fhopen()? | |
55e303ae A |
605 | if (fmode & (O_EXLOCK | O_SHLOCK)) { |
606 | lf.l_whence = SEEK_SET; | |
607 | lf.l_start = 0; | |
608 | lf.l_len = 0; | |
609 | if (fmode & O_EXLOCK) | |
610 | lf.l_type = F_WRLCK; | |
611 | else | |
612 | lf.l_type = F_RDLCK; | |
613 | type = F_FLOCK; | |
614 | if ((fmode & FNONBLOCK) == 0) | |
615 | type |= F_WAIT; | |
2d21ac55 A |
616 | if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) { |
617 | struct vfs_context context = *vfs_context_current(); | |
618 | /* Modify local copy (to not damage thread copy) */ | |
619 | context.vc_ucred = fp->f_fglob->fg_cred; | |
620 | ||
621 | vn_close(vp, fp->f_fglob->fg_flag, &context); | |
91447636 | 622 | fp_free(p, indx, fp); |
55e303ae A |
623 | return (error); |
624 | } | |
91447636 | 625 | fp->f_fglob->fg_flag |= FHASLOCK; |
55e303ae A |
626 | } |
627 | ||
91447636 A |
628 | vnode_put(vp); |
629 | ||
630 | proc_fdlock(p); | |
6601e61a | 631 | procfdtbl_releasefd(p, indx, NULL); |
91447636 A |
632 | fp_drop(p, indx, fp, 1); |
633 | proc_fdunlock(p); | |
634 | ||
55e303ae A |
635 | *retval = indx; |
636 | return (0); | |
637 | ||
638 | bad: | |
91447636 | 639 | vnode_put(vp); |
55e303ae A |
640 | return (error); |
641 | } | |
642 | ||
1c79356b | 643 | /* |
2d21ac55 | 644 | * NFS server pseudo system call |
1c79356b | 645 | */ |
1c79356b | 646 | int |
91447636 | 647 | nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) |
1c79356b | 648 | { |
91447636 A |
649 | mbuf_t nam; |
650 | struct user_nfsd_args user_nfsdarg; | |
91447636 | 651 | socket_t so; |
1c79356b A |
652 | int error; |
653 | ||
ccc36f2f A |
654 | AUDIT_ARG(cmd, uap->flag); |
655 | ||
1c79356b | 656 | /* |
b0d623f7 | 657 | * Must be super user for most operations (export ops checked later). |
1c79356b | 658 | */ |
b0d623f7 | 659 | if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) |
1c79356b | 660 | return (error); |
2d21ac55 A |
661 | #if CONFIG_MACF |
662 | error = mac_system_check_nfsd(kauth_cred_get()); | |
663 | if (error) | |
664 | return (error); | |
665 | #endif | |
91447636 | 666 | |
2d21ac55 A |
667 | /* make sure NFS server data structures have been initialized */ |
668 | nfsrv_init(); | |
1c79356b | 669 | |
2d21ac55 | 670 | if (uap->flag & NFSSVC_ADDSOCK) { |
91447636 A |
671 | if (IS_64BIT_PROCESS(p)) { |
672 | error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); | |
673 | } else { | |
674 | struct nfsd_args tmp_args; | |
675 | error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); | |
676 | if (error == 0) { | |
677 | user_nfsdarg.sock = tmp_args.sock; | |
678 | user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); | |
679 | user_nfsdarg.namelen = tmp_args.namelen; | |
680 | } | |
681 | } | |
1c79356b A |
682 | if (error) |
683 | return (error); | |
91447636 A |
684 | /* get the socket */ |
685 | error = file_socket(user_nfsdarg.sock, &so); | |
1c79356b A |
686 | if (error) |
687 | return (error); | |
91447636 A |
688 | /* Get the client address for connected sockets. */ |
689 | if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { | |
690 | nam = NULL; | |
691 | } else { | |
692 | error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); | |
693 | if (error) { | |
694 | /* drop the iocount file_socket() grabbed on the file descriptor */ | |
695 | file_drop(user_nfsdarg.sock); | |
1c79356b | 696 | return (error); |
91447636 | 697 | } |
1c79356b | 698 | } |
91447636 A |
699 | /* |
700 | * nfssvc_addsock() will grab a retain count on the socket | |
701 | * to keep the socket from being closed when nfsd closes its | |
702 | * file descriptor for it. | |
703 | */ | |
2d21ac55 | 704 | error = nfssvc_addsock(so, nam); |
91447636 A |
705 | /* drop the iocount file_socket() grabbed on the file descriptor */ |
706 | file_drop(user_nfsdarg.sock); | |
707 | } else if (uap->flag & NFSSVC_NFSD) { | |
2d21ac55 | 708 | error = nfssvc_nfsd(); |
91447636 | 709 | } else if (uap->flag & NFSSVC_EXPORT) { |
2d21ac55 | 710 | error = nfssvc_export(uap->argp); |
91447636 A |
711 | } else { |
712 | error = EINVAL; | |
1c79356b | 713 | } |
1c79356b A |
714 | if (error == EINTR || error == ERESTART) |
715 | error = 0; | |
716 | return (error); | |
717 | } | |
718 | ||
1c79356b A |
719 | /* |
720 | * Adds a socket to the list for servicing by nfsds. | |
721 | */ | |
b0d623f7 | 722 | int |
2d21ac55 | 723 | nfssvc_addsock(socket_t so, mbuf_t mynam) |
1c79356b | 724 | { |
2d21ac55 A |
725 | struct nfsrv_sock *slp; |
726 | int error = 0, sodomain, sotype, soprotocol, on = 1; | |
91447636 A |
727 | struct timeval timeo; |
728 | ||
729 | /* make sure mbuf constants are set up */ | |
2d21ac55 | 730 | if (!nfs_mbuf_mhlen) |
91447636 A |
731 | nfs_mbuf_init(); |
732 | ||
733 | sock_gettype(so, &sodomain, &sotype, &soprotocol); | |
734 | ||
6d2010ae A |
735 | /* There should be only one UDP socket for each of IPv4 and IPv6 */ |
736 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { | |
737 | mbuf_freem(mynam); | |
738 | return (EEXIST); | |
739 | } | |
740 | if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { | |
91447636 | 741 | mbuf_freem(mynam); |
2d21ac55 | 742 | return (EEXIST); |
1c79356b A |
743 | } |
744 | ||
2d21ac55 A |
745 | /* Set protocol options and reserve some space (for UDP). */ |
746 | if (sotype == SOCK_STREAM) | |
91447636 | 747 | sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); |
2d21ac55 | 748 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) |
91447636 | 749 | sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); |
2d21ac55 A |
750 | if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ |
751 | int reserve = NFS_UDPSOCKBUF; | |
752 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); | |
753 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); | |
754 | if (error) { | |
755 | log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error); | |
756 | error = 0; | |
757 | } | |
1c79356b | 758 | } |
91447636 A |
759 | sock_nointerrupt(so, 0); |
760 | ||
2d21ac55 A |
761 | /* |
762 | * Set socket send/receive timeouts. | |
763 | * Receive timeout shouldn't matter, but setting the send timeout | |
764 | * will make sure that an unresponsive client can't hang the server. | |
765 | */ | |
91447636 | 766 | timeo.tv_usec = 0; |
2d21ac55 A |
767 | timeo.tv_sec = 1; |
768 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); | |
769 | timeo.tv_sec = 30; | |
770 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); | |
771 | if (error) { | |
772 | log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error); | |
773 | error = 0; | |
774 | } | |
91447636 | 775 | |
2d21ac55 A |
776 | MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK); |
777 | if (!slp) { | |
778 | mbuf_freem(mynam); | |
779 | return (ENOMEM); | |
780 | } | |
781 | bzero((caddr_t)slp, sizeof (struct nfsrv_sock)); | |
782 | lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL); | |
783 | lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL); | |
784 | ||
785 | lck_mtx_lock(nfsd_mutex); | |
786 | ||
787 | if (soprotocol == IPPROTO_UDP) { | |
6d2010ae A |
788 | if (sodomain == AF_INET) { |
789 | /* There should be only one UDP/IPv4 socket */ | |
790 | if (nfsrv_udpsock) { | |
791 | lck_mtx_unlock(nfsd_mutex); | |
792 | nfsrv_slpfree(slp); | |
793 | mbuf_freem(mynam); | |
794 | return (EEXIST); | |
795 | } | |
796 | nfsrv_udpsock = slp; | |
797 | } | |
798 | if (sodomain == AF_INET6) { | |
799 | /* There should be only one UDP/IPv6 socket */ | |
800 | if (nfsrv_udp6sock) { | |
801 | lck_mtx_unlock(nfsd_mutex); | |
802 | nfsrv_slpfree(slp); | |
803 | mbuf_freem(mynam); | |
804 | return (EEXIST); | |
805 | } | |
806 | nfsrv_udp6sock = slp; | |
91447636 | 807 | } |
1c79356b | 808 | } |
91447636 | 809 | |
2d21ac55 A |
810 | /* add the socket to the list */ |
811 | TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); | |
812 | ||
91447636 | 813 | sock_retain(so); /* grab a retain count on the socket */ |
1c79356b | 814 | slp->ns_so = so; |
91447636 | 815 | slp->ns_sotype = sotype; |
1c79356b | 816 | slp->ns_nam = mynam; |
91447636 | 817 | |
2d21ac55 | 818 | /* set up the socket upcall */ |
6d2010ae | 819 | sock_setupcall(so, nfsrv_rcv, slp); |
4a3eedf9 A |
820 | /* just playin' it safe */ |
821 | sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); | |
91447636 | 822 | |
2d21ac55 A |
823 | /* mark that the socket is not in the nfsrv_sockwg list */ |
824 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
825 | ||
91447636 A |
826 | slp->ns_flag = SLP_VALID | SLP_NEEDQ; |
827 | ||
1c79356b | 828 | nfsrv_wakenfsd(slp); |
91447636 A |
829 | lck_mtx_unlock(nfsd_mutex); |
830 | ||
1c79356b A |
831 | return (0); |
832 | } | |
833 | ||
834 | /* | |
2d21ac55 A |
835 | * nfssvc_nfsd() |
836 | * | |
837 | * nfsd theory of operation: | |
838 | * | |
839 | * The first nfsd thread stays in user mode accepting new TCP connections | |
840 | * which are then added via the "addsock" call. The rest of the nfsd threads | |
841 | * simply call into the kernel and remain there in a loop handling NFS | |
842 | * requests until killed by a signal. | |
843 | * | |
844 | * There's a list of nfsd threads (nfsd_head). | |
845 | * There's an nfsd queue that contains only those nfsds that are | |
846 | * waiting for work to do (nfsd_queue). | |
847 | * | |
848 | * There's a list of all NFS sockets (nfsrv_socklist) and two queues for | |
849 | * managing the work on the sockets: | |
850 | * nfsrv_sockwait - sockets w/new data waiting to be worked on | |
851 | * nfsrv_sockwork - sockets being worked on which may have more work to do | |
852 | * nfsrv_sockwg -- sockets which have pending write gather data | |
853 | * When a socket receives data, if it is not currently queued, it | |
854 | * will be placed at the end of the "wait" queue. | |
855 | * Whenever a socket needs servicing we make sure it is queued and | |
856 | * wake up a waiting nfsd (if there is one). | |
857 | * | |
858 | * nfsds will service at most 8 requests from the same socket before | |
859 | * defecting to work on another socket. | |
860 | * nfsds will defect immediately if there are any sockets in the "wait" queue | |
861 | * nfsds looking for a socket to work on check the "wait" queue first and | |
862 | * then check the "work" queue. | |
863 | * When an nfsd starts working on a socket, it removes it from the head of | |
864 | * the queue it's currently on and moves it to the end of the "work" queue. | |
865 | * When nfsds are checking the queues for work, any sockets found not to | |
866 | * have any work are simply dropped from the queue. | |
867 | * | |
1c79356b | 868 | */ |
b0d623f7 | 869 | int |
2d21ac55 | 870 | nfssvc_nfsd(void) |
1c79356b | 871 | { |
2d21ac55 A |
872 | mbuf_t m, mrep; |
873 | struct nfsrv_sock *slp; | |
874 | struct nfsd *nfsd; | |
1c79356b | 875 | struct nfsrv_descript *nd = NULL; |
91447636 | 876 | int error = 0, cacherep, writes_todo; |
2d21ac55 | 877 | int siz, procrastinate, opcnt = 0; |
1c79356b | 878 | u_quad_t cur_usec; |
55e303ae | 879 | struct timeval now; |
2d21ac55 | 880 | struct vfs_context context; |
1c79356b A |
881 | |
882 | #ifndef nolint | |
883 | cacherep = RC_DOIT; | |
884 | writes_todo = 0; | |
885 | #endif | |
91447636 | 886 | |
2d21ac55 A |
887 | MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); |
888 | if (!nfsd) | |
889 | return (ENOMEM); | |
890 | bzero(nfsd, sizeof(struct nfsd)); | |
891 | lck_mtx_lock(nfsd_mutex); | |
892 | if (nfsd_thread_count++ == 0) | |
893 | nfsrv_initcache(); /* Init the server request cache */ | |
894 | TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); | |
895 | lck_mtx_unlock(nfsd_mutex); | |
896 | ||
897 | context.vc_thread = current_thread(); | |
91447636 | 898 | |
1c79356b A |
899 | /* |
900 | * Loop getting rpc requests until SIGKILL. | |
901 | */ | |
902 | for (;;) { | |
2d21ac55 A |
903 | if (nfsd_thread_max <= 0) { |
904 | /* NFS server shutting down, get out ASAP */ | |
905 | error = EINTR; | |
906 | slp = nfsd->nfsd_slp; | |
907 | } else if (nfsd->nfsd_flag & NFSD_REQINPROG) { | |
908 | /* already have some work to do */ | |
909 | error = 0; | |
910 | slp = nfsd->nfsd_slp; | |
911 | } else { | |
912 | /* need to find work to do */ | |
913 | error = 0; | |
91447636 | 914 | lck_mtx_lock(nfsd_mutex); |
2d21ac55 A |
915 | while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) { |
916 | if (nfsd_thread_count > nfsd_thread_max) { | |
917 | /* | |
918 | * If we have no socket and there are more | |
919 | * nfsd threads than configured, let's exit. | |
920 | */ | |
921 | error = 0; | |
922 | goto done; | |
923 | } | |
1c79356b | 924 | nfsd->nfsd_flag |= NFSD_WAITING; |
2d21ac55 A |
925 | TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); |
926 | error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL); | |
91447636 | 927 | if (error) { |
2d21ac55 A |
928 | if (nfsd->nfsd_flag & NFSD_WAITING) { |
929 | TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); | |
930 | nfsd->nfsd_flag &= ~NFSD_WAITING; | |
931 | } | |
1c79356b | 932 | goto done; |
91447636 | 933 | } |
1c79356b | 934 | } |
2d21ac55 A |
935 | slp = nfsd->nfsd_slp; |
936 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) { | |
937 | /* look for a socket to work on in the wait queue */ | |
938 | while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) { | |
939 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
940 | /* remove from the head of the queue */ | |
941 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
942 | slp->ns_flag &= ~SLP_WAITQ; | |
943 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) | |
944 | break; | |
945 | /* nothing to do, so skip this socket */ | |
946 | lck_rw_done(&slp->ns_rwlock); | |
1c79356b | 947 | } |
2d21ac55 A |
948 | } |
949 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) { | |
950 | /* look for a socket to work on in the work queue */ | |
951 | while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) { | |
952 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
953 | /* remove from the head of the queue */ | |
954 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
955 | slp->ns_flag &= ~SLP_WORKQ; | |
956 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) | |
957 | break; | |
958 | /* nothing to do, so skip this socket */ | |
959 | lck_rw_done(&slp->ns_rwlock); | |
960 | } | |
961 | } | |
962 | if (!nfsd->nfsd_slp && slp) { | |
963 | /* we found a socket to work on, grab a reference */ | |
964 | slp->ns_sref++; | |
965 | nfsd->nfsd_slp = slp; | |
966 | opcnt = 0; | |
967 | /* and put it at the back of the work queue */ | |
968 | TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq); | |
969 | slp->ns_flag |= SLP_WORKQ; | |
970 | lck_rw_done(&slp->ns_rwlock); | |
1c79356b | 971 | } |
91447636 | 972 | lck_mtx_unlock(nfsd_mutex); |
2d21ac55 | 973 | if (!slp) |
1c79356b | 974 | continue; |
91447636 | 975 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1c79356b | 976 | if (slp->ns_flag & SLP_VALID) { |
743b1565 | 977 | if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { |
91447636 A |
978 | slp->ns_flag &= ~SLP_NEEDQ; |
979 | nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); | |
980 | } | |
743b1565 A |
981 | if (slp->ns_flag & SLP_DISCONN) |
982 | nfsrv_zapsock(slp); | |
1c79356b | 983 | error = nfsrv_dorec(slp, nfsd, &nd); |
2d21ac55 A |
984 | if (error == EINVAL) { // RPCSEC_GSS drop |
985 | if (slp->ns_sotype == SOCK_STREAM) | |
986 | nfsrv_zapsock(slp); // drop connection | |
987 | } | |
988 | writes_todo = 0; | |
989 | if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) { | |
990 | microuptime(&now); | |
991 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + | |
992 | (u_quad_t)now.tv_usec; | |
993 | if (slp->ns_wgtime <= cur_usec) { | |
994 | error = 0; | |
995 | cacherep = RC_DOIT; | |
996 | writes_todo = 1; | |
997 | } | |
998 | slp->ns_flag &= ~SLP_DOWRITES; | |
999 | } | |
1c79356b A |
1000 | nfsd->nfsd_flag |= NFSD_REQINPROG; |
1001 | } | |
91447636 | 1002 | lck_rw_done(&slp->ns_rwlock); |
1c79356b | 1003 | } |
2d21ac55 | 1004 | if (error || (slp && !(slp->ns_flag & SLP_VALID))) { |
1c79356b | 1005 | if (nd) { |
2d21ac55 | 1006 | nfsm_chain_cleanup(&nd->nd_nmreq); |
55e303ae | 1007 | if (nd->nd_nam2) |
91447636 | 1008 | mbuf_freem(nd->nd_nam2); |
0c530ab8 A |
1009 | if (IS_VALID_CRED(nd->nd_cr)) |
1010 | kauth_cred_unref(&nd->nd_cr); | |
6d2010ae A |
1011 | if (nd->nd_gss_context) |
1012 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
2d21ac55 | 1013 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1c79356b A |
1014 | nd = NULL; |
1015 | } | |
91447636 | 1016 | nfsd->nfsd_slp = NULL; |
1c79356b | 1017 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; |
2d21ac55 A |
1018 | if (slp) |
1019 | nfsrv_slpderef(slp); | |
1020 | if (nfsd_thread_max <= 0) | |
1021 | break; | |
1c79356b A |
1022 | continue; |
1023 | } | |
1c79356b | 1024 | if (nd) { |
55e303ae | 1025 | microuptime(&nd->nd_starttime); |
1c79356b A |
1026 | if (nd->nd_nam2) |
1027 | nd->nd_nam = nd->nd_nam2; | |
1028 | else | |
1029 | nd->nd_nam = slp->ns_nam; | |
1030 | ||
2d21ac55 A |
1031 | cacherep = nfsrv_getcache(nd, slp, &mrep); |
1032 | ||
1033 | if (nfsrv_require_resv_port) { | |
1034 | /* Check if source port is a reserved port */ | |
6d2010ae A |
1035 | in_port_t port = 0; |
1036 | struct sockaddr *saddr = mbuf_data(nd->nd_nam); | |
1037 | ||
1038 | if (saddr->sa_family == AF_INET) | |
1039 | port = ntohs(((struct sockaddr_in*)saddr)->sin_port); | |
1040 | else if (saddr->sa_family == AF_INET6) | |
1041 | port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); | |
1042 | if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { | |
1c79356b A |
1043 | nd->nd_procnum = NFSPROC_NOOP; |
1044 | nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); | |
1045 | cacherep = RC_DOIT; | |
1c79356b A |
1046 | } |
1047 | } | |
1048 | ||
1049 | } | |
1050 | ||
1051 | /* | |
2d21ac55 | 1052 | * Loop to get all the write RPC replies that have been |
1c79356b A |
1053 | * gathered together. |
1054 | */ | |
1055 | do { | |
1056 | switch (cacherep) { | |
1057 | case RC_DOIT: | |
2d21ac55 A |
1058 | if (nd && (nd->nd_vers == NFS_VER3)) |
1059 | procrastinate = nfsrv_wg_delay_v3; | |
1c79356b | 1060 | else |
2d21ac55 A |
1061 | procrastinate = nfsrv_wg_delay; |
1062 | lck_rw_lock_shared(&nfsrv_export_rwlock); | |
1063 | context.vc_ucred = NULL; | |
91447636 | 1064 | if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) |
2d21ac55 A |
1065 | error = nfsrv_writegather(&nd, slp, &context, &mrep); |
1066 | else | |
1067 | error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep); | |
1068 | lck_rw_done(&nfsrv_export_rwlock); | |
1069 | if (mrep == NULL) { | |
1070 | /* | |
1071 | * If this is a stream socket and we are not going | |
1072 | * to send a reply we better close the connection | |
1073 | * so the client doesn't hang. | |
1074 | */ | |
1075 | if (error && slp->ns_sotype == SOCK_STREAM) { | |
1076 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1077 | nfsrv_zapsock(slp); | |
1078 | lck_rw_done(&slp->ns_rwlock); | |
1079 | printf("NFS server: NULL reply from proc = %d error = %d\n", | |
1080 | nd->nd_procnum, error); | |
1081 | } | |
1c79356b | 1082 | break; |
2d21ac55 A |
1083 | |
1084 | } | |
1c79356b | 1085 | if (error) { |
b0d623f7 | 1086 | OSAddAtomic(1, &nfsstats.srv_errs); |
2d21ac55 | 1087 | nfsrv_updatecache(nd, FALSE, mrep); |
55e303ae | 1088 | if (nd->nd_nam2) { |
91447636 | 1089 | mbuf_freem(nd->nd_nam2); |
55e303ae A |
1090 | nd->nd_nam2 = NULL; |
1091 | } | |
1c79356b A |
1092 | break; |
1093 | } | |
b0d623f7 | 1094 | OSAddAtomic(1, &nfsstats.srvrpccnt[nd->nd_procnum]); |
2d21ac55 A |
1095 | nfsrv_updatecache(nd, TRUE, mrep); |
1096 | /* FALLTHRU */ | |
1097 | ||
1c79356b | 1098 | case RC_REPLY: |
2d21ac55 A |
1099 | if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS |
1100 | /* | |
1101 | * Need to checksum or encrypt the reply | |
1102 | */ | |
1103 | error = nfs_gss_svc_protect_reply(nd, mrep); | |
1104 | if (error) { | |
1105 | mbuf_freem(mrep); | |
1106 | break; | |
1107 | } | |
1108 | } | |
1109 | ||
1110 | /* | |
1111 | * Get the total size of the reply | |
1112 | */ | |
1113 | m = mrep; | |
1c79356b A |
1114 | siz = 0; |
1115 | while (m) { | |
91447636 A |
1116 | siz += mbuf_len(m); |
1117 | m = mbuf_next(m); | |
1c79356b A |
1118 | } |
1119 | if (siz <= 0 || siz > NFS_MAXPACKET) { | |
1120 | printf("mbuf siz=%d\n",siz); | |
1121 | panic("Bad nfs svc reply"); | |
1122 | } | |
2d21ac55 | 1123 | m = mrep; |
91447636 A |
1124 | mbuf_pkthdr_setlen(m, siz); |
1125 | error = mbuf_pkthdr_setrcvif(m, NULL); | |
1126 | if (error) | |
1127 | panic("nfsd setrcvif failed: %d", error); | |
1c79356b A |
1128 | /* |
1129 | * For stream protocols, prepend a Sun RPC | |
1130 | * Record Mark. | |
1131 | */ | |
91447636 A |
1132 | if (slp->ns_sotype == SOCK_STREAM) { |
1133 | error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); | |
1134 | if (!error) | |
b0d623f7 | 1135 | *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); |
1c79356b | 1136 | } |
91447636 A |
1137 | if (!error) { |
1138 | if (slp->ns_flag & SLP_VALID) { | |
2d21ac55 | 1139 | error = nfsrv_send(slp, nd->nd_nam2, m); |
91447636 A |
1140 | } else { |
1141 | error = EPIPE; | |
1142 | mbuf_freem(m); | |
1143 | } | |
1144 | } else { | |
1145 | mbuf_freem(m); | |
1c79356b | 1146 | } |
2d21ac55 | 1147 | mrep = NULL; |
55e303ae | 1148 | if (nd->nd_nam2) { |
91447636 | 1149 | mbuf_freem(nd->nd_nam2); |
55e303ae A |
1150 | nd->nd_nam2 = NULL; |
1151 | } | |
91447636 A |
1152 | if (error == EPIPE) { |
1153 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1c79356b | 1154 | nfsrv_zapsock(slp); |
91447636 A |
1155 | lck_rw_done(&slp->ns_rwlock); |
1156 | } | |
1c79356b | 1157 | if (error == EINTR || error == ERESTART) { |
2d21ac55 | 1158 | nfsm_chain_cleanup(&nd->nd_nmreq); |
0c530ab8 A |
1159 | if (IS_VALID_CRED(nd->nd_cr)) |
1160 | kauth_cred_unref(&nd->nd_cr); | |
6d2010ae A |
1161 | if (nd->nd_gss_context) |
1162 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
2d21ac55 | 1163 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1c79356b | 1164 | nfsrv_slpderef(slp); |
2d21ac55 | 1165 | lck_mtx_lock(nfsd_mutex); |
1c79356b A |
1166 | goto done; |
1167 | } | |
1168 | break; | |
1169 | case RC_DROPIT: | |
91447636 | 1170 | mbuf_freem(nd->nd_nam2); |
2d21ac55 | 1171 | nd->nd_nam2 = NULL; |
1c79356b A |
1172 | break; |
1173 | }; | |
2d21ac55 | 1174 | opcnt++; |
1c79356b | 1175 | if (nd) { |
2d21ac55 | 1176 | nfsm_chain_cleanup(&nd->nd_nmreq); |
55e303ae | 1177 | if (nd->nd_nam2) |
91447636 | 1178 | mbuf_freem(nd->nd_nam2); |
0c530ab8 A |
1179 | if (IS_VALID_CRED(nd->nd_cr)) |
1180 | kauth_cred_unref(&nd->nd_cr); | |
6d2010ae A |
1181 | if (nd->nd_gss_context) |
1182 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
2d21ac55 | 1183 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1c79356b A |
1184 | nd = NULL; |
1185 | } | |
1186 | ||
1187 | /* | |
1188 | * Check to see if there are outstanding writes that | |
1189 | * need to be serviced. | |
1190 | */ | |
2d21ac55 A |
1191 | writes_todo = 0; |
1192 | if (slp->ns_wgtime) { | |
1193 | microuptime(&now); | |
1194 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + | |
1195 | (u_quad_t)now.tv_usec; | |
1196 | if (slp->ns_wgtime <= cur_usec) { | |
1197 | cacherep = RC_DOIT; | |
1198 | writes_todo = 1; | |
1199 | } | |
91447636 | 1200 | } |
1c79356b | 1201 | } while (writes_todo); |
2d21ac55 A |
1202 | |
1203 | nd = NULL; | |
1204 | if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) { | |
1205 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1206 | error = nfsrv_dorec(slp, nfsd, &nd); | |
1207 | if (error == EINVAL) { // RPCSEC_GSS drop | |
1208 | if (slp->ns_sotype == SOCK_STREAM) | |
1209 | nfsrv_zapsock(slp); // drop connection | |
1210 | } | |
91447636 | 1211 | lck_rw_done(&slp->ns_rwlock); |
2d21ac55 A |
1212 | } |
1213 | if (!nd) { | |
1214 | /* drop our reference on the socket */ | |
1c79356b A |
1215 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; |
1216 | nfsd->nfsd_slp = NULL; | |
1217 | nfsrv_slpderef(slp); | |
91447636 | 1218 | } |
1c79356b | 1219 | } |
91447636 | 1220 | lck_mtx_lock(nfsd_mutex); |
2d21ac55 | 1221 | done: |
1c79356b | 1222 | TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); |
91447636 | 1223 | FREE(nfsd, M_NFSD); |
2d21ac55 A |
1224 | if (--nfsd_thread_count == 0) |
1225 | nfsrv_cleanup(); | |
91447636 | 1226 | lck_mtx_unlock(nfsd_mutex); |
1c79356b A |
1227 | return (error); |
1228 | } | |
91447636 | 1229 | |
b0d623f7 | 1230 | int |
2d21ac55 | 1231 | nfssvc_export(user_addr_t argp) |
91447636 A |
1232 | { |
1233 | int error = 0, is_64bit; | |
1234 | struct user_nfs_export_args unxa; | |
2d21ac55 | 1235 | vfs_context_t ctx = vfs_context_current(); |
91447636 | 1236 | |
2d21ac55 | 1237 | is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx)); |
91447636 A |
1238 | |
1239 | /* copy in pointers to path and export args */ | |
1240 | if (is_64bit) { | |
1241 | error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); | |
1242 | } else { | |
1243 | struct nfs_export_args tnxa; | |
1244 | error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); | |
1245 | if (error == 0) { | |
1246 | /* munge into LP64 version of nfs_export_args structure */ | |
1247 | unxa.nxa_fsid = tnxa.nxa_fsid; | |
1248 | unxa.nxa_expid = tnxa.nxa_expid; | |
1249 | unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); | |
1250 | unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); | |
1251 | unxa.nxa_flags = tnxa.nxa_flags; | |
1252 | unxa.nxa_netcount = tnxa.nxa_netcount; | |
1253 | unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); | |
1254 | } | |
1255 | } | |
1256 | if (error) | |
1257 | return (error); | |
1258 | ||
2d21ac55 | 1259 | error = nfsrv_export(&unxa, ctx); |
91447636 A |
1260 | |
1261 | return (error); | |
1262 | } | |
1263 | ||
1c79356b | 1264 | /* |
2d21ac55 | 1265 | * Shut down a socket associated with an nfsrv_sock structure. |
1c79356b A |
1266 | * Should be called with the send lock set, if required. |
1267 | * The trick here is to increment the sref at the start, so that the nfsds | |
1268 | * will stop using it and clear ns_flag at the end so that it will not be | |
1269 | * reassigned during cleanup. | |
1270 | */ | |
b0d623f7 | 1271 | void |
2d21ac55 | 1272 | nfsrv_zapsock(struct nfsrv_sock *slp) |
1c79356b | 1273 | { |
91447636 | 1274 | socket_t so; |
1c79356b | 1275 | |
91447636 A |
1276 | if ((slp->ns_flag & SLP_VALID) == 0) |
1277 | return; | |
1c79356b | 1278 | slp->ns_flag &= ~SLP_ALLFLAGS; |
91447636 A |
1279 | |
1280 | so = slp->ns_so; | |
1281 | if (so == NULL) | |
1282 | return; | |
1283 | ||
743b1565 A |
1284 | /* |
1285 | * Attempt to deter future upcalls, but leave the | |
1286 | * upcall info in place to avoid a race with the | |
1287 | * networking code. | |
1288 | */ | |
91447636 | 1289 | socket_lock(so, 1); |
91447636 A |
1290 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
1291 | socket_unlock(so, 1); | |
743b1565 | 1292 | |
91447636 | 1293 | sock_shutdown(so, SHUT_RDWR); |
1c79356b A |
1294 | } |
1295 | ||
1c79356b | 1296 | /* |
91447636 | 1297 | * cleanup and release a server socket structure. |
1c79356b | 1298 | */ |
b0d623f7 | 1299 | void |
2d21ac55 | 1300 | nfsrv_slpfree(struct nfsrv_sock *slp) |
1c79356b | 1301 | { |
91447636 | 1302 | struct nfsrv_descript *nwp, *nnwp; |
1c79356b | 1303 | |
91447636 A |
1304 | if (slp->ns_so) { |
1305 | sock_release(slp->ns_so); | |
1306 | slp->ns_so = NULL; | |
1307 | } | |
1308 | if (slp->ns_nam) | |
1309 | mbuf_free(slp->ns_nam); | |
1310 | if (slp->ns_raw) | |
1311 | mbuf_freem(slp->ns_raw); | |
1312 | if (slp->ns_rec) | |
1313 | mbuf_freem(slp->ns_rec); | |
2d21ac55 A |
1314 | if (slp->ns_frag) |
1315 | mbuf_freem(slp->ns_frag); | |
1316 | slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; | |
1317 | slp->ns_reccnt = 0; | |
55e303ae | 1318 | |
91447636 A |
1319 | for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { |
1320 | nnwp = nwp->nd_tq.le_next; | |
1321 | LIST_REMOVE(nwp, nd_tq); | |
2d21ac55 A |
1322 | nfsm_chain_cleanup(&nwp->nd_nmreq); |
1323 | if (nwp->nd_mrep) | |
1324 | mbuf_freem(nwp->nd_mrep); | |
1325 | if (nwp->nd_nam2) | |
1326 | mbuf_freem(nwp->nd_nam2); | |
0c530ab8 A |
1327 | if (IS_VALID_CRED(nwp->nd_cr)) |
1328 | kauth_cred_unref(&nwp->nd_cr); | |
6d2010ae A |
1329 | if (nwp->nd_gss_context) |
1330 | nfs_gss_svc_ctx_deref(nwp->nd_gss_context); | |
2d21ac55 | 1331 | FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); |
55e303ae | 1332 | } |
91447636 A |
1333 | LIST_INIT(&slp->ns_tq); |
1334 | ||
2d21ac55 A |
1335 | lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group); |
1336 | lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group); | |
91447636 | 1337 | FREE(slp, M_NFSSVC); |
55e303ae A |
1338 | } |
1339 | ||
1340 | /* | |
91447636 A |
1341 | * Derefence a server socket structure. If it has no more references and |
1342 | * is no longer valid, you can throw it away. | |
55e303ae A |
1343 | */ |
1344 | void | |
2d21ac55 | 1345 | nfsrv_slpderef(struct nfsrv_sock *slp) |
55e303ae | 1346 | { |
743b1565 A |
1347 | struct timeval now; |
1348 | ||
91447636 A |
1349 | lck_mtx_lock(nfsd_mutex); |
1350 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1351 | slp->ns_sref--; | |
2d21ac55 | 1352 | |
91447636 | 1353 | if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { |
2d21ac55 A |
1354 | if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) { |
1355 | /* remove socket from queue since there's no work */ | |
1356 | if (slp->ns_flag & SLP_WAITQ) | |
1357 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1358 | else | |
1359 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1360 | slp->ns_flag &= ~SLP_QUEUED; | |
1361 | } | |
91447636 A |
1362 | lck_rw_done(&slp->ns_rwlock); |
1363 | lck_mtx_unlock(nfsd_mutex); | |
1364 | return; | |
55e303ae | 1365 | } |
91447636 | 1366 | |
2d21ac55 A |
1367 | /* This socket is no longer valid, so we'll get rid of it */ |
1368 | ||
1369 | if (slp->ns_flag & SLP_QUEUED) { | |
1370 | if (slp->ns_flag & SLP_WAITQ) | |
1371 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1372 | else | |
1373 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1374 | slp->ns_flag &= ~SLP_QUEUED; | |
1375 | } | |
1376 | ||
1377 | /* | |
1378 | * Queue the socket up for deletion | |
1379 | * and start the timer to delete it | |
1380 | * after it has been in limbo for | |
1381 | * a while. | |
1382 | */ | |
743b1565 A |
1383 | microuptime(&now); |
1384 | slp->ns_timestamp = now.tv_sec; | |
2d21ac55 A |
1385 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); |
1386 | TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); | |
1387 | if (!nfsrv_deadsock_timer_on) { | |
1388 | nfsrv_deadsock_timer_on = 1; | |
1389 | nfs_interval_timer_start(nfsrv_deadsock_timer_call, | |
1390 | NFSRV_DEADSOCKDELAY * 1000); | |
1391 | } | |
1392 | ||
743b1565 | 1393 | lck_rw_done(&slp->ns_rwlock); |
2d21ac55 A |
1394 | /* now remove from the write gather socket list */ |
1395 | if (slp->ns_wgq.tqe_next != SLPNOLIST) { | |
1396 | TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); | |
1397 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
1398 | } | |
91447636 | 1399 | lck_mtx_unlock(nfsd_mutex); |
55e303ae A |
1400 | } |
1401 | ||
1c79356b | 1402 | /* |
2d21ac55 A |
1403 | * Check periodically for dead sockets pending delete. |
1404 | * If a socket has been dead for more than NFSRV_DEADSOCKDELAY | |
1405 | * seconds then we assume it's safe to free. | |
1c79356b A |
1406 | */ |
1407 | void | |
2d21ac55 | 1408 | nfsrv_deadsock_timer(__unused void *param0, __unused void *param1) |
1c79356b | 1409 | { |
2d21ac55 | 1410 | struct nfsrv_sock *slp; |
743b1565 | 1411 | struct timeval now; |
2d21ac55 | 1412 | time_t time_to_wait; |
1c79356b | 1413 | |
2d21ac55 A |
1414 | microuptime(&now); |
1415 | lck_mtx_lock(nfsd_mutex); | |
1c79356b | 1416 | |
2d21ac55 A |
1417 | while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) { |
1418 | if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec) | |
1419 | break; | |
1420 | TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain); | |
1421 | nfsrv_slpfree(slp); | |
8ad349bb | 1422 | } |
2d21ac55 A |
1423 | if (TAILQ_EMPTY(&nfsrv_deadsocklist)) { |
1424 | nfsrv_deadsock_timer_on = 0; | |
1425 | lck_mtx_unlock(nfsd_mutex); | |
1426 | return; | |
91447636 | 1427 | } |
2d21ac55 A |
1428 | time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec; |
1429 | if (time_to_wait < 1) | |
1430 | time_to_wait = 1; | |
1c79356b | 1431 | |
2d21ac55 A |
1432 | lck_mtx_unlock(nfsd_mutex); |
1433 | ||
1434 | nfs_interval_timer_start(nfsrv_deadsock_timer_call, | |
1435 | time_to_wait * 1000); | |
1c79356b A |
1436 | } |
1437 | ||
1438 | /* | |
2d21ac55 | 1439 | * Clean up the data structures for the server. |
1c79356b | 1440 | */ |
2d21ac55 A |
1441 | void |
1442 | nfsrv_cleanup(void) | |
1c79356b | 1443 | { |
2d21ac55 | 1444 | struct nfsrv_sock *slp, *nslp; |
55e303ae | 1445 | struct timeval now; |
b0d623f7 | 1446 | #if CONFIG_FSE |
2d21ac55 A |
1447 | struct nfsrv_fmod *fp, *nfp; |
1448 | int i; | |
b0d623f7 | 1449 | #endif |
1c79356b | 1450 | |
55e303ae | 1451 | microuptime(&now); |
2d21ac55 A |
1452 | for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { |
1453 | nslp = TAILQ_NEXT(slp, ns_chain); | |
1454 | if (slp->ns_flag & SLP_VALID) { | |
1455 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1456 | nfsrv_zapsock(slp); | |
1457 | lck_rw_done(&slp->ns_rwlock); | |
1458 | } | |
1459 | if (slp->ns_flag & SLP_QUEUED) { | |
1460 | if (slp->ns_flag & SLP_WAITQ) | |
1461 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1462 | else | |
1463 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1464 | slp->ns_flag &= ~SLP_QUEUED; | |
1465 | } | |
1466 | if (slp->ns_wgq.tqe_next != SLPNOLIST) { | |
1467 | TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); | |
1468 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
1469 | } | |
1470 | /* queue the socket up for deletion */ | |
1471 | slp->ns_timestamp = now.tv_sec; | |
1472 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); | |
1473 | TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); | |
1474 | if (!nfsrv_deadsock_timer_on) { | |
1475 | nfsrv_deadsock_timer_on = 1; | |
1476 | nfs_interval_timer_start(nfsrv_deadsock_timer_call, | |
1477 | NFSRV_DEADSOCKDELAY * 1000); | |
1478 | } | |
1479 | } | |
1480 | ||
b0d623f7 | 1481 | #if CONFIG_FSE |
2d21ac55 A |
1482 | /* |
1483 | * Flush pending file write fsevents | |
1484 | */ | |
1485 | lck_mtx_lock(nfsrv_fmod_mutex); | |
1486 | for (i = 0; i < NFSRVFMODHASHSZ; i++) { | |
1487 | for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) { | |
1488 | /* | |
1489 | * Fire off the content modified fsevent for each | |
1490 | * entry, remove it from the list, and free it. | |
1491 | */ | |
6d2010ae A |
1492 | if (nfsrv_fsevents_enabled) { |
1493 | fp->fm_context.vc_thread = current_thread(); | |
2d21ac55 A |
1494 | add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, |
1495 | FSE_ARG_VNODE, fp->fm_vp, | |
1496 | FSE_ARG_DONE); | |
6d2010ae | 1497 | } |
2d21ac55 A |
1498 | vnode_put(fp->fm_vp); |
1499 | kauth_cred_unref(&fp->fm_context.vc_ucred); | |
1500 | nfp = LIST_NEXT(fp, fm_link); | |
1501 | LIST_REMOVE(fp, fm_link); | |
1502 | FREE(fp, M_TEMP); | |
1503 | } | |
1504 | } | |
1505 | nfsrv_fmod_pending = 0; | |
1506 | lck_mtx_unlock(nfsrv_fmod_mutex); | |
b0d623f7 | 1507 | #endif |
2d21ac55 A |
1508 | |
1509 | nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ | |
1510 | ||
1511 | nfsrv_cleancache(); /* And clear out server cache */ | |
1512 | ||
1513 | nfsrv_udpsock = NULL; | |
6d2010ae | 1514 | nfsrv_udp6sock = NULL; |
1c79356b | 1515 | } |
2d21ac55 | 1516 | |
1c79356b | 1517 | #endif /* NFS_NOSERVER */ |