]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2014 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * Copyright (c) 1989, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | |
32 | * | |
33 | * This code is derived from software contributed to Berkeley by | |
34 | * Rick Macklem at The University of Guelph. | |
35 | * | |
36 | * Redistribution and use in source and binary forms, with or without | |
37 | * modification, are permitted provided that the following conditions | |
38 | * are met: | |
39 | * 1. Redistributions of source code must retain the above copyright | |
40 | * notice, this list of conditions and the following disclaimer. | |
41 | * 2. Redistributions in binary form must reproduce the above copyright | |
42 | * notice, this list of conditions and the following disclaimer in the | |
43 | * documentation and/or other materials provided with the distribution. | |
44 | * 3. All advertising materials mentioning features or use of this software | |
45 | * must display the following acknowledgement: | |
46 | * This product includes software developed by the University of | |
47 | * California, Berkeley and its contributors. | |
48 | * 4. Neither the name of the University nor the names of its contributors | |
49 | * may be used to endorse or promote products derived from this software | |
50 | * without specific prior written permission. | |
51 | * | |
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
62 | * SUCH DAMAGE. | |
63 | * | |
64 | * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 | |
65 | * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $ | |
66 | */ | |
67 | /* | |
68 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
69 | * support for mandatory and extensible security protections. This notice | |
70 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
71 | * Version 2.0. | |
72 | */ | |
73 | ||
74 | #include <sys/param.h> | |
75 | #include <sys/systm.h> | |
76 | #include <sys/kernel.h> | |
77 | #include <sys/file_internal.h> | |
78 | #include <sys/filedesc.h> | |
79 | #include <sys/stat.h> | |
80 | #include <sys/vnode_internal.h> | |
81 | #include <sys/mount_internal.h> | |
82 | #include <sys/proc_internal.h> /* for fdflags */ | |
83 | #include <sys/kauth.h> | |
84 | #include <sys/sysctl.h> | |
85 | #include <sys/ubc.h> | |
86 | #include <sys/uio.h> | |
87 | #include <sys/malloc.h> | |
88 | #include <sys/kpi_mbuf.h> | |
89 | #include <sys/socket.h> | |
90 | #include <sys/socketvar.h> | |
91 | #include <sys/domain.h> | |
92 | #include <sys/protosw.h> | |
93 | #include <sys/fcntl.h> | |
94 | #include <sys/lockf.h> | |
95 | #include <sys/syslog.h> | |
96 | #include <sys/user.h> | |
97 | #include <sys/sysproto.h> | |
98 | #include <sys/kpi_socket.h> | |
99 | #include <sys/fsevents.h> | |
100 | #include <libkern/OSAtomic.h> | |
101 | #include <kern/thread_call.h> | |
102 | #include <kern/task.h> | |
103 | ||
104 | #include <security/audit/audit.h> | |
105 | ||
106 | #include <netinet/in.h> | |
107 | #include <netinet/tcp.h> | |
108 | #include <nfs/xdr_subs.h> | |
109 | #include <nfs/rpcv2.h> | |
110 | #include <nfs/nfsproto.h> | |
111 | #include <nfs/nfs.h> | |
112 | #include <nfs/nfsm_subs.h> | |
113 | #include <nfs/nfsrvcache.h> | |
114 | #include <nfs/nfs_gss.h> | |
115 | #include <nfs/nfsmount.h> | |
116 | #include <nfs/nfsnode.h> | |
117 | #include <nfs/nfs_lock.h> | |
118 | #if CONFIG_MACF | |
119 | #include <security/mac_framework.h> | |
120 | #endif | |
121 | ||
122 | kern_return_t thread_terminate(thread_t); /* XXX */ | |
123 | ||
124 | #if NFSSERVER | |
125 | ||
126 | extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, | |
127 | struct nfsrv_sock *slp, | |
128 | vfs_context_t ctx, | |
129 | mbuf_t *mrepp); | |
130 | extern int nfsrv_wg_delay; | |
131 | extern int nfsrv_wg_delay_v3; | |
132 | ||
133 | static int nfsrv_require_resv_port = 0; | |
134 | static time_t nfsrv_idlesock_timer_on = 0; | |
135 | static int nfsrv_sock_tcp_cnt = 0; | |
136 | #define NFSD_MIN_IDLE_TIMEOUT 30 | |
137 | static int nfsrv_sock_idle_timeout = 3600; /* One hour */ | |
138 | ||
139 | int nfssvc_export(user_addr_t argp); | |
140 | int nfssvc_nfsd(void); | |
141 | int nfssvc_addsock(socket_t, mbuf_t); | |
142 | void nfsrv_zapsock(struct nfsrv_sock *); | |
143 | void nfsrv_slpderef(struct nfsrv_sock *); | |
144 | void nfsrv_slpfree(struct nfsrv_sock *); | |
145 | ||
146 | #endif /* NFSSERVER */ | |
147 | ||
148 | /* | |
149 | * sysctl stuff | |
150 | */ | |
151 | SYSCTL_DECL(_vfs_generic); | |
152 | SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge"); | |
153 | ||
154 | #if NFSCLIENT | |
155 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge"); | |
156 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, ""); | |
157 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, ""); | |
158 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, ""); | |
159 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, ""); | |
160 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, ""); | |
161 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, ""); | |
162 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, ""); | |
163 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, ""); | |
164 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, ""); | |
165 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, ""); | |
166 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, ""); | |
167 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, ""); | |
168 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, ""); | |
169 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, ""); | |
170 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, ""); | |
171 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); | |
172 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); | |
173 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); | |
174 | SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); | |
175 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, ""); | |
176 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, ""); | |
177 | #endif /* NFSCLIENT */ | |
178 | ||
179 | #if NFSSERVER | |
180 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge"); | |
181 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, ""); | |
182 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, ""); | |
183 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, ""); | |
184 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, ""); | |
185 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, ""); | |
186 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, ""); | |
187 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, ""); | |
188 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, ""); | |
189 | SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, ""); | |
190 | #if CONFIG_FSE | |
191 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, ""); | |
192 | #endif | |
193 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); | |
194 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); | |
195 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, ""); | |
196 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, ""); | |
197 | #ifdef NFS_UC_Q_DEBUG | |
198 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, ""); | |
199 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, ""); | |
200 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, ""); | |
201 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, ""); | |
202 | #endif | |
203 | #endif /* NFSSERVER */ | |
204 | ||
205 | ||
206 | #if NFSCLIENT | |
207 | ||
208 | static int | |
209 | mapname2id(struct nfs_testmapid *map) | |
210 | { | |
211 | int error; | |
212 | ||
213 | error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag); | |
214 | if (error) | |
215 | return (error); | |
216 | ||
217 | if (map->ntm_grpflag) | |
218 | error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id); | |
219 | else | |
220 | error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id); | |
221 | ||
222 | return (error); | |
223 | } | |
224 | ||
225 | static int | |
226 | mapid2name(struct nfs_testmapid *map) | |
227 | { | |
228 | int error; | |
229 | int len = sizeof(map->ntm_name); | |
230 | ||
231 | if (map->ntm_grpflag) | |
232 | error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid); | |
233 | else | |
234 | error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid); | |
235 | ||
236 | if (error) | |
237 | return (error); | |
238 | ||
239 | error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag); | |
240 | ||
241 | return (error); | |
242 | ||
243 | } | |
244 | ||
245 | ||
246 | static int | |
247 | nfsclnt_testidmap(proc_t p, user_addr_t argp) | |
248 | { | |
249 | struct nfs_testmapid mapid; | |
250 | int error, coerror; | |
251 | ||
252 | /* Let root make this call. */ | |
253 | error = proc_suser(p); | |
254 | if (error) | |
255 | return (error); | |
256 | ||
257 | error = copyin(argp, &mapid, sizeof(mapid)); | |
258 | if (error) | |
259 | return (error); | |
260 | if (mapid.ntm_name2id) | |
261 | error = mapname2id(&mapid); | |
262 | else | |
263 | error = mapid2name(&mapid); | |
264 | ||
265 | coerror = copyout(&mapid, argp, sizeof(mapid)); | |
266 | ||
267 | return (error ? error : coerror); | |
268 | } | |
269 | ||
270 | int | |
271 | nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) | |
272 | { | |
273 | struct lockd_ans la; | |
274 | int error; | |
275 | ||
276 | switch (uap->flag) { | |
277 | case NFSCLNT_LOCKDANS: | |
278 | error = copyin(uap->argp, &la, sizeof(la)); | |
279 | if (!error) | |
280 | error = nfslockdans(p, &la); | |
281 | break; | |
282 | case NFSCLNT_LOCKDNOTIFY: | |
283 | error = nfslockdnotify(p, uap->argp); | |
284 | break; | |
285 | case NFSCLNT_TESTIDMAP: | |
286 | error = nfsclnt_testidmap(p, uap->argp); | |
287 | break; | |
288 | default: | |
289 | error = EINVAL; | |
290 | } | |
291 | return (error); | |
292 | } | |
293 | ||
294 | ||
295 | /* | |
296 | * Asynchronous I/O threads for client NFS. | |
297 | * They do read-ahead and write-behind operations on the block I/O cache. | |
298 | * | |
299 | * The pool of up to nfsiod_thread_max threads is launched on demand and exit | |
300 | * when unused for a while. There are as many nfsiod structs as there are | |
301 | * nfsiod threads; however there's no strict tie between a thread and a struct. | |
302 | * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes | |
303 | * up, it removes the next struct nfsiod from the queue and services it. Then | |
304 | * it will put the struct at the head of free list and sleep on it. | |
305 | * Async requests will pull the next struct nfsiod from the head of the free list, | |
306 | * put it on the work queue, and wake whatever thread is waiting on that struct. | |
307 | */ | |
308 | ||
309 | /* | |
310 | * nfsiod thread exit routine | |
311 | * | |
312 | * Must be called with nfsiod_mutex held so that the | |
313 | * decision to terminate is atomic with the termination. | |
314 | */ | |
315 | void | |
316 | nfsiod_terminate(struct nfsiod *niod) | |
317 | { | |
318 | nfsiod_thread_count--; | |
319 | lck_mtx_unlock(nfsiod_mutex); | |
320 | if (niod) | |
321 | FREE(niod, M_TEMP); | |
322 | else | |
323 | printf("nfsiod: terminating without niod\n"); | |
324 | thread_terminate(current_thread()); | |
325 | /*NOTREACHED*/ | |
326 | } | |
327 | ||
328 | /* nfsiod thread startup routine */ | |
329 | void | |
330 | nfsiod_thread(void) | |
331 | { | |
332 | struct nfsiod *niod; | |
333 | int error; | |
334 | ||
335 | MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK); | |
336 | if (!niod) { | |
337 | lck_mtx_lock(nfsiod_mutex); | |
338 | nfsiod_thread_count--; | |
339 | wakeup(current_thread()); | |
340 | lck_mtx_unlock(nfsiod_mutex); | |
341 | thread_terminate(current_thread()); | |
342 | /*NOTREACHED*/ | |
343 | } | |
344 | bzero(niod, sizeof(*niod)); | |
345 | lck_mtx_lock(nfsiod_mutex); | |
346 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); | |
347 | wakeup(current_thread()); | |
348 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); | |
349 | /* shouldn't return... so we have an error */ | |
350 | /* remove an old nfsiod struct and terminate */ | |
351 | lck_mtx_lock(nfsiod_mutex); | |
352 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
353 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
354 | nfsiod_terminate(niod); | |
355 | /*NOTREACHED*/ | |
356 | } | |
357 | ||
358 | /* | |
359 | * Start up another nfsiod thread. | |
360 | * (unless we're already maxed out and there are nfsiods running) | |
361 | */ | |
362 | int | |
363 | nfsiod_start(void) | |
364 | { | |
365 | thread_t thd = THREAD_NULL; | |
366 | ||
367 | lck_mtx_lock(nfsiod_mutex); | |
368 | if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { | |
369 | lck_mtx_unlock(nfsiod_mutex); | |
370 | return (EBUSY); | |
371 | } | |
372 | nfsiod_thread_count++; | |
373 | if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { | |
374 | lck_mtx_unlock(nfsiod_mutex); | |
375 | return (EBUSY); | |
376 | } | |
377 | /* wait for the thread to complete startup */ | |
378 | msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL); | |
379 | thread_deallocate(thd); | |
380 | return (0); | |
381 | } | |
382 | ||
383 | /* | |
384 | * Continuation for Asynchronous I/O threads for NFS client. | |
385 | * | |
386 | * Grab an nfsiod struct to work on, do some work, then drop it | |
387 | */ | |
388 | int | |
389 | nfsiod_continue(int error) | |
390 | { | |
391 | struct nfsiod *niod; | |
392 | struct nfsmount *nmp; | |
393 | struct nfsreq *req, *treq; | |
394 | struct nfs_reqqhead iodq; | |
395 | int morework; | |
396 | ||
397 | lck_mtx_lock(nfsiod_mutex); | |
398 | niod = TAILQ_FIRST(&nfsiodwork); | |
399 | if (!niod) { | |
400 | /* there's no work queued up */ | |
401 | /* remove an old nfsiod struct and terminate */ | |
402 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
403 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
404 | nfsiod_terminate(niod); | |
405 | /*NOTREACHED*/ | |
406 | } | |
407 | TAILQ_REMOVE(&nfsiodwork, niod, niod_link); | |
408 | ||
409 | worktodo: | |
410 | while ((nmp = niod->niod_nmp)) { | |
411 | if (nmp == NULL){ | |
412 | niod->niod_nmp = NULL; | |
413 | break; | |
414 | } | |
415 | ||
416 | /* | |
417 | * Service this mount's async I/O queue. | |
418 | * | |
419 | * In order to ensure some level of fairness between mounts, | |
420 | * we grab all the work up front before processing it so any | |
421 | * new work that arrives will be serviced on a subsequent | |
422 | * iteration - and we have a chance to see if other work needs | |
423 | * to be done (e.g. the delayed write queue needs to be pushed | |
424 | * or other mounts are waiting for an nfsiod). | |
425 | */ | |
426 | /* grab the current contents of the queue */ | |
427 | TAILQ_INIT(&iodq); | |
428 | TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); | |
429 | /* Mark each iod request as being managed by an iod */ | |
430 | TAILQ_FOREACH(req, &iodq, r_achain) { | |
431 | lck_mtx_lock(&req->r_mtx); | |
432 | assert(!(req->r_flags & R_IOD)); | |
433 | req->r_flags |= R_IOD; | |
434 | lck_mtx_unlock(&req->r_mtx); | |
435 | } | |
436 | lck_mtx_unlock(nfsiod_mutex); | |
437 | ||
438 | /* process the queue */ | |
439 | TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { | |
440 | TAILQ_REMOVE(&iodq, req, r_achain); | |
441 | req->r_achain.tqe_next = NFSREQNOLIST; | |
442 | req->r_callback.rcb_func(req); | |
443 | } | |
444 | ||
445 | /* now check if there's more/other work to be done */ | |
446 | lck_mtx_lock(nfsiod_mutex); | |
447 | morework = !TAILQ_EMPTY(&nmp->nm_iodq); | |
448 | if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { | |
449 | /* | |
450 | * we're going to stop working on this mount but if the | |
451 | * mount still needs more work so queue it up | |
452 | */ | |
453 | if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) | |
454 | TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); | |
455 | nmp->nm_niod = NULL; | |
456 | niod->niod_nmp = NULL; | |
457 | } | |
458 | } | |
459 | ||
460 | /* loop if there's still a mount to work on */ | |
461 | if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { | |
462 | niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); | |
463 | TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); | |
464 | niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST; | |
465 | } | |
466 | if (niod->niod_nmp) | |
467 | goto worktodo; | |
468 | ||
469 | /* queue ourselves back up - if there aren't too many threads running */ | |
470 | if (nfsiod_thread_count <= NFSIOD_MAX) { | |
471 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); | |
472 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); | |
473 | /* shouldn't return... so we have an error */ | |
474 | /* remove an old nfsiod struct and terminate */ | |
475 | lck_mtx_lock(nfsiod_mutex); | |
476 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
477 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
478 | } | |
479 | nfsiod_terminate(niod); | |
480 | /*NOTREACHED*/ | |
481 | return (0); | |
482 | } | |
483 | ||
484 | #endif /* NFSCLIENT */ | |
485 | ||
486 | ||
487 | #if NFSSERVER | |
488 | ||
489 | /* | |
490 | * NFS server system calls | |
491 | * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c | |
492 | */ | |
493 | ||
494 | /* | |
495 | * Get file handle system call | |
496 | */ | |
497 | int | |
498 | getfh(proc_t p, struct getfh_args *uap, __unused int *retval) | |
499 | { | |
500 | vnode_t vp; | |
501 | struct nfs_filehandle nfh; | |
502 | int error, fhlen, fidlen; | |
503 | struct nameidata nd; | |
504 | char path[MAXPATHLEN], *ptr; | |
505 | size_t pathlen; | |
506 | struct nfs_exportfs *nxfs; | |
507 | struct nfs_export *nx; | |
508 | ||
509 | /* | |
510 | * Must be super user | |
511 | */ | |
512 | error = proc_suser(p); | |
513 | if (error) | |
514 | return (error); | |
515 | ||
516 | error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); | |
517 | if (!error) | |
518 | error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); | |
519 | if (error) | |
520 | return (error); | |
521 | /* limit fh size to length specified (or v3 size by default) */ | |
522 | if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) | |
523 | fhlen = NFSV3_MAX_FH_SIZE; | |
524 | fidlen = fhlen - sizeof(struct nfs_exphandle); | |
525 | ||
526 | if (!nfsrv_is_initialized()) | |
527 | return (EINVAL); | |
528 | ||
529 | NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, | |
530 | UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); | |
531 | error = namei(&nd); | |
532 | if (error) | |
533 | return (error); | |
534 | nameidone(&nd); | |
535 | ||
536 | vp = nd.ni_vp; | |
537 | ||
538 | // find exportfs that matches f_mntonname | |
539 | lck_rw_lock_shared(&nfsrv_export_rwlock); | |
540 | ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; | |
541 | LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { | |
542 | if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) | |
543 | break; | |
544 | } | |
545 | if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { | |
546 | error = EINVAL; | |
547 | goto out; | |
548 | } | |
549 | // find export that best matches remainder of path | |
550 | ptr = path + strlen(nxfs->nxfs_path); | |
551 | while (*ptr && (*ptr == '/')) | |
552 | ptr++; | |
553 | LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { | |
554 | int len = strlen(nx->nx_path); | |
555 | if (len == 0) // we've hit the export entry for the root directory | |
556 | break; | |
557 | if (!strncmp(nx->nx_path, ptr, len)) | |
558 | break; | |
559 | } | |
560 | if (!nx) { | |
561 | error = EINVAL; | |
562 | goto out; | |
563 | } | |
564 | ||
565 | bzero(&nfh, sizeof(nfh)); | |
566 | nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); | |
567 | nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); | |
568 | nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); | |
569 | nfh.nfh_xh.nxh_flags = 0; | |
570 | nfh.nfh_xh.nxh_reserved = 0; | |
571 | nfh.nfh_len = fidlen; | |
572 | error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); | |
573 | if (nfh.nfh_len > (uint32_t)fidlen) | |
574 | error = EOVERFLOW; | |
575 | nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; | |
576 | nfh.nfh_len += sizeof(nfh.nfh_xh); | |
577 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; | |
578 | ||
579 | out: | |
580 | lck_rw_done(&nfsrv_export_rwlock); | |
581 | vnode_put(vp); | |
582 | if (error) | |
583 | return (error); | |
584 | error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); | |
585 | return (error); | |
586 | } | |
587 | ||
588 | extern const struct fileops vnops; | |
589 | ||
590 | /* | |
591 | * syscall for the rpc.lockd to use to translate a NFS file handle into | |
592 | * an open descriptor. | |
593 | * | |
594 | * warning: do not remove the suser() call or this becomes one giant | |
595 | * security hole. | |
596 | */ | |
597 | int | |
598 | fhopen( proc_t p, | |
599 | struct fhopen_args *uap, | |
600 | int32_t *retval) | |
601 | { | |
602 | vnode_t vp; | |
603 | struct nfs_filehandle nfh; | |
604 | struct nfs_export *nx; | |
605 | struct nfs_export_options *nxo; | |
606 | struct flock lf; | |
607 | struct fileproc *fp, *nfp; | |
608 | int fmode, error, type; | |
609 | int indx; | |
610 | vfs_context_t ctx = vfs_context_current(); | |
611 | kauth_action_t action; | |
612 | ||
613 | /* | |
614 | * Must be super user | |
615 | */ | |
616 | error = suser(vfs_context_ucred(ctx), 0); | |
617 | if (error) { | |
618 | return (error); | |
619 | } | |
620 | ||
621 | if (!nfsrv_is_initialized()) { | |
622 | return (EINVAL); | |
623 | } | |
624 | ||
625 | fmode = FFLAGS(uap->flags); | |
626 | /* why not allow a non-read/write open for our lockd? */ | |
627 | if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) | |
628 | return (EINVAL); | |
629 | ||
630 | error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); | |
631 | if (error) | |
632 | return (error); | |
633 | if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || | |
634 | (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) | |
635 | return (EINVAL); | |
636 | error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); | |
637 | if (error) | |
638 | return (error); | |
639 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; | |
640 | ||
641 | lck_rw_lock_shared(&nfsrv_export_rwlock); | |
642 | /* now give me my vnode, it gets returned to me with a reference */ | |
643 | error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo); | |
644 | lck_rw_done(&nfsrv_export_rwlock); | |
645 | if (error) { | |
646 | if (error == NFSERR_TRYLATER) | |
647 | error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER? | |
648 | return (error); | |
649 | } | |
650 | ||
651 | /* | |
652 | * From now on we have to make sure not | |
653 | * to forget about the vnode. | |
654 | * Any error that causes an abort must vnode_put(vp). | |
655 | * Just set error = err and 'goto bad;'. | |
656 | */ | |
657 | ||
658 | /* | |
659 | * from vn_open | |
660 | */ | |
661 | if (vnode_vtype(vp) == VSOCK) { | |
662 | error = EOPNOTSUPP; | |
663 | goto bad; | |
664 | } | |
665 | ||
666 | /* disallow write operations on directories */ | |
667 | if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { | |
668 | error = EISDIR; | |
669 | goto bad; | |
670 | } | |
671 | ||
672 | #if CONFIG_MACF | |
673 | if ((error = mac_vnode_check_open(ctx, vp, fmode))) | |
674 | goto bad; | |
675 | #endif | |
676 | ||
677 | /* compute action to be authorized */ | |
678 | action = 0; | |
679 | if (fmode & FREAD) | |
680 | action |= KAUTH_VNODE_READ_DATA; | |
681 | if (fmode & (FWRITE | O_TRUNC)) | |
682 | action |= KAUTH_VNODE_WRITE_DATA; | |
683 | if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) | |
684 | goto bad; | |
685 | ||
686 | if ((error = VNOP_OPEN(vp, fmode, ctx))) | |
687 | goto bad; | |
688 | if ((error = vnode_ref_ext(vp, fmode, 0))) | |
689 | goto bad; | |
690 | ||
691 | /* | |
692 | * end of vn_open code | |
693 | */ | |
694 | ||
695 | // starting here... error paths should call vn_close/vnode_put | |
696 | if ((error = falloc(p, &nfp, &indx, ctx)) != 0) { | |
697 | vn_close(vp, fmode & FMASK, ctx); | |
698 | goto bad; | |
699 | } | |
700 | fp = nfp; | |
701 | ||
702 | fp->f_fglob->fg_flag = fmode & FMASK; | |
703 | fp->f_fglob->fg_ops = &vnops; | |
704 | fp->f_fglob->fg_data = (caddr_t)vp; | |
705 | ||
706 | // XXX do we really need to support this with fhopen()? | |
707 | if (fmode & (O_EXLOCK | O_SHLOCK)) { | |
708 | lf.l_whence = SEEK_SET; | |
709 | lf.l_start = 0; | |
710 | lf.l_len = 0; | |
711 | if (fmode & O_EXLOCK) | |
712 | lf.l_type = F_WRLCK; | |
713 | else | |
714 | lf.l_type = F_RDLCK; | |
715 | type = F_FLOCK; | |
716 | if ((fmode & FNONBLOCK) == 0) | |
717 | type |= F_WAIT; | |
718 | if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) { | |
719 | struct vfs_context context = *vfs_context_current(); | |
720 | /* Modify local copy (to not damage thread copy) */ | |
721 | context.vc_ucred = fp->f_fglob->fg_cred; | |
722 | ||
723 | vn_close(vp, fp->f_fglob->fg_flag, &context); | |
724 | fp_free(p, indx, fp); | |
725 | return (error); | |
726 | } | |
727 | fp->f_fglob->fg_flag |= FHASLOCK; | |
728 | } | |
729 | ||
730 | vnode_put(vp); | |
731 | ||
732 | proc_fdlock(p); | |
733 | procfdtbl_releasefd(p, indx, NULL); | |
734 | fp_drop(p, indx, fp, 1); | |
735 | proc_fdunlock(p); | |
736 | ||
737 | *retval = indx; | |
738 | return (0); | |
739 | ||
740 | bad: | |
741 | vnode_put(vp); | |
742 | return (error); | |
743 | } | |
744 | ||
745 | /* | |
746 | * NFS server pseudo system call | |
747 | */ | |
748 | int | |
749 | nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) | |
750 | { | |
751 | mbuf_t nam; | |
752 | struct user_nfsd_args user_nfsdarg; | |
753 | socket_t so; | |
754 | int error; | |
755 | ||
756 | AUDIT_ARG(cmd, uap->flag); | |
757 | ||
758 | /* | |
759 | * Must be super user for most operations (export ops checked later). | |
760 | */ | |
761 | if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) | |
762 | return (error); | |
763 | #if CONFIG_MACF | |
764 | error = mac_system_check_nfsd(kauth_cred_get()); | |
765 | if (error) | |
766 | return (error); | |
767 | #endif | |
768 | ||
769 | /* make sure NFS server data structures have been initialized */ | |
770 | nfsrv_init(); | |
771 | ||
772 | if (uap->flag & NFSSVC_ADDSOCK) { | |
773 | if (IS_64BIT_PROCESS(p)) { | |
774 | error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); | |
775 | } else { | |
776 | struct nfsd_args tmp_args; | |
777 | error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); | |
778 | if (error == 0) { | |
779 | user_nfsdarg.sock = tmp_args.sock; | |
780 | user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); | |
781 | user_nfsdarg.namelen = tmp_args.namelen; | |
782 | } | |
783 | } | |
784 | if (error) | |
785 | return (error); | |
786 | /* get the socket */ | |
787 | error = file_socket(user_nfsdarg.sock, &so); | |
788 | if (error) | |
789 | return (error); | |
790 | /* Get the client address for connected sockets. */ | |
791 | if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { | |
792 | nam = NULL; | |
793 | } else { | |
794 | error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); | |
795 | if (error) { | |
796 | /* drop the iocount file_socket() grabbed on the file descriptor */ | |
797 | file_drop(user_nfsdarg.sock); | |
798 | return (error); | |
799 | } | |
800 | } | |
801 | /* | |
802 | * nfssvc_addsock() will grab a retain count on the socket | |
803 | * to keep the socket from being closed when nfsd closes its | |
804 | * file descriptor for it. | |
805 | */ | |
806 | error = nfssvc_addsock(so, nam); | |
807 | /* drop the iocount file_socket() grabbed on the file descriptor */ | |
808 | file_drop(user_nfsdarg.sock); | |
809 | } else if (uap->flag & NFSSVC_NFSD) { | |
810 | error = nfssvc_nfsd(); | |
811 | } else if (uap->flag & NFSSVC_EXPORT) { | |
812 | error = nfssvc_export(uap->argp); | |
813 | } else { | |
814 | error = EINVAL; | |
815 | } | |
816 | if (error == EINTR || error == ERESTART) | |
817 | error = 0; | |
818 | return (error); | |
819 | } | |
820 | ||
821 | /* | |
822 | * Adds a socket to the list for servicing by nfsds. | |
823 | */ | |
824 | int | |
825 | nfssvc_addsock(socket_t so, mbuf_t mynam) | |
826 | { | |
827 | struct nfsrv_sock *slp; | |
828 | int error = 0, sodomain, sotype, soprotocol, on = 1; | |
829 | int first; | |
830 | struct timeval timeo; | |
831 | ||
832 | /* make sure mbuf constants are set up */ | |
833 | if (!nfs_mbuf_mhlen) | |
834 | nfs_mbuf_init(); | |
835 | ||
836 | sock_gettype(so, &sodomain, &sotype, &soprotocol); | |
837 | ||
838 | /* There should be only one UDP socket for each of IPv4 and IPv6 */ | |
839 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { | |
840 | mbuf_freem(mynam); | |
841 | return (EEXIST); | |
842 | } | |
843 | if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { | |
844 | mbuf_freem(mynam); | |
845 | return (EEXIST); | |
846 | } | |
847 | ||
848 | /* Set protocol options and reserve some space (for UDP). */ | |
849 | if (sotype == SOCK_STREAM) { | |
850 | error = nfsrv_check_exports_allow_address(mynam); | |
851 | if (error) | |
852 | return (error); | |
853 | sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); | |
854 | } | |
855 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) | |
856 | sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); | |
857 | if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ | |
858 | int reserve = NFS_UDPSOCKBUF; | |
859 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); | |
860 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); | |
861 | if (error) { | |
862 | log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error); | |
863 | error = 0; | |
864 | } | |
865 | } | |
866 | sock_nointerrupt(so, 0); | |
867 | ||
868 | /* | |
869 | * Set socket send/receive timeouts. | |
870 | * Receive timeout shouldn't matter, but setting the send timeout | |
871 | * will make sure that an unresponsive client can't hang the server. | |
872 | */ | |
873 | timeo.tv_usec = 0; | |
874 | timeo.tv_sec = 1; | |
875 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); | |
876 | timeo.tv_sec = 30; | |
877 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); | |
878 | if (error) { | |
879 | log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error); | |
880 | error = 0; | |
881 | } | |
882 | ||
883 | MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK); | |
884 | if (!slp) { | |
885 | mbuf_freem(mynam); | |
886 | return (ENOMEM); | |
887 | } | |
888 | bzero((caddr_t)slp, sizeof (struct nfsrv_sock)); | |
889 | lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL); | |
890 | lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL); | |
891 | ||
892 | lck_mtx_lock(nfsd_mutex); | |
893 | ||
894 | if (soprotocol == IPPROTO_UDP) { | |
895 | if (sodomain == AF_INET) { | |
896 | /* There should be only one UDP/IPv4 socket */ | |
897 | if (nfsrv_udpsock) { | |
898 | lck_mtx_unlock(nfsd_mutex); | |
899 | nfsrv_slpfree(slp); | |
900 | mbuf_freem(mynam); | |
901 | return (EEXIST); | |
902 | } | |
903 | nfsrv_udpsock = slp; | |
904 | } | |
905 | if (sodomain == AF_INET6) { | |
906 | /* There should be only one UDP/IPv6 socket */ | |
907 | if (nfsrv_udp6sock) { | |
908 | lck_mtx_unlock(nfsd_mutex); | |
909 | nfsrv_slpfree(slp); | |
910 | mbuf_freem(mynam); | |
911 | return (EEXIST); | |
912 | } | |
913 | nfsrv_udp6sock = slp; | |
914 | } | |
915 | } | |
916 | ||
917 | /* add the socket to the list */ | |
918 | first = TAILQ_EMPTY(&nfsrv_socklist); | |
919 | TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); | |
920 | if (soprotocol == IPPROTO_TCP) { | |
921 | nfsrv_sock_tcp_cnt++; | |
922 | if (nfsrv_sock_idle_timeout < 0) | |
923 | nfsrv_sock_idle_timeout = 0; | |
924 | if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) | |
925 | nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT; | |
926 | /* | |
927 | * Possibly start or stop the idle timer. We only start the idle timer when | |
928 | * we have more than 2 * nfsd_thread_max connections. If the idle timer is | |
929 | * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or | |
930 | * the number of connections. | |
931 | */ | |
932 | if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) { | |
933 | if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { | |
934 | if (nfsrv_idlesock_timer_on) { | |
935 | thread_call_cancel(nfsrv_idlesock_timer_call); | |
936 | nfsrv_idlesock_timer_on = 0; | |
937 | } | |
938 | } else { | |
939 | struct nfsrv_sock *old_slp; | |
940 | struct timeval now; | |
941 | time_t time_to_wait = nfsrv_sock_idle_timeout; | |
942 | /* | |
943 | * Get the oldest tcp socket and calculate the | |
944 | * earliest time for the next idle timer to fire | |
945 | * based on the possibly updated nfsrv_sock_idle_timeout | |
946 | */ | |
947 | TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) { | |
948 | if (old_slp->ns_sotype == SOCK_STREAM) { | |
949 | microuptime(&now); | |
950 | time_to_wait -= now.tv_sec - old_slp->ns_timestamp; | |
951 | if (time_to_wait < 1) | |
952 | time_to_wait = 1; | |
953 | break; | |
954 | } | |
955 | } | |
956 | /* | |
957 | * If we have a timer scheduled, but if its going to fire too late, | |
958 | * turn it off. | |
959 | */ | |
960 | if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) { | |
961 | thread_call_cancel(nfsrv_idlesock_timer_call); | |
962 | nfsrv_idlesock_timer_on = 0; | |
963 | } | |
964 | /* Schedule the idle thread if it isn't already */ | |
965 | if (!nfsrv_idlesock_timer_on) { | |
966 | nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); | |
967 | nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; | |
968 | } | |
969 | } | |
970 | } | |
971 | } | |
972 | ||
973 | sock_retain(so); /* grab a retain count on the socket */ | |
974 | slp->ns_so = so; | |
975 | slp->ns_sotype = sotype; | |
976 | slp->ns_nam = mynam; | |
977 | ||
978 | /* set up the socket up-call */ | |
979 | nfsrv_uc_addsock(slp, first); | |
980 | ||
981 | /* mark that the socket is not in the nfsrv_sockwg list */ | |
982 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
983 | ||
984 | slp->ns_flag = SLP_VALID | SLP_NEEDQ; | |
985 | ||
986 | nfsrv_wakenfsd(slp); | |
987 | lck_mtx_unlock(nfsd_mutex); | |
988 | ||
989 | return (0); | |
990 | } | |
991 | ||
992 | /* | |
993 | * nfssvc_nfsd() | |
994 | * | |
995 | * nfsd theory of operation: | |
996 | * | |
997 | * The first nfsd thread stays in user mode accepting new TCP connections | |
998 | * which are then added via the "addsock" call. The rest of the nfsd threads | |
999 | * simply call into the kernel and remain there in a loop handling NFS | |
1000 | * requests until killed by a signal. | |
1001 | * | |
1002 | * There's a list of nfsd threads (nfsd_head). | |
1003 | * There's an nfsd queue that contains only those nfsds that are | |
1004 | * waiting for work to do (nfsd_queue). | |
1005 | * | |
1006 | * There's a list of all NFS sockets (nfsrv_socklist) and two queues for | |
1007 | * managing the work on the sockets: | |
1008 | * nfsrv_sockwait - sockets w/new data waiting to be worked on | |
1009 | * nfsrv_sockwork - sockets being worked on which may have more work to do | |
1010 | * nfsrv_sockwg -- sockets which have pending write gather data | |
1011 | * When a socket receives data, if it is not currently queued, it | |
1012 | * will be placed at the end of the "wait" queue. | |
1013 | * Whenever a socket needs servicing we make sure it is queued and | |
1014 | * wake up a waiting nfsd (if there is one). | |
1015 | * | |
1016 | * nfsds will service at most 8 requests from the same socket before | |
1017 | * defecting to work on another socket. | |
1018 | * nfsds will defect immediately if there are any sockets in the "wait" queue | |
1019 | * nfsds looking for a socket to work on check the "wait" queue first and | |
1020 | * then check the "work" queue. | |
1021 | * When an nfsd starts working on a socket, it removes it from the head of | |
1022 | * the queue it's currently on and moves it to the end of the "work" queue. | |
1023 | * When nfsds are checking the queues for work, any sockets found not to | |
1024 | * have any work are simply dropped from the queue. | |
1025 | * | |
1026 | */ | |
1027 | int | |
1028 | nfssvc_nfsd(void) | |
1029 | { | |
1030 | mbuf_t m, mrep; | |
1031 | struct nfsrv_sock *slp; | |
1032 | struct nfsd *nfsd; | |
1033 | struct nfsrv_descript *nd = NULL; | |
1034 | int error = 0, cacherep, writes_todo; | |
1035 | int siz, procrastinate, opcnt = 0; | |
1036 | u_quad_t cur_usec; | |
1037 | struct timeval now; | |
1038 | struct vfs_context context; | |
1039 | struct timespec to; | |
1040 | ||
1041 | #ifndef nolint | |
1042 | cacherep = RC_DOIT; | |
1043 | writes_todo = 0; | |
1044 | #endif | |
1045 | ||
1046 | MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); | |
1047 | if (!nfsd) | |
1048 | return (ENOMEM); | |
1049 | bzero(nfsd, sizeof(struct nfsd)); | |
1050 | lck_mtx_lock(nfsd_mutex); | |
1051 | if (nfsd_thread_count++ == 0) | |
1052 | nfsrv_initcache(); /* Init the server request cache */ | |
1053 | ||
1054 | TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); | |
1055 | lck_mtx_unlock(nfsd_mutex); | |
1056 | ||
1057 | context.vc_thread = current_thread(); | |
1058 | ||
1059 | /* Set time out so that nfsd threads can wake up a see if they are still needed. */ | |
1060 | to.tv_sec = 5; | |
1061 | to.tv_nsec = 0; | |
1062 | ||
1063 | /* | |
1064 | * Loop getting rpc requests until SIGKILL. | |
1065 | */ | |
1066 | for (;;) { | |
1067 | if (nfsd_thread_max <= 0) { | |
1068 | /* NFS server shutting down, get out ASAP */ | |
1069 | error = EINTR; | |
1070 | slp = nfsd->nfsd_slp; | |
1071 | } else if (nfsd->nfsd_flag & NFSD_REQINPROG) { | |
1072 | /* already have some work to do */ | |
1073 | error = 0; | |
1074 | slp = nfsd->nfsd_slp; | |
1075 | } else { | |
1076 | /* need to find work to do */ | |
1077 | error = 0; | |
1078 | lck_mtx_lock(nfsd_mutex); | |
1079 | while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) { | |
1080 | if (nfsd_thread_count > nfsd_thread_max) { | |
1081 | /* | |
1082 | * If we have no socket and there are more | |
1083 | * nfsd threads than configured, let's exit. | |
1084 | */ | |
1085 | error = 0; | |
1086 | goto done; | |
1087 | } | |
1088 | nfsd->nfsd_flag |= NFSD_WAITING; | |
1089 | TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); | |
1090 | error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to); | |
1091 | if (error) { | |
1092 | if (nfsd->nfsd_flag & NFSD_WAITING) { | |
1093 | TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); | |
1094 | nfsd->nfsd_flag &= ~NFSD_WAITING; | |
1095 | } | |
1096 | if (error == EWOULDBLOCK) | |
1097 | continue; | |
1098 | goto done; | |
1099 | } | |
1100 | } | |
1101 | slp = nfsd->nfsd_slp; | |
1102 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) { | |
1103 | /* look for a socket to work on in the wait queue */ | |
1104 | while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) { | |
1105 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1106 | /* remove from the head of the queue */ | |
1107 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1108 | slp->ns_flag &= ~SLP_WAITQ; | |
1109 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) | |
1110 | break; | |
1111 | /* nothing to do, so skip this socket */ | |
1112 | lck_rw_done(&slp->ns_rwlock); | |
1113 | } | |
1114 | } | |
1115 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) { | |
1116 | /* look for a socket to work on in the work queue */ | |
1117 | while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) { | |
1118 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1119 | /* remove from the head of the queue */ | |
1120 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1121 | slp->ns_flag &= ~SLP_WORKQ; | |
1122 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) | |
1123 | break; | |
1124 | /* nothing to do, so skip this socket */ | |
1125 | lck_rw_done(&slp->ns_rwlock); | |
1126 | } | |
1127 | } | |
1128 | if (!nfsd->nfsd_slp && slp) { | |
1129 | /* we found a socket to work on, grab a reference */ | |
1130 | slp->ns_sref++; | |
1131 | microuptime(&now); | |
1132 | slp->ns_timestamp = now.tv_sec; | |
1133 | /* We keep the socket list in least recently used order for reaping idle sockets */ | |
1134 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); | |
1135 | TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); | |
1136 | nfsd->nfsd_slp = slp; | |
1137 | opcnt = 0; | |
1138 | /* and put it at the back of the work queue */ | |
1139 | TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq); | |
1140 | slp->ns_flag |= SLP_WORKQ; | |
1141 | lck_rw_done(&slp->ns_rwlock); | |
1142 | } | |
1143 | lck_mtx_unlock(nfsd_mutex); | |
1144 | if (!slp) | |
1145 | continue; | |
1146 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1147 | if (slp->ns_flag & SLP_VALID) { | |
1148 | if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { | |
1149 | slp->ns_flag &= ~SLP_NEEDQ; | |
1150 | nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); | |
1151 | } | |
1152 | if (slp->ns_flag & SLP_DISCONN) | |
1153 | nfsrv_zapsock(slp); | |
1154 | error = nfsrv_dorec(slp, nfsd, &nd); | |
1155 | if (error == EINVAL) { // RPCSEC_GSS drop | |
1156 | if (slp->ns_sotype == SOCK_STREAM) | |
1157 | nfsrv_zapsock(slp); // drop connection | |
1158 | } | |
1159 | writes_todo = 0; | |
1160 | if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) { | |
1161 | microuptime(&now); | |
1162 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + | |
1163 | (u_quad_t)now.tv_usec; | |
1164 | if (slp->ns_wgtime <= cur_usec) { | |
1165 | error = 0; | |
1166 | cacherep = RC_DOIT; | |
1167 | writes_todo = 1; | |
1168 | } | |
1169 | slp->ns_flag &= ~SLP_DOWRITES; | |
1170 | } | |
1171 | nfsd->nfsd_flag |= NFSD_REQINPROG; | |
1172 | } | |
1173 | lck_rw_done(&slp->ns_rwlock); | |
1174 | } | |
1175 | if (error || (slp && !(slp->ns_flag & SLP_VALID))) { | |
1176 | if (nd) { | |
1177 | nfsm_chain_cleanup(&nd->nd_nmreq); | |
1178 | if (nd->nd_nam2) | |
1179 | mbuf_freem(nd->nd_nam2); | |
1180 | if (IS_VALID_CRED(nd->nd_cr)) | |
1181 | kauth_cred_unref(&nd->nd_cr); | |
1182 | if (nd->nd_gss_context) | |
1183 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
1184 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); | |
1185 | nd = NULL; | |
1186 | } | |
1187 | nfsd->nfsd_slp = NULL; | |
1188 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; | |
1189 | if (slp) | |
1190 | nfsrv_slpderef(slp); | |
1191 | if (nfsd_thread_max <= 0) | |
1192 | break; | |
1193 | continue; | |
1194 | } | |
1195 | if (nd) { | |
1196 | microuptime(&nd->nd_starttime); | |
1197 | if (nd->nd_nam2) | |
1198 | nd->nd_nam = nd->nd_nam2; | |
1199 | else | |
1200 | nd->nd_nam = slp->ns_nam; | |
1201 | ||
1202 | cacherep = nfsrv_getcache(nd, slp, &mrep); | |
1203 | ||
1204 | if (nfsrv_require_resv_port) { | |
1205 | /* Check if source port is a reserved port */ | |
1206 | in_port_t port = 0; | |
1207 | struct sockaddr *saddr = mbuf_data(nd->nd_nam); | |
1208 | ||
1209 | if (saddr->sa_family == AF_INET) | |
1210 | port = ntohs(((struct sockaddr_in*)saddr)->sin_port); | |
1211 | else if (saddr->sa_family == AF_INET6) | |
1212 | port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); | |
1213 | if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { | |
1214 | nd->nd_procnum = NFSPROC_NOOP; | |
1215 | nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); | |
1216 | cacherep = RC_DOIT; | |
1217 | } | |
1218 | } | |
1219 | ||
1220 | } | |
1221 | ||
1222 | /* | |
1223 | * Loop to get all the write RPC replies that have been | |
1224 | * gathered together. | |
1225 | */ | |
1226 | do { | |
1227 | switch (cacherep) { | |
1228 | case RC_DOIT: | |
1229 | if (nd && (nd->nd_vers == NFS_VER3)) | |
1230 | procrastinate = nfsrv_wg_delay_v3; | |
1231 | else | |
1232 | procrastinate = nfsrv_wg_delay; | |
1233 | lck_rw_lock_shared(&nfsrv_export_rwlock); | |
1234 | context.vc_ucred = NULL; | |
1235 | if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) | |
1236 | error = nfsrv_writegather(&nd, slp, &context, &mrep); | |
1237 | else | |
1238 | error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep); | |
1239 | lck_rw_done(&nfsrv_export_rwlock); | |
1240 | if (mrep == NULL) { | |
1241 | /* | |
1242 | * If this is a stream socket and we are not going | |
1243 | * to send a reply we better close the connection | |
1244 | * so the client doesn't hang. | |
1245 | */ | |
1246 | if (error && slp->ns_sotype == SOCK_STREAM) { | |
1247 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1248 | nfsrv_zapsock(slp); | |
1249 | lck_rw_done(&slp->ns_rwlock); | |
1250 | printf("NFS server: NULL reply from proc = %d error = %d\n", | |
1251 | nd->nd_procnum, error); | |
1252 | } | |
1253 | break; | |
1254 | ||
1255 | } | |
1256 | if (error) { | |
1257 | OSAddAtomic64(1, &nfsstats.srv_errs); | |
1258 | nfsrv_updatecache(nd, FALSE, mrep); | |
1259 | if (nd->nd_nam2) { | |
1260 | mbuf_freem(nd->nd_nam2); | |
1261 | nd->nd_nam2 = NULL; | |
1262 | } | |
1263 | break; | |
1264 | } | |
1265 | OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]); | |
1266 | nfsrv_updatecache(nd, TRUE, mrep); | |
1267 | /* FALLTHRU */ | |
1268 | ||
1269 | case RC_REPLY: | |
1270 | if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS | |
1271 | /* | |
1272 | * Need to checksum or encrypt the reply | |
1273 | */ | |
1274 | error = nfs_gss_svc_protect_reply(nd, mrep); | |
1275 | if (error) { | |
1276 | mbuf_freem(mrep); | |
1277 | break; | |
1278 | } | |
1279 | } | |
1280 | ||
1281 | /* | |
1282 | * Get the total size of the reply | |
1283 | */ | |
1284 | m = mrep; | |
1285 | siz = 0; | |
1286 | while (m) { | |
1287 | siz += mbuf_len(m); | |
1288 | m = mbuf_next(m); | |
1289 | } | |
1290 | if (siz <= 0 || siz > NFS_MAXPACKET) { | |
1291 | printf("mbuf siz=%d\n",siz); | |
1292 | panic("Bad nfs svc reply"); | |
1293 | } | |
1294 | m = mrep; | |
1295 | mbuf_pkthdr_setlen(m, siz); | |
1296 | error = mbuf_pkthdr_setrcvif(m, NULL); | |
1297 | if (error) | |
1298 | panic("nfsd setrcvif failed: %d", error); | |
1299 | /* | |
1300 | * For stream protocols, prepend a Sun RPC | |
1301 | * Record Mark. | |
1302 | */ | |
1303 | if (slp->ns_sotype == SOCK_STREAM) { | |
1304 | error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); | |
1305 | if (!error) | |
1306 | *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); | |
1307 | } | |
1308 | if (!error) { | |
1309 | if (slp->ns_flag & SLP_VALID) { | |
1310 | error = nfsrv_send(slp, nd->nd_nam2, m); | |
1311 | } else { | |
1312 | error = EPIPE; | |
1313 | mbuf_freem(m); | |
1314 | } | |
1315 | } else { | |
1316 | mbuf_freem(m); | |
1317 | } | |
1318 | mrep = NULL; | |
1319 | if (nd->nd_nam2) { | |
1320 | mbuf_freem(nd->nd_nam2); | |
1321 | nd->nd_nam2 = NULL; | |
1322 | } | |
1323 | if (error == EPIPE) { | |
1324 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1325 | nfsrv_zapsock(slp); | |
1326 | lck_rw_done(&slp->ns_rwlock); | |
1327 | } | |
1328 | if (error == EINTR || error == ERESTART) { | |
1329 | nfsm_chain_cleanup(&nd->nd_nmreq); | |
1330 | if (IS_VALID_CRED(nd->nd_cr)) | |
1331 | kauth_cred_unref(&nd->nd_cr); | |
1332 | if (nd->nd_gss_context) | |
1333 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
1334 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); | |
1335 | nfsrv_slpderef(slp); | |
1336 | lck_mtx_lock(nfsd_mutex); | |
1337 | goto done; | |
1338 | } | |
1339 | break; | |
1340 | case RC_DROPIT: | |
1341 | mbuf_freem(nd->nd_nam2); | |
1342 | nd->nd_nam2 = NULL; | |
1343 | break; | |
1344 | }; | |
1345 | opcnt++; | |
1346 | if (nd) { | |
1347 | nfsm_chain_cleanup(&nd->nd_nmreq); | |
1348 | if (nd->nd_nam2) | |
1349 | mbuf_freem(nd->nd_nam2); | |
1350 | if (IS_VALID_CRED(nd->nd_cr)) | |
1351 | kauth_cred_unref(&nd->nd_cr); | |
1352 | if (nd->nd_gss_context) | |
1353 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
1354 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); | |
1355 | nd = NULL; | |
1356 | } | |
1357 | ||
1358 | /* | |
1359 | * Check to see if there are outstanding writes that | |
1360 | * need to be serviced. | |
1361 | */ | |
1362 | writes_todo = 0; | |
1363 | if (slp->ns_wgtime) { | |
1364 | microuptime(&now); | |
1365 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + | |
1366 | (u_quad_t)now.tv_usec; | |
1367 | if (slp->ns_wgtime <= cur_usec) { | |
1368 | cacherep = RC_DOIT; | |
1369 | writes_todo = 1; | |
1370 | } | |
1371 | } | |
1372 | } while (writes_todo); | |
1373 | ||
1374 | nd = NULL; | |
1375 | if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) { | |
1376 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1377 | error = nfsrv_dorec(slp, nfsd, &nd); | |
1378 | if (error == EINVAL) { // RPCSEC_GSS drop | |
1379 | if (slp->ns_sotype == SOCK_STREAM) | |
1380 | nfsrv_zapsock(slp); // drop connection | |
1381 | } | |
1382 | lck_rw_done(&slp->ns_rwlock); | |
1383 | } | |
1384 | if (!nd) { | |
1385 | /* drop our reference on the socket */ | |
1386 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; | |
1387 | nfsd->nfsd_slp = NULL; | |
1388 | nfsrv_slpderef(slp); | |
1389 | } | |
1390 | } | |
1391 | lck_mtx_lock(nfsd_mutex); | |
1392 | done: | |
1393 | TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); | |
1394 | FREE(nfsd, M_NFSD); | |
1395 | if (--nfsd_thread_count == 0) | |
1396 | nfsrv_cleanup(); | |
1397 | lck_mtx_unlock(nfsd_mutex); | |
1398 | return (error); | |
1399 | } | |
1400 | ||
1401 | int | |
1402 | nfssvc_export(user_addr_t argp) | |
1403 | { | |
1404 | int error = 0, is_64bit; | |
1405 | struct user_nfs_export_args unxa; | |
1406 | vfs_context_t ctx = vfs_context_current(); | |
1407 | ||
1408 | is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx)); | |
1409 | ||
1410 | /* copy in pointers to path and export args */ | |
1411 | if (is_64bit) { | |
1412 | error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); | |
1413 | } else { | |
1414 | struct nfs_export_args tnxa; | |
1415 | error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); | |
1416 | if (error == 0) { | |
1417 | /* munge into LP64 version of nfs_export_args structure */ | |
1418 | unxa.nxa_fsid = tnxa.nxa_fsid; | |
1419 | unxa.nxa_expid = tnxa.nxa_expid; | |
1420 | unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); | |
1421 | unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); | |
1422 | unxa.nxa_flags = tnxa.nxa_flags; | |
1423 | unxa.nxa_netcount = tnxa.nxa_netcount; | |
1424 | unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); | |
1425 | } | |
1426 | } | |
1427 | if (error) | |
1428 | return (error); | |
1429 | ||
1430 | error = nfsrv_export(&unxa, ctx); | |
1431 | ||
1432 | return (error); | |
1433 | } | |
1434 | ||
1435 | /* | |
1436 | * Shut down a socket associated with an nfsrv_sock structure. | |
1437 | * Should be called with the send lock set, if required. | |
1438 | * The trick here is to increment the sref at the start, so that the nfsds | |
1439 | * will stop using it and clear ns_flag at the end so that it will not be | |
1440 | * reassigned during cleanup. | |
1441 | */ | |
1442 | void | |
1443 | nfsrv_zapsock(struct nfsrv_sock *slp) | |
1444 | { | |
1445 | socket_t so; | |
1446 | ||
1447 | if ((slp->ns_flag & SLP_VALID) == 0) | |
1448 | return; | |
1449 | slp->ns_flag &= ~SLP_ALLFLAGS; | |
1450 | ||
1451 | so = slp->ns_so; | |
1452 | if (so == NULL) | |
1453 | return; | |
1454 | ||
1455 | sock_setupcall(so, NULL, NULL); | |
1456 | sock_shutdown(so, SHUT_RDWR); | |
1457 | ||
1458 | /* | |
1459 | * Remove from the up-call queue | |
1460 | */ | |
1461 | nfsrv_uc_dequeue(slp); | |
1462 | } | |
1463 | ||
1464 | /* | |
1465 | * cleanup and release a server socket structure. | |
1466 | */ | |
1467 | void | |
1468 | nfsrv_slpfree(struct nfsrv_sock *slp) | |
1469 | { | |
1470 | struct nfsrv_descript *nwp, *nnwp; | |
1471 | ||
1472 | if (slp->ns_so) { | |
1473 | sock_release(slp->ns_so); | |
1474 | slp->ns_so = NULL; | |
1475 | } | |
1476 | if (slp->ns_nam) | |
1477 | mbuf_free(slp->ns_nam); | |
1478 | if (slp->ns_raw) | |
1479 | mbuf_freem(slp->ns_raw); | |
1480 | if (slp->ns_rec) | |
1481 | mbuf_freem(slp->ns_rec); | |
1482 | if (slp->ns_frag) | |
1483 | mbuf_freem(slp->ns_frag); | |
1484 | slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; | |
1485 | slp->ns_reccnt = 0; | |
1486 | ||
1487 | for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { | |
1488 | nnwp = nwp->nd_tq.le_next; | |
1489 | LIST_REMOVE(nwp, nd_tq); | |
1490 | nfsm_chain_cleanup(&nwp->nd_nmreq); | |
1491 | if (nwp->nd_mrep) | |
1492 | mbuf_freem(nwp->nd_mrep); | |
1493 | if (nwp->nd_nam2) | |
1494 | mbuf_freem(nwp->nd_nam2); | |
1495 | if (IS_VALID_CRED(nwp->nd_cr)) | |
1496 | kauth_cred_unref(&nwp->nd_cr); | |
1497 | if (nwp->nd_gss_context) | |
1498 | nfs_gss_svc_ctx_deref(nwp->nd_gss_context); | |
1499 | FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); | |
1500 | } | |
1501 | LIST_INIT(&slp->ns_tq); | |
1502 | ||
1503 | lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group); | |
1504 | lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group); | |
1505 | FREE(slp, M_NFSSVC); | |
1506 | } | |
1507 | ||
1508 | /* | |
1509 | * Derefence a server socket structure. If it has no more references and | |
1510 | * is no longer valid, you can throw it away. | |
1511 | */ | |
1512 | static void | |
1513 | nfsrv_slpderef_locked(struct nfsrv_sock *slp) | |
1514 | { | |
1515 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1516 | slp->ns_sref--; | |
1517 | ||
1518 | if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { | |
1519 | if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) { | |
1520 | /* remove socket from queue since there's no work */ | |
1521 | if (slp->ns_flag & SLP_WAITQ) | |
1522 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1523 | else | |
1524 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1525 | slp->ns_flag &= ~SLP_QUEUED; | |
1526 | } | |
1527 | lck_rw_done(&slp->ns_rwlock); | |
1528 | return; | |
1529 | } | |
1530 | ||
1531 | /* This socket is no longer valid, so we'll get rid of it */ | |
1532 | ||
1533 | if (slp->ns_flag & SLP_QUEUED) { | |
1534 | if (slp->ns_flag & SLP_WAITQ) | |
1535 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1536 | else | |
1537 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1538 | slp->ns_flag &= ~SLP_QUEUED; | |
1539 | } | |
1540 | lck_rw_done(&slp->ns_rwlock); | |
1541 | ||
1542 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); | |
1543 | if (slp->ns_sotype == SOCK_STREAM) | |
1544 | nfsrv_sock_tcp_cnt--; | |
1545 | ||
1546 | /* now remove from the write gather socket list */ | |
1547 | if (slp->ns_wgq.tqe_next != SLPNOLIST) { | |
1548 | TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); | |
1549 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
1550 | } | |
1551 | nfsrv_slpfree(slp); | |
1552 | } | |
1553 | ||
1554 | void | |
1555 | nfsrv_slpderef(struct nfsrv_sock *slp) | |
1556 | { | |
1557 | lck_mtx_lock(nfsd_mutex); | |
1558 | nfsrv_slpderef_locked(slp); | |
1559 | lck_mtx_unlock(nfsd_mutex); | |
1560 | } | |
1561 | ||
1562 | /* | |
1563 | * Check periodically for idle sockest if needed and | |
1564 | * zap them. | |
1565 | */ | |
1566 | void | |
1567 | nfsrv_idlesock_timer(__unused void *param0, __unused void *param1) | |
1568 | { | |
1569 | struct nfsrv_sock *slp, *tslp; | |
1570 | struct timeval now; | |
1571 | time_t time_to_wait = nfsrv_sock_idle_timeout; | |
1572 | ||
1573 | microuptime(&now); | |
1574 | lck_mtx_lock(nfsd_mutex); | |
1575 | ||
1576 | /* Turn off the timer if we're suppose to and get out */ | |
1577 | if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) | |
1578 | nfsrv_sock_idle_timeout = 0; | |
1579 | if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) { | |
1580 | nfsrv_idlesock_timer_on = 0; | |
1581 | lck_mtx_unlock(nfsd_mutex); | |
1582 | return; | |
1583 | } | |
1584 | ||
1585 | TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) { | |
1586 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1587 | /* Skip udp and referenced sockets */ | |
1588 | if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) { | |
1589 | lck_rw_done(&slp->ns_rwlock); | |
1590 | continue; | |
1591 | } | |
1592 | /* | |
1593 | * If this is the first non-referenced socket that hasn't idle out, | |
1594 | * use its time stamp to calculate the earlist time in the future | |
1595 | * to start the next invocation of the timer. Since the nfsrv_socklist | |
1596 | * is sorted oldest access to newest. Once we find the first one, | |
1597 | * we're done and break out of the loop. | |
1598 | */ | |
1599 | if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) || | |
1600 | nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { | |
1601 | time_to_wait -= now.tv_sec - slp->ns_timestamp; | |
1602 | if (time_to_wait < 1) | |
1603 | time_to_wait = 1; | |
1604 | lck_rw_done(&slp->ns_rwlock); | |
1605 | break; | |
1606 | } | |
1607 | /* | |
1608 | * Bump the ref count. nfsrv_slpderef below will destroy | |
1609 | * the socket, since nfsrv_zapsock has closed it. | |
1610 | */ | |
1611 | slp->ns_sref++; | |
1612 | nfsrv_zapsock(slp); | |
1613 | lck_rw_done(&slp->ns_rwlock); | |
1614 | nfsrv_slpderef_locked(slp); | |
1615 | } | |
1616 | ||
1617 | /* Start ourself back up */ | |
1618 | nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); | |
1619 | /* Remember when the next timer will fire for nfssvc_addsock. */ | |
1620 | nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; | |
1621 | lck_mtx_unlock(nfsd_mutex); | |
1622 | } | |
1623 | ||
1624 | /* | |
1625 | * Clean up the data structures for the server. | |
1626 | */ | |
1627 | void | |
1628 | nfsrv_cleanup(void) | |
1629 | { | |
1630 | struct nfsrv_sock *slp, *nslp; | |
1631 | struct timeval now; | |
1632 | #if CONFIG_FSE | |
1633 | struct nfsrv_fmod *fp, *nfp; | |
1634 | int i; | |
1635 | #endif | |
1636 | ||
1637 | microuptime(&now); | |
1638 | for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { | |
1639 | nslp = TAILQ_NEXT(slp, ns_chain); | |
1640 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1641 | slp->ns_sref++; | |
1642 | if (slp->ns_flag & SLP_VALID) | |
1643 | nfsrv_zapsock(slp); | |
1644 | lck_rw_done(&slp->ns_rwlock); | |
1645 | nfsrv_slpderef_locked(slp); | |
1646 | } | |
1647 | # | |
1648 | #if CONFIG_FSE | |
1649 | /* | |
1650 | * Flush pending file write fsevents | |
1651 | */ | |
1652 | lck_mtx_lock(nfsrv_fmod_mutex); | |
1653 | for (i = 0; i < NFSRVFMODHASHSZ; i++) { | |
1654 | for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) { | |
1655 | /* | |
1656 | * Fire off the content modified fsevent for each | |
1657 | * entry, remove it from the list, and free it. | |
1658 | */ | |
1659 | if (nfsrv_fsevents_enabled) { | |
1660 | fp->fm_context.vc_thread = current_thread(); | |
1661 | add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, | |
1662 | FSE_ARG_VNODE, fp->fm_vp, | |
1663 | FSE_ARG_DONE); | |
1664 | } | |
1665 | vnode_put(fp->fm_vp); | |
1666 | kauth_cred_unref(&fp->fm_context.vc_ucred); | |
1667 | nfp = LIST_NEXT(fp, fm_link); | |
1668 | LIST_REMOVE(fp, fm_link); | |
1669 | FREE(fp, M_TEMP); | |
1670 | } | |
1671 | } | |
1672 | nfsrv_fmod_pending = 0; | |
1673 | lck_mtx_unlock(nfsrv_fmod_mutex); | |
1674 | #endif | |
1675 | ||
1676 | nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */ | |
1677 | ||
1678 | nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ | |
1679 | ||
1680 | nfsrv_cleancache(); /* And clear out server cache */ | |
1681 | ||
1682 | nfsrv_udpsock = NULL; | |
1683 | nfsrv_udp6sock = NULL; | |
1684 | } | |
1685 | ||
1686 | #endif /* NFS_NOSERVER */ |