]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2016 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * Copyright (c) 1989, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | |
32 | * | |
33 | * This code is derived from software contributed to Berkeley by | |
34 | * Rick Macklem at The University of Guelph. | |
35 | * | |
36 | * Redistribution and use in source and binary forms, with or without | |
37 | * modification, are permitted provided that the following conditions | |
38 | * are met: | |
39 | * 1. Redistributions of source code must retain the above copyright | |
40 | * notice, this list of conditions and the following disclaimer. | |
41 | * 2. Redistributions in binary form must reproduce the above copyright | |
42 | * notice, this list of conditions and the following disclaimer in the | |
43 | * documentation and/or other materials provided with the distribution. | |
44 | * 3. All advertising materials mentioning features or use of this software | |
45 | * must display the following acknowledgement: | |
46 | * This product includes software developed by the University of | |
47 | * California, Berkeley and its contributors. | |
48 | * 4. Neither the name of the University nor the names of its contributors | |
49 | * may be used to endorse or promote products derived from this software | |
50 | * without specific prior written permission. | |
51 | * | |
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
62 | * SUCH DAMAGE. | |
63 | * | |
64 | * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 | |
65 | * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $ | |
66 | */ | |
67 | /* | |
68 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
69 | * support for mandatory and extensible security protections. This notice | |
70 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
71 | * Version 2.0. | |
72 | */ | |
73 | ||
74 | #include <sys/param.h> | |
75 | #include <sys/systm.h> | |
76 | #include <sys/kernel.h> | |
77 | #include <sys/file_internal.h> | |
78 | #include <sys/filedesc.h> | |
79 | #include <sys/stat.h> | |
80 | #include <sys/vnode_internal.h> | |
81 | #include <sys/mount_internal.h> | |
82 | #include <sys/proc_internal.h> /* for fdflags */ | |
83 | #include <sys/kauth.h> | |
84 | #include <sys/sysctl.h> | |
85 | #include <sys/ubc.h> | |
86 | #include <sys/uio.h> | |
87 | #include <sys/malloc.h> | |
88 | #include <sys/kpi_mbuf.h> | |
89 | #include <sys/socket.h> | |
90 | #include <sys/socketvar.h> | |
91 | #include <sys/domain.h> | |
92 | #include <sys/protosw.h> | |
93 | #include <sys/fcntl.h> | |
94 | #include <sys/lockf.h> | |
95 | #include <sys/syslog.h> | |
96 | #include <sys/user.h> | |
97 | #include <sys/sysproto.h> | |
98 | #include <sys/kpi_socket.h> | |
99 | #include <sys/fsevents.h> | |
100 | #include <libkern/OSAtomic.h> | |
101 | #include <kern/thread_call.h> | |
102 | #include <kern/task.h> | |
103 | ||
104 | #include <security/audit/audit.h> | |
105 | ||
106 | #include <netinet/in.h> | |
107 | #include <netinet/tcp.h> | |
108 | #include <nfs/xdr_subs.h> | |
109 | #include <nfs/rpcv2.h> | |
110 | #include <nfs/nfsproto.h> | |
111 | #include <nfs/nfs.h> | |
112 | #include <nfs/nfsm_subs.h> | |
113 | #include <nfs/nfsrvcache.h> | |
114 | #include <nfs/nfs_gss.h> | |
115 | #include <nfs/nfsmount.h> | |
116 | #include <nfs/nfsnode.h> | |
117 | #include <nfs/nfs_lock.h> | |
118 | #if CONFIG_MACF | |
119 | #include <security/mac_framework.h> | |
120 | #endif | |
121 | ||
122 | kern_return_t thread_terminate(thread_t); /* XXX */ | |
123 | ||
124 | #if NFSSERVER | |
125 | ||
126 | extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, | |
127 | struct nfsrv_sock *slp, | |
128 | vfs_context_t ctx, | |
129 | mbuf_t *mrepp); | |
130 | extern int nfsrv_wg_delay; | |
131 | extern int nfsrv_wg_delay_v3; | |
132 | ||
133 | static int nfsrv_require_resv_port = 0; | |
134 | static time_t nfsrv_idlesock_timer_on = 0; | |
135 | static int nfsrv_sock_tcp_cnt = 0; | |
136 | #define NFSD_MIN_IDLE_TIMEOUT 30 | |
137 | static int nfsrv_sock_idle_timeout = 3600; /* One hour */ | |
138 | ||
139 | int nfssvc_export(user_addr_t argp); | |
140 | int nfssvc_nfsd(void); | |
141 | int nfssvc_addsock(socket_t, mbuf_t); | |
142 | void nfsrv_zapsock(struct nfsrv_sock *); | |
143 | void nfsrv_slpderef(struct nfsrv_sock *); | |
144 | void nfsrv_slpfree(struct nfsrv_sock *); | |
145 | ||
146 | #endif /* NFSSERVER */ | |
147 | ||
148 | /* | |
149 | * sysctl stuff | |
150 | */ | |
151 | SYSCTL_DECL(_vfs_generic); | |
152 | SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge"); | |
153 | ||
154 | #if NFSCLIENT | |
155 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge"); | |
156 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, ""); | |
157 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, ""); | |
158 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, ""); | |
159 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, ""); | |
160 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, ""); | |
161 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, ""); | |
162 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, ""); | |
163 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, ""); | |
164 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, ""); | |
165 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, ""); | |
166 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, ""); | |
167 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, ""); | |
168 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, ""); | |
169 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, ""); | |
170 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); | |
171 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); | |
172 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); | |
173 | SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); | |
174 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, ""); | |
175 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, ""); | |
176 | SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), ""); | |
177 | #endif /* NFSCLIENT */ | |
178 | ||
179 | #if NFSSERVER | |
180 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge"); | |
181 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, ""); | |
182 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, ""); | |
183 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, ""); | |
184 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, ""); | |
185 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, ""); | |
186 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, ""); | |
187 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, ""); | |
188 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, ""); | |
189 | SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, ""); | |
190 | #if CONFIG_FSE | |
191 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, ""); | |
192 | #endif | |
193 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); | |
194 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); | |
195 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, ""); | |
196 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, ""); | |
197 | #ifdef NFS_UC_Q_DEBUG | |
198 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, ""); | |
199 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, ""); | |
200 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, ""); | |
201 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, ""); | |
202 | #endif | |
203 | #endif /* NFSSERVER */ | |
204 | ||
205 | ||
206 | #if NFSCLIENT | |
207 | ||
208 | static int | |
209 | mapname2id(struct nfs_testmapid *map) | |
210 | { | |
211 | int error; | |
212 | ||
213 | error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag); | |
214 | if (error) | |
215 | return (error); | |
216 | ||
217 | if (map->ntm_grpflag) | |
218 | error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id); | |
219 | else | |
220 | error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id); | |
221 | ||
222 | return (error); | |
223 | } | |
224 | ||
225 | static int | |
226 | mapid2name(struct nfs_testmapid *map) | |
227 | { | |
228 | int error; | |
229 | size_t len = sizeof(map->ntm_name); | |
230 | ||
231 | if (map->ntm_grpflag) | |
232 | error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid); | |
233 | else | |
234 | error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid); | |
235 | ||
236 | if (error) | |
237 | return (error); | |
238 | ||
239 | error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag); | |
240 | ||
241 | return (error); | |
242 | ||
243 | } | |
244 | ||
245 | static int | |
246 | nfsclnt_testidmap(proc_t p, user_addr_t argp) | |
247 | { | |
248 | struct nfs_testmapid mapid; | |
249 | int error, coerror; | |
250 | size_t len = sizeof(mapid.ntm_name); | |
251 | ||
252 | /* Let root make this call. */ | |
253 | error = proc_suser(p); | |
254 | if (error) | |
255 | return (error); | |
256 | ||
257 | error = copyin(argp, &mapid, sizeof(mapid)); | |
258 | if (error) | |
259 | return (error); | |
260 | switch (mapid.ntm_lookup) { | |
261 | case NTM_NAME2ID: | |
262 | error = mapname2id(&mapid); | |
263 | break; | |
264 | case NTM_ID2NAME: | |
265 | error = mapid2name(&mapid); | |
266 | break; | |
267 | case NTM_NAME2GUID: | |
268 | error = nfs4_id2guid(mapid.ntm_name, &mapid.ntm_guid, mapid.ntm_grpflag); | |
269 | break; | |
270 | case NTM_GUID2NAME: | |
271 | error = nfs4_guid2id(&mapid.ntm_guid, mapid.ntm_name, &len, mapid.ntm_grpflag); | |
272 | break; | |
273 | default: | |
274 | return (EINVAL); | |
275 | } | |
276 | ||
277 | coerror = copyout(&mapid, argp, sizeof(mapid)); | |
278 | ||
279 | return (error ? error : coerror); | |
280 | } | |
281 | ||
282 | int | |
283 | nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) | |
284 | { | |
285 | struct lockd_ans la; | |
286 | int error; | |
287 | ||
288 | switch (uap->flag) { | |
289 | case NFSCLNT_LOCKDANS: | |
290 | error = copyin(uap->argp, &la, sizeof(la)); | |
291 | if (!error) | |
292 | error = nfslockdans(p, &la); | |
293 | break; | |
294 | case NFSCLNT_LOCKDNOTIFY: | |
295 | error = nfslockdnotify(p, uap->argp); | |
296 | break; | |
297 | case NFSCLNT_TESTIDMAP: | |
298 | error = nfsclnt_testidmap(p, uap->argp); | |
299 | break; | |
300 | default: | |
301 | error = EINVAL; | |
302 | } | |
303 | return (error); | |
304 | } | |
305 | ||
306 | ||
307 | /* | |
308 | * Asynchronous I/O threads for client NFS. | |
309 | * They do read-ahead and write-behind operations on the block I/O cache. | |
310 | * | |
311 | * The pool of up to nfsiod_thread_max threads is launched on demand and exit | |
312 | * when unused for a while. There are as many nfsiod structs as there are | |
313 | * nfsiod threads; however there's no strict tie between a thread and a struct. | |
314 | * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes | |
315 | * up, it removes the next struct nfsiod from the queue and services it. Then | |
316 | * it will put the struct at the head of free list and sleep on it. | |
317 | * Async requests will pull the next struct nfsiod from the head of the free list, | |
318 | * put it on the work queue, and wake whatever thread is waiting on that struct. | |
319 | */ | |
320 | ||
321 | /* | |
322 | * nfsiod thread exit routine | |
323 | * | |
324 | * Must be called with nfsiod_mutex held so that the | |
325 | * decision to terminate is atomic with the termination. | |
326 | */ | |
327 | void | |
328 | nfsiod_terminate(struct nfsiod *niod) | |
329 | { | |
330 | nfsiod_thread_count--; | |
331 | lck_mtx_unlock(nfsiod_mutex); | |
332 | if (niod) | |
333 | FREE(niod, M_TEMP); | |
334 | else | |
335 | printf("nfsiod: terminating without niod\n"); | |
336 | thread_terminate(current_thread()); | |
337 | /*NOTREACHED*/ | |
338 | } | |
339 | ||
340 | /* nfsiod thread startup routine */ | |
341 | void | |
342 | nfsiod_thread(void) | |
343 | { | |
344 | struct nfsiod *niod; | |
345 | int error; | |
346 | ||
347 | MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK); | |
348 | if (!niod) { | |
349 | lck_mtx_lock(nfsiod_mutex); | |
350 | nfsiod_thread_count--; | |
351 | wakeup(current_thread()); | |
352 | lck_mtx_unlock(nfsiod_mutex); | |
353 | thread_terminate(current_thread()); | |
354 | /*NOTREACHED*/ | |
355 | } | |
356 | bzero(niod, sizeof(*niod)); | |
357 | lck_mtx_lock(nfsiod_mutex); | |
358 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); | |
359 | wakeup(current_thread()); | |
360 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); | |
361 | /* shouldn't return... so we have an error */ | |
362 | /* remove an old nfsiod struct and terminate */ | |
363 | lck_mtx_lock(nfsiod_mutex); | |
364 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
365 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
366 | nfsiod_terminate(niod); | |
367 | /*NOTREACHED*/ | |
368 | } | |
369 | ||
370 | /* | |
371 | * Start up another nfsiod thread. | |
372 | * (unless we're already maxed out and there are nfsiods running) | |
373 | */ | |
374 | int | |
375 | nfsiod_start(void) | |
376 | { | |
377 | thread_t thd = THREAD_NULL; | |
378 | ||
379 | lck_mtx_lock(nfsiod_mutex); | |
380 | if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { | |
381 | lck_mtx_unlock(nfsiod_mutex); | |
382 | return (EBUSY); | |
383 | } | |
384 | nfsiod_thread_count++; | |
385 | if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { | |
386 | lck_mtx_unlock(nfsiod_mutex); | |
387 | return (EBUSY); | |
388 | } | |
389 | /* wait for the thread to complete startup */ | |
390 | msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL); | |
391 | thread_deallocate(thd); | |
392 | return (0); | |
393 | } | |
394 | ||
395 | /* | |
396 | * Continuation for Asynchronous I/O threads for NFS client. | |
397 | * | |
398 | * Grab an nfsiod struct to work on, do some work, then drop it | |
399 | */ | |
400 | int | |
401 | nfsiod_continue(int error) | |
402 | { | |
403 | struct nfsiod *niod; | |
404 | struct nfsmount *nmp; | |
405 | struct nfsreq *req, *treq; | |
406 | struct nfs_reqqhead iodq; | |
407 | int morework; | |
408 | ||
409 | lck_mtx_lock(nfsiod_mutex); | |
410 | niod = TAILQ_FIRST(&nfsiodwork); | |
411 | if (!niod) { | |
412 | /* there's no work queued up */ | |
413 | /* remove an old nfsiod struct and terminate */ | |
414 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
415 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
416 | nfsiod_terminate(niod); | |
417 | /*NOTREACHED*/ | |
418 | } | |
419 | TAILQ_REMOVE(&nfsiodwork, niod, niod_link); | |
420 | ||
421 | worktodo: | |
422 | while ((nmp = niod->niod_nmp)) { | |
423 | if (nmp == NULL){ | |
424 | niod->niod_nmp = NULL; | |
425 | break; | |
426 | } | |
427 | ||
428 | /* | |
429 | * Service this mount's async I/O queue. | |
430 | * | |
431 | * In order to ensure some level of fairness between mounts, | |
432 | * we grab all the work up front before processing it so any | |
433 | * new work that arrives will be serviced on a subsequent | |
434 | * iteration - and we have a chance to see if other work needs | |
435 | * to be done (e.g. the delayed write queue needs to be pushed | |
436 | * or other mounts are waiting for an nfsiod). | |
437 | */ | |
438 | /* grab the current contents of the queue */ | |
439 | TAILQ_INIT(&iodq); | |
440 | TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); | |
441 | /* Mark each iod request as being managed by an iod */ | |
442 | TAILQ_FOREACH(req, &iodq, r_achain) { | |
443 | lck_mtx_lock(&req->r_mtx); | |
444 | assert(!(req->r_flags & R_IOD)); | |
445 | req->r_flags |= R_IOD; | |
446 | lck_mtx_unlock(&req->r_mtx); | |
447 | } | |
448 | lck_mtx_unlock(nfsiod_mutex); | |
449 | ||
450 | /* process the queue */ | |
451 | TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { | |
452 | TAILQ_REMOVE(&iodq, req, r_achain); | |
453 | req->r_achain.tqe_next = NFSREQNOLIST; | |
454 | req->r_callback.rcb_func(req); | |
455 | } | |
456 | ||
457 | /* now check if there's more/other work to be done */ | |
458 | lck_mtx_lock(nfsiod_mutex); | |
459 | morework = !TAILQ_EMPTY(&nmp->nm_iodq); | |
460 | if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { | |
461 | /* | |
462 | * we're going to stop working on this mount but if the | |
463 | * mount still needs more work so queue it up | |
464 | */ | |
465 | if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) | |
466 | TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); | |
467 | nmp->nm_niod = NULL; | |
468 | niod->niod_nmp = NULL; | |
469 | } | |
470 | } | |
471 | ||
472 | /* loop if there's still a mount to work on */ | |
473 | if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { | |
474 | niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); | |
475 | TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); | |
476 | niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST; | |
477 | } | |
478 | if (niod->niod_nmp) | |
479 | goto worktodo; | |
480 | ||
481 | /* queue ourselves back up - if there aren't too many threads running */ | |
482 | if (nfsiod_thread_count <= NFSIOD_MAX) { | |
483 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); | |
484 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); | |
485 | /* shouldn't return... so we have an error */ | |
486 | /* remove an old nfsiod struct and terminate */ | |
487 | lck_mtx_lock(nfsiod_mutex); | |
488 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
489 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
490 | } | |
491 | nfsiod_terminate(niod); | |
492 | /*NOTREACHED*/ | |
493 | return (0); | |
494 | } | |
495 | ||
496 | #endif /* NFSCLIENT */ | |
497 | ||
498 | ||
499 | #if NFSSERVER | |
500 | ||
501 | /* | |
502 | * NFS server system calls | |
503 | * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c | |
504 | */ | |
505 | ||
506 | /* | |
507 | * Get file handle system call | |
508 | */ | |
509 | int | |
510 | getfh(proc_t p, struct getfh_args *uap, __unused int *retval) | |
511 | { | |
512 | vnode_t vp; | |
513 | struct nfs_filehandle nfh; | |
514 | int error, fhlen, fidlen; | |
515 | struct nameidata nd; | |
516 | char path[MAXPATHLEN], *ptr; | |
517 | size_t pathlen; | |
518 | struct nfs_exportfs *nxfs; | |
519 | struct nfs_export *nx; | |
520 | ||
521 | /* | |
522 | * Must be super user | |
523 | */ | |
524 | error = proc_suser(p); | |
525 | if (error) | |
526 | return (error); | |
527 | ||
528 | error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); | |
529 | if (!error) | |
530 | error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); | |
531 | if (error) | |
532 | return (error); | |
533 | /* limit fh size to length specified (or v3 size by default) */ | |
534 | if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) | |
535 | fhlen = NFSV3_MAX_FH_SIZE; | |
536 | fidlen = fhlen - sizeof(struct nfs_exphandle); | |
537 | ||
538 | if (!nfsrv_is_initialized()) | |
539 | return (EINVAL); | |
540 | ||
541 | NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, | |
542 | UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); | |
543 | error = namei(&nd); | |
544 | if (error) | |
545 | return (error); | |
546 | nameidone(&nd); | |
547 | ||
548 | vp = nd.ni_vp; | |
549 | ||
550 | // find exportfs that matches f_mntonname | |
551 | lck_rw_lock_shared(&nfsrv_export_rwlock); | |
552 | ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; | |
553 | LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { | |
554 | if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) | |
555 | break; | |
556 | } | |
557 | if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { | |
558 | error = EINVAL; | |
559 | goto out; | |
560 | } | |
561 | // find export that best matches remainder of path | |
562 | ptr = path + strlen(nxfs->nxfs_path); | |
563 | while (*ptr && (*ptr == '/')) | |
564 | ptr++; | |
565 | LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { | |
566 | int len = strlen(nx->nx_path); | |
567 | if (len == 0) // we've hit the export entry for the root directory | |
568 | break; | |
569 | if (!strncmp(nx->nx_path, ptr, len)) | |
570 | break; | |
571 | } | |
572 | if (!nx) { | |
573 | error = EINVAL; | |
574 | goto out; | |
575 | } | |
576 | ||
577 | bzero(&nfh, sizeof(nfh)); | |
578 | nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); | |
579 | nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); | |
580 | nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); | |
581 | nfh.nfh_xh.nxh_flags = 0; | |
582 | nfh.nfh_xh.nxh_reserved = 0; | |
583 | nfh.nfh_len = fidlen; | |
584 | error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); | |
585 | if (nfh.nfh_len > (uint32_t)fidlen) | |
586 | error = EOVERFLOW; | |
587 | nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; | |
588 | nfh.nfh_len += sizeof(nfh.nfh_xh); | |
589 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; | |
590 | ||
591 | out: | |
592 | lck_rw_done(&nfsrv_export_rwlock); | |
593 | vnode_put(vp); | |
594 | if (error) | |
595 | return (error); | |
596 | /* | |
597 | * At first blush, this may appear to leak a kernel stack | |
598 | * address, but the copyout() never reaches &nfh.nfh_fhp | |
599 | * (sizeof(fhandle_t) < sizeof(nfh)). | |
600 | */ | |
601 | error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); | |
602 | return (error); | |
603 | } | |
604 | ||
605 | extern const struct fileops vnops; | |
606 | ||
607 | /* | |
608 | * syscall for the rpc.lockd to use to translate a NFS file handle into | |
609 | * an open descriptor. | |
610 | * | |
611 | * warning: do not remove the suser() call or this becomes one giant | |
612 | * security hole. | |
613 | */ | |
614 | int | |
615 | fhopen( proc_t p, | |
616 | struct fhopen_args *uap, | |
617 | int32_t *retval) | |
618 | { | |
619 | vnode_t vp; | |
620 | struct nfs_filehandle nfh; | |
621 | struct nfs_export *nx; | |
622 | struct nfs_export_options *nxo; | |
623 | struct flock lf; | |
624 | struct fileproc *fp, *nfp; | |
625 | int fmode, error, type; | |
626 | int indx; | |
627 | vfs_context_t ctx = vfs_context_current(); | |
628 | kauth_action_t action; | |
629 | ||
630 | /* | |
631 | * Must be super user | |
632 | */ | |
633 | error = suser(vfs_context_ucred(ctx), 0); | |
634 | if (error) { | |
635 | return (error); | |
636 | } | |
637 | ||
638 | if (!nfsrv_is_initialized()) { | |
639 | return (EINVAL); | |
640 | } | |
641 | ||
642 | fmode = FFLAGS(uap->flags); | |
643 | /* why not allow a non-read/write open for our lockd? */ | |
644 | if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) | |
645 | return (EINVAL); | |
646 | ||
647 | error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); | |
648 | if (error) | |
649 | return (error); | |
650 | if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || | |
651 | (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) | |
652 | return (EINVAL); | |
653 | error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); | |
654 | if (error) | |
655 | return (error); | |
656 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; | |
657 | ||
658 | lck_rw_lock_shared(&nfsrv_export_rwlock); | |
659 | /* now give me my vnode, it gets returned to me with a reference */ | |
660 | error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo); | |
661 | lck_rw_done(&nfsrv_export_rwlock); | |
662 | if (error) { | |
663 | if (error == NFSERR_TRYLATER) | |
664 | error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER? | |
665 | return (error); | |
666 | } | |
667 | ||
668 | /* | |
669 | * From now on we have to make sure not | |
670 | * to forget about the vnode. | |
671 | * Any error that causes an abort must vnode_put(vp). | |
672 | * Just set error = err and 'goto bad;'. | |
673 | */ | |
674 | ||
675 | /* | |
676 | * from vn_open | |
677 | */ | |
678 | if (vnode_vtype(vp) == VSOCK) { | |
679 | error = EOPNOTSUPP; | |
680 | goto bad; | |
681 | } | |
682 | ||
683 | /* disallow write operations on directories */ | |
684 | if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { | |
685 | error = EISDIR; | |
686 | goto bad; | |
687 | } | |
688 | ||
689 | #if CONFIG_MACF | |
690 | if ((error = mac_vnode_check_open(ctx, vp, fmode))) | |
691 | goto bad; | |
692 | #endif | |
693 | ||
694 | /* compute action to be authorized */ | |
695 | action = 0; | |
696 | if (fmode & FREAD) | |
697 | action |= KAUTH_VNODE_READ_DATA; | |
698 | if (fmode & (FWRITE | O_TRUNC)) | |
699 | action |= KAUTH_VNODE_WRITE_DATA; | |
700 | if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) | |
701 | goto bad; | |
702 | ||
703 | if ((error = VNOP_OPEN(vp, fmode, ctx))) | |
704 | goto bad; | |
705 | if ((error = vnode_ref_ext(vp, fmode, 0))) | |
706 | goto bad; | |
707 | ||
708 | /* | |
709 | * end of vn_open code | |
710 | */ | |
711 | ||
712 | // starting here... error paths should call vn_close/vnode_put | |
713 | if ((error = falloc(p, &nfp, &indx, ctx)) != 0) { | |
714 | vn_close(vp, fmode & FMASK, ctx); | |
715 | goto bad; | |
716 | } | |
717 | fp = nfp; | |
718 | ||
719 | fp->f_fglob->fg_flag = fmode & FMASK; | |
720 | fp->f_fglob->fg_ops = &vnops; | |
721 | fp->f_fglob->fg_data = (caddr_t)vp; | |
722 | ||
723 | // XXX do we really need to support this with fhopen()? | |
724 | if (fmode & (O_EXLOCK | O_SHLOCK)) { | |
725 | lf.l_whence = SEEK_SET; | |
726 | lf.l_start = 0; | |
727 | lf.l_len = 0; | |
728 | if (fmode & O_EXLOCK) | |
729 | lf.l_type = F_WRLCK; | |
730 | else | |
731 | lf.l_type = F_RDLCK; | |
732 | type = F_FLOCK; | |
733 | if ((fmode & FNONBLOCK) == 0) | |
734 | type |= F_WAIT; | |
735 | if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) { | |
736 | struct vfs_context context = *vfs_context_current(); | |
737 | /* Modify local copy (to not damage thread copy) */ | |
738 | context.vc_ucred = fp->f_fglob->fg_cred; | |
739 | ||
740 | vn_close(vp, fp->f_fglob->fg_flag, &context); | |
741 | fp_free(p, indx, fp); | |
742 | return (error); | |
743 | } | |
744 | fp->f_fglob->fg_flag |= FHASLOCK; | |
745 | } | |
746 | ||
747 | vnode_put(vp); | |
748 | ||
749 | proc_fdlock(p); | |
750 | procfdtbl_releasefd(p, indx, NULL); | |
751 | fp_drop(p, indx, fp, 1); | |
752 | proc_fdunlock(p); | |
753 | ||
754 | *retval = indx; | |
755 | return (0); | |
756 | ||
757 | bad: | |
758 | vnode_put(vp); | |
759 | return (error); | |
760 | } | |
761 | ||
762 | /* | |
763 | * NFS server pseudo system call | |
764 | */ | |
765 | int | |
766 | nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) | |
767 | { | |
768 | mbuf_t nam; | |
769 | struct user_nfsd_args user_nfsdarg; | |
770 | socket_t so; | |
771 | int error; | |
772 | ||
773 | AUDIT_ARG(cmd, uap->flag); | |
774 | ||
775 | /* | |
776 | * Must be super user for most operations (export ops checked later). | |
777 | */ | |
778 | if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) | |
779 | return (error); | |
780 | #if CONFIG_MACF | |
781 | error = mac_system_check_nfsd(kauth_cred_get()); | |
782 | if (error) | |
783 | return (error); | |
784 | #endif | |
785 | ||
786 | /* make sure NFS server data structures have been initialized */ | |
787 | nfsrv_init(); | |
788 | ||
789 | if (uap->flag & NFSSVC_ADDSOCK) { | |
790 | if (IS_64BIT_PROCESS(p)) { | |
791 | error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); | |
792 | } else { | |
793 | struct nfsd_args tmp_args; | |
794 | error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); | |
795 | if (error == 0) { | |
796 | user_nfsdarg.sock = tmp_args.sock; | |
797 | user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); | |
798 | user_nfsdarg.namelen = tmp_args.namelen; | |
799 | } | |
800 | } | |
801 | if (error) | |
802 | return (error); | |
803 | /* get the socket */ | |
804 | error = file_socket(user_nfsdarg.sock, &so); | |
805 | if (error) | |
806 | return (error); | |
807 | /* Get the client address for connected sockets. */ | |
808 | if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { | |
809 | nam = NULL; | |
810 | } else { | |
811 | error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); | |
812 | if (error) { | |
813 | /* drop the iocount file_socket() grabbed on the file descriptor */ | |
814 | file_drop(user_nfsdarg.sock); | |
815 | return (error); | |
816 | } | |
817 | } | |
818 | /* | |
819 | * nfssvc_addsock() will grab a retain count on the socket | |
820 | * to keep the socket from being closed when nfsd closes its | |
821 | * file descriptor for it. | |
822 | */ | |
823 | error = nfssvc_addsock(so, nam); | |
824 | /* drop the iocount file_socket() grabbed on the file descriptor */ | |
825 | file_drop(user_nfsdarg.sock); | |
826 | } else if (uap->flag & NFSSVC_NFSD) { | |
827 | error = nfssvc_nfsd(); | |
828 | } else if (uap->flag & NFSSVC_EXPORT) { | |
829 | error = nfssvc_export(uap->argp); | |
830 | } else { | |
831 | error = EINVAL; | |
832 | } | |
833 | if (error == EINTR || error == ERESTART) | |
834 | error = 0; | |
835 | return (error); | |
836 | } | |
837 | ||
838 | /* | |
839 | * Adds a socket to the list for servicing by nfsds. | |
840 | */ | |
841 | int | |
842 | nfssvc_addsock(socket_t so, mbuf_t mynam) | |
843 | { | |
844 | struct nfsrv_sock *slp; | |
845 | int error = 0, sodomain, sotype, soprotocol, on = 1; | |
846 | int first; | |
847 | struct timeval timeo; | |
848 | ||
849 | /* make sure mbuf constants are set up */ | |
850 | if (!nfs_mbuf_mhlen) | |
851 | nfs_mbuf_init(); | |
852 | ||
853 | sock_gettype(so, &sodomain, &sotype, &soprotocol); | |
854 | ||
855 | /* There should be only one UDP socket for each of IPv4 and IPv6 */ | |
856 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { | |
857 | mbuf_freem(mynam); | |
858 | return (EEXIST); | |
859 | } | |
860 | if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { | |
861 | mbuf_freem(mynam); | |
862 | return (EEXIST); | |
863 | } | |
864 | ||
865 | /* Set protocol options and reserve some space (for UDP). */ | |
866 | if (sotype == SOCK_STREAM) { | |
867 | error = nfsrv_check_exports_allow_address(mynam); | |
868 | if (error) | |
869 | return (error); | |
870 | sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); | |
871 | } | |
872 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) | |
873 | sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); | |
874 | if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ | |
875 | int reserve = NFS_UDPSOCKBUF; | |
876 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); | |
877 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); | |
878 | if (error) { | |
879 | log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error); | |
880 | error = 0; | |
881 | } | |
882 | } | |
883 | sock_nointerrupt(so, 0); | |
884 | ||
885 | /* | |
886 | * Set socket send/receive timeouts. | |
887 | * Receive timeout shouldn't matter, but setting the send timeout | |
888 | * will make sure that an unresponsive client can't hang the server. | |
889 | */ | |
890 | timeo.tv_usec = 0; | |
891 | timeo.tv_sec = 1; | |
892 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); | |
893 | timeo.tv_sec = 30; | |
894 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); | |
895 | if (error) { | |
896 | log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error); | |
897 | error = 0; | |
898 | } | |
899 | ||
900 | MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK); | |
901 | if (!slp) { | |
902 | mbuf_freem(mynam); | |
903 | return (ENOMEM); | |
904 | } | |
905 | bzero((caddr_t)slp, sizeof (struct nfsrv_sock)); | |
906 | lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL); | |
907 | lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL); | |
908 | ||
909 | lck_mtx_lock(nfsd_mutex); | |
910 | ||
911 | if (soprotocol == IPPROTO_UDP) { | |
912 | if (sodomain == AF_INET) { | |
913 | /* There should be only one UDP/IPv4 socket */ | |
914 | if (nfsrv_udpsock) { | |
915 | lck_mtx_unlock(nfsd_mutex); | |
916 | nfsrv_slpfree(slp); | |
917 | mbuf_freem(mynam); | |
918 | return (EEXIST); | |
919 | } | |
920 | nfsrv_udpsock = slp; | |
921 | } | |
922 | if (sodomain == AF_INET6) { | |
923 | /* There should be only one UDP/IPv6 socket */ | |
924 | if (nfsrv_udp6sock) { | |
925 | lck_mtx_unlock(nfsd_mutex); | |
926 | nfsrv_slpfree(slp); | |
927 | mbuf_freem(mynam); | |
928 | return (EEXIST); | |
929 | } | |
930 | nfsrv_udp6sock = slp; | |
931 | } | |
932 | } | |
933 | ||
934 | /* add the socket to the list */ | |
935 | first = TAILQ_EMPTY(&nfsrv_socklist); | |
936 | TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); | |
937 | if (soprotocol == IPPROTO_TCP) { | |
938 | nfsrv_sock_tcp_cnt++; | |
939 | if (nfsrv_sock_idle_timeout < 0) | |
940 | nfsrv_sock_idle_timeout = 0; | |
941 | if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) | |
942 | nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT; | |
943 | /* | |
944 | * Possibly start or stop the idle timer. We only start the idle timer when | |
945 | * we have more than 2 * nfsd_thread_max connections. If the idle timer is | |
946 | * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or | |
947 | * the number of connections. | |
948 | */ | |
949 | if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) { | |
950 | if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { | |
951 | if (nfsrv_idlesock_timer_on) { | |
952 | thread_call_cancel(nfsrv_idlesock_timer_call); | |
953 | nfsrv_idlesock_timer_on = 0; | |
954 | } | |
955 | } else { | |
956 | struct nfsrv_sock *old_slp; | |
957 | struct timeval now; | |
958 | time_t time_to_wait = nfsrv_sock_idle_timeout; | |
959 | /* | |
960 | * Get the oldest tcp socket and calculate the | |
961 | * earliest time for the next idle timer to fire | |
962 | * based on the possibly updated nfsrv_sock_idle_timeout | |
963 | */ | |
964 | TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) { | |
965 | if (old_slp->ns_sotype == SOCK_STREAM) { | |
966 | microuptime(&now); | |
967 | time_to_wait -= now.tv_sec - old_slp->ns_timestamp; | |
968 | if (time_to_wait < 1) | |
969 | time_to_wait = 1; | |
970 | break; | |
971 | } | |
972 | } | |
973 | /* | |
974 | * If we have a timer scheduled, but if its going to fire too late, | |
975 | * turn it off. | |
976 | */ | |
977 | if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) { | |
978 | thread_call_cancel(nfsrv_idlesock_timer_call); | |
979 | nfsrv_idlesock_timer_on = 0; | |
980 | } | |
981 | /* Schedule the idle thread if it isn't already */ | |
982 | if (!nfsrv_idlesock_timer_on) { | |
983 | nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); | |
984 | nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; | |
985 | } | |
986 | } | |
987 | } | |
988 | } | |
989 | ||
990 | sock_retain(so); /* grab a retain count on the socket */ | |
991 | slp->ns_so = so; | |
992 | slp->ns_sotype = sotype; | |
993 | slp->ns_nam = mynam; | |
994 | ||
995 | /* set up the socket up-call */ | |
996 | nfsrv_uc_addsock(slp, first); | |
997 | ||
998 | /* mark that the socket is not in the nfsrv_sockwg list */ | |
999 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
1000 | ||
1001 | slp->ns_flag = SLP_VALID | SLP_NEEDQ; | |
1002 | ||
1003 | nfsrv_wakenfsd(slp); | |
1004 | lck_mtx_unlock(nfsd_mutex); | |
1005 | ||
1006 | return (0); | |
1007 | } | |
1008 | ||
1009 | /* | |
1010 | * nfssvc_nfsd() | |
1011 | * | |
1012 | * nfsd theory of operation: | |
1013 | * | |
1014 | * The first nfsd thread stays in user mode accepting new TCP connections | |
1015 | * which are then added via the "addsock" call. The rest of the nfsd threads | |
1016 | * simply call into the kernel and remain there in a loop handling NFS | |
1017 | * requests until killed by a signal. | |
1018 | * | |
1019 | * There's a list of nfsd threads (nfsd_head). | |
1020 | * There's an nfsd queue that contains only those nfsds that are | |
1021 | * waiting for work to do (nfsd_queue). | |
1022 | * | |
1023 | * There's a list of all NFS sockets (nfsrv_socklist) and two queues for | |
1024 | * managing the work on the sockets: | |
1025 | * nfsrv_sockwait - sockets w/new data waiting to be worked on | |
1026 | * nfsrv_sockwork - sockets being worked on which may have more work to do | |
1027 | * nfsrv_sockwg -- sockets which have pending write gather data | |
1028 | * When a socket receives data, if it is not currently queued, it | |
1029 | * will be placed at the end of the "wait" queue. | |
1030 | * Whenever a socket needs servicing we make sure it is queued and | |
1031 | * wake up a waiting nfsd (if there is one). | |
1032 | * | |
1033 | * nfsds will service at most 8 requests from the same socket before | |
1034 | * defecting to work on another socket. | |
1035 | * nfsds will defect immediately if there are any sockets in the "wait" queue | |
1036 | * nfsds looking for a socket to work on check the "wait" queue first and | |
1037 | * then check the "work" queue. | |
1038 | * When an nfsd starts working on a socket, it removes it from the head of | |
1039 | * the queue it's currently on and moves it to the end of the "work" queue. | |
1040 | * When nfsds are checking the queues for work, any sockets found not to | |
1041 | * have any work are simply dropped from the queue. | |
1042 | * | |
1043 | */ | |
1044 | int | |
1045 | nfssvc_nfsd(void) | |
1046 | { | |
1047 | mbuf_t m, mrep; | |
1048 | struct nfsrv_sock *slp; | |
1049 | struct nfsd *nfsd; | |
1050 | struct nfsrv_descript *nd = NULL; | |
1051 | int error = 0, cacherep, writes_todo; | |
1052 | int siz, procrastinate, opcnt = 0; | |
1053 | u_quad_t cur_usec; | |
1054 | struct timeval now; | |
1055 | struct vfs_context context; | |
1056 | struct timespec to; | |
1057 | ||
1058 | #ifndef nolint | |
1059 | cacherep = RC_DOIT; | |
1060 | writes_todo = 0; | |
1061 | #endif | |
1062 | ||
1063 | MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); | |
1064 | if (!nfsd) | |
1065 | return (ENOMEM); | |
1066 | bzero(nfsd, sizeof(struct nfsd)); | |
1067 | lck_mtx_lock(nfsd_mutex); | |
1068 | if (nfsd_thread_count++ == 0) | |
1069 | nfsrv_initcache(); /* Init the server request cache */ | |
1070 | ||
1071 | TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); | |
1072 | lck_mtx_unlock(nfsd_mutex); | |
1073 | ||
1074 | context.vc_thread = current_thread(); | |
1075 | ||
1076 | /* Set time out so that nfsd threads can wake up a see if they are still needed. */ | |
1077 | to.tv_sec = 5; | |
1078 | to.tv_nsec = 0; | |
1079 | ||
1080 | /* | |
1081 | * Loop getting rpc requests until SIGKILL. | |
1082 | */ | |
1083 | for (;;) { | |
1084 | if (nfsd_thread_max <= 0) { | |
1085 | /* NFS server shutting down, get out ASAP */ | |
1086 | error = EINTR; | |
1087 | slp = nfsd->nfsd_slp; | |
1088 | } else if (nfsd->nfsd_flag & NFSD_REQINPROG) { | |
1089 | /* already have some work to do */ | |
1090 | error = 0; | |
1091 | slp = nfsd->nfsd_slp; | |
1092 | } else { | |
1093 | /* need to find work to do */ | |
1094 | error = 0; | |
1095 | lck_mtx_lock(nfsd_mutex); | |
1096 | while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) { | |
1097 | if (nfsd_thread_count > nfsd_thread_max) { | |
1098 | /* | |
1099 | * If we have no socket and there are more | |
1100 | * nfsd threads than configured, let's exit. | |
1101 | */ | |
1102 | error = 0; | |
1103 | goto done; | |
1104 | } | |
1105 | nfsd->nfsd_flag |= NFSD_WAITING; | |
1106 | TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); | |
1107 | error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to); | |
1108 | if (error) { | |
1109 | if (nfsd->nfsd_flag & NFSD_WAITING) { | |
1110 | TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); | |
1111 | nfsd->nfsd_flag &= ~NFSD_WAITING; | |
1112 | } | |
1113 | if (error == EWOULDBLOCK) | |
1114 | continue; | |
1115 | goto done; | |
1116 | } | |
1117 | } | |
1118 | slp = nfsd->nfsd_slp; | |
1119 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) { | |
1120 | /* look for a socket to work on in the wait queue */ | |
1121 | while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) { | |
1122 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1123 | /* remove from the head of the queue */ | |
1124 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1125 | slp->ns_flag &= ~SLP_WAITQ; | |
1126 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) | |
1127 | break; | |
1128 | /* nothing to do, so skip this socket */ | |
1129 | lck_rw_done(&slp->ns_rwlock); | |
1130 | } | |
1131 | } | |
1132 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) { | |
1133 | /* look for a socket to work on in the work queue */ | |
1134 | while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) { | |
1135 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1136 | /* remove from the head of the queue */ | |
1137 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1138 | slp->ns_flag &= ~SLP_WORKQ; | |
1139 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) | |
1140 | break; | |
1141 | /* nothing to do, so skip this socket */ | |
1142 | lck_rw_done(&slp->ns_rwlock); | |
1143 | } | |
1144 | } | |
1145 | if (!nfsd->nfsd_slp && slp) { | |
1146 | /* we found a socket to work on, grab a reference */ | |
1147 | slp->ns_sref++; | |
1148 | microuptime(&now); | |
1149 | slp->ns_timestamp = now.tv_sec; | |
1150 | /* We keep the socket list in least recently used order for reaping idle sockets */ | |
1151 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); | |
1152 | TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); | |
1153 | nfsd->nfsd_slp = slp; | |
1154 | opcnt = 0; | |
1155 | /* and put it at the back of the work queue */ | |
1156 | TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq); | |
1157 | slp->ns_flag |= SLP_WORKQ; | |
1158 | lck_rw_done(&slp->ns_rwlock); | |
1159 | } | |
1160 | lck_mtx_unlock(nfsd_mutex); | |
1161 | if (!slp) | |
1162 | continue; | |
1163 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1164 | if (slp->ns_flag & SLP_VALID) { | |
1165 | if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { | |
1166 | slp->ns_flag &= ~SLP_NEEDQ; | |
1167 | nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); | |
1168 | } | |
1169 | if (slp->ns_flag & SLP_DISCONN) | |
1170 | nfsrv_zapsock(slp); | |
1171 | error = nfsrv_dorec(slp, nfsd, &nd); | |
1172 | if (error == EINVAL) { // RPCSEC_GSS drop | |
1173 | if (slp->ns_sotype == SOCK_STREAM) | |
1174 | nfsrv_zapsock(slp); // drop connection | |
1175 | } | |
1176 | writes_todo = 0; | |
1177 | if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) { | |
1178 | microuptime(&now); | |
1179 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + | |
1180 | (u_quad_t)now.tv_usec; | |
1181 | if (slp->ns_wgtime <= cur_usec) { | |
1182 | error = 0; | |
1183 | cacherep = RC_DOIT; | |
1184 | writes_todo = 1; | |
1185 | } | |
1186 | slp->ns_flag &= ~SLP_DOWRITES; | |
1187 | } | |
1188 | nfsd->nfsd_flag |= NFSD_REQINPROG; | |
1189 | } | |
1190 | lck_rw_done(&slp->ns_rwlock); | |
1191 | } | |
1192 | if (error || (slp && !(slp->ns_flag & SLP_VALID))) { | |
1193 | if (nd) { | |
1194 | nfsm_chain_cleanup(&nd->nd_nmreq); | |
1195 | if (nd->nd_nam2) | |
1196 | mbuf_freem(nd->nd_nam2); | |
1197 | if (IS_VALID_CRED(nd->nd_cr)) | |
1198 | kauth_cred_unref(&nd->nd_cr); | |
1199 | if (nd->nd_gss_context) | |
1200 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
1201 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); | |
1202 | nd = NULL; | |
1203 | } | |
1204 | nfsd->nfsd_slp = NULL; | |
1205 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; | |
1206 | if (slp) | |
1207 | nfsrv_slpderef(slp); | |
1208 | if (nfsd_thread_max <= 0) | |
1209 | break; | |
1210 | continue; | |
1211 | } | |
1212 | if (nd) { | |
1213 | microuptime(&nd->nd_starttime); | |
1214 | if (nd->nd_nam2) | |
1215 | nd->nd_nam = nd->nd_nam2; | |
1216 | else | |
1217 | nd->nd_nam = slp->ns_nam; | |
1218 | ||
1219 | cacherep = nfsrv_getcache(nd, slp, &mrep); | |
1220 | ||
1221 | if (nfsrv_require_resv_port) { | |
1222 | /* Check if source port is a reserved port */ | |
1223 | in_port_t port = 0; | |
1224 | struct sockaddr *saddr = mbuf_data(nd->nd_nam); | |
1225 | ||
1226 | if (saddr->sa_family == AF_INET) | |
1227 | port = ntohs(((struct sockaddr_in*)saddr)->sin_port); | |
1228 | else if (saddr->sa_family == AF_INET6) | |
1229 | port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); | |
1230 | if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { | |
1231 | nd->nd_procnum = NFSPROC_NOOP; | |
1232 | nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); | |
1233 | cacherep = RC_DOIT; | |
1234 | } | |
1235 | } | |
1236 | ||
1237 | } | |
1238 | ||
1239 | /* | |
1240 | * Loop to get all the write RPC replies that have been | |
1241 | * gathered together. | |
1242 | */ | |
1243 | do { | |
1244 | switch (cacherep) { | |
1245 | case RC_DOIT: | |
1246 | if (nd && (nd->nd_vers == NFS_VER3)) | |
1247 | procrastinate = nfsrv_wg_delay_v3; | |
1248 | else | |
1249 | procrastinate = nfsrv_wg_delay; | |
1250 | lck_rw_lock_shared(&nfsrv_export_rwlock); | |
1251 | context.vc_ucred = NULL; | |
1252 | if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) | |
1253 | error = nfsrv_writegather(&nd, slp, &context, &mrep); | |
1254 | else | |
1255 | error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep); | |
1256 | lck_rw_done(&nfsrv_export_rwlock); | |
1257 | if (mrep == NULL) { | |
1258 | /* | |
1259 | * If this is a stream socket and we are not going | |
1260 | * to send a reply we better close the connection | |
1261 | * so the client doesn't hang. | |
1262 | */ | |
1263 | if (error && slp->ns_sotype == SOCK_STREAM) { | |
1264 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1265 | nfsrv_zapsock(slp); | |
1266 | lck_rw_done(&slp->ns_rwlock); | |
1267 | printf("NFS server: NULL reply from proc = %d error = %d\n", | |
1268 | nd->nd_procnum, error); | |
1269 | } | |
1270 | break; | |
1271 | ||
1272 | } | |
1273 | if (error) { | |
1274 | OSAddAtomic64(1, &nfsstats.srv_errs); | |
1275 | nfsrv_updatecache(nd, FALSE, mrep); | |
1276 | if (nd->nd_nam2) { | |
1277 | mbuf_freem(nd->nd_nam2); | |
1278 | nd->nd_nam2 = NULL; | |
1279 | } | |
1280 | break; | |
1281 | } | |
1282 | OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]); | |
1283 | nfsrv_updatecache(nd, TRUE, mrep); | |
1284 | /* FALLTHRU */ | |
1285 | ||
1286 | case RC_REPLY: | |
1287 | if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS | |
1288 | /* | |
1289 | * Need to checksum or encrypt the reply | |
1290 | */ | |
1291 | error = nfs_gss_svc_protect_reply(nd, mrep); | |
1292 | if (error) { | |
1293 | mbuf_freem(mrep); | |
1294 | break; | |
1295 | } | |
1296 | } | |
1297 | ||
1298 | /* | |
1299 | * Get the total size of the reply | |
1300 | */ | |
1301 | m = mrep; | |
1302 | siz = 0; | |
1303 | while (m) { | |
1304 | siz += mbuf_len(m); | |
1305 | m = mbuf_next(m); | |
1306 | } | |
1307 | if (siz <= 0 || siz > NFS_MAXPACKET) { | |
1308 | printf("mbuf siz=%d\n",siz); | |
1309 | panic("Bad nfs svc reply"); | |
1310 | } | |
1311 | m = mrep; | |
1312 | mbuf_pkthdr_setlen(m, siz); | |
1313 | error = mbuf_pkthdr_setrcvif(m, NULL); | |
1314 | if (error) | |
1315 | panic("nfsd setrcvif failed: %d", error); | |
1316 | /* | |
1317 | * For stream protocols, prepend a Sun RPC | |
1318 | * Record Mark. | |
1319 | */ | |
1320 | if (slp->ns_sotype == SOCK_STREAM) { | |
1321 | error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); | |
1322 | if (!error) | |
1323 | *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); | |
1324 | } | |
1325 | if (!error) { | |
1326 | if (slp->ns_flag & SLP_VALID) { | |
1327 | error = nfsrv_send(slp, nd->nd_nam2, m); | |
1328 | } else { | |
1329 | error = EPIPE; | |
1330 | mbuf_freem(m); | |
1331 | } | |
1332 | } else { | |
1333 | mbuf_freem(m); | |
1334 | } | |
1335 | mrep = NULL; | |
1336 | if (nd->nd_nam2) { | |
1337 | mbuf_freem(nd->nd_nam2); | |
1338 | nd->nd_nam2 = NULL; | |
1339 | } | |
1340 | if (error == EPIPE) { | |
1341 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1342 | nfsrv_zapsock(slp); | |
1343 | lck_rw_done(&slp->ns_rwlock); | |
1344 | } | |
1345 | if (error == EINTR || error == ERESTART) { | |
1346 | nfsm_chain_cleanup(&nd->nd_nmreq); | |
1347 | if (IS_VALID_CRED(nd->nd_cr)) | |
1348 | kauth_cred_unref(&nd->nd_cr); | |
1349 | if (nd->nd_gss_context) | |
1350 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
1351 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); | |
1352 | nfsrv_slpderef(slp); | |
1353 | lck_mtx_lock(nfsd_mutex); | |
1354 | goto done; | |
1355 | } | |
1356 | break; | |
1357 | case RC_DROPIT: | |
1358 | mbuf_freem(nd->nd_nam2); | |
1359 | nd->nd_nam2 = NULL; | |
1360 | break; | |
1361 | }; | |
1362 | opcnt++; | |
1363 | if (nd) { | |
1364 | nfsm_chain_cleanup(&nd->nd_nmreq); | |
1365 | if (nd->nd_nam2) | |
1366 | mbuf_freem(nd->nd_nam2); | |
1367 | if (IS_VALID_CRED(nd->nd_cr)) | |
1368 | kauth_cred_unref(&nd->nd_cr); | |
1369 | if (nd->nd_gss_context) | |
1370 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
1371 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); | |
1372 | nd = NULL; | |
1373 | } | |
1374 | ||
1375 | /* | |
1376 | * Check to see if there are outstanding writes that | |
1377 | * need to be serviced. | |
1378 | */ | |
1379 | writes_todo = 0; | |
1380 | if (slp->ns_wgtime) { | |
1381 | microuptime(&now); | |
1382 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + | |
1383 | (u_quad_t)now.tv_usec; | |
1384 | if (slp->ns_wgtime <= cur_usec) { | |
1385 | cacherep = RC_DOIT; | |
1386 | writes_todo = 1; | |
1387 | } | |
1388 | } | |
1389 | } while (writes_todo); | |
1390 | ||
1391 | nd = NULL; | |
1392 | if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) { | |
1393 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1394 | error = nfsrv_dorec(slp, nfsd, &nd); | |
1395 | if (error == EINVAL) { // RPCSEC_GSS drop | |
1396 | if (slp->ns_sotype == SOCK_STREAM) | |
1397 | nfsrv_zapsock(slp); // drop connection | |
1398 | } | |
1399 | lck_rw_done(&slp->ns_rwlock); | |
1400 | } | |
1401 | if (!nd) { | |
1402 | /* drop our reference on the socket */ | |
1403 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; | |
1404 | nfsd->nfsd_slp = NULL; | |
1405 | nfsrv_slpderef(slp); | |
1406 | } | |
1407 | } | |
1408 | lck_mtx_lock(nfsd_mutex); | |
1409 | done: | |
1410 | TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); | |
1411 | FREE(nfsd, M_NFSD); | |
1412 | if (--nfsd_thread_count == 0) | |
1413 | nfsrv_cleanup(); | |
1414 | lck_mtx_unlock(nfsd_mutex); | |
1415 | return (error); | |
1416 | } | |
1417 | ||
1418 | int | |
1419 | nfssvc_export(user_addr_t argp) | |
1420 | { | |
1421 | int error = 0, is_64bit; | |
1422 | struct user_nfs_export_args unxa; | |
1423 | vfs_context_t ctx = vfs_context_current(); | |
1424 | ||
1425 | is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx)); | |
1426 | ||
1427 | /* copy in pointers to path and export args */ | |
1428 | if (is_64bit) { | |
1429 | error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); | |
1430 | } else { | |
1431 | struct nfs_export_args tnxa; | |
1432 | error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); | |
1433 | if (error == 0) { | |
1434 | /* munge into LP64 version of nfs_export_args structure */ | |
1435 | unxa.nxa_fsid = tnxa.nxa_fsid; | |
1436 | unxa.nxa_expid = tnxa.nxa_expid; | |
1437 | unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); | |
1438 | unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); | |
1439 | unxa.nxa_flags = tnxa.nxa_flags; | |
1440 | unxa.nxa_netcount = tnxa.nxa_netcount; | |
1441 | unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); | |
1442 | } | |
1443 | } | |
1444 | if (error) | |
1445 | return (error); | |
1446 | ||
1447 | error = nfsrv_export(&unxa, ctx); | |
1448 | ||
1449 | return (error); | |
1450 | } | |
1451 | ||
1452 | /* | |
1453 | * Shut down a socket associated with an nfsrv_sock structure. | |
1454 | * Should be called with the send lock set, if required. | |
1455 | * The trick here is to increment the sref at the start, so that the nfsds | |
1456 | * will stop using it and clear ns_flag at the end so that it will not be | |
1457 | * reassigned during cleanup. | |
1458 | */ | |
1459 | void | |
1460 | nfsrv_zapsock(struct nfsrv_sock *slp) | |
1461 | { | |
1462 | socket_t so; | |
1463 | ||
1464 | if ((slp->ns_flag & SLP_VALID) == 0) | |
1465 | return; | |
1466 | slp->ns_flag &= ~SLP_ALLFLAGS; | |
1467 | ||
1468 | so = slp->ns_so; | |
1469 | if (so == NULL) | |
1470 | return; | |
1471 | ||
1472 | sock_setupcall(so, NULL, NULL); | |
1473 | sock_shutdown(so, SHUT_RDWR); | |
1474 | ||
1475 | /* | |
1476 | * Remove from the up-call queue | |
1477 | */ | |
1478 | nfsrv_uc_dequeue(slp); | |
1479 | } | |
1480 | ||
1481 | /* | |
1482 | * cleanup and release a server socket structure. | |
1483 | */ | |
1484 | void | |
1485 | nfsrv_slpfree(struct nfsrv_sock *slp) | |
1486 | { | |
1487 | struct nfsrv_descript *nwp, *nnwp; | |
1488 | ||
1489 | if (slp->ns_so) { | |
1490 | sock_release(slp->ns_so); | |
1491 | slp->ns_so = NULL; | |
1492 | } | |
1493 | if (slp->ns_nam) | |
1494 | mbuf_free(slp->ns_nam); | |
1495 | if (slp->ns_raw) | |
1496 | mbuf_freem(slp->ns_raw); | |
1497 | if (slp->ns_rec) | |
1498 | mbuf_freem(slp->ns_rec); | |
1499 | if (slp->ns_frag) | |
1500 | mbuf_freem(slp->ns_frag); | |
1501 | slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; | |
1502 | slp->ns_reccnt = 0; | |
1503 | ||
1504 | for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { | |
1505 | nnwp = nwp->nd_tq.le_next; | |
1506 | LIST_REMOVE(nwp, nd_tq); | |
1507 | nfsm_chain_cleanup(&nwp->nd_nmreq); | |
1508 | if (nwp->nd_mrep) | |
1509 | mbuf_freem(nwp->nd_mrep); | |
1510 | if (nwp->nd_nam2) | |
1511 | mbuf_freem(nwp->nd_nam2); | |
1512 | if (IS_VALID_CRED(nwp->nd_cr)) | |
1513 | kauth_cred_unref(&nwp->nd_cr); | |
1514 | if (nwp->nd_gss_context) | |
1515 | nfs_gss_svc_ctx_deref(nwp->nd_gss_context); | |
1516 | FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); | |
1517 | } | |
1518 | LIST_INIT(&slp->ns_tq); | |
1519 | ||
1520 | lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group); | |
1521 | lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group); | |
1522 | FREE(slp, M_NFSSVC); | |
1523 | } | |
1524 | ||
1525 | /* | |
1526 | * Derefence a server socket structure. If it has no more references and | |
1527 | * is no longer valid, you can throw it away. | |
1528 | */ | |
1529 | static void | |
1530 | nfsrv_slpderef_locked(struct nfsrv_sock *slp) | |
1531 | { | |
1532 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1533 | slp->ns_sref--; | |
1534 | ||
1535 | if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { | |
1536 | if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) { | |
1537 | /* remove socket from queue since there's no work */ | |
1538 | if (slp->ns_flag & SLP_WAITQ) | |
1539 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1540 | else | |
1541 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1542 | slp->ns_flag &= ~SLP_QUEUED; | |
1543 | } | |
1544 | lck_rw_done(&slp->ns_rwlock); | |
1545 | return; | |
1546 | } | |
1547 | ||
1548 | /* This socket is no longer valid, so we'll get rid of it */ | |
1549 | ||
1550 | if (slp->ns_flag & SLP_QUEUED) { | |
1551 | if (slp->ns_flag & SLP_WAITQ) | |
1552 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1553 | else | |
1554 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1555 | slp->ns_flag &= ~SLP_QUEUED; | |
1556 | } | |
1557 | lck_rw_done(&slp->ns_rwlock); | |
1558 | ||
1559 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); | |
1560 | if (slp->ns_sotype == SOCK_STREAM) | |
1561 | nfsrv_sock_tcp_cnt--; | |
1562 | ||
1563 | /* now remove from the write gather socket list */ | |
1564 | if (slp->ns_wgq.tqe_next != SLPNOLIST) { | |
1565 | TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); | |
1566 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
1567 | } | |
1568 | nfsrv_slpfree(slp); | |
1569 | } | |
1570 | ||
1571 | void | |
1572 | nfsrv_slpderef(struct nfsrv_sock *slp) | |
1573 | { | |
1574 | lck_mtx_lock(nfsd_mutex); | |
1575 | nfsrv_slpderef_locked(slp); | |
1576 | lck_mtx_unlock(nfsd_mutex); | |
1577 | } | |
1578 | ||
1579 | /* | |
1580 | * Check periodically for idle sockest if needed and | |
1581 | * zap them. | |
1582 | */ | |
1583 | void | |
1584 | nfsrv_idlesock_timer(__unused void *param0, __unused void *param1) | |
1585 | { | |
1586 | struct nfsrv_sock *slp, *tslp; | |
1587 | struct timeval now; | |
1588 | time_t time_to_wait = nfsrv_sock_idle_timeout; | |
1589 | ||
1590 | microuptime(&now); | |
1591 | lck_mtx_lock(nfsd_mutex); | |
1592 | ||
1593 | /* Turn off the timer if we're suppose to and get out */ | |
1594 | if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) | |
1595 | nfsrv_sock_idle_timeout = 0; | |
1596 | if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) { | |
1597 | nfsrv_idlesock_timer_on = 0; | |
1598 | lck_mtx_unlock(nfsd_mutex); | |
1599 | return; | |
1600 | } | |
1601 | ||
1602 | TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) { | |
1603 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1604 | /* Skip udp and referenced sockets */ | |
1605 | if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) { | |
1606 | lck_rw_done(&slp->ns_rwlock); | |
1607 | continue; | |
1608 | } | |
1609 | /* | |
1610 | * If this is the first non-referenced socket that hasn't idle out, | |
1611 | * use its time stamp to calculate the earlist time in the future | |
1612 | * to start the next invocation of the timer. Since the nfsrv_socklist | |
1613 | * is sorted oldest access to newest. Once we find the first one, | |
1614 | * we're done and break out of the loop. | |
1615 | */ | |
1616 | if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) || | |
1617 | nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { | |
1618 | time_to_wait -= now.tv_sec - slp->ns_timestamp; | |
1619 | if (time_to_wait < 1) | |
1620 | time_to_wait = 1; | |
1621 | lck_rw_done(&slp->ns_rwlock); | |
1622 | break; | |
1623 | } | |
1624 | /* | |
1625 | * Bump the ref count. nfsrv_slpderef below will destroy | |
1626 | * the socket, since nfsrv_zapsock has closed it. | |
1627 | */ | |
1628 | slp->ns_sref++; | |
1629 | nfsrv_zapsock(slp); | |
1630 | lck_rw_done(&slp->ns_rwlock); | |
1631 | nfsrv_slpderef_locked(slp); | |
1632 | } | |
1633 | ||
1634 | /* Start ourself back up */ | |
1635 | nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); | |
1636 | /* Remember when the next timer will fire for nfssvc_addsock. */ | |
1637 | nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; | |
1638 | lck_mtx_unlock(nfsd_mutex); | |
1639 | } | |
1640 | ||
1641 | /* | |
1642 | * Clean up the data structures for the server. | |
1643 | */ | |
1644 | void | |
1645 | nfsrv_cleanup(void) | |
1646 | { | |
1647 | struct nfsrv_sock *slp, *nslp; | |
1648 | struct timeval now; | |
1649 | #if CONFIG_FSE | |
1650 | struct nfsrv_fmod *fp, *nfp; | |
1651 | int i; | |
1652 | #endif | |
1653 | ||
1654 | microuptime(&now); | |
1655 | for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { | |
1656 | nslp = TAILQ_NEXT(slp, ns_chain); | |
1657 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1658 | slp->ns_sref++; | |
1659 | if (slp->ns_flag & SLP_VALID) | |
1660 | nfsrv_zapsock(slp); | |
1661 | lck_rw_done(&slp->ns_rwlock); | |
1662 | nfsrv_slpderef_locked(slp); | |
1663 | } | |
1664 | # | |
1665 | #if CONFIG_FSE | |
1666 | /* | |
1667 | * Flush pending file write fsevents | |
1668 | */ | |
1669 | lck_mtx_lock(nfsrv_fmod_mutex); | |
1670 | for (i = 0; i < NFSRVFMODHASHSZ; i++) { | |
1671 | for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) { | |
1672 | /* | |
1673 | * Fire off the content modified fsevent for each | |
1674 | * entry, remove it from the list, and free it. | |
1675 | */ | |
1676 | if (nfsrv_fsevents_enabled) { | |
1677 | fp->fm_context.vc_thread = current_thread(); | |
1678 | add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, | |
1679 | FSE_ARG_VNODE, fp->fm_vp, | |
1680 | FSE_ARG_DONE); | |
1681 | } | |
1682 | vnode_put(fp->fm_vp); | |
1683 | kauth_cred_unref(&fp->fm_context.vc_ucred); | |
1684 | nfp = LIST_NEXT(fp, fm_link); | |
1685 | LIST_REMOVE(fp, fm_link); | |
1686 | FREE(fp, M_TEMP); | |
1687 | } | |
1688 | } | |
1689 | nfsrv_fmod_pending = 0; | |
1690 | lck_mtx_unlock(nfsrv_fmod_mutex); | |
1691 | #endif | |
1692 | ||
1693 | nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */ | |
1694 | ||
1695 | nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ | |
1696 | ||
1697 | nfsrv_cleancache(); /* And clear out server cache */ | |
1698 | ||
1699 | nfsrv_udpsock = NULL; | |
1700 | nfsrv_udp6sock = NULL; | |
1701 | } | |
1702 | ||
1703 | #endif /* NFS_NOSERVER */ |