]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
fe8ab488 | 2 | * Copyright (c) 2000-2014 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * Copyright (c) 1989, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | |
32 | * | |
33 | * This code is derived from software contributed to Berkeley by | |
34 | * Rick Macklem at The University of Guelph. | |
35 | * | |
36 | * Redistribution and use in source and binary forms, with or without | |
37 | * modification, are permitted provided that the following conditions | |
38 | * are met: | |
39 | * 1. Redistributions of source code must retain the above copyright | |
40 | * notice, this list of conditions and the following disclaimer. | |
41 | * 2. Redistributions in binary form must reproduce the above copyright | |
42 | * notice, this list of conditions and the following disclaimer in the | |
43 | * documentation and/or other materials provided with the distribution. | |
44 | * 3. All advertising materials mentioning features or use of this software | |
45 | * must display the following acknowledgement: | |
46 | * This product includes software developed by the University of | |
47 | * California, Berkeley and its contributors. | |
48 | * 4. Neither the name of the University nor the names of its contributors | |
49 | * may be used to endorse or promote products derived from this software | |
50 | * without specific prior written permission. | |
51 | * | |
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
62 | * SUCH DAMAGE. | |
63 | * | |
64 | * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 | |
65 | * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $ | |
66 | */ | |
2d21ac55 A |
67 | /* |
68 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
69 | * support for mandatory and extensible security protections. This notice | |
70 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
71 | * Version 2.0. | |
72 | */ | |
1c79356b A |
73 | |
74 | #include <sys/param.h> | |
75 | #include <sys/systm.h> | |
1c79356b | 76 | #include <sys/kernel.h> |
91447636 | 77 | #include <sys/file_internal.h> |
1c79356b A |
78 | #include <sys/filedesc.h> |
79 | #include <sys/stat.h> | |
91447636 A |
80 | #include <sys/vnode_internal.h> |
81 | #include <sys/mount_internal.h> | |
82 | #include <sys/proc_internal.h> /* for fdflags */ | |
83 | #include <sys/kauth.h> | |
1c79356b | 84 | #include <sys/sysctl.h> |
55e303ae | 85 | #include <sys/ubc.h> |
1c79356b A |
86 | #include <sys/uio.h> |
87 | #include <sys/malloc.h> | |
91447636 | 88 | #include <sys/kpi_mbuf.h> |
1c79356b A |
89 | #include <sys/socket.h> |
90 | #include <sys/socketvar.h> | |
91 | #include <sys/domain.h> | |
92 | #include <sys/protosw.h> | |
55e303ae A |
93 | #include <sys/fcntl.h> |
94 | #include <sys/lockf.h> | |
1c79356b A |
95 | #include <sys/syslog.h> |
96 | #include <sys/user.h> | |
91447636 A |
97 | #include <sys/sysproto.h> |
98 | #include <sys/kpi_socket.h> | |
2d21ac55 | 99 | #include <sys/fsevents.h> |
91447636 | 100 | #include <libkern/OSAtomic.h> |
2d21ac55 A |
101 | #include <kern/thread_call.h> |
102 | #include <kern/task.h> | |
1c79356b | 103 | |
b0d623f7 | 104 | #include <security/audit/audit.h> |
ccc36f2f | 105 | |
1c79356b A |
106 | #include <netinet/in.h> |
107 | #include <netinet/tcp.h> | |
1c79356b A |
108 | #include <nfs/xdr_subs.h> |
109 | #include <nfs/rpcv2.h> | |
110 | #include <nfs/nfsproto.h> | |
111 | #include <nfs/nfs.h> | |
112 | #include <nfs/nfsm_subs.h> | |
113 | #include <nfs/nfsrvcache.h> | |
2d21ac55 | 114 | #include <nfs/nfs_gss.h> |
1c79356b A |
115 | #include <nfs/nfsmount.h> |
116 | #include <nfs/nfsnode.h> | |
55e303ae | 117 | #include <nfs/nfs_lock.h> |
2d21ac55 A |
118 | #if CONFIG_MACF |
119 | #include <security/mac_framework.h> | |
1c79356b A |
120 | #endif |
121 | ||
2d21ac55 A |
122 | kern_return_t thread_terminate(thread_t); /* XXX */ |
123 | ||
124 | #if NFSSERVER | |
125 | ||
126 | extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, | |
127 | struct nfsrv_sock *slp, | |
128 | vfs_context_t ctx, | |
129 | mbuf_t *mrepp); | |
130 | extern int nfsrv_wg_delay; | |
131 | extern int nfsrv_wg_delay_v3; | |
132 | ||
133 | static int nfsrv_require_resv_port = 0; | |
134 | static int nfsrv_deadsock_timer_on = 0; | |
135 | ||
b0d623f7 A |
136 | int nfssvc_export(user_addr_t argp); |
137 | int nfssvc_nfsd(void); | |
138 | int nfssvc_addsock(socket_t, mbuf_t); | |
139 | void nfsrv_zapsock(struct nfsrv_sock *); | |
140 | void nfsrv_slpderef(struct nfsrv_sock *); | |
141 | void nfsrv_slpfree(struct nfsrv_sock *); | |
2d21ac55 A |
142 | |
143 | #endif /* NFSSERVER */ | |
144 | ||
145 | /* | |
146 | * sysctl stuff | |
147 | */ | |
148 | SYSCTL_DECL(_vfs_generic); | |
149 | SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge"); | |
150 | ||
151 | #if NFSCLIENT | |
152 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge"); | |
6d2010ae A |
153 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, ""); |
154 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, ""); | |
155 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, ""); | |
156 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, ""); | |
157 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, ""); | |
158 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, ""); | |
159 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, ""); | |
160 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, ""); | |
161 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, ""); | |
162 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, ""); | |
163 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, ""); | |
164 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, ""); | |
165 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, ""); | |
166 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, ""); | |
167 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, ""); | |
168 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); | |
316670eb A |
169 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); |
170 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); | |
39236c6e | 171 | SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); |
fe8ab488 | 172 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, ""); |
316670eb | 173 | |
2d21ac55 A |
174 | #endif /* NFSCLIENT */ |
175 | ||
176 | #if NFSSERVER | |
177 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge"); | |
6d2010ae A |
178 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, ""); |
179 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, ""); | |
180 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, ""); | |
181 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, ""); | |
182 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, ""); | |
183 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, ""); | |
184 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, ""); | |
185 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, ""); | |
186 | SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, ""); | |
b0d623f7 | 187 | #if CONFIG_FSE |
6d2010ae | 188 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, ""); |
b0d623f7 | 189 | #endif |
6d2010ae A |
190 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); |
191 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); | |
316670eb A |
192 | #ifdef NFS_UC_Q_DEBUG |
193 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, ""); | |
194 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, ""); | |
195 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, ""); | |
196 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)&nfsrv_uc_queue_count, 0, ""); | |
197 | #endif | |
2d21ac55 A |
198 | #endif /* NFSSERVER */ |
199 | ||
200 | ||
201 | #if NFSCLIENT | |
202 | ||
fe8ab488 A |
203 | static int |
204 | mapname2id(struct nfs_testmapid *map) | |
205 | { | |
206 | int error; | |
207 | ||
208 | error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag); | |
209 | if (error) | |
210 | return (error); | |
211 | ||
212 | if (map->ntm_grpflag) | |
213 | error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id); | |
214 | else | |
215 | error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id); | |
216 | ||
217 | return (error); | |
218 | } | |
219 | ||
220 | static int | |
221 | mapid2name(struct nfs_testmapid *map) | |
222 | { | |
223 | int error; | |
224 | int len = sizeof(map->ntm_name); | |
225 | ||
226 | if (map->ntm_grpflag) | |
227 | error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid); | |
228 | else | |
229 | error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid); | |
230 | ||
231 | if (error) | |
232 | return (error); | |
233 | ||
234 | error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag); | |
235 | ||
236 | return (error); | |
237 | ||
238 | } | |
239 | ||
240 | ||
241 | static int | |
242 | nfsclnt_testidmap(proc_t p, user_addr_t argp) | |
243 | { | |
244 | struct nfs_testmapid mapid; | |
245 | int error, coerror; | |
246 | ||
247 | /* Let root make this call. */ | |
248 | error = proc_suser(p); | |
249 | if (error) | |
250 | return (error); | |
251 | ||
252 | error = copyin(argp, &mapid, sizeof(mapid)); | |
253 | if (error) | |
254 | return (error); | |
255 | if (mapid.ntm_name2id) | |
256 | error = mapname2id(&mapid); | |
257 | else | |
258 | error = mapid2name(&mapid); | |
259 | ||
260 | coerror = copyout(&mapid, argp, sizeof(mapid)); | |
261 | ||
262 | return (error ? error : coerror); | |
263 | } | |
264 | ||
2d21ac55 A |
265 | int |
266 | nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) | |
267 | { | |
268 | struct lockd_ans la; | |
269 | int error; | |
270 | ||
6d2010ae A |
271 | switch (uap->flag) { |
272 | case NFSCLNT_LOCKDANS: | |
2d21ac55 | 273 | error = copyin(uap->argp, &la, sizeof(la)); |
6d2010ae A |
274 | if (!error) |
275 | error = nfslockdans(p, &la); | |
276 | break; | |
277 | case NFSCLNT_LOCKDNOTIFY: | |
278 | error = nfslockdnotify(p, uap->argp); | |
279 | break; | |
fe8ab488 A |
280 | case NFSCLNT_TESTIDMAP: |
281 | error = nfsclnt_testidmap(p, uap->argp); | |
282 | break; | |
6d2010ae A |
283 | default: |
284 | error = EINVAL; | |
2d21ac55 | 285 | } |
6d2010ae | 286 | return (error); |
2d21ac55 A |
287 | } |
288 | ||
fe8ab488 | 289 | |
2d21ac55 A |
290 | /* |
291 | * Asynchronous I/O threads for client NFS. | |
292 | * They do read-ahead and write-behind operations on the block I/O cache. | |
293 | * | |
294 | * The pool of up to nfsiod_thread_max threads is launched on demand and exit | |
295 | * when unused for a while. There are as many nfsiod structs as there are | |
296 | * nfsiod threads; however there's no strict tie between a thread and a struct. | |
297 | * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes | |
298 | * up, it removes the next struct nfsiod from the queue and services it. Then | |
299 | * it will put the struct at the head of free list and sleep on it. | |
300 | * Async requests will pull the next struct nfsiod from the head of the free list, | |
301 | * put it on the work queue, and wake whatever thread is waiting on that struct. | |
302 | */ | |
2d21ac55 A |
303 | |
304 | /* | |
305 | * nfsiod thread exit routine | |
306 | * | |
307 | * Must be called with nfsiod_mutex held so that the | |
308 | * decision to terminate is atomic with the termination. | |
309 | */ | |
b0d623f7 | 310 | void |
2d21ac55 A |
311 | nfsiod_terminate(struct nfsiod *niod) |
312 | { | |
313 | nfsiod_thread_count--; | |
314 | lck_mtx_unlock(nfsiod_mutex); | |
315 | if (niod) | |
316 | FREE(niod, M_TEMP); | |
317 | else | |
318 | printf("nfsiod: terminating without niod\n"); | |
319 | thread_terminate(current_thread()); | |
320 | /*NOTREACHED*/ | |
321 | } | |
322 | ||
323 | /* nfsiod thread startup routine */ | |
b0d623f7 | 324 | void |
2d21ac55 A |
325 | nfsiod_thread(void) |
326 | { | |
327 | struct nfsiod *niod; | |
328 | int error; | |
329 | ||
330 | MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK); | |
331 | if (!niod) { | |
332 | lck_mtx_lock(nfsiod_mutex); | |
333 | nfsiod_thread_count--; | |
b0d623f7 | 334 | wakeup(current_thread()); |
2d21ac55 A |
335 | lck_mtx_unlock(nfsiod_mutex); |
336 | thread_terminate(current_thread()); | |
337 | /*NOTREACHED*/ | |
338 | } | |
339 | bzero(niod, sizeof(*niod)); | |
340 | lck_mtx_lock(nfsiod_mutex); | |
341 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); | |
342 | wakeup(current_thread()); | |
343 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); | |
344 | /* shouldn't return... so we have an error */ | |
345 | /* remove an old nfsiod struct and terminate */ | |
346 | lck_mtx_lock(nfsiod_mutex); | |
347 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
348 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
349 | nfsiod_terminate(niod); | |
350 | /*NOTREACHED*/ | |
351 | } | |
352 | ||
353 | /* | |
354 | * Start up another nfsiod thread. | |
355 | * (unless we're already maxed out and there are nfsiods running) | |
356 | */ | |
357 | int | |
358 | nfsiod_start(void) | |
359 | { | |
b0d623f7 | 360 | thread_t thd = THREAD_NULL; |
2d21ac55 A |
361 | |
362 | lck_mtx_lock(nfsiod_mutex); | |
363 | if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { | |
364 | lck_mtx_unlock(nfsiod_mutex); | |
365 | return (EBUSY); | |
366 | } | |
367 | nfsiod_thread_count++; | |
b0d623f7 A |
368 | if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { |
369 | lck_mtx_unlock(nfsiod_mutex); | |
370 | return (EBUSY); | |
371 | } | |
2d21ac55 A |
372 | /* wait for the thread to complete startup */ |
373 | msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL); | |
b0d623f7 | 374 | thread_deallocate(thd); |
2d21ac55 A |
375 | return (0); |
376 | } | |
377 | ||
378 | /* | |
379 | * Continuation for Asynchronous I/O threads for NFS client. | |
380 | * | |
381 | * Grab an nfsiod struct to work on, do some work, then drop it | |
382 | */ | |
b0d623f7 | 383 | int |
2d21ac55 A |
384 | nfsiod_continue(int error) |
385 | { | |
386 | struct nfsiod *niod; | |
387 | struct nfsmount *nmp; | |
388 | struct nfsreq *req, *treq; | |
389 | struct nfs_reqqhead iodq; | |
390 | int morework; | |
391 | ||
392 | lck_mtx_lock(nfsiod_mutex); | |
393 | niod = TAILQ_FIRST(&nfsiodwork); | |
394 | if (!niod) { | |
395 | /* there's no work queued up */ | |
2d21ac55 A |
396 | /* remove an old nfsiod struct and terminate */ |
397 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
398 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
399 | nfsiod_terminate(niod); | |
400 | /*NOTREACHED*/ | |
401 | } | |
402 | TAILQ_REMOVE(&nfsiodwork, niod, niod_link); | |
403 | ||
404 | worktodo: | |
405 | while ((nmp = niod->niod_nmp)) { | |
fe8ab488 A |
406 | if (nmp == NULL){ |
407 | niod->niod_nmp = NULL; | |
408 | break; | |
409 | } | |
410 | ||
2d21ac55 A |
411 | /* |
412 | * Service this mount's async I/O queue. | |
413 | * | |
414 | * In order to ensure some level of fairness between mounts, | |
415 | * we grab all the work up front before processing it so any | |
416 | * new work that arrives will be serviced on a subsequent | |
417 | * iteration - and we have a chance to see if other work needs | |
418 | * to be done (e.g. the delayed write queue needs to be pushed | |
419 | * or other mounts are waiting for an nfsiod). | |
420 | */ | |
421 | /* grab the current contents of the queue */ | |
422 | TAILQ_INIT(&iodq); | |
423 | TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); | |
424 | lck_mtx_unlock(nfsiod_mutex); | |
425 | ||
426 | /* process the queue */ | |
427 | TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { | |
428 | TAILQ_REMOVE(&iodq, req, r_achain); | |
fe8ab488 A |
429 | lck_mtx_lock(nfsiod_mutex); |
430 | req->r_achain.tqe_next = NFSIODCOMPLETING; | |
431 | lck_mtx_unlock(nfsiod_mutex); | |
2d21ac55 A |
432 | req->r_callback.rcb_func(req); |
433 | } | |
434 | ||
435 | /* now check if there's more/other work to be done */ | |
436 | lck_mtx_lock(nfsiod_mutex); | |
437 | morework = !TAILQ_EMPTY(&nmp->nm_iodq); | |
438 | if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { | |
fe8ab488 A |
439 | /* |
440 | * we're going to stop working on this mount but if the | |
441 | * mount still needs more work so queue it up | |
442 | */ | |
443 | if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) | |
2d21ac55 A |
444 | TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); |
445 | nmp->nm_niod = NULL; | |
446 | niod->niod_nmp = NULL; | |
447 | } | |
448 | } | |
449 | ||
450 | /* loop if there's still a mount to work on */ | |
451 | if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { | |
452 | niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); | |
453 | TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); | |
fe8ab488 | 454 | niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST; |
2d21ac55 A |
455 | } |
456 | if (niod->niod_nmp) | |
457 | goto worktodo; | |
458 | ||
459 | /* queue ourselves back up - if there aren't too many threads running */ | |
460 | if (nfsiod_thread_count <= NFSIOD_MAX) { | |
461 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); | |
462 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); | |
463 | /* shouldn't return... so we have an error */ | |
464 | /* remove an old nfsiod struct and terminate */ | |
465 | lck_mtx_lock(nfsiod_mutex); | |
466 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) | |
467 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); | |
468 | } | |
469 | nfsiod_terminate(niod); | |
470 | /*NOTREACHED*/ | |
471 | return (0); | |
472 | } | |
473 | ||
474 | #endif /* NFSCLIENT */ | |
475 | ||
476 | ||
477 | #if NFSSERVER | |
478 | ||
1c79356b A |
479 | /* |
480 | * NFS server system calls | |
481 | * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c | |
482 | */ | |
483 | ||
484 | /* | |
485 | * Get file handle system call | |
486 | */ | |
1c79356b | 487 | int |
91447636 | 488 | getfh(proc_t p, struct getfh_args *uap, __unused int *retval) |
1c79356b | 489 | { |
91447636 A |
490 | vnode_t vp; |
491 | struct nfs_filehandle nfh; | |
6d2010ae | 492 | int error, fhlen, fidlen; |
1c79356b | 493 | struct nameidata nd; |
91447636 | 494 | char path[MAXPATHLEN], *ptr; |
6d2010ae | 495 | size_t pathlen; |
91447636 A |
496 | struct nfs_exportfs *nxfs; |
497 | struct nfs_export *nx; | |
498 | ||
1c79356b A |
499 | /* |
500 | * Must be super user | |
501 | */ | |
91447636 A |
502 | error = proc_suser(p); |
503 | if (error) | |
504 | return (error); | |
505 | ||
6d2010ae A |
506 | error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); |
507 | if (!error) | |
508 | error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); | |
91447636 | 509 | if (error) |
1c79356b | 510 | return (error); |
6d2010ae A |
511 | /* limit fh size to length specified (or v3 size by default) */ |
512 | if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) | |
513 | fhlen = NFSV3_MAX_FH_SIZE; | |
514 | fidlen = fhlen - sizeof(struct nfs_exphandle); | |
91447636 | 515 | |
2d21ac55 A |
516 | if (!nfsrv_is_initialized()) |
517 | return (EINVAL); | |
518 | ||
6d2010ae | 519 | NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, |
2d21ac55 | 520 | UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); |
1c79356b A |
521 | error = namei(&nd); |
522 | if (error) | |
523 | return (error); | |
91447636 A |
524 | nameidone(&nd); |
525 | ||
1c79356b | 526 | vp = nd.ni_vp; |
91447636 A |
527 | |
528 | // find exportfs that matches f_mntonname | |
2d21ac55 | 529 | lck_rw_lock_shared(&nfsrv_export_rwlock); |
91447636 | 530 | ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; |
2d21ac55 A |
531 | LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { |
532 | if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) | |
91447636 A |
533 | break; |
534 | } | |
535 | if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { | |
536 | error = EINVAL; | |
537 | goto out; | |
538 | } | |
539 | // find export that best matches remainder of path | |
540 | ptr = path + strlen(nxfs->nxfs_path); | |
541 | while (*ptr && (*ptr == '/')) | |
542 | ptr++; | |
543 | LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { | |
544 | int len = strlen(nx->nx_path); | |
545 | if (len == 0) // we've hit the export entry for the root directory | |
546 | break; | |
547 | if (!strncmp(nx->nx_path, ptr, len)) | |
548 | break; | |
549 | } | |
550 | if (!nx) { | |
551 | error = EINVAL; | |
552 | goto out; | |
553 | } | |
554 | ||
555 | bzero(&nfh, sizeof(nfh)); | |
0c530ab8 A |
556 | nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); |
557 | nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); | |
558 | nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); | |
91447636 A |
559 | nfh.nfh_xh.nxh_flags = 0; |
560 | nfh.nfh_xh.nxh_reserved = 0; | |
6d2010ae | 561 | nfh.nfh_len = fidlen; |
2d21ac55 | 562 | error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); |
6d2010ae | 563 | if (nfh.nfh_len > (uint32_t)fidlen) |
91447636 A |
564 | error = EOVERFLOW; |
565 | nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; | |
566 | nfh.nfh_len += sizeof(nfh.nfh_xh); | |
2d21ac55 | 567 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; |
91447636 A |
568 | |
569 | out: | |
2d21ac55 | 570 | lck_rw_done(&nfsrv_export_rwlock); |
91447636 | 571 | vnode_put(vp); |
1c79356b A |
572 | if (error) |
573 | return (error); | |
6d2010ae | 574 | error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); |
1c79356b A |
575 | return (error); |
576 | } | |
577 | ||
39236c6e | 578 | extern const struct fileops vnops; |
91447636 | 579 | |
55e303ae A |
580 | /* |
581 | * syscall for the rpc.lockd to use to translate a NFS file handle into | |
582 | * an open descriptor. | |
583 | * | |
584 | * warning: do not remove the suser() call or this becomes one giant | |
585 | * security hole. | |
586 | */ | |
55e303ae | 587 | int |
91447636 A |
588 | fhopen( proc_t p, |
589 | struct fhopen_args *uap, | |
b0d623f7 | 590 | int32_t *retval) |
55e303ae | 591 | { |
91447636 A |
592 | vnode_t vp; |
593 | struct nfs_filehandle nfh; | |
594 | struct nfs_export *nx; | |
595 | struct nfs_export_options *nxo; | |
55e303ae | 596 | struct flock lf; |
91447636 A |
597 | struct fileproc *fp, *nfp; |
598 | int fmode, error, type; | |
55e303ae | 599 | int indx; |
2d21ac55 | 600 | vfs_context_t ctx = vfs_context_current(); |
91447636 A |
601 | kauth_action_t action; |
602 | ||
55e303ae A |
603 | /* |
604 | * Must be super user | |
605 | */ | |
2d21ac55 | 606 | error = suser(vfs_context_ucred(ctx), 0); |
0c530ab8 | 607 | if (error) { |
55e303ae | 608 | return (error); |
0c530ab8 | 609 | } |
55e303ae | 610 | |
2d21ac55 A |
611 | if (!nfsrv_is_initialized()) { |
612 | return (EINVAL); | |
613 | } | |
614 | ||
55e303ae A |
615 | fmode = FFLAGS(uap->flags); |
616 | /* why not allow a non-read/write open for our lockd? */ | |
2d21ac55 | 617 | if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) |
55e303ae | 618 | return (EINVAL); |
91447636 A |
619 | |
620 | error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); | |
2d21ac55 | 621 | if (error) |
91447636 A |
622 | return (error); |
623 | if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || | |
2d21ac55 | 624 | (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) |
91447636 A |
625 | return (EINVAL); |
626 | error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); | |
2d21ac55 | 627 | if (error) |
55e303ae | 628 | return (error); |
2d21ac55 | 629 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; |
91447636 | 630 | |
2d21ac55 | 631 | lck_rw_lock_shared(&nfsrv_export_rwlock); |
91447636 | 632 | /* now give me my vnode, it gets returned to me with a reference */ |
2d21ac55 A |
633 | error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo); |
634 | lck_rw_done(&nfsrv_export_rwlock); | |
0c530ab8 | 635 | if (error) { |
2d21ac55 A |
636 | if (error == NFSERR_TRYLATER) |
637 | error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER? | |
55e303ae | 638 | return (error); |
0c530ab8 | 639 | } |
91447636 | 640 | |
55e303ae | 641 | /* |
91447636 A |
642 | * From now on we have to make sure not |
643 | * to forget about the vnode. | |
644 | * Any error that causes an abort must vnode_put(vp). | |
645 | * Just set error = err and 'goto bad;'. | |
55e303ae A |
646 | */ |
647 | ||
648 | /* | |
649 | * from vn_open | |
650 | */ | |
91447636 | 651 | if (vnode_vtype(vp) == VSOCK) { |
55e303ae A |
652 | error = EOPNOTSUPP; |
653 | goto bad; | |
654 | } | |
655 | ||
91447636 A |
656 | /* disallow write operations on directories */ |
657 | if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { | |
658 | error = EISDIR; | |
55e303ae A |
659 | goto bad; |
660 | } | |
661 | ||
91447636 A |
662 | /* compute action to be authorized */ |
663 | action = 0; | |
664 | if (fmode & FREAD) | |
665 | action |= KAUTH_VNODE_READ_DATA; | |
666 | if (fmode & (FWRITE | O_TRUNC)) | |
667 | action |= KAUTH_VNODE_WRITE_DATA; | |
2d21ac55 | 668 | if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) |
91447636 | 669 | goto bad; |
55e303ae | 670 | |
2d21ac55 | 671 | if ((error = VNOP_OPEN(vp, fmode, ctx))) |
91447636 | 672 | goto bad; |
6d2010ae | 673 | if ((error = vnode_ref_ext(vp, fmode, 0))) |
55e303ae A |
674 | goto bad; |
675 | ||
55e303ae A |
676 | /* |
677 | * end of vn_open code | |
678 | */ | |
679 | ||
91447636 | 680 | // starting here... error paths should call vn_close/vnode_put |
2d21ac55 A |
681 | if ((error = falloc(p, &nfp, &indx, ctx)) != 0) { |
682 | vn_close(vp, fmode & FMASK, ctx); | |
55e303ae A |
683 | goto bad; |
684 | } | |
685 | fp = nfp; | |
686 | ||
91447636 | 687 | fp->f_fglob->fg_flag = fmode & FMASK; |
91447636 A |
688 | fp->f_fglob->fg_ops = &vnops; |
689 | fp->f_fglob->fg_data = (caddr_t)vp; | |
690 | ||
691 | // XXX do we really need to support this with fhopen()? | |
55e303ae A |
692 | if (fmode & (O_EXLOCK | O_SHLOCK)) { |
693 | lf.l_whence = SEEK_SET; | |
694 | lf.l_start = 0; | |
695 | lf.l_len = 0; | |
696 | if (fmode & O_EXLOCK) | |
697 | lf.l_type = F_WRLCK; | |
698 | else | |
699 | lf.l_type = F_RDLCK; | |
700 | type = F_FLOCK; | |
701 | if ((fmode & FNONBLOCK) == 0) | |
702 | type |= F_WAIT; | |
39236c6e | 703 | if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) { |
2d21ac55 A |
704 | struct vfs_context context = *vfs_context_current(); |
705 | /* Modify local copy (to not damage thread copy) */ | |
706 | context.vc_ucred = fp->f_fglob->fg_cred; | |
707 | ||
708 | vn_close(vp, fp->f_fglob->fg_flag, &context); | |
91447636 | 709 | fp_free(p, indx, fp); |
55e303ae A |
710 | return (error); |
711 | } | |
91447636 | 712 | fp->f_fglob->fg_flag |= FHASLOCK; |
55e303ae A |
713 | } |
714 | ||
91447636 A |
715 | vnode_put(vp); |
716 | ||
717 | proc_fdlock(p); | |
6601e61a | 718 | procfdtbl_releasefd(p, indx, NULL); |
91447636 A |
719 | fp_drop(p, indx, fp, 1); |
720 | proc_fdunlock(p); | |
721 | ||
55e303ae A |
722 | *retval = indx; |
723 | return (0); | |
724 | ||
725 | bad: | |
91447636 | 726 | vnode_put(vp); |
55e303ae A |
727 | return (error); |
728 | } | |
729 | ||
1c79356b | 730 | /* |
2d21ac55 | 731 | * NFS server pseudo system call |
1c79356b | 732 | */ |
1c79356b | 733 | int |
91447636 | 734 | nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) |
1c79356b | 735 | { |
91447636 A |
736 | mbuf_t nam; |
737 | struct user_nfsd_args user_nfsdarg; | |
91447636 | 738 | socket_t so; |
1c79356b A |
739 | int error; |
740 | ||
ccc36f2f A |
741 | AUDIT_ARG(cmd, uap->flag); |
742 | ||
1c79356b | 743 | /* |
b0d623f7 | 744 | * Must be super user for most operations (export ops checked later). |
1c79356b | 745 | */ |
b0d623f7 | 746 | if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) |
1c79356b | 747 | return (error); |
2d21ac55 A |
748 | #if CONFIG_MACF |
749 | error = mac_system_check_nfsd(kauth_cred_get()); | |
750 | if (error) | |
751 | return (error); | |
752 | #endif | |
91447636 | 753 | |
2d21ac55 A |
754 | /* make sure NFS server data structures have been initialized */ |
755 | nfsrv_init(); | |
1c79356b | 756 | |
2d21ac55 | 757 | if (uap->flag & NFSSVC_ADDSOCK) { |
91447636 A |
758 | if (IS_64BIT_PROCESS(p)) { |
759 | error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); | |
760 | } else { | |
761 | struct nfsd_args tmp_args; | |
762 | error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); | |
763 | if (error == 0) { | |
764 | user_nfsdarg.sock = tmp_args.sock; | |
765 | user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); | |
766 | user_nfsdarg.namelen = tmp_args.namelen; | |
767 | } | |
768 | } | |
1c79356b A |
769 | if (error) |
770 | return (error); | |
91447636 A |
771 | /* get the socket */ |
772 | error = file_socket(user_nfsdarg.sock, &so); | |
1c79356b A |
773 | if (error) |
774 | return (error); | |
91447636 A |
775 | /* Get the client address for connected sockets. */ |
776 | if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { | |
777 | nam = NULL; | |
778 | } else { | |
779 | error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); | |
780 | if (error) { | |
781 | /* drop the iocount file_socket() grabbed on the file descriptor */ | |
782 | file_drop(user_nfsdarg.sock); | |
1c79356b | 783 | return (error); |
91447636 | 784 | } |
1c79356b | 785 | } |
91447636 A |
786 | /* |
787 | * nfssvc_addsock() will grab a retain count on the socket | |
788 | * to keep the socket from being closed when nfsd closes its | |
789 | * file descriptor for it. | |
790 | */ | |
2d21ac55 | 791 | error = nfssvc_addsock(so, nam); |
91447636 A |
792 | /* drop the iocount file_socket() grabbed on the file descriptor */ |
793 | file_drop(user_nfsdarg.sock); | |
794 | } else if (uap->flag & NFSSVC_NFSD) { | |
2d21ac55 | 795 | error = nfssvc_nfsd(); |
91447636 | 796 | } else if (uap->flag & NFSSVC_EXPORT) { |
2d21ac55 | 797 | error = nfssvc_export(uap->argp); |
91447636 A |
798 | } else { |
799 | error = EINVAL; | |
1c79356b | 800 | } |
1c79356b A |
801 | if (error == EINTR || error == ERESTART) |
802 | error = 0; | |
803 | return (error); | |
804 | } | |
805 | ||
1c79356b A |
806 | /* |
807 | * Adds a socket to the list for servicing by nfsds. | |
808 | */ | |
b0d623f7 | 809 | int |
2d21ac55 | 810 | nfssvc_addsock(socket_t so, mbuf_t mynam) |
1c79356b | 811 | { |
2d21ac55 A |
812 | struct nfsrv_sock *slp; |
813 | int error = 0, sodomain, sotype, soprotocol, on = 1; | |
316670eb | 814 | int first; |
91447636 A |
815 | struct timeval timeo; |
816 | ||
817 | /* make sure mbuf constants are set up */ | |
2d21ac55 | 818 | if (!nfs_mbuf_mhlen) |
91447636 A |
819 | nfs_mbuf_init(); |
820 | ||
821 | sock_gettype(so, &sodomain, &sotype, &soprotocol); | |
822 | ||
6d2010ae A |
823 | /* There should be only one UDP socket for each of IPv4 and IPv6 */ |
824 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { | |
825 | mbuf_freem(mynam); | |
826 | return (EEXIST); | |
827 | } | |
828 | if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { | |
91447636 | 829 | mbuf_freem(mynam); |
2d21ac55 | 830 | return (EEXIST); |
1c79356b A |
831 | } |
832 | ||
2d21ac55 A |
833 | /* Set protocol options and reserve some space (for UDP). */ |
834 | if (sotype == SOCK_STREAM) | |
91447636 | 835 | sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); |
2d21ac55 | 836 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) |
91447636 | 837 | sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); |
2d21ac55 A |
838 | if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ |
839 | int reserve = NFS_UDPSOCKBUF; | |
840 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); | |
841 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); | |
842 | if (error) { | |
843 | log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error); | |
844 | error = 0; | |
845 | } | |
1c79356b | 846 | } |
91447636 A |
847 | sock_nointerrupt(so, 0); |
848 | ||
2d21ac55 A |
849 | /* |
850 | * Set socket send/receive timeouts. | |
851 | * Receive timeout shouldn't matter, but setting the send timeout | |
852 | * will make sure that an unresponsive client can't hang the server. | |
853 | */ | |
91447636 | 854 | timeo.tv_usec = 0; |
2d21ac55 A |
855 | timeo.tv_sec = 1; |
856 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); | |
857 | timeo.tv_sec = 30; | |
858 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); | |
859 | if (error) { | |
860 | log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error); | |
861 | error = 0; | |
862 | } | |
91447636 | 863 | |
2d21ac55 A |
864 | MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK); |
865 | if (!slp) { | |
866 | mbuf_freem(mynam); | |
867 | return (ENOMEM); | |
868 | } | |
869 | bzero((caddr_t)slp, sizeof (struct nfsrv_sock)); | |
870 | lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL); | |
871 | lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL); | |
872 | ||
873 | lck_mtx_lock(nfsd_mutex); | |
874 | ||
875 | if (soprotocol == IPPROTO_UDP) { | |
6d2010ae A |
876 | if (sodomain == AF_INET) { |
877 | /* There should be only one UDP/IPv4 socket */ | |
878 | if (nfsrv_udpsock) { | |
879 | lck_mtx_unlock(nfsd_mutex); | |
880 | nfsrv_slpfree(slp); | |
881 | mbuf_freem(mynam); | |
882 | return (EEXIST); | |
883 | } | |
884 | nfsrv_udpsock = slp; | |
885 | } | |
886 | if (sodomain == AF_INET6) { | |
887 | /* There should be only one UDP/IPv6 socket */ | |
888 | if (nfsrv_udp6sock) { | |
889 | lck_mtx_unlock(nfsd_mutex); | |
890 | nfsrv_slpfree(slp); | |
891 | mbuf_freem(mynam); | |
892 | return (EEXIST); | |
893 | } | |
894 | nfsrv_udp6sock = slp; | |
91447636 | 895 | } |
1c79356b | 896 | } |
91447636 | 897 | |
2d21ac55 | 898 | /* add the socket to the list */ |
316670eb | 899 | first = TAILQ_EMPTY(&nfsrv_socklist); |
2d21ac55 A |
900 | TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); |
901 | ||
91447636 | 902 | sock_retain(so); /* grab a retain count on the socket */ |
1c79356b | 903 | slp->ns_so = so; |
91447636 | 904 | slp->ns_sotype = sotype; |
1c79356b | 905 | slp->ns_nam = mynam; |
91447636 | 906 | |
316670eb A |
907 | /* set up the socket up-call */ |
908 | nfsrv_uc_addsock(slp, first); | |
91447636 | 909 | |
2d21ac55 A |
910 | /* mark that the socket is not in the nfsrv_sockwg list */ |
911 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
912 | ||
91447636 A |
913 | slp->ns_flag = SLP_VALID | SLP_NEEDQ; |
914 | ||
1c79356b | 915 | nfsrv_wakenfsd(slp); |
91447636 A |
916 | lck_mtx_unlock(nfsd_mutex); |
917 | ||
1c79356b A |
918 | return (0); |
919 | } | |
920 | ||
921 | /* | |
2d21ac55 A |
922 | * nfssvc_nfsd() |
923 | * | |
924 | * nfsd theory of operation: | |
925 | * | |
926 | * The first nfsd thread stays in user mode accepting new TCP connections | |
927 | * which are then added via the "addsock" call. The rest of the nfsd threads | |
928 | * simply call into the kernel and remain there in a loop handling NFS | |
929 | * requests until killed by a signal. | |
930 | * | |
931 | * There's a list of nfsd threads (nfsd_head). | |
932 | * There's an nfsd queue that contains only those nfsds that are | |
933 | * waiting for work to do (nfsd_queue). | |
934 | * | |
935 | * There's a list of all NFS sockets (nfsrv_socklist) and two queues for | |
936 | * managing the work on the sockets: | |
937 | * nfsrv_sockwait - sockets w/new data waiting to be worked on | |
938 | * nfsrv_sockwork - sockets being worked on which may have more work to do | |
939 | * nfsrv_sockwg -- sockets which have pending write gather data | |
940 | * When a socket receives data, if it is not currently queued, it | |
941 | * will be placed at the end of the "wait" queue. | |
942 | * Whenever a socket needs servicing we make sure it is queued and | |
943 | * wake up a waiting nfsd (if there is one). | |
944 | * | |
945 | * nfsds will service at most 8 requests from the same socket before | |
946 | * defecting to work on another socket. | |
947 | * nfsds will defect immediately if there are any sockets in the "wait" queue | |
948 | * nfsds looking for a socket to work on check the "wait" queue first and | |
949 | * then check the "work" queue. | |
950 | * When an nfsd starts working on a socket, it removes it from the head of | |
951 | * the queue it's currently on and moves it to the end of the "work" queue. | |
952 | * When nfsds are checking the queues for work, any sockets found not to | |
953 | * have any work are simply dropped from the queue. | |
954 | * | |
1c79356b | 955 | */ |
b0d623f7 | 956 | int |
2d21ac55 | 957 | nfssvc_nfsd(void) |
1c79356b | 958 | { |
2d21ac55 A |
959 | mbuf_t m, mrep; |
960 | struct nfsrv_sock *slp; | |
961 | struct nfsd *nfsd; | |
1c79356b | 962 | struct nfsrv_descript *nd = NULL; |
91447636 | 963 | int error = 0, cacherep, writes_todo; |
2d21ac55 | 964 | int siz, procrastinate, opcnt = 0; |
1c79356b | 965 | u_quad_t cur_usec; |
55e303ae | 966 | struct timeval now; |
2d21ac55 | 967 | struct vfs_context context; |
316670eb | 968 | struct timespec to; |
1c79356b A |
969 | |
970 | #ifndef nolint | |
971 | cacherep = RC_DOIT; | |
972 | writes_todo = 0; | |
973 | #endif | |
91447636 | 974 | |
2d21ac55 A |
975 | MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); |
976 | if (!nfsd) | |
977 | return (ENOMEM); | |
978 | bzero(nfsd, sizeof(struct nfsd)); | |
979 | lck_mtx_lock(nfsd_mutex); | |
980 | if (nfsd_thread_count++ == 0) | |
981 | nfsrv_initcache(); /* Init the server request cache */ | |
316670eb | 982 | |
2d21ac55 A |
983 | TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); |
984 | lck_mtx_unlock(nfsd_mutex); | |
985 | ||
986 | context.vc_thread = current_thread(); | |
91447636 | 987 | |
316670eb A |
988 | /* Set time out so that nfsd threads can wake up a see if they are still needed. */ |
989 | to.tv_sec = 5; | |
990 | to.tv_nsec = 0; | |
991 | ||
1c79356b A |
992 | /* |
993 | * Loop getting rpc requests until SIGKILL. | |
994 | */ | |
995 | for (;;) { | |
2d21ac55 A |
996 | if (nfsd_thread_max <= 0) { |
997 | /* NFS server shutting down, get out ASAP */ | |
998 | error = EINTR; | |
999 | slp = nfsd->nfsd_slp; | |
1000 | } else if (nfsd->nfsd_flag & NFSD_REQINPROG) { | |
1001 | /* already have some work to do */ | |
1002 | error = 0; | |
1003 | slp = nfsd->nfsd_slp; | |
1004 | } else { | |
1005 | /* need to find work to do */ | |
1006 | error = 0; | |
91447636 | 1007 | lck_mtx_lock(nfsd_mutex); |
2d21ac55 A |
1008 | while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) { |
1009 | if (nfsd_thread_count > nfsd_thread_max) { | |
1010 | /* | |
1011 | * If we have no socket and there are more | |
1012 | * nfsd threads than configured, let's exit. | |
1013 | */ | |
1014 | error = 0; | |
1015 | goto done; | |
1016 | } | |
1c79356b | 1017 | nfsd->nfsd_flag |= NFSD_WAITING; |
2d21ac55 | 1018 | TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); |
316670eb | 1019 | error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to); |
91447636 | 1020 | if (error) { |
2d21ac55 A |
1021 | if (nfsd->nfsd_flag & NFSD_WAITING) { |
1022 | TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); | |
1023 | nfsd->nfsd_flag &= ~NFSD_WAITING; | |
1024 | } | |
316670eb A |
1025 | if (error == EWOULDBLOCK) |
1026 | continue; | |
1c79356b | 1027 | goto done; |
91447636 | 1028 | } |
1c79356b | 1029 | } |
2d21ac55 A |
1030 | slp = nfsd->nfsd_slp; |
1031 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) { | |
1032 | /* look for a socket to work on in the wait queue */ | |
1033 | while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) { | |
1034 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1035 | /* remove from the head of the queue */ | |
1036 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1037 | slp->ns_flag &= ~SLP_WAITQ; | |
1038 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) | |
1039 | break; | |
1040 | /* nothing to do, so skip this socket */ | |
1041 | lck_rw_done(&slp->ns_rwlock); | |
1c79356b | 1042 | } |
2d21ac55 A |
1043 | } |
1044 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) { | |
1045 | /* look for a socket to work on in the work queue */ | |
1046 | while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) { | |
1047 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1048 | /* remove from the head of the queue */ | |
1049 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1050 | slp->ns_flag &= ~SLP_WORKQ; | |
1051 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) | |
1052 | break; | |
1053 | /* nothing to do, so skip this socket */ | |
1054 | lck_rw_done(&slp->ns_rwlock); | |
1055 | } | |
1056 | } | |
1057 | if (!nfsd->nfsd_slp && slp) { | |
1058 | /* we found a socket to work on, grab a reference */ | |
1059 | slp->ns_sref++; | |
1060 | nfsd->nfsd_slp = slp; | |
1061 | opcnt = 0; | |
1062 | /* and put it at the back of the work queue */ | |
1063 | TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq); | |
1064 | slp->ns_flag |= SLP_WORKQ; | |
1065 | lck_rw_done(&slp->ns_rwlock); | |
1c79356b | 1066 | } |
91447636 | 1067 | lck_mtx_unlock(nfsd_mutex); |
2d21ac55 | 1068 | if (!slp) |
1c79356b | 1069 | continue; |
91447636 | 1070 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1c79356b | 1071 | if (slp->ns_flag & SLP_VALID) { |
743b1565 | 1072 | if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { |
91447636 A |
1073 | slp->ns_flag &= ~SLP_NEEDQ; |
1074 | nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); | |
1075 | } | |
743b1565 A |
1076 | if (slp->ns_flag & SLP_DISCONN) |
1077 | nfsrv_zapsock(slp); | |
1c79356b | 1078 | error = nfsrv_dorec(slp, nfsd, &nd); |
2d21ac55 A |
1079 | if (error == EINVAL) { // RPCSEC_GSS drop |
1080 | if (slp->ns_sotype == SOCK_STREAM) | |
1081 | nfsrv_zapsock(slp); // drop connection | |
1082 | } | |
1083 | writes_todo = 0; | |
1084 | if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) { | |
1085 | microuptime(&now); | |
1086 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + | |
1087 | (u_quad_t)now.tv_usec; | |
1088 | if (slp->ns_wgtime <= cur_usec) { | |
1089 | error = 0; | |
1090 | cacherep = RC_DOIT; | |
1091 | writes_todo = 1; | |
1092 | } | |
1093 | slp->ns_flag &= ~SLP_DOWRITES; | |
1094 | } | |
1c79356b A |
1095 | nfsd->nfsd_flag |= NFSD_REQINPROG; |
1096 | } | |
91447636 | 1097 | lck_rw_done(&slp->ns_rwlock); |
1c79356b | 1098 | } |
2d21ac55 | 1099 | if (error || (slp && !(slp->ns_flag & SLP_VALID))) { |
1c79356b | 1100 | if (nd) { |
2d21ac55 | 1101 | nfsm_chain_cleanup(&nd->nd_nmreq); |
55e303ae | 1102 | if (nd->nd_nam2) |
91447636 | 1103 | mbuf_freem(nd->nd_nam2); |
0c530ab8 A |
1104 | if (IS_VALID_CRED(nd->nd_cr)) |
1105 | kauth_cred_unref(&nd->nd_cr); | |
6d2010ae A |
1106 | if (nd->nd_gss_context) |
1107 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
2d21ac55 | 1108 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1c79356b A |
1109 | nd = NULL; |
1110 | } | |
91447636 | 1111 | nfsd->nfsd_slp = NULL; |
1c79356b | 1112 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; |
2d21ac55 A |
1113 | if (slp) |
1114 | nfsrv_slpderef(slp); | |
1115 | if (nfsd_thread_max <= 0) | |
1116 | break; | |
1c79356b A |
1117 | continue; |
1118 | } | |
1c79356b | 1119 | if (nd) { |
55e303ae | 1120 | microuptime(&nd->nd_starttime); |
1c79356b A |
1121 | if (nd->nd_nam2) |
1122 | nd->nd_nam = nd->nd_nam2; | |
1123 | else | |
1124 | nd->nd_nam = slp->ns_nam; | |
1125 | ||
2d21ac55 A |
1126 | cacherep = nfsrv_getcache(nd, slp, &mrep); |
1127 | ||
1128 | if (nfsrv_require_resv_port) { | |
1129 | /* Check if source port is a reserved port */ | |
6d2010ae A |
1130 | in_port_t port = 0; |
1131 | struct sockaddr *saddr = mbuf_data(nd->nd_nam); | |
1132 | ||
1133 | if (saddr->sa_family == AF_INET) | |
1134 | port = ntohs(((struct sockaddr_in*)saddr)->sin_port); | |
1135 | else if (saddr->sa_family == AF_INET6) | |
1136 | port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); | |
1137 | if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { | |
1c79356b A |
1138 | nd->nd_procnum = NFSPROC_NOOP; |
1139 | nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); | |
1140 | cacherep = RC_DOIT; | |
1c79356b A |
1141 | } |
1142 | } | |
1143 | ||
1144 | } | |
1145 | ||
1146 | /* | |
2d21ac55 | 1147 | * Loop to get all the write RPC replies that have been |
1c79356b A |
1148 | * gathered together. |
1149 | */ | |
1150 | do { | |
1151 | switch (cacherep) { | |
1152 | case RC_DOIT: | |
2d21ac55 A |
1153 | if (nd && (nd->nd_vers == NFS_VER3)) |
1154 | procrastinate = nfsrv_wg_delay_v3; | |
1c79356b | 1155 | else |
2d21ac55 A |
1156 | procrastinate = nfsrv_wg_delay; |
1157 | lck_rw_lock_shared(&nfsrv_export_rwlock); | |
1158 | context.vc_ucred = NULL; | |
91447636 | 1159 | if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) |
2d21ac55 A |
1160 | error = nfsrv_writegather(&nd, slp, &context, &mrep); |
1161 | else | |
1162 | error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep); | |
1163 | lck_rw_done(&nfsrv_export_rwlock); | |
1164 | if (mrep == NULL) { | |
1165 | /* | |
1166 | * If this is a stream socket and we are not going | |
1167 | * to send a reply we better close the connection | |
1168 | * so the client doesn't hang. | |
1169 | */ | |
1170 | if (error && slp->ns_sotype == SOCK_STREAM) { | |
1171 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1172 | nfsrv_zapsock(slp); | |
1173 | lck_rw_done(&slp->ns_rwlock); | |
1174 | printf("NFS server: NULL reply from proc = %d error = %d\n", | |
1175 | nd->nd_procnum, error); | |
1176 | } | |
1c79356b | 1177 | break; |
2d21ac55 A |
1178 | |
1179 | } | |
1c79356b | 1180 | if (error) { |
316670eb | 1181 | OSAddAtomic64(1, &nfsstats.srv_errs); |
2d21ac55 | 1182 | nfsrv_updatecache(nd, FALSE, mrep); |
55e303ae | 1183 | if (nd->nd_nam2) { |
91447636 | 1184 | mbuf_freem(nd->nd_nam2); |
55e303ae A |
1185 | nd->nd_nam2 = NULL; |
1186 | } | |
1c79356b A |
1187 | break; |
1188 | } | |
316670eb | 1189 | OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]); |
2d21ac55 A |
1190 | nfsrv_updatecache(nd, TRUE, mrep); |
1191 | /* FALLTHRU */ | |
1192 | ||
1c79356b | 1193 | case RC_REPLY: |
2d21ac55 A |
1194 | if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS |
1195 | /* | |
1196 | * Need to checksum or encrypt the reply | |
1197 | */ | |
1198 | error = nfs_gss_svc_protect_reply(nd, mrep); | |
1199 | if (error) { | |
1200 | mbuf_freem(mrep); | |
1201 | break; | |
1202 | } | |
1203 | } | |
1204 | ||
1205 | /* | |
1206 | * Get the total size of the reply | |
1207 | */ | |
1208 | m = mrep; | |
1c79356b A |
1209 | siz = 0; |
1210 | while (m) { | |
91447636 A |
1211 | siz += mbuf_len(m); |
1212 | m = mbuf_next(m); | |
1c79356b A |
1213 | } |
1214 | if (siz <= 0 || siz > NFS_MAXPACKET) { | |
1215 | printf("mbuf siz=%d\n",siz); | |
1216 | panic("Bad nfs svc reply"); | |
1217 | } | |
2d21ac55 | 1218 | m = mrep; |
91447636 A |
1219 | mbuf_pkthdr_setlen(m, siz); |
1220 | error = mbuf_pkthdr_setrcvif(m, NULL); | |
1221 | if (error) | |
1222 | panic("nfsd setrcvif failed: %d", error); | |
1c79356b A |
1223 | /* |
1224 | * For stream protocols, prepend a Sun RPC | |
1225 | * Record Mark. | |
1226 | */ | |
91447636 A |
1227 | if (slp->ns_sotype == SOCK_STREAM) { |
1228 | error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); | |
1229 | if (!error) | |
b0d623f7 | 1230 | *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); |
1c79356b | 1231 | } |
91447636 A |
1232 | if (!error) { |
1233 | if (slp->ns_flag & SLP_VALID) { | |
2d21ac55 | 1234 | error = nfsrv_send(slp, nd->nd_nam2, m); |
91447636 A |
1235 | } else { |
1236 | error = EPIPE; | |
1237 | mbuf_freem(m); | |
1238 | } | |
1239 | } else { | |
1240 | mbuf_freem(m); | |
1c79356b | 1241 | } |
2d21ac55 | 1242 | mrep = NULL; |
55e303ae | 1243 | if (nd->nd_nam2) { |
91447636 | 1244 | mbuf_freem(nd->nd_nam2); |
55e303ae A |
1245 | nd->nd_nam2 = NULL; |
1246 | } | |
91447636 A |
1247 | if (error == EPIPE) { |
1248 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1c79356b | 1249 | nfsrv_zapsock(slp); |
91447636 A |
1250 | lck_rw_done(&slp->ns_rwlock); |
1251 | } | |
1c79356b | 1252 | if (error == EINTR || error == ERESTART) { |
2d21ac55 | 1253 | nfsm_chain_cleanup(&nd->nd_nmreq); |
0c530ab8 A |
1254 | if (IS_VALID_CRED(nd->nd_cr)) |
1255 | kauth_cred_unref(&nd->nd_cr); | |
6d2010ae A |
1256 | if (nd->nd_gss_context) |
1257 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
2d21ac55 | 1258 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1c79356b | 1259 | nfsrv_slpderef(slp); |
2d21ac55 | 1260 | lck_mtx_lock(nfsd_mutex); |
1c79356b A |
1261 | goto done; |
1262 | } | |
1263 | break; | |
1264 | case RC_DROPIT: | |
91447636 | 1265 | mbuf_freem(nd->nd_nam2); |
2d21ac55 | 1266 | nd->nd_nam2 = NULL; |
1c79356b A |
1267 | break; |
1268 | }; | |
2d21ac55 | 1269 | opcnt++; |
1c79356b | 1270 | if (nd) { |
2d21ac55 | 1271 | nfsm_chain_cleanup(&nd->nd_nmreq); |
55e303ae | 1272 | if (nd->nd_nam2) |
91447636 | 1273 | mbuf_freem(nd->nd_nam2); |
0c530ab8 A |
1274 | if (IS_VALID_CRED(nd->nd_cr)) |
1275 | kauth_cred_unref(&nd->nd_cr); | |
6d2010ae A |
1276 | if (nd->nd_gss_context) |
1277 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); | |
2d21ac55 | 1278 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1c79356b A |
1279 | nd = NULL; |
1280 | } | |
1281 | ||
1282 | /* | |
1283 | * Check to see if there are outstanding writes that | |
1284 | * need to be serviced. | |
1285 | */ | |
2d21ac55 A |
1286 | writes_todo = 0; |
1287 | if (slp->ns_wgtime) { | |
1288 | microuptime(&now); | |
1289 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + | |
1290 | (u_quad_t)now.tv_usec; | |
1291 | if (slp->ns_wgtime <= cur_usec) { | |
1292 | cacherep = RC_DOIT; | |
1293 | writes_todo = 1; | |
1294 | } | |
91447636 | 1295 | } |
1c79356b | 1296 | } while (writes_todo); |
2d21ac55 A |
1297 | |
1298 | nd = NULL; | |
1299 | if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) { | |
1300 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1301 | error = nfsrv_dorec(slp, nfsd, &nd); | |
1302 | if (error == EINVAL) { // RPCSEC_GSS drop | |
1303 | if (slp->ns_sotype == SOCK_STREAM) | |
1304 | nfsrv_zapsock(slp); // drop connection | |
1305 | } | |
91447636 | 1306 | lck_rw_done(&slp->ns_rwlock); |
2d21ac55 A |
1307 | } |
1308 | if (!nd) { | |
1309 | /* drop our reference on the socket */ | |
1c79356b A |
1310 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; |
1311 | nfsd->nfsd_slp = NULL; | |
1312 | nfsrv_slpderef(slp); | |
91447636 | 1313 | } |
1c79356b | 1314 | } |
91447636 | 1315 | lck_mtx_lock(nfsd_mutex); |
2d21ac55 | 1316 | done: |
1c79356b | 1317 | TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); |
91447636 | 1318 | FREE(nfsd, M_NFSD); |
2d21ac55 A |
1319 | if (--nfsd_thread_count == 0) |
1320 | nfsrv_cleanup(); | |
91447636 | 1321 | lck_mtx_unlock(nfsd_mutex); |
1c79356b A |
1322 | return (error); |
1323 | } | |
91447636 | 1324 | |
b0d623f7 | 1325 | int |
2d21ac55 | 1326 | nfssvc_export(user_addr_t argp) |
91447636 A |
1327 | { |
1328 | int error = 0, is_64bit; | |
1329 | struct user_nfs_export_args unxa; | |
2d21ac55 | 1330 | vfs_context_t ctx = vfs_context_current(); |
91447636 | 1331 | |
2d21ac55 | 1332 | is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx)); |
91447636 A |
1333 | |
1334 | /* copy in pointers to path and export args */ | |
1335 | if (is_64bit) { | |
1336 | error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); | |
1337 | } else { | |
1338 | struct nfs_export_args tnxa; | |
1339 | error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); | |
1340 | if (error == 0) { | |
1341 | /* munge into LP64 version of nfs_export_args structure */ | |
1342 | unxa.nxa_fsid = tnxa.nxa_fsid; | |
1343 | unxa.nxa_expid = tnxa.nxa_expid; | |
1344 | unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); | |
1345 | unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); | |
1346 | unxa.nxa_flags = tnxa.nxa_flags; | |
1347 | unxa.nxa_netcount = tnxa.nxa_netcount; | |
1348 | unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); | |
1349 | } | |
1350 | } | |
1351 | if (error) | |
1352 | return (error); | |
1353 | ||
2d21ac55 | 1354 | error = nfsrv_export(&unxa, ctx); |
91447636 A |
1355 | |
1356 | return (error); | |
1357 | } | |
1358 | ||
1c79356b | 1359 | /* |
2d21ac55 | 1360 | * Shut down a socket associated with an nfsrv_sock structure. |
1c79356b A |
1361 | * Should be called with the send lock set, if required. |
1362 | * The trick here is to increment the sref at the start, so that the nfsds | |
1363 | * will stop using it and clear ns_flag at the end so that it will not be | |
1364 | * reassigned during cleanup. | |
1365 | */ | |
b0d623f7 | 1366 | void |
2d21ac55 | 1367 | nfsrv_zapsock(struct nfsrv_sock *slp) |
1c79356b | 1368 | { |
91447636 | 1369 | socket_t so; |
1c79356b | 1370 | |
91447636 A |
1371 | if ((slp->ns_flag & SLP_VALID) == 0) |
1372 | return; | |
1c79356b | 1373 | slp->ns_flag &= ~SLP_ALLFLAGS; |
91447636 A |
1374 | |
1375 | so = slp->ns_so; | |
1376 | if (so == NULL) | |
1377 | return; | |
1378 | ||
743b1565 | 1379 | /* |
316670eb A |
1380 | * Attempt to deter future up-calls, but leave the |
1381 | * up-call info in place to avoid a race with the | |
743b1565 A |
1382 | * networking code. |
1383 | */ | |
91447636 | 1384 | socket_lock(so, 1); |
91447636 A |
1385 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
1386 | socket_unlock(so, 1); | |
743b1565 | 1387 | |
91447636 | 1388 | sock_shutdown(so, SHUT_RDWR); |
316670eb A |
1389 | |
1390 | /* | |
1391 | * Remove from the up-call queue | |
1392 | */ | |
1393 | nfsrv_uc_dequeue(slp); | |
1c79356b A |
1394 | } |
1395 | ||
1c79356b | 1396 | /* |
91447636 | 1397 | * cleanup and release a server socket structure. |
1c79356b | 1398 | */ |
b0d623f7 | 1399 | void |
2d21ac55 | 1400 | nfsrv_slpfree(struct nfsrv_sock *slp) |
1c79356b | 1401 | { |
91447636 | 1402 | struct nfsrv_descript *nwp, *nnwp; |
1c79356b | 1403 | |
91447636 A |
1404 | if (slp->ns_so) { |
1405 | sock_release(slp->ns_so); | |
1406 | slp->ns_so = NULL; | |
1407 | } | |
1408 | if (slp->ns_nam) | |
1409 | mbuf_free(slp->ns_nam); | |
1410 | if (slp->ns_raw) | |
1411 | mbuf_freem(slp->ns_raw); | |
1412 | if (slp->ns_rec) | |
1413 | mbuf_freem(slp->ns_rec); | |
2d21ac55 A |
1414 | if (slp->ns_frag) |
1415 | mbuf_freem(slp->ns_frag); | |
1416 | slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; | |
1417 | slp->ns_reccnt = 0; | |
55e303ae | 1418 | |
316670eb A |
1419 | if (slp->ns_ua) |
1420 | FREE(slp->ns_ua, M_NFSSVC); | |
1421 | ||
91447636 A |
1422 | for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { |
1423 | nnwp = nwp->nd_tq.le_next; | |
1424 | LIST_REMOVE(nwp, nd_tq); | |
2d21ac55 A |
1425 | nfsm_chain_cleanup(&nwp->nd_nmreq); |
1426 | if (nwp->nd_mrep) | |
1427 | mbuf_freem(nwp->nd_mrep); | |
1428 | if (nwp->nd_nam2) | |
1429 | mbuf_freem(nwp->nd_nam2); | |
0c530ab8 A |
1430 | if (IS_VALID_CRED(nwp->nd_cr)) |
1431 | kauth_cred_unref(&nwp->nd_cr); | |
6d2010ae A |
1432 | if (nwp->nd_gss_context) |
1433 | nfs_gss_svc_ctx_deref(nwp->nd_gss_context); | |
2d21ac55 | 1434 | FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); |
55e303ae | 1435 | } |
91447636 A |
1436 | LIST_INIT(&slp->ns_tq); |
1437 | ||
2d21ac55 A |
1438 | lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group); |
1439 | lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group); | |
91447636 | 1440 | FREE(slp, M_NFSSVC); |
55e303ae A |
1441 | } |
1442 | ||
1443 | /* | |
91447636 A |
1444 | * Derefence a server socket structure. If it has no more references and |
1445 | * is no longer valid, you can throw it away. | |
55e303ae A |
1446 | */ |
1447 | void | |
2d21ac55 | 1448 | nfsrv_slpderef(struct nfsrv_sock *slp) |
55e303ae | 1449 | { |
743b1565 A |
1450 | struct timeval now; |
1451 | ||
91447636 A |
1452 | lck_mtx_lock(nfsd_mutex); |
1453 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1454 | slp->ns_sref--; | |
2d21ac55 | 1455 | |
91447636 | 1456 | if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { |
2d21ac55 A |
1457 | if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) { |
1458 | /* remove socket from queue since there's no work */ | |
1459 | if (slp->ns_flag & SLP_WAITQ) | |
1460 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1461 | else | |
1462 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1463 | slp->ns_flag &= ~SLP_QUEUED; | |
1464 | } | |
91447636 A |
1465 | lck_rw_done(&slp->ns_rwlock); |
1466 | lck_mtx_unlock(nfsd_mutex); | |
1467 | return; | |
55e303ae | 1468 | } |
91447636 | 1469 | |
2d21ac55 A |
1470 | /* This socket is no longer valid, so we'll get rid of it */ |
1471 | ||
1472 | if (slp->ns_flag & SLP_QUEUED) { | |
1473 | if (slp->ns_flag & SLP_WAITQ) | |
1474 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1475 | else | |
1476 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1477 | slp->ns_flag &= ~SLP_QUEUED; | |
1478 | } | |
1479 | ||
1480 | /* | |
1481 | * Queue the socket up for deletion | |
1482 | * and start the timer to delete it | |
1483 | * after it has been in limbo for | |
1484 | * a while. | |
1485 | */ | |
743b1565 A |
1486 | microuptime(&now); |
1487 | slp->ns_timestamp = now.tv_sec; | |
2d21ac55 A |
1488 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); |
1489 | TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); | |
1490 | if (!nfsrv_deadsock_timer_on) { | |
1491 | nfsrv_deadsock_timer_on = 1; | |
1492 | nfs_interval_timer_start(nfsrv_deadsock_timer_call, | |
1493 | NFSRV_DEADSOCKDELAY * 1000); | |
1494 | } | |
1495 | ||
743b1565 | 1496 | lck_rw_done(&slp->ns_rwlock); |
2d21ac55 A |
1497 | /* now remove from the write gather socket list */ |
1498 | if (slp->ns_wgq.tqe_next != SLPNOLIST) { | |
1499 | TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); | |
1500 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
1501 | } | |
91447636 | 1502 | lck_mtx_unlock(nfsd_mutex); |
55e303ae A |
1503 | } |
1504 | ||
1c79356b | 1505 | /* |
2d21ac55 A |
1506 | * Check periodically for dead sockets pending delete. |
1507 | * If a socket has been dead for more than NFSRV_DEADSOCKDELAY | |
1508 | * seconds then we assume it's safe to free. | |
1c79356b A |
1509 | */ |
1510 | void | |
2d21ac55 | 1511 | nfsrv_deadsock_timer(__unused void *param0, __unused void *param1) |
1c79356b | 1512 | { |
2d21ac55 | 1513 | struct nfsrv_sock *slp; |
743b1565 | 1514 | struct timeval now; |
2d21ac55 | 1515 | time_t time_to_wait; |
1c79356b | 1516 | |
2d21ac55 A |
1517 | microuptime(&now); |
1518 | lck_mtx_lock(nfsd_mutex); | |
1c79356b | 1519 | |
2d21ac55 A |
1520 | while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) { |
1521 | if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec) | |
1522 | break; | |
1523 | TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain); | |
1524 | nfsrv_slpfree(slp); | |
8ad349bb | 1525 | } |
2d21ac55 A |
1526 | if (TAILQ_EMPTY(&nfsrv_deadsocklist)) { |
1527 | nfsrv_deadsock_timer_on = 0; | |
1528 | lck_mtx_unlock(nfsd_mutex); | |
1529 | return; | |
91447636 | 1530 | } |
2d21ac55 A |
1531 | time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec; |
1532 | if (time_to_wait < 1) | |
1533 | time_to_wait = 1; | |
1c79356b | 1534 | |
2d21ac55 A |
1535 | lck_mtx_unlock(nfsd_mutex); |
1536 | ||
1537 | nfs_interval_timer_start(nfsrv_deadsock_timer_call, | |
1538 | time_to_wait * 1000); | |
1c79356b A |
1539 | } |
1540 | ||
1541 | /* | |
2d21ac55 | 1542 | * Clean up the data structures for the server. |
1c79356b | 1543 | */ |
2d21ac55 A |
1544 | void |
1545 | nfsrv_cleanup(void) | |
1c79356b | 1546 | { |
2d21ac55 | 1547 | struct nfsrv_sock *slp, *nslp; |
55e303ae | 1548 | struct timeval now; |
b0d623f7 | 1549 | #if CONFIG_FSE |
2d21ac55 A |
1550 | struct nfsrv_fmod *fp, *nfp; |
1551 | int i; | |
b0d623f7 | 1552 | #endif |
1c79356b | 1553 | |
55e303ae | 1554 | microuptime(&now); |
2d21ac55 A |
1555 | for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { |
1556 | nslp = TAILQ_NEXT(slp, ns_chain); | |
1557 | if (slp->ns_flag & SLP_VALID) { | |
1558 | lck_rw_lock_exclusive(&slp->ns_rwlock); | |
1559 | nfsrv_zapsock(slp); | |
1560 | lck_rw_done(&slp->ns_rwlock); | |
1561 | } | |
1562 | if (slp->ns_flag & SLP_QUEUED) { | |
1563 | if (slp->ns_flag & SLP_WAITQ) | |
1564 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); | |
1565 | else | |
1566 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); | |
1567 | slp->ns_flag &= ~SLP_QUEUED; | |
1568 | } | |
1569 | if (slp->ns_wgq.tqe_next != SLPNOLIST) { | |
1570 | TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); | |
1571 | slp->ns_wgq.tqe_next = SLPNOLIST; | |
1572 | } | |
1573 | /* queue the socket up for deletion */ | |
1574 | slp->ns_timestamp = now.tv_sec; | |
1575 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); | |
1576 | TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); | |
1577 | if (!nfsrv_deadsock_timer_on) { | |
1578 | nfsrv_deadsock_timer_on = 1; | |
1579 | nfs_interval_timer_start(nfsrv_deadsock_timer_call, | |
1580 | NFSRV_DEADSOCKDELAY * 1000); | |
1581 | } | |
1582 | } | |
1583 | ||
b0d623f7 | 1584 | #if CONFIG_FSE |
2d21ac55 A |
1585 | /* |
1586 | * Flush pending file write fsevents | |
1587 | */ | |
1588 | lck_mtx_lock(nfsrv_fmod_mutex); | |
1589 | for (i = 0; i < NFSRVFMODHASHSZ; i++) { | |
1590 | for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) { | |
1591 | /* | |
1592 | * Fire off the content modified fsevent for each | |
1593 | * entry, remove it from the list, and free it. | |
1594 | */ | |
6d2010ae A |
1595 | if (nfsrv_fsevents_enabled) { |
1596 | fp->fm_context.vc_thread = current_thread(); | |
2d21ac55 A |
1597 | add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, |
1598 | FSE_ARG_VNODE, fp->fm_vp, | |
1599 | FSE_ARG_DONE); | |
6d2010ae | 1600 | } |
2d21ac55 A |
1601 | vnode_put(fp->fm_vp); |
1602 | kauth_cred_unref(&fp->fm_context.vc_ucred); | |
1603 | nfp = LIST_NEXT(fp, fm_link); | |
1604 | LIST_REMOVE(fp, fm_link); | |
1605 | FREE(fp, M_TEMP); | |
1606 | } | |
1607 | } | |
1608 | nfsrv_fmod_pending = 0; | |
1609 | lck_mtx_unlock(nfsrv_fmod_mutex); | |
b0d623f7 | 1610 | #endif |
2d21ac55 | 1611 | |
316670eb A |
1612 | nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */ |
1613 | ||
2d21ac55 A |
1614 | nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ |
1615 | ||
1616 | nfsrv_cleancache(); /* And clear out server cache */ | |
1617 | ||
1618 | nfsrv_udpsock = NULL; | |
6d2010ae | 1619 | nfsrv_udp6sock = NULL; |
1c79356b | 1620 | } |
2d21ac55 | 1621 | |
1c79356b | 1622 | #endif /* NFS_NOSERVER */ |