]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_syscalls.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/file_internal.h>
78 #include <sys/filedesc.h>
79 #include <sys/stat.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/mount_internal.h>
82 #include <sys/proc_internal.h> /* for fdflags */
83 #include <sys/kauth.h>
84 #include <sys/sysctl.h>
85 #include <sys/ubc.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/kpi_mbuf.h>
89 #include <sys/socket.h>
90 #include <sys/socketvar.h>
91 #include <sys/domain.h>
92 #include <sys/protosw.h>
93 #include <sys/fcntl.h>
94 #include <sys/lockf.h>
95 #include <sys/syslog.h>
96 #include <sys/user.h>
97 #include <sys/sysproto.h>
98 #include <sys/kpi_socket.h>
99 #include <sys/fsevents.h>
100 #include <libkern/OSAtomic.h>
101 #include <kern/thread_call.h>
102 #include <kern/task.h>
103
104 #include <security/audit/audit.h>
105
106 #include <netinet/in.h>
107 #include <netinet/tcp.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/rpcv2.h>
110 #include <nfs/nfsproto.h>
111 #include <nfs/nfs.h>
112 #include <nfs/nfsm_subs.h>
113 #include <nfs/nfsrvcache.h>
114 #include <nfs/nfs_gss.h>
115 #include <nfs/nfsmount.h>
116 #include <nfs/nfsnode.h>
117 #include <nfs/nfs_lock.h>
118 #if CONFIG_MACF
119 #include <security/mac_framework.h>
120 #endif
121
122 kern_return_t thread_terminate(thread_t); /* XXX */
123
124 #if NFSSERVER
125
126 extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
127 struct nfsrv_sock *slp,
128 vfs_context_t ctx,
129 mbuf_t *mrepp);
130 extern int nfsrv_wg_delay;
131 extern int nfsrv_wg_delay_v3;
132
133 static int nfsrv_require_resv_port = 0;
134 static int nfsrv_deadsock_timer_on = 0;
135
136 int nfssvc_export(user_addr_t argp);
137 int nfssvc_nfsd(void);
138 int nfssvc_addsock(socket_t, mbuf_t);
139 void nfsrv_zapsock(struct nfsrv_sock *);
140 void nfsrv_slpderef(struct nfsrv_sock *);
141 void nfsrv_slpfree(struct nfsrv_sock *);
142
143 #endif /* NFSSERVER */
144
145 /*
146 * sysctl stuff
147 */
148 SYSCTL_DECL(_vfs_generic);
149 SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
150
151 #if NFSCLIENT
152 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
153 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
154 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
158 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
159 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
162 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
163 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
165 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
166 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
167 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
168 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
169 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
170 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
171 SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
172
173
174 #endif /* NFSCLIENT */
175
176 #if NFSSERVER
177 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
178 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
179 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
180 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
181 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
182 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
183 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
184 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
185 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
186 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
187 #if CONFIG_FSE
188 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
189 #endif
190 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
191 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
192 #ifdef NFS_UC_Q_DEBUG
193 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
194 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
195 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
196 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)&nfsrv_uc_queue_count, 0, "");
197 #endif
198 #endif /* NFSSERVER */
199
200
201 #if NFSCLIENT
202
203 int
204 nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
205 {
206 struct lockd_ans la;
207 int error;
208
209 switch (uap->flag) {
210 case NFSCLNT_LOCKDANS:
211 error = copyin(uap->argp, &la, sizeof(la));
212 if (!error)
213 error = nfslockdans(p, &la);
214 break;
215 case NFSCLNT_LOCKDNOTIFY:
216 error = nfslockdnotify(p, uap->argp);
217 break;
218 default:
219 error = EINVAL;
220 }
221 return (error);
222 }
223
224 /*
225 * Asynchronous I/O threads for client NFS.
226 * They do read-ahead and write-behind operations on the block I/O cache.
227 *
228 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
229 * when unused for a while. There are as many nfsiod structs as there are
230 * nfsiod threads; however there's no strict tie between a thread and a struct.
231 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
232 * up, it removes the next struct nfsiod from the queue and services it. Then
233 * it will put the struct at the head of free list and sleep on it.
234 * Async requests will pull the next struct nfsiod from the head of the free list,
235 * put it on the work queue, and wake whatever thread is waiting on that struct.
236 */
237
238 /*
239 * nfsiod thread exit routine
240 *
241 * Must be called with nfsiod_mutex held so that the
242 * decision to terminate is atomic with the termination.
243 */
244 void
245 nfsiod_terminate(struct nfsiod *niod)
246 {
247 nfsiod_thread_count--;
248 lck_mtx_unlock(nfsiod_mutex);
249 if (niod)
250 FREE(niod, M_TEMP);
251 else
252 printf("nfsiod: terminating without niod\n");
253 thread_terminate(current_thread());
254 /*NOTREACHED*/
255 }
256
257 /* nfsiod thread startup routine */
258 void
259 nfsiod_thread(void)
260 {
261 struct nfsiod *niod;
262 int error;
263
264 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
265 if (!niod) {
266 lck_mtx_lock(nfsiod_mutex);
267 nfsiod_thread_count--;
268 wakeup(current_thread());
269 lck_mtx_unlock(nfsiod_mutex);
270 thread_terminate(current_thread());
271 /*NOTREACHED*/
272 }
273 bzero(niod, sizeof(*niod));
274 lck_mtx_lock(nfsiod_mutex);
275 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
276 wakeup(current_thread());
277 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
278 /* shouldn't return... so we have an error */
279 /* remove an old nfsiod struct and terminate */
280 lck_mtx_lock(nfsiod_mutex);
281 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
282 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
283 nfsiod_terminate(niod);
284 /*NOTREACHED*/
285 }
286
287 /*
288 * Start up another nfsiod thread.
289 * (unless we're already maxed out and there are nfsiods running)
290 */
291 int
292 nfsiod_start(void)
293 {
294 thread_t thd = THREAD_NULL;
295
296 lck_mtx_lock(nfsiod_mutex);
297 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
298 lck_mtx_unlock(nfsiod_mutex);
299 return (EBUSY);
300 }
301 nfsiod_thread_count++;
302 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
303 lck_mtx_unlock(nfsiod_mutex);
304 return (EBUSY);
305 }
306 /* wait for the thread to complete startup */
307 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
308 thread_deallocate(thd);
309 return (0);
310 }
311
312 /*
313 * Continuation for Asynchronous I/O threads for NFS client.
314 *
315 * Grab an nfsiod struct to work on, do some work, then drop it
316 */
317 int
318 nfsiod_continue(int error)
319 {
320 struct nfsiod *niod;
321 struct nfsmount *nmp;
322 struct nfsreq *req, *treq;
323 struct nfs_reqqhead iodq;
324 int morework;
325
326 lck_mtx_lock(nfsiod_mutex);
327 niod = TAILQ_FIRST(&nfsiodwork);
328 if (!niod) {
329 /* there's no work queued up */
330 /* remove an old nfsiod struct and terminate */
331 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
332 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
333 nfsiod_terminate(niod);
334 /*NOTREACHED*/
335 }
336 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
337
338 worktodo:
339 while ((nmp = niod->niod_nmp)) {
340 /*
341 * Service this mount's async I/O queue.
342 *
343 * In order to ensure some level of fairness between mounts,
344 * we grab all the work up front before processing it so any
345 * new work that arrives will be serviced on a subsequent
346 * iteration - and we have a chance to see if other work needs
347 * to be done (e.g. the delayed write queue needs to be pushed
348 * or other mounts are waiting for an nfsiod).
349 */
350 /* grab the current contents of the queue */
351 TAILQ_INIT(&iodq);
352 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
353 lck_mtx_unlock(nfsiod_mutex);
354
355 /* process the queue */
356 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
357 TAILQ_REMOVE(&iodq, req, r_achain);
358 req->r_achain.tqe_next = NFSREQNOLIST;
359 req->r_callback.rcb_func(req);
360 }
361
362 /* now check if there's more/other work to be done */
363 lck_mtx_lock(nfsiod_mutex);
364 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
365 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
366 /* we're going to stop working on this mount */
367 if (morework) /* mount still needs more work so queue it up */
368 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
369 nmp->nm_niod = NULL;
370 niod->niod_nmp = NULL;
371 }
372 }
373
374 /* loop if there's still a mount to work on */
375 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
376 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
377 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
378 }
379 if (niod->niod_nmp)
380 goto worktodo;
381
382 /* queue ourselves back up - if there aren't too many threads running */
383 if (nfsiod_thread_count <= NFSIOD_MAX) {
384 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
385 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
386 /* shouldn't return... so we have an error */
387 /* remove an old nfsiod struct and terminate */
388 lck_mtx_lock(nfsiod_mutex);
389 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
390 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
391 }
392 nfsiod_terminate(niod);
393 /*NOTREACHED*/
394 return (0);
395 }
396
397 #endif /* NFSCLIENT */
398
399
400 #if NFSSERVER
401
402 /*
403 * NFS server system calls
404 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
405 */
406
407 /*
408 * Get file handle system call
409 */
410 int
411 getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
412 {
413 vnode_t vp;
414 struct nfs_filehandle nfh;
415 int error, fhlen, fidlen;
416 struct nameidata nd;
417 char path[MAXPATHLEN], *ptr;
418 size_t pathlen;
419 struct nfs_exportfs *nxfs;
420 struct nfs_export *nx;
421
422 /*
423 * Must be super user
424 */
425 error = proc_suser(p);
426 if (error)
427 return (error);
428
429 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
430 if (!error)
431 error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
432 if (error)
433 return (error);
434 /* limit fh size to length specified (or v3 size by default) */
435 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE))
436 fhlen = NFSV3_MAX_FH_SIZE;
437 fidlen = fhlen - sizeof(struct nfs_exphandle);
438
439 if (!nfsrv_is_initialized())
440 return (EINVAL);
441
442 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
443 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
444 error = namei(&nd);
445 if (error)
446 return (error);
447 nameidone(&nd);
448
449 vp = nd.ni_vp;
450
451 // find exportfs that matches f_mntonname
452 lck_rw_lock_shared(&nfsrv_export_rwlock);
453 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
454 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
455 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
456 break;
457 }
458 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
459 error = EINVAL;
460 goto out;
461 }
462 // find export that best matches remainder of path
463 ptr = path + strlen(nxfs->nxfs_path);
464 while (*ptr && (*ptr == '/'))
465 ptr++;
466 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
467 int len = strlen(nx->nx_path);
468 if (len == 0) // we've hit the export entry for the root directory
469 break;
470 if (!strncmp(nx->nx_path, ptr, len))
471 break;
472 }
473 if (!nx) {
474 error = EINVAL;
475 goto out;
476 }
477
478 bzero(&nfh, sizeof(nfh));
479 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
480 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
481 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
482 nfh.nfh_xh.nxh_flags = 0;
483 nfh.nfh_xh.nxh_reserved = 0;
484 nfh.nfh_len = fidlen;
485 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
486 if (nfh.nfh_len > (uint32_t)fidlen)
487 error = EOVERFLOW;
488 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
489 nfh.nfh_len += sizeof(nfh.nfh_xh);
490 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
491
492 out:
493 lck_rw_done(&nfsrv_export_rwlock);
494 vnode_put(vp);
495 if (error)
496 return (error);
497 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
498 return (error);
499 }
500
501 extern const struct fileops vnops;
502
503 /*
504 * syscall for the rpc.lockd to use to translate a NFS file handle into
505 * an open descriptor.
506 *
507 * warning: do not remove the suser() call or this becomes one giant
508 * security hole.
509 */
510 int
511 fhopen( proc_t p,
512 struct fhopen_args *uap,
513 int32_t *retval)
514 {
515 vnode_t vp;
516 struct nfs_filehandle nfh;
517 struct nfs_export *nx;
518 struct nfs_export_options *nxo;
519 struct flock lf;
520 struct fileproc *fp, *nfp;
521 int fmode, error, type;
522 int indx;
523 vfs_context_t ctx = vfs_context_current();
524 kauth_action_t action;
525
526 /*
527 * Must be super user
528 */
529 error = suser(vfs_context_ucred(ctx), 0);
530 if (error) {
531 return (error);
532 }
533
534 if (!nfsrv_is_initialized()) {
535 return (EINVAL);
536 }
537
538 fmode = FFLAGS(uap->flags);
539 /* why not allow a non-read/write open for our lockd? */
540 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
541 return (EINVAL);
542
543 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
544 if (error)
545 return (error);
546 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
547 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
548 return (EINVAL);
549 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
550 if (error)
551 return (error);
552 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
553
554 lck_rw_lock_shared(&nfsrv_export_rwlock);
555 /* now give me my vnode, it gets returned to me with a reference */
556 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
557 lck_rw_done(&nfsrv_export_rwlock);
558 if (error) {
559 if (error == NFSERR_TRYLATER)
560 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
561 return (error);
562 }
563
564 /*
565 * From now on we have to make sure not
566 * to forget about the vnode.
567 * Any error that causes an abort must vnode_put(vp).
568 * Just set error = err and 'goto bad;'.
569 */
570
571 /*
572 * from vn_open
573 */
574 if (vnode_vtype(vp) == VSOCK) {
575 error = EOPNOTSUPP;
576 goto bad;
577 }
578
579 /* disallow write operations on directories */
580 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
581 error = EISDIR;
582 goto bad;
583 }
584
585 /* compute action to be authorized */
586 action = 0;
587 if (fmode & FREAD)
588 action |= KAUTH_VNODE_READ_DATA;
589 if (fmode & (FWRITE | O_TRUNC))
590 action |= KAUTH_VNODE_WRITE_DATA;
591 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
592 goto bad;
593
594 if ((error = VNOP_OPEN(vp, fmode, ctx)))
595 goto bad;
596 if ((error = vnode_ref_ext(vp, fmode, 0)))
597 goto bad;
598
599 /*
600 * end of vn_open code
601 */
602
603 // starting here... error paths should call vn_close/vnode_put
604 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
605 vn_close(vp, fmode & FMASK, ctx);
606 goto bad;
607 }
608 fp = nfp;
609
610 fp->f_fglob->fg_flag = fmode & FMASK;
611 fp->f_fglob->fg_ops = &vnops;
612 fp->f_fglob->fg_data = (caddr_t)vp;
613
614 // XXX do we really need to support this with fhopen()?
615 if (fmode & (O_EXLOCK | O_SHLOCK)) {
616 lf.l_whence = SEEK_SET;
617 lf.l_start = 0;
618 lf.l_len = 0;
619 if (fmode & O_EXLOCK)
620 lf.l_type = F_WRLCK;
621 else
622 lf.l_type = F_RDLCK;
623 type = F_FLOCK;
624 if ((fmode & FNONBLOCK) == 0)
625 type |= F_WAIT;
626 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
627 struct vfs_context context = *vfs_context_current();
628 /* Modify local copy (to not damage thread copy) */
629 context.vc_ucred = fp->f_fglob->fg_cred;
630
631 vn_close(vp, fp->f_fglob->fg_flag, &context);
632 fp_free(p, indx, fp);
633 return (error);
634 }
635 fp->f_fglob->fg_flag |= FHASLOCK;
636 }
637
638 vnode_put(vp);
639
640 proc_fdlock(p);
641 procfdtbl_releasefd(p, indx, NULL);
642 fp_drop(p, indx, fp, 1);
643 proc_fdunlock(p);
644
645 *retval = indx;
646 return (0);
647
648 bad:
649 vnode_put(vp);
650 return (error);
651 }
652
653 /*
654 * NFS server pseudo system call
655 */
656 int
657 nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
658 {
659 mbuf_t nam;
660 struct user_nfsd_args user_nfsdarg;
661 socket_t so;
662 int error;
663
664 AUDIT_ARG(cmd, uap->flag);
665
666 /*
667 * Must be super user for most operations (export ops checked later).
668 */
669 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p))))
670 return (error);
671 #if CONFIG_MACF
672 error = mac_system_check_nfsd(kauth_cred_get());
673 if (error)
674 return (error);
675 #endif
676
677 /* make sure NFS server data structures have been initialized */
678 nfsrv_init();
679
680 if (uap->flag & NFSSVC_ADDSOCK) {
681 if (IS_64BIT_PROCESS(p)) {
682 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
683 } else {
684 struct nfsd_args tmp_args;
685 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
686 if (error == 0) {
687 user_nfsdarg.sock = tmp_args.sock;
688 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
689 user_nfsdarg.namelen = tmp_args.namelen;
690 }
691 }
692 if (error)
693 return (error);
694 /* get the socket */
695 error = file_socket(user_nfsdarg.sock, &so);
696 if (error)
697 return (error);
698 /* Get the client address for connected sockets. */
699 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
700 nam = NULL;
701 } else {
702 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
703 if (error) {
704 /* drop the iocount file_socket() grabbed on the file descriptor */
705 file_drop(user_nfsdarg.sock);
706 return (error);
707 }
708 }
709 /*
710 * nfssvc_addsock() will grab a retain count on the socket
711 * to keep the socket from being closed when nfsd closes its
712 * file descriptor for it.
713 */
714 error = nfssvc_addsock(so, nam);
715 /* drop the iocount file_socket() grabbed on the file descriptor */
716 file_drop(user_nfsdarg.sock);
717 } else if (uap->flag & NFSSVC_NFSD) {
718 error = nfssvc_nfsd();
719 } else if (uap->flag & NFSSVC_EXPORT) {
720 error = nfssvc_export(uap->argp);
721 } else {
722 error = EINVAL;
723 }
724 if (error == EINTR || error == ERESTART)
725 error = 0;
726 return (error);
727 }
728
729 /*
730 * Adds a socket to the list for servicing by nfsds.
731 */
732 int
733 nfssvc_addsock(socket_t so, mbuf_t mynam)
734 {
735 struct nfsrv_sock *slp;
736 int error = 0, sodomain, sotype, soprotocol, on = 1;
737 int first;
738 struct timeval timeo;
739
740 /* make sure mbuf constants are set up */
741 if (!nfs_mbuf_mhlen)
742 nfs_mbuf_init();
743
744 sock_gettype(so, &sodomain, &sotype, &soprotocol);
745
746 /* There should be only one UDP socket for each of IPv4 and IPv6 */
747 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
748 mbuf_freem(mynam);
749 return (EEXIST);
750 }
751 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
752 mbuf_freem(mynam);
753 return (EEXIST);
754 }
755
756 /* Set protocol options and reserve some space (for UDP). */
757 if (sotype == SOCK_STREAM)
758 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
759 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
760 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
761 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
762 int reserve = NFS_UDPSOCKBUF;
763 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
764 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
765 if (error) {
766 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
767 error = 0;
768 }
769 }
770 sock_nointerrupt(so, 0);
771
772 /*
773 * Set socket send/receive timeouts.
774 * Receive timeout shouldn't matter, but setting the send timeout
775 * will make sure that an unresponsive client can't hang the server.
776 */
777 timeo.tv_usec = 0;
778 timeo.tv_sec = 1;
779 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
780 timeo.tv_sec = 30;
781 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
782 if (error) {
783 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
784 error = 0;
785 }
786
787 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
788 if (!slp) {
789 mbuf_freem(mynam);
790 return (ENOMEM);
791 }
792 bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
793 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
794 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
795
796 lck_mtx_lock(nfsd_mutex);
797
798 if (soprotocol == IPPROTO_UDP) {
799 if (sodomain == AF_INET) {
800 /* There should be only one UDP/IPv4 socket */
801 if (nfsrv_udpsock) {
802 lck_mtx_unlock(nfsd_mutex);
803 nfsrv_slpfree(slp);
804 mbuf_freem(mynam);
805 return (EEXIST);
806 }
807 nfsrv_udpsock = slp;
808 }
809 if (sodomain == AF_INET6) {
810 /* There should be only one UDP/IPv6 socket */
811 if (nfsrv_udp6sock) {
812 lck_mtx_unlock(nfsd_mutex);
813 nfsrv_slpfree(slp);
814 mbuf_freem(mynam);
815 return (EEXIST);
816 }
817 nfsrv_udp6sock = slp;
818 }
819 }
820
821 /* add the socket to the list */
822 first = TAILQ_EMPTY(&nfsrv_socklist);
823 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
824
825 sock_retain(so); /* grab a retain count on the socket */
826 slp->ns_so = so;
827 slp->ns_sotype = sotype;
828 slp->ns_nam = mynam;
829
830 /* set up the socket up-call */
831 nfsrv_uc_addsock(slp, first);
832
833 /* mark that the socket is not in the nfsrv_sockwg list */
834 slp->ns_wgq.tqe_next = SLPNOLIST;
835
836 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
837
838 nfsrv_wakenfsd(slp);
839 lck_mtx_unlock(nfsd_mutex);
840
841 return (0);
842 }
843
844 /*
845 * nfssvc_nfsd()
846 *
847 * nfsd theory of operation:
848 *
849 * The first nfsd thread stays in user mode accepting new TCP connections
850 * which are then added via the "addsock" call. The rest of the nfsd threads
851 * simply call into the kernel and remain there in a loop handling NFS
852 * requests until killed by a signal.
853 *
854 * There's a list of nfsd threads (nfsd_head).
855 * There's an nfsd queue that contains only those nfsds that are
856 * waiting for work to do (nfsd_queue).
857 *
858 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
859 * managing the work on the sockets:
860 * nfsrv_sockwait - sockets w/new data waiting to be worked on
861 * nfsrv_sockwork - sockets being worked on which may have more work to do
862 * nfsrv_sockwg -- sockets which have pending write gather data
863 * When a socket receives data, if it is not currently queued, it
864 * will be placed at the end of the "wait" queue.
865 * Whenever a socket needs servicing we make sure it is queued and
866 * wake up a waiting nfsd (if there is one).
867 *
868 * nfsds will service at most 8 requests from the same socket before
869 * defecting to work on another socket.
870 * nfsds will defect immediately if there are any sockets in the "wait" queue
871 * nfsds looking for a socket to work on check the "wait" queue first and
872 * then check the "work" queue.
873 * When an nfsd starts working on a socket, it removes it from the head of
874 * the queue it's currently on and moves it to the end of the "work" queue.
875 * When nfsds are checking the queues for work, any sockets found not to
876 * have any work are simply dropped from the queue.
877 *
878 */
879 int
880 nfssvc_nfsd(void)
881 {
882 mbuf_t m, mrep;
883 struct nfsrv_sock *slp;
884 struct nfsd *nfsd;
885 struct nfsrv_descript *nd = NULL;
886 int error = 0, cacherep, writes_todo;
887 int siz, procrastinate, opcnt = 0;
888 u_quad_t cur_usec;
889 struct timeval now;
890 struct vfs_context context;
891 struct timespec to;
892
893 #ifndef nolint
894 cacherep = RC_DOIT;
895 writes_todo = 0;
896 #endif
897
898 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
899 if (!nfsd)
900 return (ENOMEM);
901 bzero(nfsd, sizeof(struct nfsd));
902 lck_mtx_lock(nfsd_mutex);
903 if (nfsd_thread_count++ == 0)
904 nfsrv_initcache(); /* Init the server request cache */
905
906 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
907 lck_mtx_unlock(nfsd_mutex);
908
909 context.vc_thread = current_thread();
910
911 /* Set time out so that nfsd threads can wake up a see if they are still needed. */
912 to.tv_sec = 5;
913 to.tv_nsec = 0;
914
915 /*
916 * Loop getting rpc requests until SIGKILL.
917 */
918 for (;;) {
919 if (nfsd_thread_max <= 0) {
920 /* NFS server shutting down, get out ASAP */
921 error = EINTR;
922 slp = nfsd->nfsd_slp;
923 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
924 /* already have some work to do */
925 error = 0;
926 slp = nfsd->nfsd_slp;
927 } else {
928 /* need to find work to do */
929 error = 0;
930 lck_mtx_lock(nfsd_mutex);
931 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
932 if (nfsd_thread_count > nfsd_thread_max) {
933 /*
934 * If we have no socket and there are more
935 * nfsd threads than configured, let's exit.
936 */
937 error = 0;
938 goto done;
939 }
940 nfsd->nfsd_flag |= NFSD_WAITING;
941 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
942 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
943 if (error) {
944 if (nfsd->nfsd_flag & NFSD_WAITING) {
945 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
946 nfsd->nfsd_flag &= ~NFSD_WAITING;
947 }
948 if (error == EWOULDBLOCK)
949 continue;
950 goto done;
951 }
952 }
953 slp = nfsd->nfsd_slp;
954 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
955 /* look for a socket to work on in the wait queue */
956 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
957 lck_rw_lock_exclusive(&slp->ns_rwlock);
958 /* remove from the head of the queue */
959 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
960 slp->ns_flag &= ~SLP_WAITQ;
961 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
962 break;
963 /* nothing to do, so skip this socket */
964 lck_rw_done(&slp->ns_rwlock);
965 }
966 }
967 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
968 /* look for a socket to work on in the work queue */
969 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
970 lck_rw_lock_exclusive(&slp->ns_rwlock);
971 /* remove from the head of the queue */
972 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
973 slp->ns_flag &= ~SLP_WORKQ;
974 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
975 break;
976 /* nothing to do, so skip this socket */
977 lck_rw_done(&slp->ns_rwlock);
978 }
979 }
980 if (!nfsd->nfsd_slp && slp) {
981 /* we found a socket to work on, grab a reference */
982 slp->ns_sref++;
983 nfsd->nfsd_slp = slp;
984 opcnt = 0;
985 /* and put it at the back of the work queue */
986 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
987 slp->ns_flag |= SLP_WORKQ;
988 lck_rw_done(&slp->ns_rwlock);
989 }
990 lck_mtx_unlock(nfsd_mutex);
991 if (!slp)
992 continue;
993 lck_rw_lock_exclusive(&slp->ns_rwlock);
994 if (slp->ns_flag & SLP_VALID) {
995 if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) {
996 slp->ns_flag &= ~SLP_NEEDQ;
997 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
998 }
999 if (slp->ns_flag & SLP_DISCONN)
1000 nfsrv_zapsock(slp);
1001 error = nfsrv_dorec(slp, nfsd, &nd);
1002 if (error == EINVAL) { // RPCSEC_GSS drop
1003 if (slp->ns_sotype == SOCK_STREAM)
1004 nfsrv_zapsock(slp); // drop connection
1005 }
1006 writes_todo = 0;
1007 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1008 microuptime(&now);
1009 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1010 (u_quad_t)now.tv_usec;
1011 if (slp->ns_wgtime <= cur_usec) {
1012 error = 0;
1013 cacherep = RC_DOIT;
1014 writes_todo = 1;
1015 }
1016 slp->ns_flag &= ~SLP_DOWRITES;
1017 }
1018 nfsd->nfsd_flag |= NFSD_REQINPROG;
1019 }
1020 lck_rw_done(&slp->ns_rwlock);
1021 }
1022 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1023 if (nd) {
1024 nfsm_chain_cleanup(&nd->nd_nmreq);
1025 if (nd->nd_nam2)
1026 mbuf_freem(nd->nd_nam2);
1027 if (IS_VALID_CRED(nd->nd_cr))
1028 kauth_cred_unref(&nd->nd_cr);
1029 if (nd->nd_gss_context)
1030 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1031 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1032 nd = NULL;
1033 }
1034 nfsd->nfsd_slp = NULL;
1035 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1036 if (slp)
1037 nfsrv_slpderef(slp);
1038 if (nfsd_thread_max <= 0)
1039 break;
1040 continue;
1041 }
1042 if (nd) {
1043 microuptime(&nd->nd_starttime);
1044 if (nd->nd_nam2)
1045 nd->nd_nam = nd->nd_nam2;
1046 else
1047 nd->nd_nam = slp->ns_nam;
1048
1049 cacherep = nfsrv_getcache(nd, slp, &mrep);
1050
1051 if (nfsrv_require_resv_port) {
1052 /* Check if source port is a reserved port */
1053 in_port_t port = 0;
1054 struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1055
1056 if (saddr->sa_family == AF_INET)
1057 port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1058 else if (saddr->sa_family == AF_INET6)
1059 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1060 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1061 nd->nd_procnum = NFSPROC_NOOP;
1062 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1063 cacherep = RC_DOIT;
1064 }
1065 }
1066
1067 }
1068
1069 /*
1070 * Loop to get all the write RPC replies that have been
1071 * gathered together.
1072 */
1073 do {
1074 switch (cacherep) {
1075 case RC_DOIT:
1076 if (nd && (nd->nd_vers == NFS_VER3))
1077 procrastinate = nfsrv_wg_delay_v3;
1078 else
1079 procrastinate = nfsrv_wg_delay;
1080 lck_rw_lock_shared(&nfsrv_export_rwlock);
1081 context.vc_ucred = NULL;
1082 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
1083 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1084 else
1085 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1086 lck_rw_done(&nfsrv_export_rwlock);
1087 if (mrep == NULL) {
1088 /*
1089 * If this is a stream socket and we are not going
1090 * to send a reply we better close the connection
1091 * so the client doesn't hang.
1092 */
1093 if (error && slp->ns_sotype == SOCK_STREAM) {
1094 lck_rw_lock_exclusive(&slp->ns_rwlock);
1095 nfsrv_zapsock(slp);
1096 lck_rw_done(&slp->ns_rwlock);
1097 printf("NFS server: NULL reply from proc = %d error = %d\n",
1098 nd->nd_procnum, error);
1099 }
1100 break;
1101
1102 }
1103 if (error) {
1104 OSAddAtomic64(1, &nfsstats.srv_errs);
1105 nfsrv_updatecache(nd, FALSE, mrep);
1106 if (nd->nd_nam2) {
1107 mbuf_freem(nd->nd_nam2);
1108 nd->nd_nam2 = NULL;
1109 }
1110 break;
1111 }
1112 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
1113 nfsrv_updatecache(nd, TRUE, mrep);
1114 /* FALLTHRU */
1115
1116 case RC_REPLY:
1117 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1118 /*
1119 * Need to checksum or encrypt the reply
1120 */
1121 error = nfs_gss_svc_protect_reply(nd, mrep);
1122 if (error) {
1123 mbuf_freem(mrep);
1124 break;
1125 }
1126 }
1127
1128 /*
1129 * Get the total size of the reply
1130 */
1131 m = mrep;
1132 siz = 0;
1133 while (m) {
1134 siz += mbuf_len(m);
1135 m = mbuf_next(m);
1136 }
1137 if (siz <= 0 || siz > NFS_MAXPACKET) {
1138 printf("mbuf siz=%d\n",siz);
1139 panic("Bad nfs svc reply");
1140 }
1141 m = mrep;
1142 mbuf_pkthdr_setlen(m, siz);
1143 error = mbuf_pkthdr_setrcvif(m, NULL);
1144 if (error)
1145 panic("nfsd setrcvif failed: %d", error);
1146 /*
1147 * For stream protocols, prepend a Sun RPC
1148 * Record Mark.
1149 */
1150 if (slp->ns_sotype == SOCK_STREAM) {
1151 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1152 if (!error)
1153 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1154 }
1155 if (!error) {
1156 if (slp->ns_flag & SLP_VALID) {
1157 error = nfsrv_send(slp, nd->nd_nam2, m);
1158 } else {
1159 error = EPIPE;
1160 mbuf_freem(m);
1161 }
1162 } else {
1163 mbuf_freem(m);
1164 }
1165 mrep = NULL;
1166 if (nd->nd_nam2) {
1167 mbuf_freem(nd->nd_nam2);
1168 nd->nd_nam2 = NULL;
1169 }
1170 if (error == EPIPE) {
1171 lck_rw_lock_exclusive(&slp->ns_rwlock);
1172 nfsrv_zapsock(slp);
1173 lck_rw_done(&slp->ns_rwlock);
1174 }
1175 if (error == EINTR || error == ERESTART) {
1176 nfsm_chain_cleanup(&nd->nd_nmreq);
1177 if (IS_VALID_CRED(nd->nd_cr))
1178 kauth_cred_unref(&nd->nd_cr);
1179 if (nd->nd_gss_context)
1180 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1181 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1182 nfsrv_slpderef(slp);
1183 lck_mtx_lock(nfsd_mutex);
1184 goto done;
1185 }
1186 break;
1187 case RC_DROPIT:
1188 mbuf_freem(nd->nd_nam2);
1189 nd->nd_nam2 = NULL;
1190 break;
1191 };
1192 opcnt++;
1193 if (nd) {
1194 nfsm_chain_cleanup(&nd->nd_nmreq);
1195 if (nd->nd_nam2)
1196 mbuf_freem(nd->nd_nam2);
1197 if (IS_VALID_CRED(nd->nd_cr))
1198 kauth_cred_unref(&nd->nd_cr);
1199 if (nd->nd_gss_context)
1200 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1201 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1202 nd = NULL;
1203 }
1204
1205 /*
1206 * Check to see if there are outstanding writes that
1207 * need to be serviced.
1208 */
1209 writes_todo = 0;
1210 if (slp->ns_wgtime) {
1211 microuptime(&now);
1212 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1213 (u_quad_t)now.tv_usec;
1214 if (slp->ns_wgtime <= cur_usec) {
1215 cacherep = RC_DOIT;
1216 writes_todo = 1;
1217 }
1218 }
1219 } while (writes_todo);
1220
1221 nd = NULL;
1222 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1223 lck_rw_lock_exclusive(&slp->ns_rwlock);
1224 error = nfsrv_dorec(slp, nfsd, &nd);
1225 if (error == EINVAL) { // RPCSEC_GSS drop
1226 if (slp->ns_sotype == SOCK_STREAM)
1227 nfsrv_zapsock(slp); // drop connection
1228 }
1229 lck_rw_done(&slp->ns_rwlock);
1230 }
1231 if (!nd) {
1232 /* drop our reference on the socket */
1233 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1234 nfsd->nfsd_slp = NULL;
1235 nfsrv_slpderef(slp);
1236 }
1237 }
1238 lck_mtx_lock(nfsd_mutex);
1239 done:
1240 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1241 FREE(nfsd, M_NFSD);
1242 if (--nfsd_thread_count == 0)
1243 nfsrv_cleanup();
1244 lck_mtx_unlock(nfsd_mutex);
1245 return (error);
1246 }
1247
1248 int
1249 nfssvc_export(user_addr_t argp)
1250 {
1251 int error = 0, is_64bit;
1252 struct user_nfs_export_args unxa;
1253 vfs_context_t ctx = vfs_context_current();
1254
1255 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
1256
1257 /* copy in pointers to path and export args */
1258 if (is_64bit) {
1259 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1260 } else {
1261 struct nfs_export_args tnxa;
1262 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1263 if (error == 0) {
1264 /* munge into LP64 version of nfs_export_args structure */
1265 unxa.nxa_fsid = tnxa.nxa_fsid;
1266 unxa.nxa_expid = tnxa.nxa_expid;
1267 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1268 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1269 unxa.nxa_flags = tnxa.nxa_flags;
1270 unxa.nxa_netcount = tnxa.nxa_netcount;
1271 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1272 }
1273 }
1274 if (error)
1275 return (error);
1276
1277 error = nfsrv_export(&unxa, ctx);
1278
1279 return (error);
1280 }
1281
1282 /*
1283 * Shut down a socket associated with an nfsrv_sock structure.
1284 * Should be called with the send lock set, if required.
1285 * The trick here is to increment the sref at the start, so that the nfsds
1286 * will stop using it and clear ns_flag at the end so that it will not be
1287 * reassigned during cleanup.
1288 */
1289 void
1290 nfsrv_zapsock(struct nfsrv_sock *slp)
1291 {
1292 socket_t so;
1293
1294 if ((slp->ns_flag & SLP_VALID) == 0)
1295 return;
1296 slp->ns_flag &= ~SLP_ALLFLAGS;
1297
1298 so = slp->ns_so;
1299 if (so == NULL)
1300 return;
1301
1302 /*
1303 * Attempt to deter future up-calls, but leave the
1304 * up-call info in place to avoid a race with the
1305 * networking code.
1306 */
1307 socket_lock(so, 1);
1308 so->so_rcv.sb_flags &= ~SB_UPCALL;
1309 socket_unlock(so, 1);
1310
1311 sock_shutdown(so, SHUT_RDWR);
1312
1313 /*
1314 * Remove from the up-call queue
1315 */
1316 nfsrv_uc_dequeue(slp);
1317 }
1318
1319 /*
1320 * cleanup and release a server socket structure.
1321 */
1322 void
1323 nfsrv_slpfree(struct nfsrv_sock *slp)
1324 {
1325 struct nfsrv_descript *nwp, *nnwp;
1326
1327 if (slp->ns_so) {
1328 sock_release(slp->ns_so);
1329 slp->ns_so = NULL;
1330 }
1331 if (slp->ns_nam)
1332 mbuf_free(slp->ns_nam);
1333 if (slp->ns_raw)
1334 mbuf_freem(slp->ns_raw);
1335 if (slp->ns_rec)
1336 mbuf_freem(slp->ns_rec);
1337 if (slp->ns_frag)
1338 mbuf_freem(slp->ns_frag);
1339 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1340 slp->ns_reccnt = 0;
1341
1342 if (slp->ns_ua)
1343 FREE(slp->ns_ua, M_NFSSVC);
1344
1345 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1346 nnwp = nwp->nd_tq.le_next;
1347 LIST_REMOVE(nwp, nd_tq);
1348 nfsm_chain_cleanup(&nwp->nd_nmreq);
1349 if (nwp->nd_mrep)
1350 mbuf_freem(nwp->nd_mrep);
1351 if (nwp->nd_nam2)
1352 mbuf_freem(nwp->nd_nam2);
1353 if (IS_VALID_CRED(nwp->nd_cr))
1354 kauth_cred_unref(&nwp->nd_cr);
1355 if (nwp->nd_gss_context)
1356 nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1357 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
1358 }
1359 LIST_INIT(&slp->ns_tq);
1360
1361 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
1362 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
1363 FREE(slp, M_NFSSVC);
1364 }
1365
1366 /*
1367 * Derefence a server socket structure. If it has no more references and
1368 * is no longer valid, you can throw it away.
1369 */
1370 void
1371 nfsrv_slpderef(struct nfsrv_sock *slp)
1372 {
1373 struct timeval now;
1374
1375 lck_mtx_lock(nfsd_mutex);
1376 lck_rw_lock_exclusive(&slp->ns_rwlock);
1377 slp->ns_sref--;
1378
1379 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1380 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1381 /* remove socket from queue since there's no work */
1382 if (slp->ns_flag & SLP_WAITQ)
1383 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1384 else
1385 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1386 slp->ns_flag &= ~SLP_QUEUED;
1387 }
1388 lck_rw_done(&slp->ns_rwlock);
1389 lck_mtx_unlock(nfsd_mutex);
1390 return;
1391 }
1392
1393 /* This socket is no longer valid, so we'll get rid of it */
1394
1395 if (slp->ns_flag & SLP_QUEUED) {
1396 if (slp->ns_flag & SLP_WAITQ)
1397 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1398 else
1399 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1400 slp->ns_flag &= ~SLP_QUEUED;
1401 }
1402
1403 /*
1404 * Queue the socket up for deletion
1405 * and start the timer to delete it
1406 * after it has been in limbo for
1407 * a while.
1408 */
1409 microuptime(&now);
1410 slp->ns_timestamp = now.tv_sec;
1411 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1412 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
1413 if (!nfsrv_deadsock_timer_on) {
1414 nfsrv_deadsock_timer_on = 1;
1415 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1416 NFSRV_DEADSOCKDELAY * 1000);
1417 }
1418
1419 lck_rw_done(&slp->ns_rwlock);
1420 /* now remove from the write gather socket list */
1421 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1422 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1423 slp->ns_wgq.tqe_next = SLPNOLIST;
1424 }
1425 lck_mtx_unlock(nfsd_mutex);
1426 }
1427
1428 /*
1429 * Check periodically for dead sockets pending delete.
1430 * If a socket has been dead for more than NFSRV_DEADSOCKDELAY
1431 * seconds then we assume it's safe to free.
1432 */
1433 void
1434 nfsrv_deadsock_timer(__unused void *param0, __unused void *param1)
1435 {
1436 struct nfsrv_sock *slp;
1437 struct timeval now;
1438 time_t time_to_wait;
1439
1440 microuptime(&now);
1441 lck_mtx_lock(nfsd_mutex);
1442
1443 while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) {
1444 if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec)
1445 break;
1446 TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain);
1447 nfsrv_slpfree(slp);
1448 }
1449 if (TAILQ_EMPTY(&nfsrv_deadsocklist)) {
1450 nfsrv_deadsock_timer_on = 0;
1451 lck_mtx_unlock(nfsd_mutex);
1452 return;
1453 }
1454 time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec;
1455 if (time_to_wait < 1)
1456 time_to_wait = 1;
1457
1458 lck_mtx_unlock(nfsd_mutex);
1459
1460 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1461 time_to_wait * 1000);
1462 }
1463
1464 /*
1465 * Clean up the data structures for the server.
1466 */
1467 void
1468 nfsrv_cleanup(void)
1469 {
1470 struct nfsrv_sock *slp, *nslp;
1471 struct timeval now;
1472 #if CONFIG_FSE
1473 struct nfsrv_fmod *fp, *nfp;
1474 int i;
1475 #endif
1476
1477 microuptime(&now);
1478 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1479 nslp = TAILQ_NEXT(slp, ns_chain);
1480 if (slp->ns_flag & SLP_VALID) {
1481 lck_rw_lock_exclusive(&slp->ns_rwlock);
1482 nfsrv_zapsock(slp);
1483 lck_rw_done(&slp->ns_rwlock);
1484 }
1485 if (slp->ns_flag & SLP_QUEUED) {
1486 if (slp->ns_flag & SLP_WAITQ)
1487 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1488 else
1489 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1490 slp->ns_flag &= ~SLP_QUEUED;
1491 }
1492 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1493 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1494 slp->ns_wgq.tqe_next = SLPNOLIST;
1495 }
1496 /* queue the socket up for deletion */
1497 slp->ns_timestamp = now.tv_sec;
1498 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1499 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
1500 if (!nfsrv_deadsock_timer_on) {
1501 nfsrv_deadsock_timer_on = 1;
1502 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1503 NFSRV_DEADSOCKDELAY * 1000);
1504 }
1505 }
1506
1507 #if CONFIG_FSE
1508 /*
1509 * Flush pending file write fsevents
1510 */
1511 lck_mtx_lock(nfsrv_fmod_mutex);
1512 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1513 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1514 /*
1515 * Fire off the content modified fsevent for each
1516 * entry, remove it from the list, and free it.
1517 */
1518 if (nfsrv_fsevents_enabled) {
1519 fp->fm_context.vc_thread = current_thread();
1520 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1521 FSE_ARG_VNODE, fp->fm_vp,
1522 FSE_ARG_DONE);
1523 }
1524 vnode_put(fp->fm_vp);
1525 kauth_cred_unref(&fp->fm_context.vc_ucred);
1526 nfp = LIST_NEXT(fp, fm_link);
1527 LIST_REMOVE(fp, fm_link);
1528 FREE(fp, M_TEMP);
1529 }
1530 }
1531 nfsrv_fmod_pending = 0;
1532 lck_mtx_unlock(nfsrv_fmod_mutex);
1533 #endif
1534
1535 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
1536
1537 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1538
1539 nfsrv_cleancache(); /* And clear out server cache */
1540
1541 nfsrv_udpsock = NULL;
1542 nfsrv_udp6sock = NULL;
1543 }
1544
1545 #endif /* NFS_NOSERVER */