]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_syscalls.c
xnu-1699.22.73.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/file_internal.h>
78 #include <sys/filedesc.h>
79 #include <sys/stat.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/mount_internal.h>
82 #include <sys/proc_internal.h> /* for fdflags */
83 #include <sys/kauth.h>
84 #include <sys/sysctl.h>
85 #include <sys/ubc.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/kpi_mbuf.h>
89 #include <sys/socket.h>
90 #include <sys/socketvar.h>
91 #include <sys/domain.h>
92 #include <sys/protosw.h>
93 #include <sys/fcntl.h>
94 #include <sys/lockf.h>
95 #include <sys/syslog.h>
96 #include <sys/user.h>
97 #include <sys/sysproto.h>
98 #include <sys/kpi_socket.h>
99 #include <sys/fsevents.h>
100 #include <libkern/OSAtomic.h>
101 #include <kern/thread_call.h>
102 #include <kern/task.h>
103
104 #include <security/audit/audit.h>
105
106 #include <netinet/in.h>
107 #include <netinet/tcp.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/rpcv2.h>
110 #include <nfs/nfsproto.h>
111 #include <nfs/nfs.h>
112 #include <nfs/nfsm_subs.h>
113 #include <nfs/nfsrvcache.h>
114 #include <nfs/nfs_gss.h>
115 #include <nfs/nfsmount.h>
116 #include <nfs/nfsnode.h>
117 #include <nfs/nfs_lock.h>
118 #if CONFIG_MACF
119 #include <security/mac_framework.h>
120 #endif
121
122 kern_return_t thread_terminate(thread_t); /* XXX */
123
124 #if NFSSERVER
125
126 extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
127 struct nfsrv_sock *slp,
128 vfs_context_t ctx,
129 mbuf_t *mrepp);
130 extern int nfsrv_wg_delay;
131 extern int nfsrv_wg_delay_v3;
132
133 static int nfsrv_require_resv_port = 0;
134 static int nfsrv_deadsock_timer_on = 0;
135
136 int nfssvc_export(user_addr_t argp);
137 int nfssvc_nfsd(void);
138 int nfssvc_addsock(socket_t, mbuf_t);
139 void nfsrv_zapsock(struct nfsrv_sock *);
140 void nfsrv_slpderef(struct nfsrv_sock *);
141 void nfsrv_slpfree(struct nfsrv_sock *);
142
143 #endif /* NFSSERVER */
144
145 /*
146 * sysctl stuff
147 */
148 SYSCTL_DECL(_vfs_generic);
149 SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
150
151 #if NFSCLIENT
152 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
153 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
154 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
155 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
156 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
157 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
158 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
159 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
160 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
161 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
162 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
163 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, "");
164 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
165 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
166 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
167 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
168 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
169 #endif /* NFSCLIENT */
170
171 #if NFSSERVER
172 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
173 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
174 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
175 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
176 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
177 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
178 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
179 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
180 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
181 SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
182 #if CONFIG_FSE
183 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
184 #endif
185 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
186 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
187 #endif /* NFSSERVER */
188
189
190 #if NFSCLIENT
191
192 int
193 nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
194 {
195 struct lockd_ans la;
196 int error;
197
198 switch (uap->flag) {
199 case NFSCLNT_LOCKDANS:
200 error = copyin(uap->argp, &la, sizeof(la));
201 if (!error)
202 error = nfslockdans(p, &la);
203 break;
204 case NFSCLNT_LOCKDNOTIFY:
205 error = nfslockdnotify(p, uap->argp);
206 break;
207 default:
208 error = EINVAL;
209 }
210 return (error);
211 }
212
213 /*
214 * Asynchronous I/O threads for client NFS.
215 * They do read-ahead and write-behind operations on the block I/O cache.
216 *
217 * The pool of up to nfsiod_thread_max threads is launched on demand and exit
218 * when unused for a while. There are as many nfsiod structs as there are
219 * nfsiod threads; however there's no strict tie between a thread and a struct.
220 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
221 * up, it removes the next struct nfsiod from the queue and services it. Then
222 * it will put the struct at the head of free list and sleep on it.
223 * Async requests will pull the next struct nfsiod from the head of the free list,
224 * put it on the work queue, and wake whatever thread is waiting on that struct.
225 */
226
227 /*
228 * nfsiod thread exit routine
229 *
230 * Must be called with nfsiod_mutex held so that the
231 * decision to terminate is atomic with the termination.
232 */
233 void
234 nfsiod_terminate(struct nfsiod *niod)
235 {
236 nfsiod_thread_count--;
237 lck_mtx_unlock(nfsiod_mutex);
238 if (niod)
239 FREE(niod, M_TEMP);
240 else
241 printf("nfsiod: terminating without niod\n");
242 thread_terminate(current_thread());
243 /*NOTREACHED*/
244 }
245
246 /* nfsiod thread startup routine */
247 void
248 nfsiod_thread(void)
249 {
250 struct nfsiod *niod;
251 int error;
252
253 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
254 if (!niod) {
255 lck_mtx_lock(nfsiod_mutex);
256 nfsiod_thread_count--;
257 wakeup(current_thread());
258 lck_mtx_unlock(nfsiod_mutex);
259 thread_terminate(current_thread());
260 /*NOTREACHED*/
261 }
262 bzero(niod, sizeof(*niod));
263 lck_mtx_lock(nfsiod_mutex);
264 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
265 wakeup(current_thread());
266 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
267 /* shouldn't return... so we have an error */
268 /* remove an old nfsiod struct and terminate */
269 lck_mtx_lock(nfsiod_mutex);
270 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
271 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
272 nfsiod_terminate(niod);
273 /*NOTREACHED*/
274 }
275
276 /*
277 * Start up another nfsiod thread.
278 * (unless we're already maxed out and there are nfsiods running)
279 */
280 int
281 nfsiod_start(void)
282 {
283 thread_t thd = THREAD_NULL;
284
285 lck_mtx_lock(nfsiod_mutex);
286 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
287 lck_mtx_unlock(nfsiod_mutex);
288 return (EBUSY);
289 }
290 nfsiod_thread_count++;
291 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
292 lck_mtx_unlock(nfsiod_mutex);
293 return (EBUSY);
294 }
295 /* wait for the thread to complete startup */
296 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
297 thread_deallocate(thd);
298 return (0);
299 }
300
301 /*
302 * Continuation for Asynchronous I/O threads for NFS client.
303 *
304 * Grab an nfsiod struct to work on, do some work, then drop it
305 */
306 int
307 nfsiod_continue(int error)
308 {
309 struct nfsiod *niod;
310 struct nfsmount *nmp;
311 struct nfsreq *req, *treq;
312 struct nfs_reqqhead iodq;
313 int morework;
314
315 lck_mtx_lock(nfsiod_mutex);
316 niod = TAILQ_FIRST(&nfsiodwork);
317 if (!niod) {
318 /* there's no work queued up */
319 /* remove an old nfsiod struct and terminate */
320 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
321 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
322 nfsiod_terminate(niod);
323 /*NOTREACHED*/
324 }
325 TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
326
327 worktodo:
328 while ((nmp = niod->niod_nmp)) {
329 /*
330 * Service this mount's async I/O queue.
331 *
332 * In order to ensure some level of fairness between mounts,
333 * we grab all the work up front before processing it so any
334 * new work that arrives will be serviced on a subsequent
335 * iteration - and we have a chance to see if other work needs
336 * to be done (e.g. the delayed write queue needs to be pushed
337 * or other mounts are waiting for an nfsiod).
338 */
339 /* grab the current contents of the queue */
340 TAILQ_INIT(&iodq);
341 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
342 lck_mtx_unlock(nfsiod_mutex);
343
344 /* process the queue */
345 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
346 TAILQ_REMOVE(&iodq, req, r_achain);
347 req->r_achain.tqe_next = NFSREQNOLIST;
348 req->r_callback.rcb_func(req);
349 }
350
351 /* now check if there's more/other work to be done */
352 lck_mtx_lock(nfsiod_mutex);
353 morework = !TAILQ_EMPTY(&nmp->nm_iodq);
354 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
355 /* we're going to stop working on this mount */
356 if (morework) /* mount still needs more work so queue it up */
357 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
358 nmp->nm_niod = NULL;
359 niod->niod_nmp = NULL;
360 }
361 }
362
363 /* loop if there's still a mount to work on */
364 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
365 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
366 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
367 }
368 if (niod->niod_nmp)
369 goto worktodo;
370
371 /* queue ourselves back up - if there aren't too many threads running */
372 if (nfsiod_thread_count <= NFSIOD_MAX) {
373 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
374 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
375 /* shouldn't return... so we have an error */
376 /* remove an old nfsiod struct and terminate */
377 lck_mtx_lock(nfsiod_mutex);
378 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
379 TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
380 }
381 nfsiod_terminate(niod);
382 /*NOTREACHED*/
383 return (0);
384 }
385
386 #endif /* NFSCLIENT */
387
388
389 #if NFSSERVER
390
391 /*
392 * NFS server system calls
393 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
394 */
395
396 /*
397 * Get file handle system call
398 */
399 int
400 getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
401 {
402 vnode_t vp;
403 struct nfs_filehandle nfh;
404 int error, fhlen, fidlen;
405 struct nameidata nd;
406 char path[MAXPATHLEN], *ptr;
407 size_t pathlen;
408 struct nfs_exportfs *nxfs;
409 struct nfs_export *nx;
410
411 /*
412 * Must be super user
413 */
414 error = proc_suser(p);
415 if (error)
416 return (error);
417
418 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
419 if (!error)
420 error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
421 if (error)
422 return (error);
423 /* limit fh size to length specified (or v3 size by default) */
424 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE))
425 fhlen = NFSV3_MAX_FH_SIZE;
426 fidlen = fhlen - sizeof(struct nfs_exphandle);
427
428 if (!nfsrv_is_initialized())
429 return (EINVAL);
430
431 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
432 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
433 error = namei(&nd);
434 if (error)
435 return (error);
436 nameidone(&nd);
437
438 vp = nd.ni_vp;
439
440 // find exportfs that matches f_mntonname
441 lck_rw_lock_shared(&nfsrv_export_rwlock);
442 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
443 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
444 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
445 break;
446 }
447 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
448 error = EINVAL;
449 goto out;
450 }
451 // find export that best matches remainder of path
452 ptr = path + strlen(nxfs->nxfs_path);
453 while (*ptr && (*ptr == '/'))
454 ptr++;
455 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
456 int len = strlen(nx->nx_path);
457 if (len == 0) // we've hit the export entry for the root directory
458 break;
459 if (!strncmp(nx->nx_path, ptr, len))
460 break;
461 }
462 if (!nx) {
463 error = EINVAL;
464 goto out;
465 }
466
467 bzero(&nfh, sizeof(nfh));
468 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
469 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
470 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
471 nfh.nfh_xh.nxh_flags = 0;
472 nfh.nfh_xh.nxh_reserved = 0;
473 nfh.nfh_len = fidlen;
474 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
475 if (nfh.nfh_len > (uint32_t)fidlen)
476 error = EOVERFLOW;
477 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
478 nfh.nfh_len += sizeof(nfh.nfh_xh);
479 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
480
481 out:
482 lck_rw_done(&nfsrv_export_rwlock);
483 vnode_put(vp);
484 if (error)
485 return (error);
486 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
487 return (error);
488 }
489
490 extern struct fileops vnops;
491
492 /*
493 * syscall for the rpc.lockd to use to translate a NFS file handle into
494 * an open descriptor.
495 *
496 * warning: do not remove the suser() call or this becomes one giant
497 * security hole.
498 */
499 int
500 fhopen( proc_t p,
501 struct fhopen_args *uap,
502 int32_t *retval)
503 {
504 vnode_t vp;
505 struct nfs_filehandle nfh;
506 struct nfs_export *nx;
507 struct nfs_export_options *nxo;
508 struct flock lf;
509 struct fileproc *fp, *nfp;
510 int fmode, error, type;
511 int indx;
512 vfs_context_t ctx = vfs_context_current();
513 kauth_action_t action;
514
515 /*
516 * Must be super user
517 */
518 error = suser(vfs_context_ucred(ctx), 0);
519 if (error) {
520 return (error);
521 }
522
523 if (!nfsrv_is_initialized()) {
524 return (EINVAL);
525 }
526
527 fmode = FFLAGS(uap->flags);
528 /* why not allow a non-read/write open for our lockd? */
529 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
530 return (EINVAL);
531
532 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
533 if (error)
534 return (error);
535 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
536 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
537 return (EINVAL);
538 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
539 if (error)
540 return (error);
541 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
542
543 lck_rw_lock_shared(&nfsrv_export_rwlock);
544 /* now give me my vnode, it gets returned to me with a reference */
545 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
546 lck_rw_done(&nfsrv_export_rwlock);
547 if (error) {
548 if (error == NFSERR_TRYLATER)
549 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
550 return (error);
551 }
552
553 /*
554 * From now on we have to make sure not
555 * to forget about the vnode.
556 * Any error that causes an abort must vnode_put(vp).
557 * Just set error = err and 'goto bad;'.
558 */
559
560 /*
561 * from vn_open
562 */
563 if (vnode_vtype(vp) == VSOCK) {
564 error = EOPNOTSUPP;
565 goto bad;
566 }
567
568 /* disallow write operations on directories */
569 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
570 error = EISDIR;
571 goto bad;
572 }
573
574 /* compute action to be authorized */
575 action = 0;
576 if (fmode & FREAD)
577 action |= KAUTH_VNODE_READ_DATA;
578 if (fmode & (FWRITE | O_TRUNC))
579 action |= KAUTH_VNODE_WRITE_DATA;
580 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
581 goto bad;
582
583 if ((error = VNOP_OPEN(vp, fmode, ctx)))
584 goto bad;
585 if ((error = vnode_ref_ext(vp, fmode, 0)))
586 goto bad;
587
588 /*
589 * end of vn_open code
590 */
591
592 // starting here... error paths should call vn_close/vnode_put
593 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
594 vn_close(vp, fmode & FMASK, ctx);
595 goto bad;
596 }
597 fp = nfp;
598
599 fp->f_fglob->fg_flag = fmode & FMASK;
600 fp->f_fglob->fg_type = DTYPE_VNODE;
601 fp->f_fglob->fg_ops = &vnops;
602 fp->f_fglob->fg_data = (caddr_t)vp;
603
604 // XXX do we really need to support this with fhopen()?
605 if (fmode & (O_EXLOCK | O_SHLOCK)) {
606 lf.l_whence = SEEK_SET;
607 lf.l_start = 0;
608 lf.l_len = 0;
609 if (fmode & O_EXLOCK)
610 lf.l_type = F_WRLCK;
611 else
612 lf.l_type = F_RDLCK;
613 type = F_FLOCK;
614 if ((fmode & FNONBLOCK) == 0)
615 type |= F_WAIT;
616 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) {
617 struct vfs_context context = *vfs_context_current();
618 /* Modify local copy (to not damage thread copy) */
619 context.vc_ucred = fp->f_fglob->fg_cred;
620
621 vn_close(vp, fp->f_fglob->fg_flag, &context);
622 fp_free(p, indx, fp);
623 return (error);
624 }
625 fp->f_fglob->fg_flag |= FHASLOCK;
626 }
627
628 vnode_put(vp);
629
630 proc_fdlock(p);
631 procfdtbl_releasefd(p, indx, NULL);
632 fp_drop(p, indx, fp, 1);
633 proc_fdunlock(p);
634
635 *retval = indx;
636 return (0);
637
638 bad:
639 vnode_put(vp);
640 return (error);
641 }
642
643 /*
644 * NFS server pseudo system call
645 */
646 int
647 nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
648 {
649 mbuf_t nam;
650 struct user_nfsd_args user_nfsdarg;
651 socket_t so;
652 int error;
653
654 AUDIT_ARG(cmd, uap->flag);
655
656 /*
657 * Must be super user for most operations (export ops checked later).
658 */
659 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p))))
660 return (error);
661 #if CONFIG_MACF
662 error = mac_system_check_nfsd(kauth_cred_get());
663 if (error)
664 return (error);
665 #endif
666
667 /* make sure NFS server data structures have been initialized */
668 nfsrv_init();
669
670 if (uap->flag & NFSSVC_ADDSOCK) {
671 if (IS_64BIT_PROCESS(p)) {
672 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
673 } else {
674 struct nfsd_args tmp_args;
675 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
676 if (error == 0) {
677 user_nfsdarg.sock = tmp_args.sock;
678 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
679 user_nfsdarg.namelen = tmp_args.namelen;
680 }
681 }
682 if (error)
683 return (error);
684 /* get the socket */
685 error = file_socket(user_nfsdarg.sock, &so);
686 if (error)
687 return (error);
688 /* Get the client address for connected sockets. */
689 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
690 nam = NULL;
691 } else {
692 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
693 if (error) {
694 /* drop the iocount file_socket() grabbed on the file descriptor */
695 file_drop(user_nfsdarg.sock);
696 return (error);
697 }
698 }
699 /*
700 * nfssvc_addsock() will grab a retain count on the socket
701 * to keep the socket from being closed when nfsd closes its
702 * file descriptor for it.
703 */
704 error = nfssvc_addsock(so, nam);
705 /* drop the iocount file_socket() grabbed on the file descriptor */
706 file_drop(user_nfsdarg.sock);
707 } else if (uap->flag & NFSSVC_NFSD) {
708 error = nfssvc_nfsd();
709 } else if (uap->flag & NFSSVC_EXPORT) {
710 error = nfssvc_export(uap->argp);
711 } else {
712 error = EINVAL;
713 }
714 if (error == EINTR || error == ERESTART)
715 error = 0;
716 return (error);
717 }
718
719 /*
720 * Adds a socket to the list for servicing by nfsds.
721 */
722 int
723 nfssvc_addsock(socket_t so, mbuf_t mynam)
724 {
725 struct nfsrv_sock *slp;
726 int error = 0, sodomain, sotype, soprotocol, on = 1;
727 struct timeval timeo;
728
729 /* make sure mbuf constants are set up */
730 if (!nfs_mbuf_mhlen)
731 nfs_mbuf_init();
732
733 sock_gettype(so, &sodomain, &sotype, &soprotocol);
734
735 /* There should be only one UDP socket for each of IPv4 and IPv6 */
736 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
737 mbuf_freem(mynam);
738 return (EEXIST);
739 }
740 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
741 mbuf_freem(mynam);
742 return (EEXIST);
743 }
744
745 /* Set protocol options and reserve some space (for UDP). */
746 if (sotype == SOCK_STREAM)
747 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
748 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
749 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
750 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
751 int reserve = NFS_UDPSOCKBUF;
752 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
753 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
754 if (error) {
755 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
756 error = 0;
757 }
758 }
759 sock_nointerrupt(so, 0);
760
761 /*
762 * Set socket send/receive timeouts.
763 * Receive timeout shouldn't matter, but setting the send timeout
764 * will make sure that an unresponsive client can't hang the server.
765 */
766 timeo.tv_usec = 0;
767 timeo.tv_sec = 1;
768 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
769 timeo.tv_sec = 30;
770 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
771 if (error) {
772 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
773 error = 0;
774 }
775
776 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
777 if (!slp) {
778 mbuf_freem(mynam);
779 return (ENOMEM);
780 }
781 bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
782 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
783 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
784
785 lck_mtx_lock(nfsd_mutex);
786
787 if (soprotocol == IPPROTO_UDP) {
788 if (sodomain == AF_INET) {
789 /* There should be only one UDP/IPv4 socket */
790 if (nfsrv_udpsock) {
791 lck_mtx_unlock(nfsd_mutex);
792 nfsrv_slpfree(slp);
793 mbuf_freem(mynam);
794 return (EEXIST);
795 }
796 nfsrv_udpsock = slp;
797 }
798 if (sodomain == AF_INET6) {
799 /* There should be only one UDP/IPv6 socket */
800 if (nfsrv_udp6sock) {
801 lck_mtx_unlock(nfsd_mutex);
802 nfsrv_slpfree(slp);
803 mbuf_freem(mynam);
804 return (EEXIST);
805 }
806 nfsrv_udp6sock = slp;
807 }
808 }
809
810 /* add the socket to the list */
811 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
812
813 sock_retain(so); /* grab a retain count on the socket */
814 slp->ns_so = so;
815 slp->ns_sotype = sotype;
816 slp->ns_nam = mynam;
817
818 /* set up the socket upcall */
819 sock_setupcall(so, nfsrv_rcv, slp);
820 /* just playin' it safe */
821 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
822
823 /* mark that the socket is not in the nfsrv_sockwg list */
824 slp->ns_wgq.tqe_next = SLPNOLIST;
825
826 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
827
828 nfsrv_wakenfsd(slp);
829 lck_mtx_unlock(nfsd_mutex);
830
831 return (0);
832 }
833
834 /*
835 * nfssvc_nfsd()
836 *
837 * nfsd theory of operation:
838 *
839 * The first nfsd thread stays in user mode accepting new TCP connections
840 * which are then added via the "addsock" call. The rest of the nfsd threads
841 * simply call into the kernel and remain there in a loop handling NFS
842 * requests until killed by a signal.
843 *
844 * There's a list of nfsd threads (nfsd_head).
845 * There's an nfsd queue that contains only those nfsds that are
846 * waiting for work to do (nfsd_queue).
847 *
848 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
849 * managing the work on the sockets:
850 * nfsrv_sockwait - sockets w/new data waiting to be worked on
851 * nfsrv_sockwork - sockets being worked on which may have more work to do
852 * nfsrv_sockwg -- sockets which have pending write gather data
853 * When a socket receives data, if it is not currently queued, it
854 * will be placed at the end of the "wait" queue.
855 * Whenever a socket needs servicing we make sure it is queued and
856 * wake up a waiting nfsd (if there is one).
857 *
858 * nfsds will service at most 8 requests from the same socket before
859 * defecting to work on another socket.
860 * nfsds will defect immediately if there are any sockets in the "wait" queue
861 * nfsds looking for a socket to work on check the "wait" queue first and
862 * then check the "work" queue.
863 * When an nfsd starts working on a socket, it removes it from the head of
864 * the queue it's currently on and moves it to the end of the "work" queue.
865 * When nfsds are checking the queues for work, any sockets found not to
866 * have any work are simply dropped from the queue.
867 *
868 */
869 int
870 nfssvc_nfsd(void)
871 {
872 mbuf_t m, mrep;
873 struct nfsrv_sock *slp;
874 struct nfsd *nfsd;
875 struct nfsrv_descript *nd = NULL;
876 int error = 0, cacherep, writes_todo;
877 int siz, procrastinate, opcnt = 0;
878 u_quad_t cur_usec;
879 struct timeval now;
880 struct vfs_context context;
881
882 #ifndef nolint
883 cacherep = RC_DOIT;
884 writes_todo = 0;
885 #endif
886
887 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
888 if (!nfsd)
889 return (ENOMEM);
890 bzero(nfsd, sizeof(struct nfsd));
891 lck_mtx_lock(nfsd_mutex);
892 if (nfsd_thread_count++ == 0)
893 nfsrv_initcache(); /* Init the server request cache */
894 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
895 lck_mtx_unlock(nfsd_mutex);
896
897 context.vc_thread = current_thread();
898
899 /*
900 * Loop getting rpc requests until SIGKILL.
901 */
902 for (;;) {
903 if (nfsd_thread_max <= 0) {
904 /* NFS server shutting down, get out ASAP */
905 error = EINTR;
906 slp = nfsd->nfsd_slp;
907 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
908 /* already have some work to do */
909 error = 0;
910 slp = nfsd->nfsd_slp;
911 } else {
912 /* need to find work to do */
913 error = 0;
914 lck_mtx_lock(nfsd_mutex);
915 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
916 if (nfsd_thread_count > nfsd_thread_max) {
917 /*
918 * If we have no socket and there are more
919 * nfsd threads than configured, let's exit.
920 */
921 error = 0;
922 goto done;
923 }
924 nfsd->nfsd_flag |= NFSD_WAITING;
925 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
926 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL);
927 if (error) {
928 if (nfsd->nfsd_flag & NFSD_WAITING) {
929 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
930 nfsd->nfsd_flag &= ~NFSD_WAITING;
931 }
932 goto done;
933 }
934 }
935 slp = nfsd->nfsd_slp;
936 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
937 /* look for a socket to work on in the wait queue */
938 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
939 lck_rw_lock_exclusive(&slp->ns_rwlock);
940 /* remove from the head of the queue */
941 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
942 slp->ns_flag &= ~SLP_WAITQ;
943 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
944 break;
945 /* nothing to do, so skip this socket */
946 lck_rw_done(&slp->ns_rwlock);
947 }
948 }
949 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
950 /* look for a socket to work on in the work queue */
951 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
952 lck_rw_lock_exclusive(&slp->ns_rwlock);
953 /* remove from the head of the queue */
954 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
955 slp->ns_flag &= ~SLP_WORKQ;
956 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
957 break;
958 /* nothing to do, so skip this socket */
959 lck_rw_done(&slp->ns_rwlock);
960 }
961 }
962 if (!nfsd->nfsd_slp && slp) {
963 /* we found a socket to work on, grab a reference */
964 slp->ns_sref++;
965 nfsd->nfsd_slp = slp;
966 opcnt = 0;
967 /* and put it at the back of the work queue */
968 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
969 slp->ns_flag |= SLP_WORKQ;
970 lck_rw_done(&slp->ns_rwlock);
971 }
972 lck_mtx_unlock(nfsd_mutex);
973 if (!slp)
974 continue;
975 lck_rw_lock_exclusive(&slp->ns_rwlock);
976 if (slp->ns_flag & SLP_VALID) {
977 if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) {
978 slp->ns_flag &= ~SLP_NEEDQ;
979 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
980 }
981 if (slp->ns_flag & SLP_DISCONN)
982 nfsrv_zapsock(slp);
983 error = nfsrv_dorec(slp, nfsd, &nd);
984 if (error == EINVAL) { // RPCSEC_GSS drop
985 if (slp->ns_sotype == SOCK_STREAM)
986 nfsrv_zapsock(slp); // drop connection
987 }
988 writes_todo = 0;
989 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
990 microuptime(&now);
991 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
992 (u_quad_t)now.tv_usec;
993 if (slp->ns_wgtime <= cur_usec) {
994 error = 0;
995 cacherep = RC_DOIT;
996 writes_todo = 1;
997 }
998 slp->ns_flag &= ~SLP_DOWRITES;
999 }
1000 nfsd->nfsd_flag |= NFSD_REQINPROG;
1001 }
1002 lck_rw_done(&slp->ns_rwlock);
1003 }
1004 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1005 if (nd) {
1006 nfsm_chain_cleanup(&nd->nd_nmreq);
1007 if (nd->nd_nam2)
1008 mbuf_freem(nd->nd_nam2);
1009 if (IS_VALID_CRED(nd->nd_cr))
1010 kauth_cred_unref(&nd->nd_cr);
1011 if (nd->nd_gss_context)
1012 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1013 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1014 nd = NULL;
1015 }
1016 nfsd->nfsd_slp = NULL;
1017 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1018 if (slp)
1019 nfsrv_slpderef(slp);
1020 if (nfsd_thread_max <= 0)
1021 break;
1022 continue;
1023 }
1024 if (nd) {
1025 microuptime(&nd->nd_starttime);
1026 if (nd->nd_nam2)
1027 nd->nd_nam = nd->nd_nam2;
1028 else
1029 nd->nd_nam = slp->ns_nam;
1030
1031 cacherep = nfsrv_getcache(nd, slp, &mrep);
1032
1033 if (nfsrv_require_resv_port) {
1034 /* Check if source port is a reserved port */
1035 in_port_t port = 0;
1036 struct sockaddr *saddr = mbuf_data(nd->nd_nam);
1037
1038 if (saddr->sa_family == AF_INET)
1039 port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1040 else if (saddr->sa_family == AF_INET6)
1041 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1042 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1043 nd->nd_procnum = NFSPROC_NOOP;
1044 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1045 cacherep = RC_DOIT;
1046 }
1047 }
1048
1049 }
1050
1051 /*
1052 * Loop to get all the write RPC replies that have been
1053 * gathered together.
1054 */
1055 do {
1056 switch (cacherep) {
1057 case RC_DOIT:
1058 if (nd && (nd->nd_vers == NFS_VER3))
1059 procrastinate = nfsrv_wg_delay_v3;
1060 else
1061 procrastinate = nfsrv_wg_delay;
1062 lck_rw_lock_shared(&nfsrv_export_rwlock);
1063 context.vc_ucred = NULL;
1064 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
1065 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1066 else
1067 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1068 lck_rw_done(&nfsrv_export_rwlock);
1069 if (mrep == NULL) {
1070 /*
1071 * If this is a stream socket and we are not going
1072 * to send a reply we better close the connection
1073 * so the client doesn't hang.
1074 */
1075 if (error && slp->ns_sotype == SOCK_STREAM) {
1076 lck_rw_lock_exclusive(&slp->ns_rwlock);
1077 nfsrv_zapsock(slp);
1078 lck_rw_done(&slp->ns_rwlock);
1079 printf("NFS server: NULL reply from proc = %d error = %d\n",
1080 nd->nd_procnum, error);
1081 }
1082 break;
1083
1084 }
1085 if (error) {
1086 OSAddAtomic(1, &nfsstats.srv_errs);
1087 nfsrv_updatecache(nd, FALSE, mrep);
1088 if (nd->nd_nam2) {
1089 mbuf_freem(nd->nd_nam2);
1090 nd->nd_nam2 = NULL;
1091 }
1092 break;
1093 }
1094 OSAddAtomic(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
1095 nfsrv_updatecache(nd, TRUE, mrep);
1096 /* FALLTHRU */
1097
1098 case RC_REPLY:
1099 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1100 /*
1101 * Need to checksum or encrypt the reply
1102 */
1103 error = nfs_gss_svc_protect_reply(nd, mrep);
1104 if (error) {
1105 mbuf_freem(mrep);
1106 break;
1107 }
1108 }
1109
1110 /*
1111 * Get the total size of the reply
1112 */
1113 m = mrep;
1114 siz = 0;
1115 while (m) {
1116 siz += mbuf_len(m);
1117 m = mbuf_next(m);
1118 }
1119 if (siz <= 0 || siz > NFS_MAXPACKET) {
1120 printf("mbuf siz=%d\n",siz);
1121 panic("Bad nfs svc reply");
1122 }
1123 m = mrep;
1124 mbuf_pkthdr_setlen(m, siz);
1125 error = mbuf_pkthdr_setrcvif(m, NULL);
1126 if (error)
1127 panic("nfsd setrcvif failed: %d", error);
1128 /*
1129 * For stream protocols, prepend a Sun RPC
1130 * Record Mark.
1131 */
1132 if (slp->ns_sotype == SOCK_STREAM) {
1133 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
1134 if (!error)
1135 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
1136 }
1137 if (!error) {
1138 if (slp->ns_flag & SLP_VALID) {
1139 error = nfsrv_send(slp, nd->nd_nam2, m);
1140 } else {
1141 error = EPIPE;
1142 mbuf_freem(m);
1143 }
1144 } else {
1145 mbuf_freem(m);
1146 }
1147 mrep = NULL;
1148 if (nd->nd_nam2) {
1149 mbuf_freem(nd->nd_nam2);
1150 nd->nd_nam2 = NULL;
1151 }
1152 if (error == EPIPE) {
1153 lck_rw_lock_exclusive(&slp->ns_rwlock);
1154 nfsrv_zapsock(slp);
1155 lck_rw_done(&slp->ns_rwlock);
1156 }
1157 if (error == EINTR || error == ERESTART) {
1158 nfsm_chain_cleanup(&nd->nd_nmreq);
1159 if (IS_VALID_CRED(nd->nd_cr))
1160 kauth_cred_unref(&nd->nd_cr);
1161 if (nd->nd_gss_context)
1162 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1163 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1164 nfsrv_slpderef(slp);
1165 lck_mtx_lock(nfsd_mutex);
1166 goto done;
1167 }
1168 break;
1169 case RC_DROPIT:
1170 mbuf_freem(nd->nd_nam2);
1171 nd->nd_nam2 = NULL;
1172 break;
1173 };
1174 opcnt++;
1175 if (nd) {
1176 nfsm_chain_cleanup(&nd->nd_nmreq);
1177 if (nd->nd_nam2)
1178 mbuf_freem(nd->nd_nam2);
1179 if (IS_VALID_CRED(nd->nd_cr))
1180 kauth_cred_unref(&nd->nd_cr);
1181 if (nd->nd_gss_context)
1182 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1183 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
1184 nd = NULL;
1185 }
1186
1187 /*
1188 * Check to see if there are outstanding writes that
1189 * need to be serviced.
1190 */
1191 writes_todo = 0;
1192 if (slp->ns_wgtime) {
1193 microuptime(&now);
1194 cur_usec = (u_quad_t)now.tv_sec * 1000000 +
1195 (u_quad_t)now.tv_usec;
1196 if (slp->ns_wgtime <= cur_usec) {
1197 cacherep = RC_DOIT;
1198 writes_todo = 1;
1199 }
1200 }
1201 } while (writes_todo);
1202
1203 nd = NULL;
1204 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1205 lck_rw_lock_exclusive(&slp->ns_rwlock);
1206 error = nfsrv_dorec(slp, nfsd, &nd);
1207 if (error == EINVAL) { // RPCSEC_GSS drop
1208 if (slp->ns_sotype == SOCK_STREAM)
1209 nfsrv_zapsock(slp); // drop connection
1210 }
1211 lck_rw_done(&slp->ns_rwlock);
1212 }
1213 if (!nd) {
1214 /* drop our reference on the socket */
1215 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1216 nfsd->nfsd_slp = NULL;
1217 nfsrv_slpderef(slp);
1218 }
1219 }
1220 lck_mtx_lock(nfsd_mutex);
1221 done:
1222 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1223 FREE(nfsd, M_NFSD);
1224 if (--nfsd_thread_count == 0)
1225 nfsrv_cleanup();
1226 lck_mtx_unlock(nfsd_mutex);
1227 return (error);
1228 }
1229
1230 int
1231 nfssvc_export(user_addr_t argp)
1232 {
1233 int error = 0, is_64bit;
1234 struct user_nfs_export_args unxa;
1235 vfs_context_t ctx = vfs_context_current();
1236
1237 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
1238
1239 /* copy in pointers to path and export args */
1240 if (is_64bit) {
1241 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1242 } else {
1243 struct nfs_export_args tnxa;
1244 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1245 if (error == 0) {
1246 /* munge into LP64 version of nfs_export_args structure */
1247 unxa.nxa_fsid = tnxa.nxa_fsid;
1248 unxa.nxa_expid = tnxa.nxa_expid;
1249 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1250 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1251 unxa.nxa_flags = tnxa.nxa_flags;
1252 unxa.nxa_netcount = tnxa.nxa_netcount;
1253 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1254 }
1255 }
1256 if (error)
1257 return (error);
1258
1259 error = nfsrv_export(&unxa, ctx);
1260
1261 return (error);
1262 }
1263
1264 /*
1265 * Shut down a socket associated with an nfsrv_sock structure.
1266 * Should be called with the send lock set, if required.
1267 * The trick here is to increment the sref at the start, so that the nfsds
1268 * will stop using it and clear ns_flag at the end so that it will not be
1269 * reassigned during cleanup.
1270 */
1271 void
1272 nfsrv_zapsock(struct nfsrv_sock *slp)
1273 {
1274 socket_t so;
1275
1276 if ((slp->ns_flag & SLP_VALID) == 0)
1277 return;
1278 slp->ns_flag &= ~SLP_ALLFLAGS;
1279
1280 so = slp->ns_so;
1281 if (so == NULL)
1282 return;
1283
1284 /*
1285 * Attempt to deter future upcalls, but leave the
1286 * upcall info in place to avoid a race with the
1287 * networking code.
1288 */
1289 socket_lock(so, 1);
1290 so->so_rcv.sb_flags &= ~SB_UPCALL;
1291 socket_unlock(so, 1);
1292
1293 sock_shutdown(so, SHUT_RDWR);
1294 }
1295
1296 /*
1297 * cleanup and release a server socket structure.
1298 */
1299 void
1300 nfsrv_slpfree(struct nfsrv_sock *slp)
1301 {
1302 struct nfsrv_descript *nwp, *nnwp;
1303
1304 if (slp->ns_so) {
1305 sock_release(slp->ns_so);
1306 slp->ns_so = NULL;
1307 }
1308 if (slp->ns_nam)
1309 mbuf_free(slp->ns_nam);
1310 if (slp->ns_raw)
1311 mbuf_freem(slp->ns_raw);
1312 if (slp->ns_rec)
1313 mbuf_freem(slp->ns_rec);
1314 if (slp->ns_frag)
1315 mbuf_freem(slp->ns_frag);
1316 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1317 slp->ns_reccnt = 0;
1318
1319 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1320 nnwp = nwp->nd_tq.le_next;
1321 LIST_REMOVE(nwp, nd_tq);
1322 nfsm_chain_cleanup(&nwp->nd_nmreq);
1323 if (nwp->nd_mrep)
1324 mbuf_freem(nwp->nd_mrep);
1325 if (nwp->nd_nam2)
1326 mbuf_freem(nwp->nd_nam2);
1327 if (IS_VALID_CRED(nwp->nd_cr))
1328 kauth_cred_unref(&nwp->nd_cr);
1329 if (nwp->nd_gss_context)
1330 nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1331 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
1332 }
1333 LIST_INIT(&slp->ns_tq);
1334
1335 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
1336 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
1337 FREE(slp, M_NFSSVC);
1338 }
1339
1340 /*
1341 * Derefence a server socket structure. If it has no more references and
1342 * is no longer valid, you can throw it away.
1343 */
1344 void
1345 nfsrv_slpderef(struct nfsrv_sock *slp)
1346 {
1347 struct timeval now;
1348
1349 lck_mtx_lock(nfsd_mutex);
1350 lck_rw_lock_exclusive(&slp->ns_rwlock);
1351 slp->ns_sref--;
1352
1353 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1354 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1355 /* remove socket from queue since there's no work */
1356 if (slp->ns_flag & SLP_WAITQ)
1357 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1358 else
1359 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1360 slp->ns_flag &= ~SLP_QUEUED;
1361 }
1362 lck_rw_done(&slp->ns_rwlock);
1363 lck_mtx_unlock(nfsd_mutex);
1364 return;
1365 }
1366
1367 /* This socket is no longer valid, so we'll get rid of it */
1368
1369 if (slp->ns_flag & SLP_QUEUED) {
1370 if (slp->ns_flag & SLP_WAITQ)
1371 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1372 else
1373 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1374 slp->ns_flag &= ~SLP_QUEUED;
1375 }
1376
1377 /*
1378 * Queue the socket up for deletion
1379 * and start the timer to delete it
1380 * after it has been in limbo for
1381 * a while.
1382 */
1383 microuptime(&now);
1384 slp->ns_timestamp = now.tv_sec;
1385 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1386 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
1387 if (!nfsrv_deadsock_timer_on) {
1388 nfsrv_deadsock_timer_on = 1;
1389 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1390 NFSRV_DEADSOCKDELAY * 1000);
1391 }
1392
1393 lck_rw_done(&slp->ns_rwlock);
1394 /* now remove from the write gather socket list */
1395 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1396 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1397 slp->ns_wgq.tqe_next = SLPNOLIST;
1398 }
1399 lck_mtx_unlock(nfsd_mutex);
1400 }
1401
1402 /*
1403 * Check periodically for dead sockets pending delete.
1404 * If a socket has been dead for more than NFSRV_DEADSOCKDELAY
1405 * seconds then we assume it's safe to free.
1406 */
1407 void
1408 nfsrv_deadsock_timer(__unused void *param0, __unused void *param1)
1409 {
1410 struct nfsrv_sock *slp;
1411 struct timeval now;
1412 time_t time_to_wait;
1413
1414 microuptime(&now);
1415 lck_mtx_lock(nfsd_mutex);
1416
1417 while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) {
1418 if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec)
1419 break;
1420 TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain);
1421 nfsrv_slpfree(slp);
1422 }
1423 if (TAILQ_EMPTY(&nfsrv_deadsocklist)) {
1424 nfsrv_deadsock_timer_on = 0;
1425 lck_mtx_unlock(nfsd_mutex);
1426 return;
1427 }
1428 time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec;
1429 if (time_to_wait < 1)
1430 time_to_wait = 1;
1431
1432 lck_mtx_unlock(nfsd_mutex);
1433
1434 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1435 time_to_wait * 1000);
1436 }
1437
1438 /*
1439 * Clean up the data structures for the server.
1440 */
1441 void
1442 nfsrv_cleanup(void)
1443 {
1444 struct nfsrv_sock *slp, *nslp;
1445 struct timeval now;
1446 #if CONFIG_FSE
1447 struct nfsrv_fmod *fp, *nfp;
1448 int i;
1449 #endif
1450
1451 microuptime(&now);
1452 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1453 nslp = TAILQ_NEXT(slp, ns_chain);
1454 if (slp->ns_flag & SLP_VALID) {
1455 lck_rw_lock_exclusive(&slp->ns_rwlock);
1456 nfsrv_zapsock(slp);
1457 lck_rw_done(&slp->ns_rwlock);
1458 }
1459 if (slp->ns_flag & SLP_QUEUED) {
1460 if (slp->ns_flag & SLP_WAITQ)
1461 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1462 else
1463 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1464 slp->ns_flag &= ~SLP_QUEUED;
1465 }
1466 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1467 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1468 slp->ns_wgq.tqe_next = SLPNOLIST;
1469 }
1470 /* queue the socket up for deletion */
1471 slp->ns_timestamp = now.tv_sec;
1472 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1473 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
1474 if (!nfsrv_deadsock_timer_on) {
1475 nfsrv_deadsock_timer_on = 1;
1476 nfs_interval_timer_start(nfsrv_deadsock_timer_call,
1477 NFSRV_DEADSOCKDELAY * 1000);
1478 }
1479 }
1480
1481 #if CONFIG_FSE
1482 /*
1483 * Flush pending file write fsevents
1484 */
1485 lck_mtx_lock(nfsrv_fmod_mutex);
1486 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1487 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1488 /*
1489 * Fire off the content modified fsevent for each
1490 * entry, remove it from the list, and free it.
1491 */
1492 if (nfsrv_fsevents_enabled) {
1493 fp->fm_context.vc_thread = current_thread();
1494 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1495 FSE_ARG_VNODE, fp->fm_vp,
1496 FSE_ARG_DONE);
1497 }
1498 vnode_put(fp->fm_vp);
1499 kauth_cred_unref(&fp->fm_context.vc_ucred);
1500 nfp = LIST_NEXT(fp, fm_link);
1501 LIST_REMOVE(fp, fm_link);
1502 FREE(fp, M_TEMP);
1503 }
1504 }
1505 nfsrv_fmod_pending = 0;
1506 lck_mtx_unlock(nfsrv_fmod_mutex);
1507 #endif
1508
1509 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1510
1511 nfsrv_cleancache(); /* And clear out server cache */
1512
1513 nfsrv_udpsock = NULL;
1514 nfsrv_udp6sock = NULL;
1515 }
1516
1517 #endif /* NFS_NOSERVER */