]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_descrip.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / kern / kern_descrip.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/syslog.h>
94 #include <sys/unistd.h>
95 #include <sys/resourcevar.h>
96 #include <sys/aio_kern.h>
97 #include <sys/ev.h>
98 #include <kern/locks.h>
99 #include <sys/uio_internal.h>
100 #include <sys/codesign.h>
101 #include <sys/codedir_internal.h>
102
103 #include <security/audit/audit.h>
104
105 #include <sys/mount_internal.h>
106 #include <sys/kdebug.h>
107 #include <sys/sysproto.h>
108 #include <sys/pipe.h>
109 #include <sys/spawn.h>
110 #include <sys/cprotect.h>
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <libkern/OSAtomic.h>
115
116 #include <sys/ubc_internal.h>
117
118 #include <kern/ipc_misc.h>
119 #include <vm/vm_protos.h>
120
121 #include <mach/mach_port.h>
122 #include <stdbool.h>
123
124 #if CONFIG_MACF
125 #include <security/mac_framework.h>
126 #endif
127
128 #define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
129 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
130 mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
131 void ipc_port_release_send(ipc_port_t);
132
133 struct psemnode;
134 struct pshmnode;
135
136 static int finishdup(proc_t p,
137 struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
138
139 int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked);
140 void fg_drop(struct fileproc * fp);
141 void fg_free(struct fileglob *fg);
142 void fg_ref(struct fileproc * fp);
143 void fileport_releasefg(struct fileglob *fg);
144
145 /* flags for close_internal_locked */
146 #define FD_DUP2RESV 1
147
148 /* We don't want these exported */
149
150 __private_extern__
151 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
152
153 static void _fdrelse(struct proc * p, int fd);
154
155
156 extern void file_lock_init(void);
157
158 extern kauth_scope_t kauth_scope_fileop;
159
160 /* Conflict wait queue for when selects collide (opaque type) */
161 extern struct waitq select_conflict_queue;
162
163 #ifndef HFS_GET_BOOT_INFO
164 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
165 #endif
166
167 #ifndef HFS_SET_BOOT_INFO
168 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
169 #endif
170
171 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
172 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
173 #endif
174
175 #define f_flag f_fglob->fg_flag
176 #define f_type f_fglob->fg_ops->fo_type
177 #define f_msgcount f_fglob->fg_msgcount
178 #define f_cred f_fglob->fg_cred
179 #define f_ops f_fglob->fg_ops
180 #define f_offset f_fglob->fg_offset
181 #define f_data f_fglob->fg_data
182 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
183 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
184 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
185 ? 1 : 0)
186 /*
187 * Descriptor management.
188 */
189 struct fmsglist fmsghead; /* head of list of open files */
190 struct fmsglist fmsg_ithead; /* head of list of open files */
191 int nfiles; /* actual number of open files */
192
193
194 lck_grp_attr_t * file_lck_grp_attr;
195 lck_grp_t * file_lck_grp;
196 lck_attr_t * file_lck_attr;
197
198 lck_mtx_t * uipc_lock;
199
200
201 /*
202 * check_file_seek_range
203 *
204 * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
205 *
206 * Parameters: fl Flock structure.
207 * cur_file_offset Current offset in the file.
208 *
209 * Returns: 0 on Success.
210 * EOVERFLOW on overflow.
211 * EINVAL on offset less than zero.
212 */
213
214 static int
215 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
216 {
217 if (fl->l_whence == SEEK_CUR) {
218 /* Check if the start marker is beyond LLONG_MAX. */
219 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
220 /* Check if start marker is negative */
221 if (fl->l_start < 0) {
222 return EINVAL;
223 }
224 return EOVERFLOW;
225 }
226 /* Check if the start marker is negative. */
227 if (fl->l_start + cur_file_offset < 0) {
228 return EINVAL;
229 }
230 /* Check if end marker is beyond LLONG_MAX. */
231 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
232 cur_file_offset, fl->l_len - 1))) {
233 return EOVERFLOW;
234 }
235 /* Check if the end marker is negative. */
236 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
237 fl->l_len < 0)) {
238 return EINVAL;
239 }
240 } else if (fl->l_whence == SEEK_SET) {
241 /* Check if the start marker is negative. */
242 if (fl->l_start < 0) {
243 return EINVAL;
244 }
245 /* Check if the end marker is beyond LLONG_MAX. */
246 if ((fl->l_len > 0) &&
247 CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
248 return EOVERFLOW;
249 }
250 /* Check if the end marker is negative. */
251 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
252 return EINVAL;
253 }
254 }
255 return 0;
256 }
257
258
259 /*
260 * file_lock_init
261 *
262 * Description: Initialize the file lock group and the uipc and flist locks
263 *
264 * Parameters: (void)
265 *
266 * Returns: void
267 *
268 * Notes: Called at system startup from bsd_init().
269 */
270 void
271 file_lock_init(void)
272 {
273 /* allocate file lock group attribute and group */
274 file_lck_grp_attr = lck_grp_attr_alloc_init();
275
276 file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr);
277
278 /* Allocate file lock attribute */
279 file_lck_attr = lck_attr_alloc_init();
280
281 uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
282 }
283
284
285 void
286 proc_dirs_lock_shared(proc_t p)
287 {
288 lck_rw_lock_shared(&p->p_dirs_lock);
289 }
290
291 void
292 proc_dirs_unlock_shared(proc_t p)
293 {
294 lck_rw_unlock_shared(&p->p_dirs_lock);
295 }
296
297 void
298 proc_dirs_lock_exclusive(proc_t p)
299 {
300 lck_rw_lock_exclusive(&p->p_dirs_lock);
301 }
302
303 void
304 proc_dirs_unlock_exclusive(proc_t p)
305 {
306 lck_rw_unlock_exclusive(&p->p_dirs_lock);
307 }
308
309 /*
310 * proc_fdlock, proc_fdlock_spin
311 *
312 * Description: Lock to control access to the per process struct fileproc
313 * and struct filedesc
314 *
315 * Parameters: p Process to take the lock on
316 *
317 * Returns: void
318 *
319 * Notes: The lock is initialized in forkproc() and destroyed in
320 * reap_child_process().
321 */
322 void
323 proc_fdlock(proc_t p)
324 {
325 lck_mtx_lock(&p->p_fdmlock);
326 }
327
328 void
329 proc_fdlock_spin(proc_t p)
330 {
331 lck_mtx_lock_spin(&p->p_fdmlock);
332 }
333
334 void
335 proc_fdlock_assert(proc_t p, int assertflags)
336 {
337 lck_mtx_assert(&p->p_fdmlock, assertflags);
338 }
339
340
341 /*
342 * proc_fdunlock
343 *
344 * Description: Unlock the lock previously locked by a call to proc_fdlock()
345 *
346 * Parameters: p Process to drop the lock on
347 *
348 * Returns: void
349 */
350 void
351 proc_fdunlock(proc_t p)
352 {
353 lck_mtx_unlock(&p->p_fdmlock);
354 }
355
356
357 /*
358 * System calls on descriptors.
359 */
360
361
362 /*
363 * getdtablesize
364 *
365 * Description: Returns the per process maximum size of the descriptor table
366 *
367 * Parameters: p Process being queried
368 * retval Pointer to the call return area
369 *
370 * Returns: 0 Success
371 *
372 * Implicit returns:
373 * *retval (modified) Size of dtable
374 */
375 int
376 getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
377 {
378 proc_fdlock_spin(p);
379 *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
380 proc_fdunlock(p);
381
382 return 0;
383 }
384
385
386 void
387 procfdtbl_reservefd(struct proc * p, int fd)
388 {
389 p->p_fd->fd_ofiles[fd] = NULL;
390 p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
391 }
392
393 void
394 procfdtbl_markclosefd(struct proc * p, int fd)
395 {
396 p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
397 }
398
399 void
400 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
401 {
402 if (fp != NULL) {
403 p->p_fd->fd_ofiles[fd] = fp;
404 }
405 p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
406 if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
407 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
408 wakeup(&p->p_fd);
409 }
410 }
411
412 void
413 procfdtbl_waitfd(struct proc * p, int fd)
414 {
415 p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
416 msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
417 }
418
419
420 void
421 procfdtbl_clearfd(struct proc * p, int fd)
422 {
423 int waiting;
424
425 waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
426 p->p_fd->fd_ofiles[fd] = NULL;
427 p->p_fd->fd_ofileflags[fd] = 0;
428 if (waiting == UF_RESVWAIT) {
429 wakeup(&p->p_fd);
430 }
431 }
432
433 /*
434 * _fdrelse
435 *
436 * Description: Inline utility function to free an fd in a filedesc
437 *
438 * Parameters: fdp Pointer to filedesc fd lies in
439 * fd fd to free
440 * reserv fd should be reserved
441 *
442 * Returns: void
443 *
444 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
445 * the caller
446 */
447 static void
448 _fdrelse(struct proc * p, int fd)
449 {
450 struct filedesc *fdp = p->p_fd;
451 int nfd = 0;
452
453 if (fd < fdp->fd_freefile) {
454 fdp->fd_freefile = fd;
455 }
456 #if DIAGNOSTIC
457 if (fd > fdp->fd_lastfile) {
458 panic("fdrelse: fd_lastfile inconsistent");
459 }
460 #endif
461 procfdtbl_clearfd(p, fd);
462
463 while ((nfd = fdp->fd_lastfile) > 0 &&
464 fdp->fd_ofiles[nfd] == NULL &&
465 !(fdp->fd_ofileflags[nfd] & UF_RESERVED)) {
466 /* JMM - What about files with lingering EV_VANISHED knotes? */
467 fdp->fd_lastfile--;
468 }
469 }
470
471
472 int
473 fd_rdwr(
474 int fd,
475 enum uio_rw rw,
476 uint64_t base,
477 int64_t len,
478 enum uio_seg segflg,
479 off_t offset,
480 int io_flg,
481 int64_t *aresid)
482 {
483 struct fileproc *fp;
484 proc_t p;
485 int error = 0;
486 int flags = 0;
487 int spacetype;
488 uio_t auio = NULL;
489 char uio_buf[UIO_SIZEOF(1)];
490 struct vfs_context context = *(vfs_context_current());
491 bool wrote_some = false;
492
493 p = current_proc();
494
495 error = fp_lookup(p, fd, &fp, 0);
496 if (error) {
497 return error;
498 }
499
500 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_PIPE && fp->f_type != DTYPE_SOCKET) {
501 error = EINVAL;
502 goto out;
503 }
504 if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
505 error = EBADF;
506 goto out;
507 }
508
509 if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
510 error = EBADF;
511 goto out;
512 }
513
514 context.vc_ucred = fp->f_fglob->fg_cred;
515
516 if (UIO_SEG_IS_USER_SPACE(segflg)) {
517 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
518 } else {
519 spacetype = UIO_SYSSPACE;
520 }
521
522 auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
523
524 uio_addiov(auio, base, len);
525
526 if (!(io_flg & IO_APPEND)) {
527 flags = FOF_OFFSET;
528 }
529
530 if (rw == UIO_WRITE) {
531 user_ssize_t orig_resid = uio_resid(auio);
532 error = fo_write(fp, auio, flags, &context);
533 wrote_some = uio_resid(auio) < orig_resid;
534 } else {
535 error = fo_read(fp, auio, flags, &context);
536 }
537
538 if (aresid) {
539 *aresid = uio_resid(auio);
540 } else {
541 if (uio_resid(auio) && error == 0) {
542 error = EIO;
543 }
544 }
545 out:
546 if (wrote_some) {
547 fp_drop_written(p, fd, fp);
548 } else {
549 fp_drop(p, fd, fp, 0);
550 }
551
552 return error;
553 }
554
555
556
557 /*
558 * dup
559 *
560 * Description: Duplicate a file descriptor.
561 *
562 * Parameters: p Process performing the dup
563 * uap->fd The fd to dup
564 * retval Pointer to the call return area
565 *
566 * Returns: 0 Success
567 * !0 Errno
568 *
569 * Implicit returns:
570 * *retval (modified) The new descriptor
571 */
572 int
573 dup(proc_t p, struct dup_args *uap, int32_t *retval)
574 {
575 struct filedesc *fdp = p->p_fd;
576 int old = uap->fd;
577 int new, error;
578 struct fileproc *fp;
579
580 proc_fdlock(p);
581 if ((error = fp_lookup(p, old, &fp, 1))) {
582 proc_fdunlock(p);
583 return error;
584 }
585 if (FP_ISGUARDED(fp, GUARD_DUP)) {
586 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
587 (void) fp_drop(p, old, fp, 1);
588 proc_fdunlock(p);
589 return error;
590 }
591 if ((error = fdalloc(p, 0, &new))) {
592 fp_drop(p, old, fp, 1);
593 proc_fdunlock(p);
594 return error;
595 }
596 error = finishdup(p, fdp, old, new, 0, retval);
597 fp_drop(p, old, fp, 1);
598 proc_fdunlock(p);
599
600 if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) {
601 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
602 new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
603 }
604
605 return error;
606 }
607
608 /*
609 * dup2
610 *
611 * Description: Duplicate a file descriptor to a particular value.
612 *
613 * Parameters: p Process performing the dup
614 * uap->from The fd to dup
615 * uap->to The fd to dup it to
616 * retval Pointer to the call return area
617 *
618 * Returns: 0 Success
619 * !0 Errno
620 *
621 * Implicit returns:
622 * *retval (modified) The new descriptor
623 */
624 int
625 dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
626 {
627 struct filedesc *fdp = p->p_fd;
628 int old = uap->from, new = uap->to;
629 int i, error;
630 struct fileproc *fp, *nfp;
631
632 proc_fdlock(p);
633
634 startover:
635 if ((error = fp_lookup(p, old, &fp, 1))) {
636 proc_fdunlock(p);
637 return error;
638 }
639 if (FP_ISGUARDED(fp, GUARD_DUP)) {
640 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
641 (void) fp_drop(p, old, fp, 1);
642 proc_fdunlock(p);
643 return error;
644 }
645 if (new < 0 ||
646 (rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
647 new >= maxfiles) {
648 fp_drop(p, old, fp, 1);
649 proc_fdunlock(p);
650 return EBADF;
651 }
652 if (old == new) {
653 fp_drop(p, old, fp, 1);
654 *retval = new;
655 proc_fdunlock(p);
656 return 0;
657 }
658 if (new < 0 || new >= fdp->fd_nfiles) {
659 if ((error = fdalloc(p, new, &i))) {
660 fp_drop(p, old, fp, 1);
661 proc_fdunlock(p);
662 return error;
663 }
664 if (new != i) {
665 fdrelse(p, i);
666 goto closeit;
667 }
668 } else {
669 closeit:
670 while ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
671 fp_drop(p, old, fp, 1);
672 procfdtbl_waitfd(p, new);
673 #if DIAGNOSTIC
674 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
675 #endif
676 goto startover;
677 }
678
679 if ((fdp->fd_ofiles[new] != NULL) &&
680 ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
681 fp_drop(p, old, fp, 1);
682 if (FP_ISGUARDED(nfp, GUARD_CLOSE)) {
683 error = fp_guard_exception(p,
684 new, nfp, kGUARD_EXC_CLOSE);
685 (void) fp_drop(p, new, nfp, 1);
686 proc_fdunlock(p);
687 return error;
688 }
689 (void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
690 #if DIAGNOSTIC
691 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
692 #endif
693 procfdtbl_clearfd(p, new);
694 goto startover;
695 } else {
696 #if DIAGNOSTIC
697 if (fdp->fd_ofiles[new] != NULL) {
698 panic("dup2: no ref on fileproc %d", new);
699 }
700 #endif
701 procfdtbl_reservefd(p, new);
702 }
703
704 #if DIAGNOSTIC
705 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
706 #endif
707 }
708 #if DIAGNOSTIC
709 if (fdp->fd_ofiles[new] != 0) {
710 panic("dup2: overwriting fd_ofiles with new %d", new);
711 }
712 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
713 panic("dup2: unreserved fileflags with new %d", new);
714 }
715 #endif
716 error = finishdup(p, fdp, old, new, 0, retval);
717 fp_drop(p, old, fp, 1);
718 proc_fdunlock(p);
719
720 return error;
721 }
722
723
724 /*
725 * fcntl
726 *
727 * Description: The file control system call.
728 *
729 * Parameters: p Process performing the fcntl
730 * uap->fd The fd to operate against
731 * uap->cmd The command to perform
732 * uap->arg Pointer to the command argument
733 * retval Pointer to the call return area
734 *
735 * Returns: 0 Success
736 * !0 Errno (see fcntl_nocancel)
737 *
738 * Implicit returns:
739 * *retval (modified) fcntl return value (if any)
740 *
741 * Notes: This system call differs from fcntl_nocancel() in that it
742 * tests for cancellation prior to performing a potentially
743 * blocking operation.
744 */
745 int
746 fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
747 {
748 __pthread_testcancel(1);
749 return fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
750 }
751
752
753 /*
754 * fcntl_nocancel
755 *
756 * Description: A non-cancel-testing file control system call.
757 *
758 * Parameters: p Process performing the fcntl
759 * uap->fd The fd to operate against
760 * uap->cmd The command to perform
761 * uap->arg Pointer to the command argument
762 * retval Pointer to the call return area
763 *
764 * Returns: 0 Success
765 * EINVAL
766 * fp_lookup:EBADF Bad file descriptor
767 * [F_DUPFD]
768 * fdalloc:EMFILE
769 * fdalloc:ENOMEM
770 * finishdup:EBADF
771 * finishdup:ENOMEM
772 * [F_SETOWN]
773 * ESRCH
774 * [F_SETLK]
775 * EBADF
776 * EOVERFLOW
777 * copyin:EFAULT
778 * vnode_getwithref:???
779 * VNOP_ADVLOCK:???
780 * msleep:ETIMEDOUT
781 * [F_GETLK]
782 * EBADF
783 * EOVERFLOW
784 * copyin:EFAULT
785 * copyout:EFAULT
786 * vnode_getwithref:???
787 * VNOP_ADVLOCK:???
788 * [F_PREALLOCATE]
789 * EBADF
790 * EINVAL
791 * copyin:EFAULT
792 * copyout:EFAULT
793 * vnode_getwithref:???
794 * VNOP_ALLOCATE:???
795 * [F_SETSIZE,F_RDADVISE]
796 * EBADF
797 * copyin:EFAULT
798 * vnode_getwithref:???
799 * [F_RDAHEAD,F_NOCACHE]
800 * EBADF
801 * vnode_getwithref:???
802 * [???]
803 *
804 * Implicit returns:
805 * *retval (modified) fcntl return value (if any)
806 */
807 int
808 fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
809 {
810 int fd = uap->fd;
811 struct filedesc *fdp = p->p_fd;
812 struct fileproc *fp;
813 char *pop;
814 struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */
815 int i, tmp, error, error2, flg = 0;
816 struct flock fl = {};
817 struct flocktimeout fltimeout;
818 struct timespec *timeout = NULL;
819 struct vfs_context context;
820 off_t offset;
821 int newmin;
822 daddr64_t lbn, bn;
823 unsigned int fflag;
824 user_addr_t argp;
825 boolean_t is64bit;
826
827 AUDIT_ARG(fd, uap->fd);
828 AUDIT_ARG(cmd, uap->cmd);
829
830 proc_fdlock(p);
831 if ((error = fp_lookup(p, fd, &fp, 1))) {
832 proc_fdunlock(p);
833 return error;
834 }
835 context.vc_thread = current_thread();
836 context.vc_ucred = fp->f_cred;
837
838 is64bit = proc_is64bit(p);
839 if (is64bit) {
840 argp = uap->arg;
841 } else {
842 /*
843 * Since the arg parameter is defined as a long but may be
844 * either a long or a pointer we must take care to handle
845 * sign extension issues. Our sys call munger will sign
846 * extend a long when we are called from a 32-bit process.
847 * Since we can never have an address greater than 32-bits
848 * from a 32-bit process we lop off the top 32-bits to avoid
849 * getting the wrong address
850 */
851 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
852 }
853
854 pop = &fdp->fd_ofileflags[fd];
855
856 #if CONFIG_MACF
857 error = mac_file_check_fcntl(proc_ucred(p), fp->f_fglob, uap->cmd,
858 uap->arg);
859 if (error) {
860 goto out;
861 }
862 #endif
863
864 switch (uap->cmd) {
865 case F_DUPFD:
866 case F_DUPFD_CLOEXEC:
867 if (FP_ISGUARDED(fp, GUARD_DUP)) {
868 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
869 goto out;
870 }
871 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
872 AUDIT_ARG(value32, newmin);
873 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
874 newmin >= maxfiles) {
875 error = EINVAL;
876 goto out;
877 }
878 if ((error = fdalloc(p, newmin, &i))) {
879 goto out;
880 }
881 error = finishdup(p, fdp, fd, i,
882 uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
883 goto out;
884
885 case F_GETFD:
886 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
887 error = 0;
888 goto out;
889
890 case F_SETFD:
891 AUDIT_ARG(value32, uap->arg);
892 if (uap->arg & FD_CLOEXEC) {
893 *pop |= UF_EXCLOSE;
894 } else {
895 if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
896 error = fp_guard_exception(p,
897 fd, fp, kGUARD_EXC_NOCLOEXEC);
898 goto out;
899 }
900 *pop &= ~UF_EXCLOSE;
901 }
902 error = 0;
903 goto out;
904
905 case F_GETFL:
906 *retval = OFLAGS(fp->f_flag);
907 error = 0;
908 goto out;
909
910 case F_SETFL:
911 fp->f_flag &= ~FCNTLFLAGS;
912 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
913 AUDIT_ARG(value32, tmp);
914 fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS;
915 tmp = fp->f_flag & FNONBLOCK;
916 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
917 if (error) {
918 goto out;
919 }
920 tmp = fp->f_flag & FASYNC;
921 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
922 if (!error) {
923 goto out;
924 }
925 fp->f_flag &= ~FNONBLOCK;
926 tmp = 0;
927 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
928 goto out;
929
930 case F_GETOWN:
931 if (fp->f_type == DTYPE_SOCKET) {
932 *retval = ((struct socket *)fp->f_data)->so_pgid;
933 error = 0;
934 goto out;
935 }
936 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
937 *retval = -*retval;
938 goto out;
939
940 case F_SETOWN:
941 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
942 AUDIT_ARG(value32, tmp);
943 if (fp->f_type == DTYPE_SOCKET) {
944 ((struct socket *)fp->f_data)->so_pgid = tmp;
945 error = 0;
946 goto out;
947 }
948 if (fp->f_type == DTYPE_PIPE) {
949 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
950 goto out;
951 }
952
953 if (tmp <= 0) {
954 tmp = -tmp;
955 } else {
956 proc_t p1 = proc_find(tmp);
957 if (p1 == 0) {
958 error = ESRCH;
959 goto out;
960 }
961 tmp = (int)p1->p_pgrpid;
962 proc_rele(p1);
963 }
964 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
965 goto out;
966
967 case F_SETNOSIGPIPE:
968 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
969 if (fp->f_type == DTYPE_SOCKET) {
970 #if SOCKETS
971 error = sock_setsockopt((struct socket *)fp->f_data,
972 SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
973 #else
974 error = EINVAL;
975 #endif
976 } else {
977 struct fileglob *fg = fp->f_fglob;
978
979 lck_mtx_lock_spin(&fg->fg_lock);
980 if (tmp) {
981 fg->fg_lflags |= FG_NOSIGPIPE;
982 } else {
983 fg->fg_lflags &= ~FG_NOSIGPIPE;
984 }
985 lck_mtx_unlock(&fg->fg_lock);
986 error = 0;
987 }
988 goto out;
989
990 case F_GETNOSIGPIPE:
991 if (fp->f_type == DTYPE_SOCKET) {
992 #if SOCKETS
993 int retsize = sizeof(*retval);
994 error = sock_getsockopt((struct socket *)fp->f_data,
995 SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
996 #else
997 error = EINVAL;
998 #endif
999 } else {
1000 *retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ?
1001 1 : 0;
1002 error = 0;
1003 }
1004 goto out;
1005
1006 case F_SETCONFINED:
1007 /*
1008 * If this is the only reference to this fglob in the process
1009 * and it's already marked as close-on-fork then mark it as
1010 * (immutably) "confined" i.e. any fd that points to it will
1011 * forever be close-on-fork, and attempts to use an IPC
1012 * mechanism to move the descriptor elsewhere will fail.
1013 */
1014 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
1015 struct fileglob *fg = fp->f_fglob;
1016
1017 lck_mtx_lock_spin(&fg->fg_lock);
1018 if (fg->fg_lflags & FG_CONFINED) {
1019 error = 0;
1020 } else if (1 != fg->fg_count) {
1021 error = EAGAIN; /* go close the dup .. */
1022 } else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
1023 fg->fg_lflags |= FG_CONFINED;
1024 error = 0;
1025 } else {
1026 error = EBADF; /* open without O_CLOFORK? */
1027 }
1028 lck_mtx_unlock(&fg->fg_lock);
1029 } else {
1030 /*
1031 * Other subsystems may have built on the immutability
1032 * of FG_CONFINED; clearing it may be tricky.
1033 */
1034 error = EPERM; /* immutable */
1035 }
1036 goto out;
1037
1038 case F_GETCONFINED:
1039 *retval = (fp->f_fglob->fg_lflags & FG_CONFINED) ? 1 : 0;
1040 error = 0;
1041 goto out;
1042
1043 case F_SETLKWTIMEOUT:
1044 case F_SETLKW:
1045 case F_OFD_SETLKWTIMEOUT:
1046 case F_OFD_SETLKW:
1047 flg |= F_WAIT;
1048 /* Fall into F_SETLK */
1049
1050 case F_SETLK:
1051 case F_OFD_SETLK:
1052 if (fp->f_type != DTYPE_VNODE) {
1053 error = EBADF;
1054 goto out;
1055 }
1056 vp = (struct vnode *)fp->f_data;
1057
1058 fflag = fp->f_flag;
1059 offset = fp->f_offset;
1060 proc_fdunlock(p);
1061
1062 /* Copy in the lock structure */
1063 if (F_SETLKWTIMEOUT == uap->cmd ||
1064 F_OFD_SETLKWTIMEOUT == uap->cmd) {
1065 error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1066 if (error) {
1067 goto outdrop;
1068 }
1069 fl = fltimeout.fl;
1070 timeout = &fltimeout.timeout;
1071 } else {
1072 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1073 if (error) {
1074 goto outdrop;
1075 }
1076 }
1077
1078 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1079 /* and ending byte for EOVERFLOW in SEEK_SET */
1080 error = check_file_seek_range(&fl, offset);
1081 if (error) {
1082 goto outdrop;
1083 }
1084
1085 if ((error = vnode_getwithref(vp))) {
1086 goto outdrop;
1087 }
1088 if (fl.l_whence == SEEK_CUR) {
1089 fl.l_start += offset;
1090 }
1091
1092 #if CONFIG_MACF
1093 error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1094 F_SETLK, &fl);
1095 if (error) {
1096 (void)vnode_put(vp);
1097 goto outdrop;
1098 }
1099 #endif
1100 switch (uap->cmd) {
1101 case F_OFD_SETLK:
1102 case F_OFD_SETLKW:
1103 case F_OFD_SETLKWTIMEOUT:
1104 flg |= F_OFD_LOCK;
1105 switch (fl.l_type) {
1106 case F_RDLCK:
1107 if ((fflag & FREAD) == 0) {
1108 error = EBADF;
1109 break;
1110 }
1111 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1112 F_SETLK, &fl, flg, &context, timeout);
1113 break;
1114 case F_WRLCK:
1115 if ((fflag & FWRITE) == 0) {
1116 error = EBADF;
1117 break;
1118 }
1119 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1120 F_SETLK, &fl, flg, &context, timeout);
1121 break;
1122 case F_UNLCK:
1123 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1124 F_UNLCK, &fl, F_OFD_LOCK, &context,
1125 timeout);
1126 break;
1127 default:
1128 error = EINVAL;
1129 break;
1130 }
1131 if (0 == error &&
1132 (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
1133 struct fileglob *fg = fp->f_fglob;
1134
1135 /*
1136 * arrange F_UNLCK on last close (once
1137 * set, FG_HAS_OFDLOCK is immutable)
1138 */
1139 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
1140 lck_mtx_lock_spin(&fg->fg_lock);
1141 fg->fg_lflags |= FG_HAS_OFDLOCK;
1142 lck_mtx_unlock(&fg->fg_lock);
1143 }
1144 }
1145 break;
1146 default:
1147 flg |= F_POSIX;
1148 switch (fl.l_type) {
1149 case F_RDLCK:
1150 if ((fflag & FREAD) == 0) {
1151 error = EBADF;
1152 break;
1153 }
1154 // XXX UInt32 unsafe for LP64 kernel
1155 OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1156 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1157 F_SETLK, &fl, flg, &context, timeout);
1158 break;
1159 case F_WRLCK:
1160 if ((fflag & FWRITE) == 0) {
1161 error = EBADF;
1162 break;
1163 }
1164 // XXX UInt32 unsafe for LP64 kernel
1165 OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1166 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1167 F_SETLK, &fl, flg, &context, timeout);
1168 break;
1169 case F_UNLCK:
1170 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1171 F_UNLCK, &fl, F_POSIX, &context, timeout);
1172 break;
1173 default:
1174 error = EINVAL;
1175 break;
1176 }
1177 break;
1178 }
1179 (void) vnode_put(vp);
1180 goto outdrop;
1181
1182 case F_GETLK:
1183 case F_OFD_GETLK:
1184 case F_GETLKPID:
1185 case F_OFD_GETLKPID:
1186 if (fp->f_type != DTYPE_VNODE) {
1187 error = EBADF;
1188 goto out;
1189 }
1190 vp = (struct vnode *)fp->f_data;
1191
1192 offset = fp->f_offset;
1193 proc_fdunlock(p);
1194
1195 /* Copy in the lock structure */
1196 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1197 if (error) {
1198 goto outdrop;
1199 }
1200
1201 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1202 /* and ending byte for EOVERFLOW in SEEK_SET */
1203 error = check_file_seek_range(&fl, offset);
1204 if (error) {
1205 goto outdrop;
1206 }
1207
1208 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1209 error = EINVAL;
1210 goto outdrop;
1211 }
1212
1213 switch (fl.l_type) {
1214 case F_RDLCK:
1215 case F_UNLCK:
1216 case F_WRLCK:
1217 break;
1218 default:
1219 error = EINVAL;
1220 goto outdrop;
1221 }
1222
1223 switch (fl.l_whence) {
1224 case SEEK_CUR:
1225 case SEEK_SET:
1226 case SEEK_END:
1227 break;
1228 default:
1229 error = EINVAL;
1230 goto outdrop;
1231 }
1232
1233 if ((error = vnode_getwithref(vp)) == 0) {
1234 if (fl.l_whence == SEEK_CUR) {
1235 fl.l_start += offset;
1236 }
1237
1238 #if CONFIG_MACF
1239 error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1240 uap->cmd, &fl);
1241 if (error == 0)
1242 #endif
1243 switch (uap->cmd) {
1244 case F_OFD_GETLK:
1245 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1246 F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1247 break;
1248 case F_OFD_GETLKPID:
1249 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1250 F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1251 break;
1252 default:
1253 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1254 uap->cmd, &fl, F_POSIX, &context, NULL);
1255 break;
1256 }
1257
1258 (void)vnode_put(vp);
1259
1260 if (error == 0) {
1261 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1262 }
1263 }
1264 goto outdrop;
1265
1266 case F_PREALLOCATE: {
1267 fstore_t alloc_struct; /* structure for allocate command */
1268 u_int32_t alloc_flags = 0;
1269
1270 if (fp->f_type != DTYPE_VNODE) {
1271 error = EBADF;
1272 goto out;
1273 }
1274
1275 vp = (struct vnode *)fp->f_data;
1276 proc_fdunlock(p);
1277
1278 /* make sure that we have write permission */
1279 if ((fp->f_flag & FWRITE) == 0) {
1280 error = EBADF;
1281 goto outdrop;
1282 }
1283
1284 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1285 if (error) {
1286 goto outdrop;
1287 }
1288
1289 /* now set the space allocated to 0 */
1290 alloc_struct.fst_bytesalloc = 0;
1291
1292 /*
1293 * Do some simple parameter checking
1294 */
1295
1296 /* set up the flags */
1297
1298 alloc_flags |= PREALLOCATE;
1299
1300 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
1301 alloc_flags |= ALLOCATECONTIG;
1302 }
1303
1304 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
1305 alloc_flags |= ALLOCATEALL;
1306 }
1307
1308 /*
1309 * Do any position mode specific stuff. The only
1310 * position mode supported now is PEOFPOSMODE
1311 */
1312
1313 switch (alloc_struct.fst_posmode) {
1314 case F_PEOFPOSMODE:
1315 if (alloc_struct.fst_offset != 0) {
1316 error = EINVAL;
1317 goto outdrop;
1318 }
1319
1320 alloc_flags |= ALLOCATEFROMPEOF;
1321 break;
1322
1323 case F_VOLPOSMODE:
1324 if (alloc_struct.fst_offset <= 0) {
1325 error = EINVAL;
1326 goto outdrop;
1327 }
1328
1329 alloc_flags |= ALLOCATEFROMVOL;
1330 break;
1331
1332 default: {
1333 error = EINVAL;
1334 goto outdrop;
1335 }
1336 }
1337 if ((error = vnode_getwithref(vp)) == 0) {
1338 /*
1339 * call allocate to get the space
1340 */
1341 error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
1342 &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1343 &context);
1344 (void)vnode_put(vp);
1345
1346 error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1347
1348 if (error == 0) {
1349 error = error2;
1350 }
1351 }
1352 goto outdrop;
1353 }
1354 case F_PUNCHHOLE: {
1355 fpunchhole_t args;
1356
1357 if (fp->f_type != DTYPE_VNODE) {
1358 error = EBADF;
1359 goto out;
1360 }
1361
1362 vp = (struct vnode *)fp->f_data;
1363 proc_fdunlock(p);
1364
1365 /* need write permissions */
1366 if ((fp->f_flag & FWRITE) == 0) {
1367 error = EPERM;
1368 goto outdrop;
1369 }
1370
1371 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1372 goto outdrop;
1373 }
1374
1375 if ((error = vnode_getwithref(vp))) {
1376 goto outdrop;
1377 }
1378
1379 #if CONFIG_MACF
1380 if ((error = mac_vnode_check_write(&context, fp->f_fglob->fg_cred, vp))) {
1381 (void)vnode_put(vp);
1382 goto outdrop;
1383 }
1384 #endif
1385
1386 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
1387 (void)vnode_put(vp);
1388
1389 goto outdrop;
1390 }
1391 case F_TRIM_ACTIVE_FILE: {
1392 ftrimactivefile_t args;
1393
1394 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
1395 error = EACCES;
1396 goto out;
1397 }
1398
1399 if (fp->f_type != DTYPE_VNODE) {
1400 error = EBADF;
1401 goto out;
1402 }
1403
1404 vp = (struct vnode *)fp->f_data;
1405 proc_fdunlock(p);
1406
1407 /* need write permissions */
1408 if ((fp->f_flag & FWRITE) == 0) {
1409 error = EPERM;
1410 goto outdrop;
1411 }
1412
1413 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1414 goto outdrop;
1415 }
1416
1417 if ((error = vnode_getwithref(vp))) {
1418 goto outdrop;
1419 }
1420
1421 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
1422 (void)vnode_put(vp);
1423
1424 goto outdrop;
1425 }
1426 case F_SPECULATIVE_READ: {
1427 fspecread_t args;
1428
1429 if (fp->f_type != DTYPE_VNODE) {
1430 error = EBADF;
1431 goto out;
1432 }
1433
1434 vp = (struct vnode *)fp->f_data;
1435 proc_fdunlock(p);
1436
1437 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1438 goto outdrop;
1439 }
1440
1441 /* Discard invalid offsets or lengths */
1442 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
1443 error = EINVAL;
1444 goto outdrop;
1445 }
1446
1447 /*
1448 * Round the file offset down to a page-size boundary (or to 0).
1449 * The filesystem will need to round the length up to the end of the page boundary
1450 * or to the EOF of the file.
1451 */
1452 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
1453 uint64_t foff_delta = args.fsr_offset - foff;
1454 args.fsr_offset = (off_t) foff;
1455
1456 /*
1457 * Now add in the delta to the supplied length. Since we may have adjusted the
1458 * offset, increase it by the amount that we adjusted.
1459 */
1460 args.fsr_length += foff_delta;
1461
1462 if ((error = vnode_getwithref(vp))) {
1463 goto outdrop;
1464 }
1465 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
1466 (void)vnode_put(vp);
1467
1468 goto outdrop;
1469 }
1470 case F_SETSIZE:
1471 if (fp->f_type != DTYPE_VNODE) {
1472 error = EBADF;
1473 goto out;
1474 }
1475 vp = (struct vnode *)fp->f_data;
1476 proc_fdunlock(p);
1477
1478 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
1479 if (error) {
1480 goto outdrop;
1481 }
1482 AUDIT_ARG(value64, offset);
1483
1484 error = vnode_getwithref(vp);
1485 if (error) {
1486 goto outdrop;
1487 }
1488
1489 #if CONFIG_MACF
1490 error = mac_vnode_check_truncate(&context,
1491 fp->f_fglob->fg_cred, vp);
1492 if (error) {
1493 (void)vnode_put(vp);
1494 goto outdrop;
1495 }
1496 #endif
1497 /*
1498 * Make sure that we are root. Growing a file
1499 * without zero filling the data is a security hole
1500 * root would have access anyway so we'll allow it
1501 */
1502 if (!kauth_cred_issuser(kauth_cred_get())) {
1503 error = EACCES;
1504 } else {
1505 /*
1506 * set the file size
1507 */
1508 error = vnode_setsize(vp, offset, IO_NOZEROFILL,
1509 &context);
1510
1511 #if CONFIG_MACF
1512 if (error == 0) {
1513 mac_vnode_notify_truncate(&context, fp->f_fglob->fg_cred, vp);
1514 }
1515 #endif
1516 }
1517
1518 (void)vnode_put(vp);
1519 goto outdrop;
1520
1521 case F_RDAHEAD:
1522 if (fp->f_type != DTYPE_VNODE) {
1523 error = EBADF;
1524 goto out;
1525 }
1526 if (uap->arg) {
1527 fp->f_fglob->fg_flag &= ~FNORDAHEAD;
1528 } else {
1529 fp->f_fglob->fg_flag |= FNORDAHEAD;
1530 }
1531
1532 goto out;
1533
1534 case F_NOCACHE:
1535 if (fp->f_type != DTYPE_VNODE) {
1536 error = EBADF;
1537 goto out;
1538 }
1539 if (uap->arg) {
1540 fp->f_fglob->fg_flag |= FNOCACHE;
1541 } else {
1542 fp->f_fglob->fg_flag &= ~FNOCACHE;
1543 }
1544
1545 goto out;
1546
1547 case F_NODIRECT:
1548 if (fp->f_type != DTYPE_VNODE) {
1549 error = EBADF;
1550 goto out;
1551 }
1552 if (uap->arg) {
1553 fp->f_fglob->fg_flag |= FNODIRECT;
1554 } else {
1555 fp->f_fglob->fg_flag &= ~FNODIRECT;
1556 }
1557
1558 goto out;
1559
1560 case F_SINGLE_WRITER:
1561 if (fp->f_type != DTYPE_VNODE) {
1562 error = EBADF;
1563 goto out;
1564 }
1565 if (uap->arg) {
1566 fp->f_fglob->fg_flag |= FSINGLE_WRITER;
1567 } else {
1568 fp->f_fglob->fg_flag &= ~FSINGLE_WRITER;
1569 }
1570
1571 goto out;
1572
1573 case F_GLOBAL_NOCACHE:
1574 if (fp->f_type != DTYPE_VNODE) {
1575 error = EBADF;
1576 goto out;
1577 }
1578 vp = (struct vnode *)fp->f_data;
1579 proc_fdunlock(p);
1580
1581 if ((error = vnode_getwithref(vp)) == 0) {
1582 *retval = vnode_isnocache(vp);
1583
1584 if (uap->arg) {
1585 vnode_setnocache(vp);
1586 } else {
1587 vnode_clearnocache(vp);
1588 }
1589
1590 (void)vnode_put(vp);
1591 }
1592 goto outdrop;
1593
1594 case F_CHECK_OPENEVT:
1595 if (fp->f_type != DTYPE_VNODE) {
1596 error = EBADF;
1597 goto out;
1598 }
1599 vp = (struct vnode *)fp->f_data;
1600 proc_fdunlock(p);
1601
1602 if ((error = vnode_getwithref(vp)) == 0) {
1603 *retval = vnode_is_openevt(vp);
1604
1605 if (uap->arg) {
1606 vnode_set_openevt(vp);
1607 } else {
1608 vnode_clear_openevt(vp);
1609 }
1610
1611 (void)vnode_put(vp);
1612 }
1613 goto outdrop;
1614
1615 case F_RDADVISE: {
1616 struct radvisory ra_struct;
1617
1618 if (fp->f_type != DTYPE_VNODE) {
1619 error = EBADF;
1620 goto out;
1621 }
1622 vp = (struct vnode *)fp->f_data;
1623 proc_fdunlock(p);
1624
1625 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
1626 goto outdrop;
1627 }
1628 if ((error = vnode_getwithref(vp)) == 0) {
1629 error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1630
1631 (void)vnode_put(vp);
1632 }
1633 goto outdrop;
1634 }
1635
1636 case F_FLUSH_DATA:
1637
1638 if (fp->f_type != DTYPE_VNODE) {
1639 error = EBADF;
1640 goto out;
1641 }
1642 vp = (struct vnode *)fp->f_data;
1643 proc_fdunlock(p);
1644
1645 if ((error = vnode_getwithref(vp)) == 0) {
1646 error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
1647
1648 (void)vnode_put(vp);
1649 }
1650 goto outdrop;
1651
1652 case F_LOG2PHYS:
1653 case F_LOG2PHYS_EXT: {
1654 struct log2phys l2p_struct = {}; /* structure for allocate command */
1655 int devBlockSize;
1656
1657 off_t file_offset = 0;
1658 size_t a_size = 0;
1659 size_t run = 0;
1660
1661 if (uap->cmd == F_LOG2PHYS_EXT) {
1662 error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1663 if (error) {
1664 goto out;
1665 }
1666 file_offset = l2p_struct.l2p_devoffset;
1667 } else {
1668 file_offset = fp->f_offset;
1669 }
1670 if (fp->f_type != DTYPE_VNODE) {
1671 error = EBADF;
1672 goto out;
1673 }
1674 vp = (struct vnode *)fp->f_data;
1675 proc_fdunlock(p);
1676 if ((error = vnode_getwithref(vp))) {
1677 goto outdrop;
1678 }
1679 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1680 if (error) {
1681 (void)vnode_put(vp);
1682 goto outdrop;
1683 }
1684 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1685 if (error) {
1686 (void)vnode_put(vp);
1687 goto outdrop;
1688 }
1689 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1690 if (uap->cmd == F_LOG2PHYS_EXT) {
1691 if (l2p_struct.l2p_contigbytes < 0) {
1692 vnode_put(vp);
1693 error = EINVAL;
1694 goto outdrop;
1695 }
1696
1697 a_size = MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1698 } else {
1699 a_size = devBlockSize;
1700 }
1701
1702 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1703
1704 (void)vnode_put(vp);
1705
1706 if (!error) {
1707 l2p_struct.l2p_flags = 0; /* for now */
1708 if (uap->cmd == F_LOG2PHYS_EXT) {
1709 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1710 } else {
1711 l2p_struct.l2p_contigbytes = 0; /* for now */
1712 }
1713
1714 /*
1715 * The block number being -1 suggests that the file offset is not backed
1716 * by any real blocks on-disk. As a result, just let it be passed back up wholesale.
1717 */
1718 if (bn == -1) {
1719 /* Don't multiply it by the block size */
1720 l2p_struct.l2p_devoffset = bn;
1721 } else {
1722 l2p_struct.l2p_devoffset = bn * devBlockSize;
1723 l2p_struct.l2p_devoffset += file_offset - offset;
1724 }
1725 error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1726 }
1727 goto outdrop;
1728 }
1729 case F_GETPATH:
1730 case F_GETPATH_NOFIRMLINK: {
1731 char *pathbufp;
1732 int pathlen;
1733
1734 if (fp->f_type != DTYPE_VNODE) {
1735 error = EBADF;
1736 goto out;
1737 }
1738 vp = (struct vnode *)fp->f_data;
1739 proc_fdunlock(p);
1740
1741 pathlen = MAXPATHLEN;
1742 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1743 if (pathbufp == NULL) {
1744 error = ENOMEM;
1745 goto outdrop;
1746 }
1747 if ((error = vnode_getwithref(vp)) == 0) {
1748 if (uap->cmd == F_GETPATH_NOFIRMLINK) {
1749 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
1750 } else {
1751 error = vn_getpath(vp, pathbufp, &pathlen);
1752 }
1753 (void)vnode_put(vp);
1754
1755 if (error == 0) {
1756 error = copyout((caddr_t)pathbufp, argp, pathlen);
1757 }
1758 }
1759 FREE(pathbufp, M_TEMP);
1760 goto outdrop;
1761 }
1762
1763 case F_PATHPKG_CHECK: {
1764 char *pathbufp;
1765 size_t pathlen;
1766
1767 if (fp->f_type != DTYPE_VNODE) {
1768 error = EBADF;
1769 goto out;
1770 }
1771 vp = (struct vnode *)fp->f_data;
1772 proc_fdunlock(p);
1773
1774 pathlen = MAXPATHLEN;
1775 pathbufp = kalloc(MAXPATHLEN);
1776
1777 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
1778 if ((error = vnode_getwithref(vp)) == 0) {
1779 AUDIT_ARG(text, pathbufp);
1780 error = vn_path_package_check(vp, pathbufp, pathlen, retval);
1781
1782 (void)vnode_put(vp);
1783 }
1784 }
1785 kfree(pathbufp, MAXPATHLEN);
1786 goto outdrop;
1787 }
1788
1789 case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
1790 case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
1791 case F_BARRIERFSYNC: // fsync + barrier
1792 case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
1793 case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
1794 if (fp->f_type != DTYPE_VNODE) {
1795 error = EBADF;
1796 goto out;
1797 }
1798 vp = (struct vnode *)fp->f_data;
1799 proc_fdunlock(p);
1800
1801 if ((error = vnode_getwithref(vp)) == 0) {
1802 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1803
1804 (void)vnode_put(vp);
1805 }
1806 break;
1807 }
1808
1809 /*
1810 * SPI (private) for opening a file starting from a dir fd
1811 */
1812 case F_OPENFROM: {
1813 struct user_fopenfrom fopen;
1814 struct vnode_attr va;
1815 struct nameidata nd;
1816 int cmode;
1817
1818 /* Check if this isn't a valid file descriptor */
1819 if ((fp->f_type != DTYPE_VNODE) ||
1820 (fp->f_flag & FREAD) == 0) {
1821 error = EBADF;
1822 goto out;
1823 }
1824 vp = (struct vnode *)fp->f_data;
1825 proc_fdunlock(p);
1826
1827 if (vnode_getwithref(vp)) {
1828 error = ENOENT;
1829 goto outdrop;
1830 }
1831
1832 /* Only valid for directories */
1833 if (vp->v_type != VDIR) {
1834 vnode_put(vp);
1835 error = ENOTDIR;
1836 goto outdrop;
1837 }
1838
1839 /* Get flags, mode and pathname arguments. */
1840 if (IS_64BIT_PROCESS(p)) {
1841 error = copyin(argp, &fopen, sizeof(fopen));
1842 } else {
1843 struct user32_fopenfrom fopen32;
1844
1845 error = copyin(argp, &fopen32, sizeof(fopen32));
1846 fopen.o_flags = fopen32.o_flags;
1847 fopen.o_mode = fopen32.o_mode;
1848 fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
1849 }
1850 if (error) {
1851 vnode_put(vp);
1852 goto outdrop;
1853 }
1854 AUDIT_ARG(fflags, fopen.o_flags);
1855 AUDIT_ARG(mode, fopen.o_mode);
1856 VATTR_INIT(&va);
1857 /* Mask off all but regular access permissions */
1858 cmode = ((fopen.o_mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1859 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
1860
1861 /* Start the lookup relative to the file descriptor's vnode. */
1862 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
1863 fopen.o_pathname, &context);
1864 nd.ni_dvp = vp;
1865
1866 error = open1(&context, &nd, fopen.o_flags, &va,
1867 fileproc_alloc_init, NULL, retval);
1868
1869 vnode_put(vp);
1870 break;
1871 }
1872 /*
1873 * SPI (private) for unlinking a file starting from a dir fd
1874 */
1875 case F_UNLINKFROM: {
1876 user_addr_t pathname;
1877
1878 /* Check if this isn't a valid file descriptor */
1879 if ((fp->f_type != DTYPE_VNODE) ||
1880 (fp->f_flag & FREAD) == 0) {
1881 error = EBADF;
1882 goto out;
1883 }
1884 vp = (struct vnode *)fp->f_data;
1885 proc_fdunlock(p);
1886
1887 if (vnode_getwithref(vp)) {
1888 error = ENOENT;
1889 goto outdrop;
1890 }
1891
1892 /* Only valid for directories */
1893 if (vp->v_type != VDIR) {
1894 vnode_put(vp);
1895 error = ENOTDIR;
1896 goto outdrop;
1897 }
1898
1899 /* Get flags, mode and pathname arguments. */
1900 if (IS_64BIT_PROCESS(p)) {
1901 pathname = (user_addr_t)argp;
1902 } else {
1903 pathname = CAST_USER_ADDR_T(argp);
1904 }
1905
1906 /* Start the lookup relative to the file descriptor's vnode. */
1907 error = unlink1(&context, vp, pathname, UIO_USERSPACE, 0);
1908
1909 vnode_put(vp);
1910 break;
1911 }
1912
1913 case F_ADDSIGS:
1914 case F_ADDFILESIGS:
1915 case F_ADDFILESIGS_FOR_DYLD_SIM:
1916 case F_ADDFILESIGS_RETURN:
1917 {
1918 struct cs_blob *blob = NULL;
1919 struct user_fsignatures fs;
1920 kern_return_t kr;
1921 vm_offset_t kernel_blob_addr;
1922 vm_size_t kernel_blob_size;
1923 int blob_add_flags = 0;
1924
1925 if (fp->f_type != DTYPE_VNODE) {
1926 error = EBADF;
1927 goto out;
1928 }
1929 vp = (struct vnode *)fp->f_data;
1930 proc_fdunlock(p);
1931
1932 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1933 blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
1934 if ((p->p_csflags & CS_KILL) == 0) {
1935 proc_lock(p);
1936 p->p_csflags |= CS_KILL;
1937 proc_unlock(p);
1938 }
1939 }
1940
1941 error = vnode_getwithref(vp);
1942 if (error) {
1943 goto outdrop;
1944 }
1945
1946 if (IS_64BIT_PROCESS(p)) {
1947 error = copyin(argp, &fs, sizeof(fs));
1948 } else {
1949 struct user32_fsignatures fs32;
1950
1951 error = copyin(argp, &fs32, sizeof(fs32));
1952 fs.fs_file_start = fs32.fs_file_start;
1953 fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
1954 fs.fs_blob_size = fs32.fs_blob_size;
1955 }
1956
1957 if (error) {
1958 vnode_put(vp);
1959 goto outdrop;
1960 }
1961
1962 /*
1963 * First check if we have something loaded a this offset
1964 */
1965 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start);
1966 if (blob != NULL) {
1967 /* If this is for dyld_sim revalidate the blob */
1968 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1969 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags);
1970 if (error) {
1971 blob = NULL;
1972 if (error != EAGAIN) {
1973 vnode_put(vp);
1974 goto outdrop;
1975 }
1976 }
1977 }
1978 }
1979
1980 if (blob == NULL) {
1981 /*
1982 * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
1983 * our use cases for the immediate future, but note that at the time of this commit, some
1984 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
1985 *
1986 * We should consider how we can manage this more effectively; the above means that some
1987 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
1988 * threshold considered ridiculous at the time of this change.
1989 */
1990 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
1991 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
1992 error = E2BIG;
1993 vnode_put(vp);
1994 goto outdrop;
1995 }
1996
1997 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
1998 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
1999 if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
2000 error = ENOMEM;
2001 vnode_put(vp);
2002 goto outdrop;
2003 }
2004
2005 if (uap->cmd == F_ADDSIGS) {
2006 error = copyin(fs.fs_blob_start,
2007 (void *) kernel_blob_addr,
2008 fs.fs_blob_size);
2009 } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM */
2010 int resid;
2011
2012 error = vn_rdwr(UIO_READ,
2013 vp,
2014 (caddr_t) kernel_blob_addr,
2015 kernel_blob_size,
2016 fs.fs_file_start + fs.fs_blob_start,
2017 UIO_SYSSPACE,
2018 0,
2019 kauth_cred_get(),
2020 &resid,
2021 p);
2022 if ((error == 0) && resid) {
2023 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2024 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2025 }
2026 }
2027
2028 if (error) {
2029 ubc_cs_blob_deallocate(kernel_blob_addr,
2030 kernel_blob_size);
2031 vnode_put(vp);
2032 goto outdrop;
2033 }
2034
2035 blob = NULL;
2036 error = ubc_cs_blob_add(vp,
2037 CPU_TYPE_ANY, /* not for a specific architecture */
2038 fs.fs_file_start,
2039 &kernel_blob_addr,
2040 kernel_blob_size,
2041 NULL,
2042 blob_add_flags,
2043 &blob);
2044
2045 /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
2046 if (error) {
2047 if (kernel_blob_addr) {
2048 ubc_cs_blob_deallocate(kernel_blob_addr,
2049 kernel_blob_size);
2050 }
2051 vnode_put(vp);
2052 goto outdrop;
2053 } else {
2054 #if CHECK_CS_VALIDATION_BITMAP
2055 ubc_cs_validation_bitmap_allocate( vp );
2056 #endif
2057 }
2058 }
2059
2060 if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2061 /*
2062 * The first element of the structure is a
2063 * off_t that happen to have the same size for
2064 * all archs. Lets overwrite that.
2065 */
2066 off_t end_offset = 0;
2067 if (blob) {
2068 end_offset = blob->csb_end_offset;
2069 }
2070 error = copyout(&end_offset, argp, sizeof(end_offset));
2071 }
2072
2073 (void) vnode_put(vp);
2074 break;
2075 }
2076 case F_GETCODEDIR:
2077 case F_FINDSIGS: {
2078 error = ENOTSUP;
2079 goto out;
2080 }
2081 case F_CHECK_LV: {
2082 struct fileglob *fg;
2083 fchecklv_t lv = {};
2084
2085 if (fp->f_type != DTYPE_VNODE) {
2086 error = EBADF;
2087 goto out;
2088 }
2089 fg = fp->f_fglob;
2090 proc_fdunlock(p);
2091
2092 if (IS_64BIT_PROCESS(p)) {
2093 error = copyin(argp, &lv, sizeof(lv));
2094 } else {
2095 struct user32_fchecklv lv32 = {};
2096
2097 error = copyin(argp, &lv32, sizeof(lv32));
2098 lv.lv_file_start = lv32.lv_file_start;
2099 lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
2100 lv.lv_error_message_size = lv32.lv_error_message_size;
2101 }
2102 if (error) {
2103 goto outdrop;
2104 }
2105
2106 #if CONFIG_MACF
2107 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
2108 (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
2109 #endif
2110
2111 break;
2112 }
2113 #if CONFIG_PROTECT
2114 case F_GETPROTECTIONCLASS: {
2115 if (fp->f_type != DTYPE_VNODE) {
2116 error = EBADF;
2117 goto out;
2118 }
2119 vp = (struct vnode *)fp->f_data;
2120
2121 proc_fdunlock(p);
2122
2123 if (vnode_getwithref(vp)) {
2124 error = ENOENT;
2125 goto outdrop;
2126 }
2127
2128 struct vnode_attr va;
2129
2130 VATTR_INIT(&va);
2131 VATTR_WANTED(&va, va_dataprotect_class);
2132 error = VNOP_GETATTR(vp, &va, &context);
2133 if (!error) {
2134 if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
2135 *retval = va.va_dataprotect_class;
2136 } else {
2137 error = ENOTSUP;
2138 }
2139 }
2140
2141 vnode_put(vp);
2142 break;
2143 }
2144
2145 case F_SETPROTECTIONCLASS: {
2146 /* tmp must be a valid PROTECTION_CLASS_* */
2147 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2148
2149 if (fp->f_type != DTYPE_VNODE) {
2150 error = EBADF;
2151 goto out;
2152 }
2153 vp = (struct vnode *)fp->f_data;
2154
2155 proc_fdunlock(p);
2156
2157 if (vnode_getwithref(vp)) {
2158 error = ENOENT;
2159 goto outdrop;
2160 }
2161
2162 /* Only go forward if you have write access */
2163 vfs_context_t ctx = vfs_context_current();
2164 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2165 vnode_put(vp);
2166 error = EBADF;
2167 goto outdrop;
2168 }
2169
2170 struct vnode_attr va;
2171
2172 VATTR_INIT(&va);
2173 VATTR_SET(&va, va_dataprotect_class, tmp);
2174
2175 error = VNOP_SETATTR(vp, &va, ctx);
2176
2177 vnode_put(vp);
2178 break;
2179 }
2180
2181 case F_TRANSCODEKEY: {
2182 if (fp->f_type != DTYPE_VNODE) {
2183 error = EBADF;
2184 goto out;
2185 }
2186
2187 vp = (struct vnode *)fp->f_data;
2188 proc_fdunlock(p);
2189
2190 if (vnode_getwithref(vp)) {
2191 error = ENOENT;
2192 goto outdrop;
2193 }
2194
2195 cp_key_t k = {
2196 .len = CP_MAX_WRAPPEDKEYSIZE,
2197 };
2198
2199 MALLOC(k.key, char *, k.len, M_TEMP, M_WAITOK | M_ZERO);
2200
2201 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
2202
2203 vnode_put(vp);
2204
2205 if (error == 0) {
2206 error = copyout(k.key, argp, k.len);
2207 *retval = k.len;
2208 }
2209
2210 FREE(k.key, M_TEMP);
2211
2212 break;
2213 }
2214
2215 case F_GETPROTECTIONLEVEL: {
2216 if (fp->f_type != DTYPE_VNODE) {
2217 error = EBADF;
2218 goto out;
2219 }
2220
2221 vp = (struct vnode*) fp->f_data;
2222 proc_fdunlock(p);
2223
2224 if (vnode_getwithref(vp)) {
2225 error = ENOENT;
2226 goto outdrop;
2227 }
2228
2229 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
2230
2231 vnode_put(vp);
2232 break;
2233 }
2234
2235 case F_GETDEFAULTPROTLEVEL: {
2236 if (fp->f_type != DTYPE_VNODE) {
2237 error = EBADF;
2238 goto out;
2239 }
2240
2241 vp = (struct vnode*) fp->f_data;
2242 proc_fdunlock(p);
2243
2244 if (vnode_getwithref(vp)) {
2245 error = ENOENT;
2246 goto outdrop;
2247 }
2248
2249 /*
2250 * if cp_get_major_vers fails, error will be set to proper errno
2251 * and cp_version will still be 0.
2252 */
2253
2254 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
2255
2256 vnode_put(vp);
2257 break;
2258 }
2259
2260 #endif /* CONFIG_PROTECT */
2261
2262 case F_MOVEDATAEXTENTS: {
2263 struct fileproc *fp2 = NULL;
2264 struct vnode *src_vp = NULLVP;
2265 struct vnode *dst_vp = NULLVP;
2266 /* We need to grab the 2nd FD out of the argments before moving on. */
2267 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2268
2269 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
2270 if (error) {
2271 goto out;
2272 }
2273
2274 if (fp->f_type != DTYPE_VNODE) {
2275 error = EBADF;
2276 goto out;
2277 }
2278
2279 /*
2280 * For now, special case HFS+ and APFS only, since this
2281 * is SPI.
2282 */
2283 src_vp = (struct vnode *)fp->f_data;
2284 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
2285 error = ENOTSUP;
2286 goto out;
2287 }
2288
2289 /*
2290 * Get the references before we start acquiring iocounts on the vnodes,
2291 * while we still hold the proc fd lock
2292 */
2293 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
2294 error = EBADF;
2295 goto out;
2296 }
2297 if (fp2->f_type != DTYPE_VNODE) {
2298 fp_drop(p, fd2, fp2, 1);
2299 error = EBADF;
2300 goto out;
2301 }
2302 dst_vp = (struct vnode *)fp2->f_data;
2303 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
2304 fp_drop(p, fd2, fp2, 1);
2305 error = ENOTSUP;
2306 goto out;
2307 }
2308
2309 #if CONFIG_MACF
2310 /* Re-do MAC checks against the new FD, pass in a fake argument */
2311 error = mac_file_check_fcntl(proc_ucred(p), fp2->f_fglob, uap->cmd, 0);
2312 if (error) {
2313 fp_drop(p, fd2, fp2, 1);
2314 goto out;
2315 }
2316 #endif
2317 /* Audit the 2nd FD */
2318 AUDIT_ARG(fd, fd2);
2319
2320 proc_fdunlock(p);
2321
2322 if (vnode_getwithref(src_vp)) {
2323 fp_drop(p, fd2, fp2, 0);
2324 error = ENOENT;
2325 goto outdrop;
2326 }
2327 if (vnode_getwithref(dst_vp)) {
2328 vnode_put(src_vp);
2329 fp_drop(p, fd2, fp2, 0);
2330 error = ENOENT;
2331 goto outdrop;
2332 }
2333
2334 /*
2335 * Basic asserts; validate they are not the same and that
2336 * both live on the same filesystem.
2337 */
2338 if (dst_vp == src_vp) {
2339 vnode_put(src_vp);
2340 vnode_put(dst_vp);
2341 fp_drop(p, fd2, fp2, 0);
2342 error = EINVAL;
2343 goto outdrop;
2344 }
2345
2346 if (dst_vp->v_mount != src_vp->v_mount) {
2347 vnode_put(src_vp);
2348 vnode_put(dst_vp);
2349 fp_drop(p, fd2, fp2, 0);
2350 error = EXDEV;
2351 goto outdrop;
2352 }
2353
2354 /* Now we have a legit pair of FDs. Go to work */
2355
2356 /* Now check for write access to the target files */
2357 if (vnode_authorize(src_vp, NULLVP,
2358 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2359 vnode_put(src_vp);
2360 vnode_put(dst_vp);
2361 fp_drop(p, fd2, fp2, 0);
2362 error = EBADF;
2363 goto outdrop;
2364 }
2365
2366 if (vnode_authorize(dst_vp, NULLVP,
2367 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2368 vnode_put(src_vp);
2369 vnode_put(dst_vp);
2370 fp_drop(p, fd2, fp2, 0);
2371 error = EBADF;
2372 goto outdrop;
2373 }
2374
2375 /* Verify that both vps point to files and not directories */
2376 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2377 error = EINVAL;
2378 vnode_put(src_vp);
2379 vnode_put(dst_vp);
2380 fp_drop(p, fd2, fp2, 0);
2381 goto outdrop;
2382 }
2383
2384 /*
2385 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2386 * We'll pass in our special bit indicating that the new behavior is expected
2387 */
2388
2389 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2390
2391 vnode_put(src_vp);
2392 vnode_put(dst_vp);
2393 fp_drop(p, fd2, fp2, 0);
2394 break;
2395 }
2396
2397 /*
2398 * SPI for making a file compressed.
2399 */
2400 case F_MAKECOMPRESSED: {
2401 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2402
2403 if (fp->f_type != DTYPE_VNODE) {
2404 error = EBADF;
2405 goto out;
2406 }
2407
2408 vp = (struct vnode*) fp->f_data;
2409 proc_fdunlock(p);
2410
2411 /* get the vnode */
2412 if (vnode_getwithref(vp)) {
2413 error = ENOENT;
2414 goto outdrop;
2415 }
2416
2417 /* Is it a file? */
2418 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2419 vnode_put(vp);
2420 error = EBADF;
2421 goto outdrop;
2422 }
2423
2424 /* invoke ioctl to pass off to FS */
2425 /* Only go forward if you have write access */
2426 vfs_context_t ctx = vfs_context_current();
2427 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2428 vnode_put(vp);
2429 error = EBADF;
2430 goto outdrop;
2431 }
2432
2433 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2434
2435 vnode_put(vp);
2436 break;
2437 }
2438
2439 /*
2440 * SPI (private) for indicating to a filesystem that subsequent writes to
2441 * the open FD will written to the Fastflow.
2442 */
2443 case F_SET_GREEDY_MODE:
2444 /* intentionally drop through to the same handler as F_SETSTATIC.
2445 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2446 */
2447
2448 /*
2449 * SPI (private) for indicating to a filesystem that subsequent writes to
2450 * the open FD will represent static content.
2451 */
2452 case F_SETSTATICCONTENT: {
2453 caddr_t ioctl_arg = NULL;
2454
2455 if (uap->arg) {
2456 ioctl_arg = (caddr_t) 1;
2457 }
2458
2459 if (fp->f_type != DTYPE_VNODE) {
2460 error = EBADF;
2461 goto out;
2462 }
2463 vp = (struct vnode *)fp->f_data;
2464 proc_fdunlock(p);
2465
2466 error = vnode_getwithref(vp);
2467 if (error) {
2468 error = ENOENT;
2469 goto outdrop;
2470 }
2471
2472 /* Only go forward if you have write access */
2473 vfs_context_t ctx = vfs_context_current();
2474 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2475 vnode_put(vp);
2476 error = EBADF;
2477 goto outdrop;
2478 }
2479
2480 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2481 (void)vnode_put(vp);
2482
2483 break;
2484 }
2485
2486 /*
2487 * SPI (private) for indicating to the lower level storage driver that the
2488 * subsequent writes should be of a particular IO type (burst, greedy, static),
2489 * or other flavors that may be necessary.
2490 */
2491 case F_SETIOTYPE: {
2492 caddr_t param_ptr;
2493 uint32_t param;
2494
2495 if (uap->arg) {
2496 /* extract 32 bits of flags from userland */
2497 param_ptr = (caddr_t) uap->arg;
2498 param = (uint32_t) param_ptr;
2499 } else {
2500 /* If no argument is specified, error out */
2501 error = EINVAL;
2502 goto out;
2503 }
2504
2505 /*
2506 * Validate the different types of flags that can be specified:
2507 * all of them are mutually exclusive for now.
2508 */
2509 switch (param) {
2510 case F_IOTYPE_ISOCHRONOUS:
2511 break;
2512
2513 default:
2514 error = EINVAL;
2515 goto out;
2516 }
2517
2518
2519 if (fp->f_type != DTYPE_VNODE) {
2520 error = EBADF;
2521 goto out;
2522 }
2523 vp = (struct vnode *)fp->f_data;
2524 proc_fdunlock(p);
2525
2526 error = vnode_getwithref(vp);
2527 if (error) {
2528 error = ENOENT;
2529 goto outdrop;
2530 }
2531
2532 /* Only go forward if you have write access */
2533 vfs_context_t ctx = vfs_context_current();
2534 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2535 vnode_put(vp);
2536 error = EBADF;
2537 goto outdrop;
2538 }
2539
2540 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2541 (void)vnode_put(vp);
2542
2543 break;
2544 }
2545
2546 /*
2547 * Set the vnode pointed to by 'fd'
2548 * and tag it as the (potentially future) backing store
2549 * for another filesystem
2550 */
2551 case F_SETBACKINGSTORE: {
2552 if (fp->f_type != DTYPE_VNODE) {
2553 error = EBADF;
2554 goto out;
2555 }
2556
2557 vp = (struct vnode *)fp->f_data;
2558
2559 if (vp->v_tag != VT_HFS) {
2560 error = EINVAL;
2561 goto out;
2562 }
2563 proc_fdunlock(p);
2564
2565 if (vnode_getwithref(vp)) {
2566 error = ENOENT;
2567 goto outdrop;
2568 }
2569
2570 /* only proceed if you have write access */
2571 vfs_context_t ctx = vfs_context_current();
2572 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2573 vnode_put(vp);
2574 error = EBADF;
2575 goto outdrop;
2576 }
2577
2578
2579 /* If arg != 0, set, otherwise unset */
2580 if (uap->arg) {
2581 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)1, 0, &context);
2582 } else {
2583 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
2584 }
2585
2586 vnode_put(vp);
2587 break;
2588 }
2589
2590 /*
2591 * like F_GETPATH, but special semantics for
2592 * the mobile time machine handler.
2593 */
2594 case F_GETPATH_MTMINFO: {
2595 char *pathbufp;
2596 int pathlen;
2597
2598 if (fp->f_type != DTYPE_VNODE) {
2599 error = EBADF;
2600 goto out;
2601 }
2602 vp = (struct vnode *)fp->f_data;
2603 proc_fdunlock(p);
2604
2605 pathlen = MAXPATHLEN;
2606 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
2607 if (pathbufp == NULL) {
2608 error = ENOMEM;
2609 goto outdrop;
2610 }
2611 if ((error = vnode_getwithref(vp)) == 0) {
2612 int backingstore = 0;
2613
2614 /* Check for error from vn_getpath before moving on */
2615 if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
2616 if (vp->v_tag == VT_HFS) {
2617 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
2618 }
2619 (void)vnode_put(vp);
2620
2621 if (error == 0) {
2622 error = copyout((caddr_t)pathbufp, argp, pathlen);
2623 }
2624 if (error == 0) {
2625 /*
2626 * If the copyout was successful, now check to ensure
2627 * that this vnode is not a BACKINGSTORE vnode. mtmd
2628 * wants the path regardless.
2629 */
2630 if (backingstore) {
2631 error = EBUSY;
2632 }
2633 }
2634 } else {
2635 (void)vnode_put(vp);
2636 }
2637 }
2638 FREE(pathbufp, M_TEMP);
2639 goto outdrop;
2640 }
2641
2642 #if DEBUG || DEVELOPMENT
2643 case F_RECYCLE:
2644 if (fp->f_type != DTYPE_VNODE) {
2645 error = EBADF;
2646 goto out;
2647 }
2648 vp = (struct vnode *)fp->f_data;
2649 proc_fdunlock(p);
2650
2651 vnode_recycle(vp);
2652 break;
2653 #endif
2654
2655 default:
2656 /*
2657 * This is an fcntl() that we d not recognize at this level;
2658 * if this is a vnode, we send it down into the VNOP_IOCTL
2659 * for this vnode; this can include special devices, and will
2660 * effectively overload fcntl() to send ioctl()'s.
2661 */
2662 if ((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)) {
2663 error = EINVAL;
2664 goto out;
2665 }
2666
2667 /* Catch any now-invalid fcntl() selectors */
2668 switch (uap->cmd) {
2669 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
2670 case (int)FSIOC_FIOSEEKHOLE:
2671 case (int)FSIOC_FIOSEEKDATA:
2672 case (int)FSIOC_CAS_BSDFLAGS:
2673 case HFS_GET_BOOT_INFO:
2674 case HFS_SET_BOOT_INFO:
2675 case FIOPINSWAP:
2676 case F_MARKDEPENDENCY:
2677 case TIOCREVOKE:
2678 error = EINVAL;
2679 goto out;
2680 default:
2681 break;
2682 }
2683
2684 if (fp->f_type != DTYPE_VNODE) {
2685 error = EBADF;
2686 goto out;
2687 }
2688 vp = (struct vnode *)fp->f_data;
2689 proc_fdunlock(p);
2690
2691 if ((error = vnode_getwithref(vp)) == 0) {
2692 #define STK_PARAMS 128
2693 char stkbuf[STK_PARAMS] = {0};
2694 unsigned int size;
2695 caddr_t data, memp;
2696 /*
2697 * For this to work properly, we have to copy in the
2698 * ioctl() cmd argument if there is one; we must also
2699 * check that a command parameter, if present, does
2700 * not exceed the maximum command length dictated by
2701 * the number of bits we have available in the command
2702 * to represent a structure length. Finally, we have
2703 * to copy the results back out, if it is that type of
2704 * ioctl().
2705 */
2706 size = IOCPARM_LEN(uap->cmd);
2707 if (size > IOCPARM_MAX) {
2708 (void)vnode_put(vp);
2709 error = EINVAL;
2710 break;
2711 }
2712
2713 memp = NULL;
2714 if (size > sizeof(stkbuf)) {
2715 if ((memp = (caddr_t)kalloc(size)) == 0) {
2716 (void)vnode_put(vp);
2717 error = ENOMEM;
2718 goto outdrop;
2719 }
2720 data = memp;
2721 } else {
2722 data = &stkbuf[0];
2723 }
2724
2725 if (uap->cmd & IOC_IN) {
2726 if (size) {
2727 /* structure */
2728 error = copyin(argp, data, size);
2729 if (error) {
2730 (void)vnode_put(vp);
2731 if (memp) {
2732 kfree(memp, size);
2733 }
2734 goto outdrop;
2735 }
2736
2737 /* Bzero the section beyond that which was needed */
2738 if (size <= sizeof(stkbuf)) {
2739 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
2740 }
2741 } else {
2742 /* int */
2743 if (is64bit) {
2744 *(user_addr_t *)data = argp;
2745 } else {
2746 *(uint32_t *)data = (uint32_t)argp;
2747 }
2748 };
2749 } else if ((uap->cmd & IOC_OUT) && size) {
2750 /*
2751 * Zero the buffer so the user always
2752 * gets back something deterministic.
2753 */
2754 bzero(data, size);
2755 } else if (uap->cmd & IOC_VOID) {
2756 if (is64bit) {
2757 *(user_addr_t *)data = argp;
2758 } else {
2759 *(uint32_t *)data = (uint32_t)argp;
2760 }
2761 }
2762
2763 error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
2764
2765 (void)vnode_put(vp);
2766
2767 /* Copy any output data to user */
2768 if (error == 0 && (uap->cmd & IOC_OUT) && size) {
2769 error = copyout(data, argp, size);
2770 }
2771 if (memp) {
2772 kfree(memp, size);
2773 }
2774 }
2775 break;
2776 }
2777
2778 outdrop:
2779 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2780 fp_drop(p, fd, fp, 0);
2781 return error;
2782 out:
2783 fp_drop(p, fd, fp, 1);
2784 proc_fdunlock(p);
2785 return error;
2786 }
2787
2788
2789 /*
2790 * finishdup
2791 *
2792 * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
2793 *
2794 * Parameters: p Process performing the dup
2795 * old The fd to dup
2796 * new The fd to dup it to
2797 * fd_flags Flags to augment the new fd
2798 * retval Pointer to the call return area
2799 *
2800 * Returns: 0 Success
2801 * EBADF
2802 * ENOMEM
2803 *
2804 * Implicit returns:
2805 * *retval (modified) The new descriptor
2806 *
2807 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
2808 * the caller
2809 *
2810 * Notes: This function may drop and reacquire this lock; it is unsafe
2811 * for a caller to assume that other state protected by the lock
2812 * has not been subsequently changed out from under it.
2813 */
2814 int
2815 finishdup(proc_t p,
2816 struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
2817 {
2818 struct fileproc *nfp;
2819 struct fileproc *ofp;
2820 #if CONFIG_MACF
2821 int error;
2822 #endif
2823
2824 #if DIAGNOSTIC
2825 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2826 #endif
2827 if ((ofp = fdp->fd_ofiles[old]) == NULL ||
2828 (fdp->fd_ofileflags[old] & UF_RESERVED)) {
2829 fdrelse(p, new);
2830 return EBADF;
2831 }
2832 fg_ref(ofp);
2833
2834 #if CONFIG_MACF
2835 error = mac_file_check_dup(proc_ucred(p), ofp->f_fglob, new);
2836 if (error) {
2837 fg_drop(ofp);
2838 fdrelse(p, new);
2839 return error;
2840 }
2841 #endif
2842
2843 proc_fdunlock(p);
2844
2845 nfp = fileproc_alloc_init(NULL);
2846
2847 proc_fdlock(p);
2848
2849 if (nfp == NULL) {
2850 fg_drop(ofp);
2851 fdrelse(p, new);
2852 return ENOMEM;
2853 }
2854
2855 nfp->f_fglob = ofp->f_fglob;
2856
2857 #if DIAGNOSTIC
2858 if (fdp->fd_ofiles[new] != 0) {
2859 panic("finishdup: overwriting fd_ofiles with new %d", new);
2860 }
2861 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2862 panic("finishdup: unreserved fileflags with new %d", new);
2863 }
2864 #endif
2865
2866 if (new > fdp->fd_lastfile) {
2867 fdp->fd_lastfile = new;
2868 }
2869 *fdflags(p, new) |= fd_flags;
2870 procfdtbl_releasefd(p, new, nfp);
2871 *retval = new;
2872 return 0;
2873 }
2874
2875
2876 /*
2877 * close
2878 *
2879 * Description: The implementation of the close(2) system call
2880 *
2881 * Parameters: p Process in whose per process file table
2882 * the close is to occur
2883 * uap->fd fd to be closed
2884 * retval <unused>
2885 *
2886 * Returns: 0 Success
2887 * fp_lookup:EBADF Bad file descriptor
2888 * fp_guard_exception:??? Guarded file descriptor
2889 * close_internal:EBADF
2890 * close_internal:??? Anything returnable by a per-fileops
2891 * close function
2892 */
2893 int
2894 close(proc_t p, struct close_args *uap, int32_t *retval)
2895 {
2896 __pthread_testcancel(1);
2897 return close_nocancel(p, (struct close_nocancel_args *)uap, retval);
2898 }
2899
2900
2901 int
2902 close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
2903 {
2904 struct fileproc *fp;
2905 int fd = uap->fd;
2906 int error;
2907
2908 AUDIT_SYSCLOSE(p, fd);
2909
2910 proc_fdlock(p);
2911
2912 if ((error = fp_lookup(p, fd, &fp, 1))) {
2913 proc_fdunlock(p);
2914 return error;
2915 }
2916
2917 if (FP_ISGUARDED(fp, GUARD_CLOSE)) {
2918 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
2919 (void) fp_drop(p, fd, fp, 1);
2920 proc_fdunlock(p);
2921 return error;
2922 }
2923
2924 error = close_internal_locked(p, fd, fp, 0);
2925
2926 proc_fdunlock(p);
2927
2928 return error;
2929 }
2930
2931
2932 /*
2933 * close_internal_locked
2934 *
2935 * Close a file descriptor.
2936 *
2937 * Parameters: p Process in whose per process file table
2938 * the close is to occur
2939 * fd fd to be closed
2940 * fp fileproc associated with the fd
2941 *
2942 * Returns: 0 Success
2943 * EBADF fd already in close wait state
2944 * closef_locked:??? Anything returnable by a per-fileops
2945 * close function
2946 *
2947 * Locks: Assumes proc_fdlock for process is held by the caller and returns
2948 * with lock held
2949 *
2950 * Notes: This function may drop and reacquire this lock; it is unsafe
2951 * for a caller to assume that other state protected by the lock
2952 * has not been subsequently changed out from under it.
2953 */
2954 int
2955 close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
2956 {
2957 struct filedesc *fdp = p->p_fd;
2958 int error = 0;
2959 int resvfd = flags & FD_DUP2RESV;
2960
2961
2962 #if DIAGNOSTIC
2963 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2964 #endif
2965
2966 /* Keep people from using the filedesc while we are closing it */
2967 procfdtbl_markclosefd(p, fd);
2968
2969
2970 if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
2971 panic("close_internal_locked: being called on already closing fd");
2972 }
2973
2974
2975 #if DIAGNOSTIC
2976 if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0) {
2977 panic("close_internal: unreserved fileflags with fd %d", fd);
2978 }
2979 #endif
2980
2981 fp->f_flags |= FP_CLOSING;
2982
2983 if ((fp->f_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
2984 proc_fdunlock(p);
2985
2986 if ((fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
2987 /*
2988 * call out to allow 3rd party notification of close.
2989 * Ignore result of kauth_authorize_fileop call.
2990 */
2991 if (vnode_getwithref((vnode_t)fp->f_data) == 0) {
2992 u_int fileop_flags = 0;
2993 if ((fp->f_flags & FP_WRITTEN) != 0) {
2994 fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
2995 }
2996 kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE,
2997 (uintptr_t)fp->f_data, (uintptr_t)fileop_flags);
2998 vnode_put((vnode_t)fp->f_data);
2999 }
3000 }
3001 if (fp->f_flags & FP_AIOISSUED) {
3002 /*
3003 * cancel all async IO requests that can be cancelled.
3004 */
3005 _aio_close( p, fd );
3006 }
3007
3008 proc_fdlock(p);
3009 }
3010
3011 if (fd < fdp->fd_knlistsize) {
3012 knote_fdclose(p, fd);
3013 }
3014
3015 /* release the ref returned from fp_lookup before calling drain */
3016 (void) os_ref_release_locked(&fp->f_iocount);
3017 fileproc_drain(p, fp);
3018
3019 if (fp->f_flags & FP_WAITEVENT) {
3020 (void)waitevent_close(p, fp);
3021 }
3022
3023 if (resvfd == 0) {
3024 _fdrelse(p, fd);
3025 } else {
3026 procfdtbl_reservefd(p, fd);
3027 }
3028
3029 if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) {
3030 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
3031 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
3032 }
3033
3034 error = closef_locked(fp, fp->f_fglob, p);
3035 if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE) {
3036 wakeup(&fp->f_flags);
3037 }
3038 fp->f_flags &= ~(FP_WAITCLOSE | FP_CLOSING);
3039
3040 proc_fdunlock(p);
3041
3042 fileproc_free(fp);
3043
3044 proc_fdlock(p);
3045
3046 #if DIAGNOSTIC
3047 if (resvfd != 0) {
3048 if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0) {
3049 panic("close with reserved fd returns with freed fd:%d: proc: %p", fd, p);
3050 }
3051 }
3052 #endif
3053
3054 return error;
3055 }
3056
3057
3058 /*
3059 * fstat1
3060 *
3061 * Description: Return status information about a file descriptor.
3062 *
3063 * Parameters: p The process doing the fstat
3064 * fd The fd to stat
3065 * ub The user stat buffer
3066 * xsecurity The user extended security
3067 * buffer, or 0 if none
3068 * xsecurity_size The size of xsecurity, or 0
3069 * if no xsecurity
3070 * isstat64 Flag to indicate 64 bit version
3071 * for inode size, etc.
3072 *
3073 * Returns: 0 Success
3074 * EBADF
3075 * EFAULT
3076 * fp_lookup:EBADF Bad file descriptor
3077 * vnode_getwithref:???
3078 * copyout:EFAULT
3079 * vnode_getwithref:???
3080 * vn_stat:???
3081 * soo_stat:???
3082 * pipe_stat:???
3083 * pshm_stat:???
3084 * kqueue_stat:???
3085 *
3086 * Notes: Internal implementation for all other fstat() related
3087 * functions
3088 *
3089 * XXX switch on node type is bogus; need a stat in struct
3090 * XXX fileops instead.
3091 */
3092 static int
3093 fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3094 {
3095 struct fileproc *fp;
3096 union {
3097 struct stat sb;
3098 struct stat64 sb64;
3099 } source;
3100 union {
3101 struct user64_stat user64_sb;
3102 struct user32_stat user32_sb;
3103 struct user64_stat64 user64_sb64;
3104 struct user32_stat64 user32_sb64;
3105 } dest;
3106 int error, my_size;
3107 file_type_t type;
3108 caddr_t data;
3109 kauth_filesec_t fsec;
3110 user_size_t xsecurity_bufsize;
3111 vfs_context_t ctx = vfs_context_current();
3112 void * sbptr;
3113
3114
3115 AUDIT_ARG(fd, fd);
3116
3117 if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
3118 return error;
3119 }
3120 type = fp->f_type;
3121 data = fp->f_data;
3122 fsec = KAUTH_FILESEC_NONE;
3123
3124 sbptr = (void *)&source;
3125
3126 switch (type) {
3127 case DTYPE_VNODE:
3128 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
3129 /*
3130 * If the caller has the file open, and is not
3131 * requesting extended security information, we are
3132 * going to let them get the basic stat information.
3133 */
3134 if (xsecurity == USER_ADDR_NULL) {
3135 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
3136 fp->f_fglob->fg_cred);
3137 } else {
3138 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
3139 }
3140
3141 AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
3142 (void)vnode_put((vnode_t)data);
3143 }
3144 break;
3145
3146 #if SOCKETS
3147 case DTYPE_SOCKET:
3148 error = soo_stat((struct socket *)data, sbptr, isstat64);
3149 break;
3150 #endif /* SOCKETS */
3151
3152 case DTYPE_PIPE:
3153 error = pipe_stat((void *)data, sbptr, isstat64);
3154 break;
3155
3156 case DTYPE_PSXSHM:
3157 error = pshm_stat((void *)data, sbptr, isstat64);
3158 break;
3159
3160 case DTYPE_KQUEUE:
3161 error = kqueue_stat((void *)data, sbptr, isstat64, p);
3162 break;
3163
3164 default:
3165 error = EBADF;
3166 goto out;
3167 }
3168 if (error == 0) {
3169 caddr_t sbp;
3170
3171 if (isstat64 != 0) {
3172 source.sb64.st_lspare = 0;
3173 source.sb64.st_qspare[0] = 0LL;
3174 source.sb64.st_qspare[1] = 0LL;
3175
3176 if (IS_64BIT_PROCESS(current_proc())) {
3177 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3178 my_size = sizeof(dest.user64_sb64);
3179 sbp = (caddr_t)&dest.user64_sb64;
3180 } else {
3181 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3182 my_size = sizeof(dest.user32_sb64);
3183 sbp = (caddr_t)&dest.user32_sb64;
3184 }
3185 } else {
3186 source.sb.st_lspare = 0;
3187 source.sb.st_qspare[0] = 0LL;
3188 source.sb.st_qspare[1] = 0LL;
3189 if (IS_64BIT_PROCESS(current_proc())) {
3190 munge_user64_stat(&source.sb, &dest.user64_sb);
3191 my_size = sizeof(dest.user64_sb);
3192 sbp = (caddr_t)&dest.user64_sb;
3193 } else {
3194 munge_user32_stat(&source.sb, &dest.user32_sb);
3195 my_size = sizeof(dest.user32_sb);
3196 sbp = (caddr_t)&dest.user32_sb;
3197 }
3198 }
3199
3200 error = copyout(sbp, ub, my_size);
3201 }
3202
3203 /* caller wants extended security information? */
3204 if (xsecurity != USER_ADDR_NULL) {
3205 /* did we get any? */
3206 if (fsec == KAUTH_FILESEC_NONE) {
3207 if (susize(xsecurity_size, 0) != 0) {
3208 error = EFAULT;
3209 goto out;
3210 }
3211 } else {
3212 /* find the user buffer size */
3213 xsecurity_bufsize = fusize(xsecurity_size);
3214
3215 /* copy out the actual data size */
3216 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3217 error = EFAULT;
3218 goto out;
3219 }
3220
3221 /* if the caller supplied enough room, copy out to it */
3222 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
3223 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3224 }
3225 }
3226 }
3227 out:
3228 fp_drop(p, fd, fp, 0);
3229 if (fsec != NULL) {
3230 kauth_filesec_free(fsec);
3231 }
3232 return error;
3233 }
3234
3235
3236 /*
3237 * fstat_extended
3238 *
3239 * Description: Extended version of fstat supporting returning extended
3240 * security information
3241 *
3242 * Parameters: p The process doing the fstat
3243 * uap->fd The fd to stat
3244 * uap->ub The user stat buffer
3245 * uap->xsecurity The user extended security
3246 * buffer, or 0 if none
3247 * uap->xsecurity_size The size of xsecurity, or 0
3248 *
3249 * Returns: 0 Success
3250 * !0 Errno (see fstat1)
3251 */
3252 int
3253 fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
3254 {
3255 return fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
3256 }
3257
3258
3259 /*
3260 * fstat
3261 *
3262 * Description: Get file status for the file associated with fd
3263 *
3264 * Parameters: p The process doing the fstat
3265 * uap->fd The fd to stat
3266 * uap->ub The user stat buffer
3267 *
3268 * Returns: 0 Success
3269 * !0 Errno (see fstat1)
3270 */
3271 int
3272 fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
3273 {
3274 return fstat1(p, uap->fd, uap->ub, 0, 0, 0);
3275 }
3276
3277
3278 /*
3279 * fstat64_extended
3280 *
3281 * Description: Extended version of fstat64 supporting returning extended
3282 * security information
3283 *
3284 * Parameters: p The process doing the fstat
3285 * uap->fd The fd to stat
3286 * uap->ub The user stat buffer
3287 * uap->xsecurity The user extended security
3288 * buffer, or 0 if none
3289 * uap->xsecurity_size The size of xsecurity, or 0
3290 *
3291 * Returns: 0 Success
3292 * !0 Errno (see fstat1)
3293 */
3294 int
3295 fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3296 {
3297 return fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
3298 }
3299
3300
3301 /*
3302 * fstat64
3303 *
3304 * Description: Get 64 bit version of the file status for the file associated
3305 * with fd
3306 *
3307 * Parameters: p The process doing the fstat
3308 * uap->fd The fd to stat
3309 * uap->ub The user stat buffer
3310 *
3311 * Returns: 0 Success
3312 * !0 Errno (see fstat1)
3313 */
3314 int
3315 fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
3316 {
3317 return fstat1(p, uap->fd, uap->ub, 0, 0, 1);
3318 }
3319
3320
3321 /*
3322 * fpathconf
3323 *
3324 * Description: Return pathconf information about a file descriptor.
3325 *
3326 * Parameters: p Process making the request
3327 * uap->fd fd to get information about
3328 * uap->name Name of information desired
3329 * retval Pointer to the call return area
3330 *
3331 * Returns: 0 Success
3332 * EINVAL
3333 * fp_lookup:EBADF Bad file descriptor
3334 * vnode_getwithref:???
3335 * vn_pathconf:???
3336 *
3337 * Implicit returns:
3338 * *retval (modified) Returned information (numeric)
3339 */
3340 int
3341 fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3342 {
3343 int fd = uap->fd;
3344 struct fileproc *fp;
3345 struct vnode *vp;
3346 int error = 0;
3347 file_type_t type;
3348 caddr_t data;
3349
3350
3351 AUDIT_ARG(fd, uap->fd);
3352 if ((error = fp_lookup(p, fd, &fp, 0))) {
3353 return error;
3354 }
3355 type = fp->f_type;
3356 data = fp->f_data;
3357
3358 switch (type) {
3359 case DTYPE_SOCKET:
3360 if (uap->name != _PC_PIPE_BUF) {
3361 error = EINVAL;
3362 goto out;
3363 }
3364 *retval = PIPE_BUF;
3365 error = 0;
3366 goto out;
3367
3368 case DTYPE_PIPE:
3369 if (uap->name != _PC_PIPE_BUF) {
3370 error = EINVAL;
3371 goto out;
3372 }
3373 *retval = PIPE_BUF;
3374 error = 0;
3375 goto out;
3376
3377 case DTYPE_VNODE:
3378 vp = (struct vnode *)data;
3379
3380 if ((error = vnode_getwithref(vp)) == 0) {
3381 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3382
3383 error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3384
3385 (void)vnode_put(vp);
3386 }
3387 goto out;
3388
3389 default:
3390 error = EINVAL;
3391 goto out;
3392 }
3393 /*NOTREACHED*/
3394 out:
3395 fp_drop(p, fd, fp, 0);
3396 return error;
3397 }
3398
3399 /*
3400 * Statistics counter for the number of times a process calling fdalloc()
3401 * has resulted in an expansion of the per process open file table.
3402 *
3403 * XXX This would likely be of more use if it were per process
3404 */
3405 int fdexpand;
3406
3407
3408 /*
3409 * fdalloc
3410 *
3411 * Description: Allocate a file descriptor for the process.
3412 *
3413 * Parameters: p Process to allocate the fd in
3414 * want The fd we would prefer to get
3415 * result Pointer to fd we got
3416 *
3417 * Returns: 0 Success
3418 * EMFILE
3419 * ENOMEM
3420 *
3421 * Implicit returns:
3422 * *result (modified) The fd which was allocated
3423 */
3424 int
3425 fdalloc(proc_t p, int want, int *result)
3426 {
3427 struct filedesc *fdp = p->p_fd;
3428 int i;
3429 int lim, last, numfiles, oldnfiles;
3430 struct fileproc **newofiles, **ofiles;
3431 char *newofileflags;
3432
3433 /*
3434 * Search for a free descriptor starting at the higher
3435 * of want or fd_freefile. If that fails, consider
3436 * expanding the ofile array.
3437 */
3438 #if DIAGNOSTIC
3439 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3440 #endif
3441
3442 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3443 for (;;) {
3444 last = min(fdp->fd_nfiles, lim);
3445 if ((i = want) < fdp->fd_freefile) {
3446 i = fdp->fd_freefile;
3447 }
3448 for (; i < last; i++) {
3449 if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3450 procfdtbl_reservefd(p, i);
3451 if (i > fdp->fd_lastfile) {
3452 fdp->fd_lastfile = i;
3453 }
3454 if (want <= fdp->fd_freefile) {
3455 fdp->fd_freefile = i;
3456 }
3457 *result = i;
3458 return 0;
3459 }
3460 }
3461
3462 /*
3463 * No space in current array. Expand?
3464 */
3465 if (fdp->fd_nfiles >= lim) {
3466 return EMFILE;
3467 }
3468 if (fdp->fd_nfiles < NDEXTENT) {
3469 numfiles = NDEXTENT;
3470 } else {
3471 numfiles = 2 * fdp->fd_nfiles;
3472 }
3473 /* Enforce lim */
3474 if (numfiles > lim) {
3475 numfiles = lim;
3476 }
3477 proc_fdunlock(p);
3478 MALLOC_ZONE(newofiles, struct fileproc **,
3479 numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3480 proc_fdlock(p);
3481 if (newofiles == NULL) {
3482 return ENOMEM;
3483 }
3484 if (fdp->fd_nfiles >= numfiles) {
3485 FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL);
3486 continue;
3487 }
3488 newofileflags = (char *) &newofiles[numfiles];
3489 /*
3490 * Copy the existing ofile and ofileflags arrays
3491 * and zero the new portion of each array.
3492 */
3493 oldnfiles = fdp->fd_nfiles;
3494 (void) memcpy(newofiles, fdp->fd_ofiles,
3495 oldnfiles * sizeof(*fdp->fd_ofiles));
3496 (void) memset(&newofiles[oldnfiles], 0,
3497 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3498
3499 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3500 oldnfiles * sizeof(*fdp->fd_ofileflags));
3501 (void) memset(&newofileflags[oldnfiles], 0,
3502 (numfiles - oldnfiles) *
3503 sizeof(*fdp->fd_ofileflags));
3504 ofiles = fdp->fd_ofiles;
3505 fdp->fd_ofiles = newofiles;
3506 fdp->fd_ofileflags = newofileflags;
3507 fdp->fd_nfiles = numfiles;
3508 FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL);
3509 fdexpand++;
3510 }
3511 }
3512
3513
3514 /*
3515 * fdavail
3516 *
3517 * Description: Check to see whether n user file descriptors are available
3518 * to the process p.
3519 *
3520 * Parameters: p Process to check in
3521 * n The number of fd's desired
3522 *
3523 * Returns: 0 No
3524 * 1 Yes
3525 *
3526 * Locks: Assumes proc_fdlock for process is held by the caller
3527 *
3528 * Notes: The answer only remains valid so long as the proc_fdlock is
3529 * held by the caller.
3530 */
3531 int
3532 fdavail(proc_t p, int n)
3533 {
3534 struct filedesc *fdp = p->p_fd;
3535 struct fileproc **fpp;
3536 char *flags;
3537 int i, lim;
3538
3539 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3540 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
3541 return 1;
3542 }
3543 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3544 flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3545 for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
3546 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
3547 return 1;
3548 }
3549 }
3550 return 0;
3551 }
3552
3553
3554 /*
3555 * fdrelse
3556 *
3557 * Description: Legacy KPI wrapper function for _fdrelse
3558 *
3559 * Parameters: p Process in which fd lives
3560 * fd fd to free
3561 *
3562 * Returns: void
3563 *
3564 * Locks: Assumes proc_fdlock for process is held by the caller
3565 */
3566 void
3567 fdrelse(proc_t p, int fd)
3568 {
3569 _fdrelse(p, fd);
3570 }
3571
3572
3573 /*
3574 * fdgetf_noref
3575 *
3576 * Description: Get the fileproc pointer for the given fd from the per process
3577 * open file table without taking an explicit reference on it.
3578 *
3579 * Parameters: p Process containing fd
3580 * fd fd to obtain fileproc for
3581 * resultfp Pointer to pointer return area
3582 *
3583 * Returns: 0 Success
3584 * EBADF
3585 *
3586 * Implicit returns:
3587 * *resultfp (modified) Pointer to fileproc pointer
3588 *
3589 * Locks: Assumes proc_fdlock for process is held by the caller
3590 *
3591 * Notes: Because there is no reference explicitly taken, the returned
3592 * fileproc pointer is only valid so long as the proc_fdlock
3593 * remains held by the caller.
3594 */
3595 int
3596 fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp)
3597 {
3598 struct filedesc *fdp = p->p_fd;
3599 struct fileproc *fp;
3600
3601 if (fd < 0 || fd >= fdp->fd_nfiles ||
3602 (fp = fdp->fd_ofiles[fd]) == NULL ||
3603 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3604 return EBADF;
3605 }
3606 if (resultfp) {
3607 *resultfp = fp;
3608 }
3609 return 0;
3610 }
3611
3612
3613 /*
3614 * fp_getfvp
3615 *
3616 * Description: Get fileproc and vnode pointer for a given fd from the per
3617 * process open file table of the specified process, and if
3618 * successful, increment the f_iocount
3619 *
3620 * Parameters: p Process in which fd lives
3621 * fd fd to get information for
3622 * resultfp Pointer to result fileproc
3623 * pointer area, or 0 if none
3624 * resultvp Pointer to result vnode pointer
3625 * area, or 0 if none
3626 *
3627 * Returns: 0 Success
3628 * EBADF Bad file descriptor
3629 * ENOTSUP fd does not refer to a vnode
3630 *
3631 * Implicit returns:
3632 * *resultfp (modified) Fileproc pointer
3633 * *resultvp (modified) vnode pointer
3634 *
3635 * Notes: The resultfp and resultvp fields are optional, and may be
3636 * independently specified as NULL to skip returning information
3637 *
3638 * Locks: Internally takes and releases proc_fdlock
3639 */
3640 int
3641 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
3642 {
3643 struct filedesc *fdp = p->p_fd;
3644 struct fileproc *fp;
3645
3646 proc_fdlock_spin(p);
3647 if (fd < 0 || fd >= fdp->fd_nfiles ||
3648 (fp = fdp->fd_ofiles[fd]) == NULL ||
3649 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3650 proc_fdunlock(p);
3651 return EBADF;
3652 }
3653 if (fp->f_type != DTYPE_VNODE) {
3654 proc_fdunlock(p);
3655 return ENOTSUP;
3656 }
3657 os_ref_retain_locked(&fp->f_iocount);
3658
3659 if (resultfp) {
3660 *resultfp = fp;
3661 }
3662 if (resultvp) {
3663 *resultvp = (struct vnode *)fp->f_data;
3664 }
3665 proc_fdunlock(p);
3666
3667 return 0;
3668 }
3669
3670
3671 /*
3672 * fp_getfvpandvid
3673 *
3674 * Description: Get fileproc, vnode pointer, and vid for a given fd from the
3675 * per process open file table of the specified process, and if
3676 * successful, increment the f_iocount
3677 *
3678 * Parameters: p Process in which fd lives
3679 * fd fd to get information for
3680 * resultfp Pointer to result fileproc
3681 * pointer area, or 0 if none
3682 * resultvp Pointer to result vnode pointer
3683 * area, or 0 if none
3684 * vidp Pointer to resuld vid area
3685 *
3686 * Returns: 0 Success
3687 * EBADF Bad file descriptor
3688 * ENOTSUP fd does not refer to a vnode
3689 *
3690 * Implicit returns:
3691 * *resultfp (modified) Fileproc pointer
3692 * *resultvp (modified) vnode pointer
3693 * *vidp vid value
3694 *
3695 * Notes: The resultfp and resultvp fields are optional, and may be
3696 * independently specified as NULL to skip returning information
3697 *
3698 * Locks: Internally takes and releases proc_fdlock
3699 */
3700 int
3701 fp_getfvpandvid(proc_t p, int fd, struct fileproc **resultfp,
3702 struct vnode **resultvp, uint32_t *vidp)
3703 {
3704 struct filedesc *fdp = p->p_fd;
3705 struct fileproc *fp;
3706
3707 proc_fdlock_spin(p);
3708 if (fd < 0 || fd >= fdp->fd_nfiles ||
3709 (fp = fdp->fd_ofiles[fd]) == NULL ||
3710 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3711 proc_fdunlock(p);
3712 return EBADF;
3713 }
3714 if (fp->f_type != DTYPE_VNODE) {
3715 proc_fdunlock(p);
3716 return ENOTSUP;
3717 }
3718 os_ref_retain_locked(&fp->f_iocount);
3719
3720 if (resultfp) {
3721 *resultfp = fp;
3722 }
3723 if (resultvp) {
3724 *resultvp = (struct vnode *)fp->f_data;
3725 }
3726 if (vidp) {
3727 *vidp = (uint32_t)vnode_vid((struct vnode *)fp->f_data);
3728 }
3729 proc_fdunlock(p);
3730
3731 return 0;
3732 }
3733
3734
3735 /*
3736 * fp_getfsock
3737 *
3738 * Description: Get fileproc and socket pointer for a given fd from the
3739 * per process open file table of the specified process, and if
3740 * successful, increment the f_iocount
3741 *
3742 * Parameters: p Process in which fd lives
3743 * fd fd to get information for
3744 * resultfp Pointer to result fileproc
3745 * pointer area, or 0 if none
3746 * results Pointer to result socket
3747 * pointer area, or 0 if none
3748 *
3749 * Returns: EBADF The file descriptor is invalid
3750 * EOPNOTSUPP The file descriptor is not a socket
3751 * 0 Success
3752 *
3753 * Implicit returns:
3754 * *resultfp (modified) Fileproc pointer
3755 * *results (modified) socket pointer
3756 *
3757 * Notes: EOPNOTSUPP should probably be ENOTSOCK; this function is only
3758 * ever called from accept1().
3759 */
3760 int
3761 fp_getfsock(proc_t p, int fd, struct fileproc **resultfp,
3762 struct socket **results)
3763 {
3764 struct filedesc *fdp = p->p_fd;
3765 struct fileproc *fp;
3766
3767 proc_fdlock_spin(p);
3768 if (fd < 0 || fd >= fdp->fd_nfiles ||
3769 (fp = fdp->fd_ofiles[fd]) == NULL ||
3770 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3771 proc_fdunlock(p);
3772 return EBADF;
3773 }
3774 if (fp->f_type != DTYPE_SOCKET) {
3775 proc_fdunlock(p);
3776 return EOPNOTSUPP;
3777 }
3778 os_ref_retain_locked(&fp->f_iocount);
3779
3780 if (resultfp) {
3781 *resultfp = fp;
3782 }
3783 if (results) {
3784 *results = (struct socket *)fp->f_data;
3785 }
3786 proc_fdunlock(p);
3787
3788 return 0;
3789 }
3790
3791
3792 /*
3793 * fp_getfkq
3794 *
3795 * Description: Get fileproc and kqueue pointer for a given fd from the
3796 * per process open file table of the specified process, and if
3797 * successful, increment the f_iocount
3798 *
3799 * Parameters: p Process in which fd lives
3800 * fd fd to get information for
3801 * resultfp Pointer to result fileproc
3802 * pointer area, or 0 if none
3803 * resultkq Pointer to result kqueue
3804 * pointer area, or 0 if none
3805 *
3806 * Returns: EBADF The file descriptor is invalid
3807 * EBADF The file descriptor is not a socket
3808 * 0 Success
3809 *
3810 * Implicit returns:
3811 * *resultfp (modified) Fileproc pointer
3812 * *resultkq (modified) kqueue pointer
3813 *
3814 * Notes: The second EBADF should probably be something else to make
3815 * the error condition distinct.
3816 */
3817 int
3818 fp_getfkq(proc_t p, int fd, struct fileproc **resultfp,
3819 struct kqueue **resultkq)
3820 {
3821 struct filedesc *fdp = p->p_fd;
3822 struct fileproc *fp;
3823
3824 proc_fdlock_spin(p);
3825 if (fd < 0 || fd >= fdp->fd_nfiles ||
3826 (fp = fdp->fd_ofiles[fd]) == NULL ||
3827 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3828 proc_fdunlock(p);
3829 return EBADF;
3830 }
3831 if (fp->f_type != DTYPE_KQUEUE) {
3832 proc_fdunlock(p);
3833 return EBADF;
3834 }
3835 os_ref_retain_locked(&fp->f_iocount);
3836
3837 if (resultfp) {
3838 *resultfp = fp;
3839 }
3840 if (resultkq) {
3841 *resultkq = (struct kqueue *)fp->f_data;
3842 }
3843 proc_fdunlock(p);
3844
3845 return 0;
3846 }
3847
3848
3849 /*
3850 * fp_getfpshm
3851 *
3852 * Description: Get fileproc and POSIX shared memory pointer for a given fd
3853 * from the per process open file table of the specified process
3854 * and if successful, increment the f_iocount
3855 *
3856 * Parameters: p Process in which fd lives
3857 * fd fd to get information for
3858 * resultfp Pointer to result fileproc
3859 * pointer area, or 0 if none
3860 * resultpshm Pointer to result POSIX
3861 * shared memory pointer
3862 * pointer area, or 0 if none
3863 *
3864 * Returns: EBADF The file descriptor is invalid
3865 * EBADF The file descriptor is not a POSIX
3866 * shared memory area
3867 * 0 Success
3868 *
3869 * Implicit returns:
3870 * *resultfp (modified) Fileproc pointer
3871 * *resultpshm (modified) POSIX shared memory pointer
3872 *
3873 * Notes: The second EBADF should probably be something else to make
3874 * the error condition distinct.
3875 */
3876 int
3877 fp_getfpshm(proc_t p, int fd, struct fileproc **resultfp,
3878 struct pshmnode **resultpshm)
3879 {
3880 struct filedesc *fdp = p->p_fd;
3881 struct fileproc *fp;
3882
3883 proc_fdlock_spin(p);
3884 if (fd < 0 || fd >= fdp->fd_nfiles ||
3885 (fp = fdp->fd_ofiles[fd]) == NULL ||
3886 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3887 proc_fdunlock(p);
3888 return EBADF;
3889 }
3890 if (fp->f_type != DTYPE_PSXSHM) {
3891 proc_fdunlock(p);
3892 return EBADF;
3893 }
3894 os_ref_retain_locked(&fp->f_iocount);
3895
3896 if (resultfp) {
3897 *resultfp = fp;
3898 }
3899 if (resultpshm) {
3900 *resultpshm = (struct pshmnode *)fp->f_data;
3901 }
3902 proc_fdunlock(p);
3903
3904 return 0;
3905 }
3906
3907
3908 /*
3909 * fp_getfsem
3910 *
3911 * Description: Get fileproc and POSIX semaphore pointer for a given fd from
3912 * the per process open file table of the specified process
3913 * and if successful, increment the f_iocount
3914 *
3915 * Parameters: p Process in which fd lives
3916 * fd fd to get information for
3917 * resultfp Pointer to result fileproc
3918 * pointer area, or 0 if none
3919 * resultpsem Pointer to result POSIX
3920 * semaphore pointer area, or
3921 * 0 if none
3922 *
3923 * Returns: EBADF The file descriptor is invalid
3924 * EBADF The file descriptor is not a POSIX
3925 * semaphore
3926 * 0 Success
3927 *
3928 * Implicit returns:
3929 * *resultfp (modified) Fileproc pointer
3930 * *resultpsem (modified) POSIX semaphore pointer
3931 *
3932 * Notes: The second EBADF should probably be something else to make
3933 * the error condition distinct.
3934 *
3935 * In order to support unnamed POSIX semaphores, the named
3936 * POSIX semaphores will have to move out of the per-process
3937 * open filetable, and into a global table that is shared with
3938 * unnamed POSIX semaphores, since unnamed POSIX semaphores
3939 * are typically used by declaring instances in shared memory,
3940 * and there's no other way to do this without changing the
3941 * underlying type, which would introduce binary compatibility
3942 * issues.
3943 */
3944 int
3945 fp_getfpsem(proc_t p, int fd, struct fileproc **resultfp,
3946 struct psemnode **resultpsem)
3947 {
3948 struct filedesc *fdp = p->p_fd;
3949 struct fileproc *fp;
3950
3951 proc_fdlock_spin(p);
3952 if (fd < 0 || fd >= fdp->fd_nfiles ||
3953 (fp = fdp->fd_ofiles[fd]) == NULL ||
3954 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3955 proc_fdunlock(p);
3956 return EBADF;
3957 }
3958 if (fp->f_type != DTYPE_PSXSEM) {
3959 proc_fdunlock(p);
3960 return EBADF;
3961 }
3962 os_ref_retain_locked(&fp->f_iocount);
3963
3964 if (resultfp) {
3965 *resultfp = fp;
3966 }
3967 if (resultpsem) {
3968 *resultpsem = (struct psemnode *)fp->f_data;
3969 }
3970 proc_fdunlock(p);
3971
3972 return 0;
3973 }
3974
3975
3976 /*
3977 * fp_getfpipe
3978 *
3979 * Description: Get fileproc and pipe pointer for a given fd from the
3980 * per process open file table of the specified process
3981 * and if successful, increment the f_iocount
3982 *
3983 * Parameters: p Process in which fd lives
3984 * fd fd to get information for
3985 * resultfp Pointer to result fileproc
3986 * pointer area, or 0 if none
3987 * resultpipe Pointer to result pipe
3988 * pointer area, or 0 if none
3989 *
3990 * Returns: EBADF The file descriptor is invalid
3991 * EBADF The file descriptor is not a socket
3992 * 0 Success
3993 *
3994 * Implicit returns:
3995 * *resultfp (modified) Fileproc pointer
3996 * *resultpipe (modified) pipe pointer
3997 *
3998 * Notes: The second EBADF should probably be something else to make
3999 * the error condition distinct.
4000 */
4001 int
4002 fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp,
4003 struct pipe **resultpipe)
4004 {
4005 struct filedesc *fdp = p->p_fd;
4006 struct fileproc *fp;
4007
4008 proc_fdlock_spin(p);
4009 if (fd < 0 || fd >= fdp->fd_nfiles ||
4010 (fp = fdp->fd_ofiles[fd]) == NULL ||
4011 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4012 proc_fdunlock(p);
4013 return EBADF;
4014 }
4015 if (fp->f_type != DTYPE_PIPE) {
4016 proc_fdunlock(p);
4017 return EBADF;
4018 }
4019 os_ref_retain_locked(&fp->f_iocount);
4020
4021 if (resultfp) {
4022 *resultfp = fp;
4023 }
4024 if (resultpipe) {
4025 *resultpipe = (struct pipe *)fp->f_data;
4026 }
4027 proc_fdunlock(p);
4028
4029 return 0;
4030 }
4031
4032
4033 /*
4034 * fp_lookup
4035 *
4036 * Description: Get fileproc pointer for a given fd from the per process
4037 * open file table of the specified process and if successful,
4038 * increment the f_iocount
4039 *
4040 * Parameters: p Process in which fd lives
4041 * fd fd to get information for
4042 * resultfp Pointer to result fileproc
4043 * pointer area, or 0 if none
4044 * locked !0 if the caller holds the
4045 * proc_fdlock, 0 otherwise
4046 *
4047 * Returns: 0 Success
4048 * EBADF Bad file descriptor
4049 *
4050 * Implicit returns:
4051 * *resultfp (modified) Fileproc pointer
4052 *
4053 * Locks: If the argument 'locked' is non-zero, then the caller is
4054 * expected to have taken and held the proc_fdlock; if it is
4055 * zero, than this routine internally takes and drops this lock.
4056 */
4057 int
4058 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
4059 {
4060 struct filedesc *fdp = p->p_fd;
4061 struct fileproc *fp;
4062
4063 if (!locked) {
4064 proc_fdlock_spin(p);
4065 }
4066 if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
4067 (fp = fdp->fd_ofiles[fd]) == NULL ||
4068 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4069 if (!locked) {
4070 proc_fdunlock(p);
4071 }
4072 return EBADF;
4073 }
4074 os_ref_retain_locked(&fp->f_iocount);
4075
4076 if (resultfp) {
4077 *resultfp = fp;
4078 }
4079 if (!locked) {
4080 proc_fdunlock(p);
4081 }
4082
4083 return 0;
4084 }
4085
4086
4087 /*
4088 * fp_tryswap
4089 *
4090 * Description: Swap the fileproc pointer for a given fd with a new
4091 * fileproc pointer in the per-process open file table of
4092 * the specified process. The fdlock must be held at entry.
4093 * Iff the swap is successful, the old fileproc pointer is freed.
4094 *
4095 * Parameters: p Process containing the fd
4096 * fd The fd of interest
4097 * nfp Pointer to the newfp
4098 *
4099 * Returns: 0 Success
4100 * EBADF Bad file descriptor
4101 * EINTR Interrupted
4102 * EKEEPLOOKING Other references were active, try again.
4103 */
4104 int
4105 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
4106 {
4107 struct fileproc *fp;
4108 int error;
4109
4110 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
4111
4112 if (0 != (error = fp_lookup(p, fd, &fp, 1))) {
4113 return error;
4114 }
4115 /*
4116 * At this point, our caller (change_guardedfd_np) has
4117 * one f_iocount reference, and we just took another
4118 * one to begin the replacement.
4119 * fp and nfp have a +1 reference from allocation.
4120 * Thus if no-one else is looking, f_iocount should be 3.
4121 */
4122 if (os_ref_get_count(&fp->f_iocount) < 3 ||
4123 1 != os_ref_get_count(&nfp->f_iocount)) {
4124 panic("%s: f_iocount", __func__);
4125 } else if (3 == os_ref_get_count(&fp->f_iocount)) {
4126 /* Copy the contents of *fp, preserving the "type" of *nfp */
4127
4128 nfp->f_flags = (nfp->f_flags & FP_TYPEMASK) |
4129 (fp->f_flags & ~FP_TYPEMASK);
4130 os_ref_retain_locked(&nfp->f_iocount);
4131 os_ref_retain_locked(&nfp->f_iocount);
4132 nfp->f_fglob = fp->f_fglob;
4133 nfp->f_wset = fp->f_wset;
4134
4135 p->p_fd->fd_ofiles[fd] = nfp;
4136 fp_drop(p, fd, nfp, 1);
4137
4138 os_ref_release_live(&fp->f_iocount);
4139 os_ref_release_live(&fp->f_iocount);
4140 fileproc_free(fp);
4141 } else {
4142 /*
4143 * Wait for all other active references to evaporate.
4144 */
4145 p->p_fpdrainwait = 1;
4146 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
4147 PRIBIO | PCATCH, "tryswap fpdrain", NULL);
4148 if (0 == error) {
4149 /*
4150 * Return an "internal" errno to trigger a full
4151 * reevaluation of the change-guard attempt.
4152 */
4153 error = EKEEPLOOKING;
4154 }
4155 (void) fp_drop(p, fd, fp, 1);
4156 }
4157 return error;
4158 }
4159
4160
4161 /*
4162 * fp_drop_written
4163 *
4164 * Description: Set the FP_WRITTEN flag on the fileproc and drop the I/O
4165 * reference previously taken by calling fp_lookup et. al.
4166 *
4167 * Parameters: p Process in which the fd lives
4168 * fd fd associated with the fileproc
4169 * fp fileproc on which to set the
4170 * flag and drop the reference
4171 *
4172 * Returns: 0 Success
4173 * fp_drop:EBADF Bad file descriptor
4174 *
4175 * Locks: This function internally takes and drops the proc_fdlock for
4176 * the supplied process
4177 *
4178 * Notes: The fileproc must correspond to the fd in the supplied proc
4179 */
4180 int
4181 fp_drop_written(proc_t p, int fd, struct fileproc *fp)
4182 {
4183 int error;
4184
4185 proc_fdlock_spin(p);
4186
4187 fp->f_flags |= FP_WRITTEN;
4188
4189 error = fp_drop(p, fd, fp, 1);
4190
4191 proc_fdunlock(p);
4192
4193 return error;
4194 }
4195
4196
4197 /*
4198 * fp_drop_event
4199 *
4200 * Description: Set the FP_WAITEVENT flag on the fileproc and drop the I/O
4201 * reference previously taken by calling fp_lookup et. al.
4202 *
4203 * Parameters: p Process in which the fd lives
4204 * fd fd associated with the fileproc
4205 * fp fileproc on which to set the
4206 * flag and drop the reference
4207 *
4208 * Returns: 0 Success
4209 * fp_drop:EBADF Bad file descriptor
4210 *
4211 * Locks: This function internally takes and drops the proc_fdlock for
4212 * the supplied process
4213 *
4214 * Notes: The fileproc must correspond to the fd in the supplied proc
4215 */
4216 int
4217 fp_drop_event(proc_t p, int fd, struct fileproc *fp)
4218 {
4219 int error;
4220
4221 proc_fdlock_spin(p);
4222
4223 fp->f_flags |= FP_WAITEVENT;
4224
4225 error = fp_drop(p, fd, fp, 1);
4226
4227 proc_fdunlock(p);
4228
4229 return error;
4230 }
4231
4232
4233 /*
4234 * fp_drop
4235 *
4236 * Description: Drop the I/O reference previously taken by calling fp_lookup
4237 * et. al.
4238 *
4239 * Parameters: p Process in which the fd lives
4240 * fd fd associated with the fileproc
4241 * fp fileproc on which to set the
4242 * flag and drop the reference
4243 * locked flag to internally take and
4244 * drop proc_fdlock if it is not
4245 * already held by the caller
4246 *
4247 * Returns: 0 Success
4248 * EBADF Bad file descriptor
4249 *
4250 * Locks: This function internally takes and drops the proc_fdlock for
4251 * the supplied process if 'locked' is non-zero, and assumes that
4252 * the caller already holds this lock if 'locked' is non-zero.
4253 *
4254 * Notes: The fileproc must correspond to the fd in the supplied proc
4255 */
4256 int
4257 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
4258 {
4259 struct filedesc *fdp = p->p_fd;
4260 int needwakeup = 0;
4261
4262 if (!locked) {
4263 proc_fdlock_spin(p);
4264 }
4265 if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
4266 (fp = fdp->fd_ofiles[fd]) == NULL ||
4267 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4268 !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4269 if (!locked) {
4270 proc_fdunlock(p);
4271 }
4272 return EBADF;
4273 }
4274
4275 if (1 == os_ref_release_locked(&fp->f_iocount)) {
4276 if (fp->f_flags & FP_SELCONFLICT) {
4277 fp->f_flags &= ~FP_SELCONFLICT;
4278 }
4279
4280 if (p->p_fpdrainwait) {
4281 p->p_fpdrainwait = 0;
4282 needwakeup = 1;
4283 }
4284 }
4285 if (!locked) {
4286 proc_fdunlock(p);
4287 }
4288 if (needwakeup) {
4289 wakeup(&p->p_fpdrainwait);
4290 }
4291
4292 return 0;
4293 }
4294
4295
4296 /*
4297 * file_vnode
4298 *
4299 * Description: Given an fd, look it up in the current process's per process
4300 * open file table, and return its internal vnode pointer.
4301 *
4302 * Parameters: fd fd to obtain vnode from
4303 * vpp pointer to vnode return area
4304 *
4305 * Returns: 0 Success
4306 * EINVAL The fd does not refer to a
4307 * vnode fileproc entry
4308 * fp_lookup:EBADF Bad file descriptor
4309 *
4310 * Implicit returns:
4311 * *vpp (modified) Returned vnode pointer
4312 *
4313 * Locks: This function internally takes and drops the proc_fdlock for
4314 * the current process
4315 *
4316 * Notes: If successful, this function increments the f_iocount on the
4317 * fd's corresponding fileproc.
4318 *
4319 * The fileproc referenced is not returned; because of this, care
4320 * must be taken to not drop the last reference (e.g. by closing
4321 * the file). This is inherently unsafe, since the reference may
4322 * not be recoverable from the vnode, if there is a subsequent
4323 * close that destroys the associate fileproc. The caller should
4324 * therefore retain their own reference on the fileproc so that
4325 * the f_iocount can be dropped subsequently. Failure to do this
4326 * can result in the returned pointer immediately becoming invalid
4327 * following the call.
4328 *
4329 * Use of this function is discouraged.
4330 */
4331 int
4332 file_vnode(int fd, struct vnode **vpp)
4333 {
4334 proc_t p = current_proc();
4335 struct fileproc *fp;
4336 int error;
4337
4338 proc_fdlock_spin(p);
4339 if ((error = fp_lookup(p, fd, &fp, 1))) {
4340 proc_fdunlock(p);
4341 return error;
4342 }
4343 if (fp->f_type != DTYPE_VNODE) {
4344 fp_drop(p, fd, fp, 1);
4345 proc_fdunlock(p);
4346 return EINVAL;
4347 }
4348 if (vpp != NULL) {
4349 *vpp = (struct vnode *)fp->f_data;
4350 }
4351 proc_fdunlock(p);
4352
4353 return 0;
4354 }
4355
4356
4357 /*
4358 * file_vnode_withvid
4359 *
4360 * Description: Given an fd, look it up in the current process's per process
4361 * open file table, and return its internal vnode pointer.
4362 *
4363 * Parameters: fd fd to obtain vnode from
4364 * vpp pointer to vnode return area
4365 * vidp pointer to vid of the returned vnode
4366 *
4367 * Returns: 0 Success
4368 * EINVAL The fd does not refer to a
4369 * vnode fileproc entry
4370 * fp_lookup:EBADF Bad file descriptor
4371 *
4372 * Implicit returns:
4373 * *vpp (modified) Returned vnode pointer
4374 *
4375 * Locks: This function internally takes and drops the proc_fdlock for
4376 * the current process
4377 *
4378 * Notes: If successful, this function increments the f_iocount on the
4379 * fd's corresponding fileproc.
4380 *
4381 * The fileproc referenced is not returned; because of this, care
4382 * must be taken to not drop the last reference (e.g. by closing
4383 * the file). This is inherently unsafe, since the reference may
4384 * not be recoverable from the vnode, if there is a subsequent
4385 * close that destroys the associate fileproc. The caller should
4386 * therefore retain their own reference on the fileproc so that
4387 * the f_iocount can be dropped subsequently. Failure to do this
4388 * can result in the returned pointer immediately becoming invalid
4389 * following the call.
4390 *
4391 * Use of this function is discouraged.
4392 */
4393 int
4394 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t * vidp)
4395 {
4396 proc_t p = current_proc();
4397 struct fileproc *fp;
4398 vnode_t vp;
4399 int error;
4400
4401 proc_fdlock_spin(p);
4402 if ((error = fp_lookup(p, fd, &fp, 1))) {
4403 proc_fdunlock(p);
4404 return error;
4405 }
4406 if (fp->f_type != DTYPE_VNODE) {
4407 fp_drop(p, fd, fp, 1);
4408 proc_fdunlock(p);
4409 return EINVAL;
4410 }
4411 vp = (struct vnode *)fp->f_data;
4412 if (vpp != NULL) {
4413 *vpp = vp;
4414 }
4415
4416 if ((vidp != NULL) && (vp != NULLVP)) {
4417 *vidp = (uint32_t)vp->v_id;
4418 }
4419
4420 proc_fdunlock(p);
4421
4422 return 0;
4423 }
4424
4425
4426 /*
4427 * file_socket
4428 *
4429 * Description: Given an fd, look it up in the current process's per process
4430 * open file table, and return its internal socket pointer.
4431 *
4432 * Parameters: fd fd to obtain vnode from
4433 * sp pointer to socket return area
4434 *
4435 * Returns: 0 Success
4436 * ENOTSOCK Not a socket
4437 * fp_lookup:EBADF Bad file descriptor
4438 *
4439 * Implicit returns:
4440 * *sp (modified) Returned socket pointer
4441 *
4442 * Locks: This function internally takes and drops the proc_fdlock for
4443 * the current process
4444 *
4445 * Notes: If successful, this function increments the f_iocount on the
4446 * fd's corresponding fileproc.
4447 *
4448 * The fileproc referenced is not returned; because of this, care
4449 * must be taken to not drop the last reference (e.g. by closing
4450 * the file). This is inherently unsafe, since the reference may
4451 * not be recoverable from the socket, if there is a subsequent
4452 * close that destroys the associate fileproc. The caller should
4453 * therefore retain their own reference on the fileproc so that
4454 * the f_iocount can be dropped subsequently. Failure to do this
4455 * can result in the returned pointer immediately becoming invalid
4456 * following the call.
4457 *
4458 * Use of this function is discouraged.
4459 */
4460 int
4461 file_socket(int fd, struct socket **sp)
4462 {
4463 proc_t p = current_proc();
4464 struct fileproc *fp;
4465 int error;
4466
4467 proc_fdlock_spin(p);
4468 if ((error = fp_lookup(p, fd, &fp, 1))) {
4469 proc_fdunlock(p);
4470 return error;
4471 }
4472 if (fp->f_type != DTYPE_SOCKET) {
4473 fp_drop(p, fd, fp, 1);
4474 proc_fdunlock(p);
4475 return ENOTSOCK;
4476 }
4477 *sp = (struct socket *)fp->f_data;
4478 proc_fdunlock(p);
4479
4480 return 0;
4481 }
4482
4483
4484 /*
4485 * file_flags
4486 *
4487 * Description: Given an fd, look it up in the current process's per process
4488 * open file table, and return its fileproc's flags field.
4489 *
4490 * Parameters: fd fd whose flags are to be
4491 * retrieved
4492 * flags pointer to flags data area
4493 *
4494 * Returns: 0 Success
4495 * ENOTSOCK Not a socket
4496 * fp_lookup:EBADF Bad file descriptor
4497 *
4498 * Implicit returns:
4499 * *flags (modified) Returned flags field
4500 *
4501 * Locks: This function internally takes and drops the proc_fdlock for
4502 * the current process
4503 *
4504 * Notes: This function will internally increment and decrement the
4505 * f_iocount of the fileproc as part of its operation.
4506 */
4507 int
4508 file_flags(int fd, int *flags)
4509 {
4510 proc_t p = current_proc();
4511 struct fileproc *fp;
4512 int error;
4513
4514 proc_fdlock_spin(p);
4515 if ((error = fp_lookup(p, fd, &fp, 1))) {
4516 proc_fdunlock(p);
4517 return error;
4518 }
4519 *flags = (int)fp->f_flag;
4520 fp_drop(p, fd, fp, 1);
4521 proc_fdunlock(p);
4522
4523 return 0;
4524 }
4525
4526
4527 /*
4528 * file_drop
4529 *
4530 * Description: Drop an iocount reference on an fd, and wake up any waiters
4531 * for draining (i.e. blocked in fileproc_drain() called during
4532 * the last attempt to close a file).
4533 *
4534 * Parameters: fd fd on which an ioreference is
4535 * to be dropped
4536 *
4537 * Returns: 0 Success
4538 * EBADF Bad file descriptor
4539 *
4540 * Description: Given an fd, look it up in the current process's per process
4541 * open file table, and drop it's fileproc's f_iocount by one
4542 *
4543 * Notes: This is intended as a corresponding operation to the functions
4544 * file_vnode() and file_socket() operations.
4545 *
4546 * Technically, the close reference is supposed to be protected
4547 * by a fileproc_drain(), however, a drain will only block if
4548 * the fd refers to a character device, and that device has had
4549 * preparefileread() called on it. If it refers to something
4550 * other than a character device, then the drain will occur and
4551 * block each close attempt, rather than merely the last close.
4552 *
4553 * Since it's possible for an fd that refers to a character
4554 * device to have an intermediate close followed by an open to
4555 * cause a different file to correspond to that descriptor,
4556 * unless there was a cautionary reference taken on the fileproc,
4557 * this is an inherently unsafe function. This happens in the
4558 * case where multiple fd's in a process refer to the same
4559 * character device (e.g. stdin/out/err pointing to a tty, etc.).
4560 *
4561 * Use of this function is discouraged.
4562 */
4563 int
4564 file_drop(int fd)
4565 {
4566 struct fileproc *fp;
4567 proc_t p = current_proc();
4568 int needwakeup = 0;
4569
4570 proc_fdlock_spin(p);
4571 if (fd < 0 || fd >= p->p_fd->fd_nfiles ||
4572 (fp = p->p_fd->fd_ofiles[fd]) == NULL ||
4573 ((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) &&
4574 !(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) {
4575 proc_fdunlock(p);
4576 return EBADF;
4577 }
4578
4579 if (1 == os_ref_release_locked(&fp->f_iocount)) {
4580 if (fp->f_flags & FP_SELCONFLICT) {
4581 fp->f_flags &= ~FP_SELCONFLICT;
4582 }
4583
4584 if (p->p_fpdrainwait) {
4585 p->p_fpdrainwait = 0;
4586 needwakeup = 1;
4587 }
4588 }
4589 proc_fdunlock(p);
4590
4591 if (needwakeup) {
4592 wakeup(&p->p_fpdrainwait);
4593 }
4594 return 0;
4595 }
4596
4597
4598 static int falloc_withalloc_locked(proc_t, struct fileproc **, int *,
4599 vfs_context_t, struct fileproc * (*)(void *), void *, int);
4600
4601 /*
4602 * falloc
4603 *
4604 * Description: Allocate an entry in the per process open file table and
4605 * return the corresponding fileproc and fd.
4606 *
4607 * Parameters: p The process in whose open file
4608 * table the fd is to be allocated
4609 * resultfp Pointer to fileproc pointer
4610 * return area
4611 * resultfd Pointer to fd return area
4612 * ctx VFS context
4613 *
4614 * Returns: 0 Success
4615 * falloc:ENFILE Too many open files in system
4616 * falloc:EMFILE Too many open files in process
4617 * falloc:ENOMEM M_FILEPROC or M_FILEGLOB zone
4618 * exhausted
4619 *
4620 * Implicit returns:
4621 * *resultfd (modified) Returned fileproc pointer
4622 * *resultfd (modified) Returned fd
4623 *
4624 * Locks: This function takes and drops the proc_fdlock; if this lock
4625 * is already held, use falloc_locked() instead.
4626 *
4627 * Notes: This function takes separate process and context arguments
4628 * solely to support kern_exec.c; otherwise, it would take
4629 * neither, and expect falloc_locked() to use the
4630 * vfs_context_current() routine internally.
4631 */
4632 int
4633 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4634 {
4635 return falloc_withalloc(p, resultfp, resultfd, ctx,
4636 fileproc_alloc_init, NULL);
4637 }
4638
4639 /*
4640 * Like falloc, but including the fileproc allocator and create-args
4641 */
4642 int
4643 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4644 vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *arg)
4645 {
4646 int error;
4647
4648 proc_fdlock(p);
4649 error = falloc_withalloc_locked(p,
4650 resultfp, resultfd, ctx, fp_zalloc, arg, 1);
4651 proc_fdunlock(p);
4652
4653 return error;
4654 }
4655
4656 /*
4657 * "uninitialized" ops -- ensure fg->fg_ops->fo_type always exists
4658 */
4659 static const struct fileops uninitops;
4660
4661 /*
4662 * falloc_locked
4663 *
4664 * Create a new open file structure and allocate
4665 * a file descriptor for the process that refers to it.
4666 *
4667 * Returns: 0 Success
4668 *
4669 * Description: Allocate an entry in the per process open file table and
4670 * return the corresponding fileproc and fd.
4671 *
4672 * Parameters: p The process in whose open file
4673 * table the fd is to be allocated
4674 * resultfp Pointer to fileproc pointer
4675 * return area
4676 * resultfd Pointer to fd return area
4677 * ctx VFS context
4678 * locked Flag to indicate whether the
4679 * caller holds proc_fdlock
4680 *
4681 * Returns: 0 Success
4682 * ENFILE Too many open files in system
4683 * fdalloc:EMFILE Too many open files in process
4684 * ENOMEM M_FILEPROC or M_FILEGLOB zone
4685 * exhausted
4686 * fdalloc:ENOMEM
4687 *
4688 * Implicit returns:
4689 * *resultfd (modified) Returned fileproc pointer
4690 * *resultfd (modified) Returned fd
4691 *
4692 * Locks: If the parameter 'locked' is zero, this function takes and
4693 * drops the proc_fdlock; if non-zero, the caller must hold the
4694 * lock.
4695 *
4696 * Notes: If you intend to use a non-zero 'locked' parameter, use the
4697 * utility function falloc() instead.
4698 *
4699 * This function takes separate process and context arguments
4700 * solely to support kern_exec.c; otherwise, it would take
4701 * neither, and use the vfs_context_current() routine internally.
4702 */
4703 int
4704 falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
4705 vfs_context_t ctx, int locked)
4706 {
4707 return falloc_withalloc_locked(p, resultfp, resultfd, ctx,
4708 fileproc_alloc_init, NULL, locked);
4709 }
4710
4711 static int
4712 falloc_withalloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
4713 vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg,
4714 int locked)
4715 {
4716 struct fileproc *fp;
4717 struct fileglob *fg;
4718 int error, nfd;
4719
4720 if (nfiles >= maxfiles) {
4721 tablefull("file");
4722 return ENFILE;
4723 }
4724
4725 if (!locked) {
4726 proc_fdlock(p);
4727 }
4728
4729 if ((error = fdalloc(p, 0, &nfd))) {
4730 if (!locked) {
4731 proc_fdunlock(p);
4732 }
4733 return error;
4734 }
4735
4736 #if CONFIG_MACF
4737 error = mac_file_check_create(proc_ucred(p));
4738 if (error) {
4739 if (!locked) {
4740 proc_fdunlock(p);
4741 }
4742 return error;
4743 }
4744 #endif
4745
4746 /*
4747 * Allocate a new file descriptor.
4748 * If the process has file descriptor zero open, add to the list
4749 * of open files at that point, otherwise put it at the front of
4750 * the list of open files.
4751 */
4752 proc_fdunlock(p);
4753
4754 fp = (*fp_zalloc)(crarg);
4755 if (fp == NULL) {
4756 if (locked) {
4757 proc_fdlock(p);
4758 }
4759 return ENOMEM;
4760 }
4761 MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK);
4762 if (fg == NULL) {
4763 fileproc_free(fp);
4764 if (locked) {
4765 proc_fdlock(p);
4766 }
4767 return ENOMEM;
4768 }
4769 bzero(fg, sizeof(struct fileglob));
4770 lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4771
4772 os_ref_retain_locked(&fp->f_iocount);
4773 fg->fg_count = 1;
4774 fg->fg_ops = &uninitops;
4775 fp->f_fglob = fg;
4776 #if CONFIG_MACF
4777 mac_file_label_init(fg);
4778 #endif
4779
4780 kauth_cred_ref(ctx->vc_ucred);
4781
4782 proc_fdlock(p);
4783
4784 fp->f_cred = ctx->vc_ucred;
4785
4786 #if CONFIG_MACF
4787 mac_file_label_associate(fp->f_cred, fg);
4788 #endif
4789
4790 OSAddAtomic(1, &nfiles);
4791
4792 p->p_fd->fd_ofiles[nfd] = fp;
4793
4794 if (!locked) {
4795 proc_fdunlock(p);
4796 }
4797
4798 if (resultfp) {
4799 *resultfp = fp;
4800 }
4801 if (resultfd) {
4802 *resultfd = nfd;
4803 }
4804
4805 return 0;
4806 }
4807
4808
4809 /*
4810 * fg_free
4811 *
4812 * Description: Free a file structure; drop the global open file count, and
4813 * drop the credential reference, if the fileglob has one, and
4814 * destroy the instance mutex before freeing
4815 *
4816 * Parameters: fg Pointer to fileglob to be
4817 * freed
4818 *
4819 * Returns: void
4820 */
4821 void
4822 fg_free(struct fileglob *fg)
4823 {
4824 OSAddAtomic(-1, &nfiles);
4825
4826 if (fg->fg_vn_data) {
4827 fg_vn_data_free(fg->fg_vn_data);
4828 fg->fg_vn_data = NULL;
4829 }
4830
4831 if (IS_VALID_CRED(fg->fg_cred)) {
4832 kauth_cred_unref(&fg->fg_cred);
4833 }
4834 lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
4835
4836 #if CONFIG_MACF
4837 mac_file_label_destroy(fg);
4838 #endif
4839 FREE_ZONE(fg, sizeof *fg, M_FILEGLOB);
4840 }
4841
4842
4843 /*
4844 * fg_get_vnode
4845 *
4846 * Description: Return vnode associated with the file structure, if
4847 * any. The lifetime of the returned vnode is bound to
4848 * the lifetime of the file structure.
4849 *
4850 * Parameters: fg Pointer to fileglob to
4851 * inspect
4852 *
4853 * Returns: vnode_t
4854 */
4855 vnode_t
4856 fg_get_vnode(struct fileglob *fg)
4857 {
4858 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
4859 return (vnode_t)fg->fg_data;
4860 } else {
4861 return NULL;
4862 }
4863 }
4864
4865 /*
4866 * fdexec
4867 *
4868 * Description: Perform close-on-exec processing for all files in a process
4869 * that are either marked as close-on-exec, or which were in the
4870 * process of being opened at the time of the execve
4871 *
4872 * Also handles the case (via posix_spawn()) where -all-
4873 * files except those marked with "inherit" as treated as
4874 * close-on-exec.
4875 *
4876 * Parameters: p Pointer to process calling
4877 * execve
4878 *
4879 * Returns: void
4880 *
4881 * Locks: This function internally takes and drops proc_fdlock()
4882 * But assumes tables don't grow/change while unlocked.
4883 *
4884 */
4885 void
4886 fdexec(proc_t p, short flags, int self_exec)
4887 {
4888 struct filedesc *fdp = p->p_fd;
4889 int i;
4890 boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4891 thread_t self = current_thread();
4892 struct uthread *ut = get_bsdthread_info(self);
4893 struct kqworkq *dealloc_kqwq = NULL;
4894
4895 /*
4896 * If the current thread is bound as a workq/workloop
4897 * servicing thread, we need to unbind it first.
4898 */
4899 if (ut->uu_kqr_bound && self_exec) {
4900 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
4901 }
4902
4903 proc_fdlock(p);
4904
4905 /*
4906 * Deallocate the knotes for this process
4907 * and mark the tables non-existent so
4908 * subsequent kqueue closes go faster.
4909 */
4910 knotes_dealloc(p);
4911 assert(fdp->fd_knlistsize == 0);
4912 assert(fdp->fd_knhashmask == 0);
4913
4914 for (i = fdp->fd_lastfile; i >= 0; i--) {
4915 struct fileproc *fp = fdp->fd_ofiles[i];
4916 char *flagp = &fdp->fd_ofileflags[i];
4917
4918 if (fp && cloexec_default) {
4919 /*
4920 * Reverse the usual semantics of file descriptor
4921 * inheritance - all of them should be closed
4922 * except files marked explicitly as "inherit" and
4923 * not marked close-on-exec.
4924 */
4925 if ((*flagp & (UF_EXCLOSE | UF_INHERIT)) != UF_INHERIT) {
4926 *flagp |= UF_EXCLOSE;
4927 }
4928 *flagp &= ~UF_INHERIT;
4929 }
4930
4931 if (
4932 ((*flagp & (UF_RESERVED | UF_EXCLOSE)) == UF_EXCLOSE)
4933 #if CONFIG_MACF
4934 || (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
4935 #endif
4936 ) {
4937 procfdtbl_clearfd(p, i);
4938 if (i == fdp->fd_lastfile && i > 0) {
4939 fdp->fd_lastfile--;
4940 }
4941 if (i < fdp->fd_freefile) {
4942 fdp->fd_freefile = i;
4943 }
4944
4945 /*
4946 * Wait for any third party viewers (e.g., lsof)
4947 * to release their references to this fileproc.
4948 */
4949 while (os_ref_get_count(&fp->f_iocount) > 1) {
4950 p->p_fpdrainwait = 1;
4951 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO,
4952 "fpdrain", NULL);
4953 }
4954 if (fp->f_flags & FP_WAITEVENT) {
4955 (void)waitevent_close(p, fp);
4956 }
4957 closef_locked(fp, fp->f_fglob, p);
4958
4959 fileproc_free(fp);
4960 }
4961 }
4962
4963 /* release the per-process workq kq */
4964 if (fdp->fd_wqkqueue) {
4965 dealloc_kqwq = fdp->fd_wqkqueue;
4966 fdp->fd_wqkqueue = NULL;
4967 }
4968
4969 proc_fdunlock(p);
4970
4971 /* Anything to free? */
4972 if (dealloc_kqwq) {
4973 kqworkq_dealloc(dealloc_kqwq);
4974 }
4975 }
4976
4977
4978 /*
4979 * fdcopy
4980 *
4981 * Description: Copy a filedesc structure. This is normally used as part of
4982 * forkproc() when forking a new process, to copy the per process
4983 * open file table over to the new process.
4984 *
4985 * Parameters: p Process whose open file table
4986 * is to be copied (parent)
4987 * uth_cdir Per thread current working
4988 * cirectory, or NULL
4989 *
4990 * Returns: NULL Copy failed
4991 * !NULL Pointer to new struct filedesc
4992 *
4993 * Locks: This function internally takes and drops proc_fdlock()
4994 *
4995 * Notes: Files are copied directly, ignoring the new resource limits
4996 * for the process that's being copied into. Since the descriptor
4997 * references are just additional references, this does not count
4998 * against the number of open files on the system.
4999 *
5000 * The struct filedesc includes the current working directory,
5001 * and the current root directory, if the process is chroot'ed.
5002 *
5003 * If the exec was called by a thread using a per thread current
5004 * working directory, we inherit the working directory from the
5005 * thread making the call, rather than from the process.
5006 *
5007 * In the case of a failure to obtain a reference, for most cases,
5008 * the file entry will be silently dropped. There's an exception
5009 * for the case of a chroot dir, since a failure to to obtain a
5010 * reference there would constitute an "escape" from the chroot
5011 * environment, which must not be allowed. In that case, we will
5012 * deny the execve() operation, rather than allowing the escape.
5013 */
5014 struct filedesc *
5015 fdcopy(proc_t p, vnode_t uth_cdir)
5016 {
5017 struct filedesc *newfdp, *fdp = p->p_fd;
5018 int i;
5019 struct fileproc *ofp, *fp;
5020 vnode_t v_dir;
5021
5022 MALLOC_ZONE(newfdp, struct filedesc *,
5023 sizeof(*newfdp), M_FILEDESC, M_WAITOK);
5024 if (newfdp == NULL) {
5025 return NULL;
5026 }
5027
5028 proc_fdlock(p);
5029
5030 /*
5031 * the FD_CHROOT flag will be inherited via this copy
5032 */
5033 (void) memcpy(newfdp, fdp, sizeof(*newfdp));
5034
5035 /*
5036 * If we are running with per-thread current working directories,
5037 * inherit the new current working directory from the current thread
5038 * instead, before we take our references.
5039 */
5040 if (uth_cdir != NULLVP) {
5041 newfdp->fd_cdir = uth_cdir;
5042 }
5043
5044 /*
5045 * For both fd_cdir and fd_rdir make sure we get
5046 * a valid reference... if we can't, than set
5047 * set the pointer(s) to NULL in the child... this
5048 * will keep us from using a non-referenced vp
5049 * and allows us to do the vnode_rele only on
5050 * a properly referenced vp
5051 */
5052 if ((v_dir = newfdp->fd_cdir)) {
5053 if (vnode_getwithref(v_dir) == 0) {
5054 if ((vnode_ref(v_dir))) {
5055 newfdp->fd_cdir = NULL;
5056 }
5057 vnode_put(v_dir);
5058 } else {
5059 newfdp->fd_cdir = NULL;
5060 }
5061 }
5062 if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
5063 /*
5064 * we couldn't get a new reference on
5065 * the current working directory being
5066 * inherited... we might as well drop
5067 * our reference from the parent also
5068 * since the vnode has gone DEAD making
5069 * it useless... by dropping it we'll
5070 * be that much closer to recycling it
5071 */
5072 vnode_rele(fdp->fd_cdir);
5073 fdp->fd_cdir = NULL;
5074 }
5075
5076 if ((v_dir = newfdp->fd_rdir)) {
5077 if (vnode_getwithref(v_dir) == 0) {
5078 if ((vnode_ref(v_dir))) {
5079 newfdp->fd_rdir = NULL;
5080 }
5081 vnode_put(v_dir);
5082 } else {
5083 newfdp->fd_rdir = NULL;
5084 }
5085 }
5086 /* Coming from a chroot environment and unable to get a reference... */
5087 if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
5088 proc_fdunlock(p);
5089 /*
5090 * We couldn't get a new reference on
5091 * the chroot directory being
5092 * inherited... this is fatal, since
5093 * otherwise it would constitute an
5094 * escape from a chroot environment by
5095 * the new process.
5096 */
5097 if (newfdp->fd_cdir) {
5098 vnode_rele(newfdp->fd_cdir);
5099 }
5100 FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC);
5101 return NULL;
5102 }
5103
5104 /*
5105 * If the number of open files fits in the internal arrays
5106 * of the open file structure, use them, otherwise allocate
5107 * additional memory for the number of descriptors currently
5108 * in use.
5109 */
5110 if (newfdp->fd_lastfile < NDFILE) {
5111 i = NDFILE;
5112 } else {
5113 /*
5114 * Compute the smallest multiple of NDEXTENT needed
5115 * for the file descriptors currently in use,
5116 * allowing the table to shrink.
5117 */
5118 i = newfdp->fd_nfiles;
5119 while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2) {
5120 i /= 2;
5121 }
5122 }
5123 proc_fdunlock(p);
5124
5125 MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **,
5126 i * OFILESIZE, M_OFILETABL, M_WAITOK);
5127 if (newfdp->fd_ofiles == NULL) {
5128 if (newfdp->fd_cdir) {
5129 vnode_rele(newfdp->fd_cdir);
5130 }
5131 if (newfdp->fd_rdir) {
5132 vnode_rele(newfdp->fd_rdir);
5133 }
5134
5135 FREE_ZONE(newfdp, sizeof(*newfdp), M_FILEDESC);
5136 return NULL;
5137 }
5138 (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
5139 proc_fdlock(p);
5140
5141 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
5142 newfdp->fd_nfiles = i;
5143
5144 if (fdp->fd_nfiles > 0) {
5145 struct fileproc **fpp;
5146 char *flags;
5147
5148 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
5149 (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
5150 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
5151 (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
5152
5153 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
5154 flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
5155 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--, flags--) {
5156 if ((ofp = *fpp) != NULL &&
5157 0 == (ofp->f_fglob->fg_lflags & FG_CONFINED) &&
5158 0 == (*flags & (UF_FORKCLOSE | UF_RESERVED))) {
5159 #if DEBUG
5160 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) {
5161 panic("complex fileproc");
5162 }
5163 #endif
5164 fp = fileproc_alloc_init(NULL);
5165 if (fp == NULL) {
5166 /*
5167 * XXX no room to copy, unable to
5168 * XXX safely unwind state at present
5169 */
5170 *fpp = NULL;
5171 } else {
5172 fp->f_flags |=
5173 (ofp->f_flags & ~FP_TYPEMASK);
5174 fp->f_fglob = ofp->f_fglob;
5175 (void)fg_ref(fp);
5176 *fpp = fp;
5177 }
5178 } else {
5179 *fpp = NULL;
5180 *flags = 0;
5181 }
5182 if (*fpp == NULL) {
5183 if (i == newfdp->fd_lastfile && i > 0) {
5184 newfdp->fd_lastfile--;
5185 }
5186 if (i < newfdp->fd_freefile) {
5187 newfdp->fd_freefile = i;
5188 }
5189 }
5190 }
5191 }
5192
5193 proc_fdunlock(p);
5194
5195 /*
5196 * Initialize knote and kqueue tracking structs
5197 */
5198 newfdp->fd_knlist = NULL;
5199 newfdp->fd_knlistsize = 0;
5200 newfdp->fd_knhash = NULL;
5201 newfdp->fd_knhashmask = 0;
5202 newfdp->fd_kqhash = NULL;
5203 newfdp->fd_kqhashmask = 0;
5204 newfdp->fd_wqkqueue = NULL;
5205 lck_mtx_init(&newfdp->fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
5206 lck_mtx_init(&newfdp->fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
5207
5208 return newfdp;
5209 }
5210
5211
5212 /*
5213 * fdfree
5214 *
5215 * Description: Release a filedesc (per process open file table) structure;
5216 * this is done on process exit(), or from forkproc_free() if
5217 * the fork fails for some reason subsequent to a successful
5218 * call to fdcopy()
5219 *
5220 * Parameters: p Pointer to process going away
5221 *
5222 * Returns: void
5223 *
5224 * Locks: This function internally takes and drops proc_fdlock()
5225 */
5226 void
5227 fdfree(proc_t p)
5228 {
5229 struct filedesc *fdp;
5230 struct fileproc *fp;
5231 struct kqworkq *dealloc_kqwq = NULL;
5232 int i;
5233
5234 proc_fdlock(p);
5235
5236 if (p == kernproc || NULL == (fdp = p->p_fd)) {
5237 proc_fdunlock(p);
5238 return;
5239 }
5240
5241 extern struct filedesc filedesc0;
5242
5243 if (&filedesc0 == fdp) {
5244 panic("filedesc0");
5245 }
5246
5247 /*
5248 * deallocate all the knotes up front and claim empty
5249 * tables to make any subsequent kqueue closes faster.
5250 */
5251 knotes_dealloc(p);
5252 assert(fdp->fd_knlistsize == 0);
5253 assert(fdp->fd_knhashmask == 0);
5254
5255 /*
5256 * dealloc all workloops that have outstanding retains
5257 * when created with scheduling parameters.
5258 */
5259 kqworkloops_dealloc(p);
5260
5261 /* close file descriptors */
5262 if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
5263 for (i = fdp->fd_lastfile; i >= 0; i--) {
5264 if ((fp = fdp->fd_ofiles[i]) != NULL) {
5265 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
5266 panic("fdfree: found fp with UF_RESERVED");
5267 }
5268
5269 fileproc_drain(p, fp);
5270 procfdtbl_reservefd(p, i);
5271
5272 if (fp->f_flags & FP_WAITEVENT) {
5273 (void)waitevent_close(p, fp);
5274 }
5275 (void) closef_locked(fp, fp->f_fglob, p);
5276 fileproc_free(fp);
5277 }
5278 }
5279 FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
5280 fdp->fd_ofiles = NULL;
5281 fdp->fd_nfiles = 0;
5282 }
5283
5284 if (fdp->fd_wqkqueue) {
5285 dealloc_kqwq = fdp->fd_wqkqueue;
5286 fdp->fd_wqkqueue = NULL;
5287 }
5288
5289 proc_fdunlock(p);
5290
5291 if (dealloc_kqwq) {
5292 kqworkq_dealloc(dealloc_kqwq);
5293 }
5294 if (fdp->fd_cdir) {
5295 vnode_rele(fdp->fd_cdir);
5296 }
5297 if (fdp->fd_rdir) {
5298 vnode_rele(fdp->fd_rdir);
5299 }
5300
5301 proc_fdlock_spin(p);
5302 p->p_fd = NULL;
5303 proc_fdunlock(p);
5304
5305 if (fdp->fd_kqhash) {
5306 for (uint32_t j = 0; j <= fdp->fd_kqhashmask; j++) {
5307 assert(LIST_EMPTY(&fdp->fd_kqhash[j]));
5308 }
5309 FREE(fdp->fd_kqhash, M_KQUEUE);
5310 }
5311
5312 lck_mtx_destroy(&fdp->fd_kqhashlock, proc_kqhashlock_grp);
5313 lck_mtx_destroy(&fdp->fd_knhashlock, proc_knhashlock_grp);
5314
5315 FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC);
5316 }
5317
5318 /*
5319 * closef_locked
5320 *
5321 * Description: Internal form of closef; called with proc_fdlock held
5322 *
5323 * Parameters: fp Pointer to fileproc for fd
5324 * fg Pointer to fileglob for fd
5325 * p Pointer to proc structure
5326 *
5327 * Returns: 0 Success
5328 * closef_finish:??? Anything returnable by a per-fileops
5329 * close function
5330 *
5331 * Note: Decrements reference count on file structure; if this was the
5332 * last reference, then closef_finish() is called
5333 *
5334 * p and fp are allowed to be NULL when closing a file that was
5335 * being passed in a message (but only if we are called when this
5336 * is NOT the last reference).
5337 */
5338 int
5339 closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
5340 {
5341 struct vnode *vp;
5342 struct flock lf;
5343 struct vfs_context context;
5344 int error;
5345
5346 if (fg == NULL) {
5347 return 0;
5348 }
5349
5350 /* Set up context with cred stashed in fg */
5351 if (p == current_proc()) {
5352 context.vc_thread = current_thread();
5353 } else {
5354 context.vc_thread = NULL;
5355 }
5356 context.vc_ucred = fg->fg_cred;
5357
5358 /*
5359 * POSIX record locking dictates that any close releases ALL
5360 * locks owned by this process. This is handled by setting
5361 * a flag in the unlock to free ONLY locks obeying POSIX
5362 * semantics, and not to free BSD-style file locks.
5363 * If the descriptor was in a message, POSIX-style locks
5364 * aren't passed with the descriptor.
5365 */
5366 if (p && (p->p_ladvflag & P_LADVLOCK) &&
5367 DTYPE_VNODE == FILEGLOB_DTYPE(fg)) {
5368 proc_fdunlock(p);
5369
5370 lf.l_whence = SEEK_SET;
5371 lf.l_start = 0;
5372 lf.l_len = 0;
5373 lf.l_type = F_UNLCK;
5374 vp = (struct vnode *)fg->fg_data;
5375
5376 if ((error = vnode_getwithref(vp)) == 0) {
5377 (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
5378 (void)vnode_put(vp);
5379 }
5380 proc_fdlock(p);
5381 }
5382 lck_mtx_lock_spin(&fg->fg_lock);
5383 fg->fg_count--;
5384
5385 if (fg->fg_count > 0) {
5386 lck_mtx_unlock(&fg->fg_lock);
5387 return 0;
5388 }
5389 #if DIAGNOSTIC
5390 if (fg->fg_count != 0) {
5391 panic("fg %p: being freed with bad fg_count (%d)", fg, fg->fg_count);
5392 }
5393 #endif
5394
5395 if (fp && (fp->f_flags & FP_WRITTEN)) {
5396 fg->fg_flag |= FWASWRITTEN;
5397 }
5398
5399 fg->fg_lflags |= FG_TERM;
5400 lck_mtx_unlock(&fg->fg_lock);
5401
5402 if (p) {
5403 proc_fdunlock(p);
5404 }
5405
5406 /* Since we ensure that fg->fg_ops is always initialized,
5407 * it is safe to invoke fo_close on the fg */
5408 error = fo_close(fg, &context);
5409
5410 fg_free(fg);
5411
5412 if (p) {
5413 proc_fdlock(p);
5414 }
5415
5416 return error;
5417 }
5418
5419
5420 /*
5421 * fileproc_drain
5422 *
5423 * Description: Drain out pending I/O operations
5424 *
5425 * Parameters: p Process closing this file
5426 * fp fileproc struct for the open
5427 * instance on the file
5428 *
5429 * Returns: void
5430 *
5431 * Locks: Assumes the caller holds the proc_fdlock
5432 *
5433 * Notes: For character devices, this occurs on the last close of the
5434 * device; for all other file descriptors, this occurs on each
5435 * close to prevent fd's from being closed out from under
5436 * operations currently in progress and blocked
5437 *
5438 * See Also: file_vnode(), file_socket(), file_drop(), and the cautions
5439 * regarding their use and interaction with this function.
5440 */
5441 void
5442 fileproc_drain(proc_t p, struct fileproc * fp)
5443 {
5444 struct vfs_context context;
5445
5446 context.vc_thread = proc_thread(p); /* XXX */
5447 context.vc_ucred = fp->f_fglob->fg_cred;
5448
5449 /* Set the vflag for drain */
5450 fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
5451
5452 while (os_ref_get_count(&fp->f_iocount) > 1) {
5453 lck_mtx_convert_spin(&p->p_fdmlock);
5454
5455 fo_drain(fp, &context);
5456 if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
5457 if (waitq_wakeup64_all((struct waitq *)fp->f_wset, NO_EVENT64,
5458 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5459 panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->f_wset, fp);
5460 }
5461 }
5462 if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5463 if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5464 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5465 panic("bad select_conflict_queue");
5466 }
5467 }
5468 p->p_fpdrainwait = 1;
5469
5470 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5471 }
5472 #if DIAGNOSTIC
5473 if ((fp->f_flags & FP_INSELECT) != 0) {
5474 panic("FP_INSELECT set on drained fp");
5475 }
5476 #endif
5477 if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5478 fp->f_flags &= ~FP_SELCONFLICT;
5479 }
5480 }
5481
5482
5483 /*
5484 * fp_free
5485 *
5486 * Description: Release the fd and free the fileproc associated with the fd
5487 * in the per process open file table of the specified process;
5488 * these values must correspond.
5489 *
5490 * Parameters: p Process containing fd
5491 * fd fd to be released
5492 * fp fileproc to be freed
5493 */
5494 void
5495 fp_free(proc_t p, int fd, struct fileproc * fp)
5496 {
5497 proc_fdlock_spin(p);
5498 fdrelse(p, fd);
5499 proc_fdunlock(p);
5500
5501 fg_free(fp->f_fglob);
5502 os_ref_release_live(&fp->f_iocount);
5503 fileproc_free(fp);
5504 }
5505
5506
5507 /*
5508 * flock
5509 *
5510 * Description: Apply an advisory lock on a file descriptor.
5511 *
5512 * Parameters: p Process making request
5513 * uap->fd fd on which the lock is to be
5514 * attempted
5515 * uap->how (Un)Lock bits, including type
5516 * retval Pointer to the call return area
5517 *
5518 * Returns: 0 Success
5519 * fp_getfvp:EBADF Bad file descriptor
5520 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5521 * vnode_getwithref:???
5522 * VNOP_ADVLOCK:???
5523 *
5524 * Implicit returns:
5525 * *retval (modified) Size of dtable
5526 *
5527 * Notes: Just attempt to get a record lock of the requested type on
5528 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5529 */
5530 int
5531 flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5532 {
5533 int fd = uap->fd;
5534 int how = uap->how;
5535 struct fileproc *fp;
5536 struct vnode *vp;
5537 struct flock lf;
5538 vfs_context_t ctx = vfs_context_current();
5539 int error = 0;
5540
5541 AUDIT_ARG(fd, uap->fd);
5542 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5543 return error;
5544 }
5545 if ((error = vnode_getwithref(vp))) {
5546 goto out1;
5547 }
5548 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5549
5550 lf.l_whence = SEEK_SET;
5551 lf.l_start = 0;
5552 lf.l_len = 0;
5553 if (how & LOCK_UN) {
5554 lf.l_type = F_UNLCK;
5555 fp->f_flag &= ~FHASLOCK;
5556 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5557 goto out;
5558 }
5559 if (how & LOCK_EX) {
5560 lf.l_type = F_WRLCK;
5561 } else if (how & LOCK_SH) {
5562 lf.l_type = F_RDLCK;
5563 } else {
5564 error = EBADF;
5565 goto out;
5566 }
5567 #if CONFIG_MACF
5568 error = mac_file_check_lock(proc_ucred(p), fp->f_fglob, F_SETLK, &lf);
5569 if (error) {
5570 goto out;
5571 }
5572 #endif
5573 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf,
5574 (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5575 ctx, NULL);
5576 if (!error) {
5577 fp->f_flag |= FHASLOCK;
5578 }
5579 out:
5580 (void)vnode_put(vp);
5581 out1:
5582 fp_drop(p, fd, fp, 0);
5583 return error;
5584 }
5585
5586 /*
5587 * fileport_makeport
5588 *
5589 * Description: Obtain a Mach send right for a given file descriptor.
5590 *
5591 * Parameters: p Process calling fileport
5592 * uap->fd The fd to reference
5593 * uap->portnamep User address at which to place port name.
5594 *
5595 * Returns: 0 Success.
5596 * EBADF Bad file descriptor.
5597 * EINVAL File descriptor had type that cannot be sent, misc. other errors.
5598 * EFAULT Address at which to store port name is not valid.
5599 * EAGAIN Resource shortage.
5600 *
5601 * Implicit returns:
5602 * On success, name of send right is stored at user-specified address.
5603 */
5604 int
5605 fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5606 __unused int *retval)
5607 {
5608 int err;
5609 int fd = uap->fd;
5610 user_addr_t user_portaddr = uap->portnamep;
5611 struct fileproc *fp = FILEPROC_NULL;
5612 struct fileglob *fg = NULL;
5613 ipc_port_t fileport;
5614 mach_port_name_t name = MACH_PORT_NULL;
5615
5616 proc_fdlock(p);
5617 err = fp_lookup(p, fd, &fp, 1);
5618 if (err != 0) {
5619 goto out_unlock;
5620 }
5621
5622 if (!file_issendable(p, fp)) {
5623 err = EINVAL;
5624 goto out_unlock;
5625 }
5626
5627 if (FP_ISGUARDED(fp, GUARD_FILEPORT)) {
5628 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5629 goto out_unlock;
5630 }
5631
5632 /* Dropped when port is deallocated */
5633 fg = fp->f_fglob;
5634 fg_ref(fp);
5635
5636 proc_fdunlock(p);
5637
5638 /* Allocate and initialize a port */
5639 fileport = fileport_alloc(fg);
5640 if (fileport == IPC_PORT_NULL) {
5641 err = EAGAIN;
5642 fg_drop(fp);
5643 goto out;
5644 }
5645
5646 /* Add an entry. Deallocates port on failure. */
5647 name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5648 if (!MACH_PORT_VALID(name)) {
5649 err = EINVAL;
5650 goto out;
5651 }
5652
5653 err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5654 if (err != 0) {
5655 goto out;
5656 }
5657
5658 /* Tag the fileglob for debugging purposes */
5659 lck_mtx_lock_spin(&fg->fg_lock);
5660 fg->fg_lflags |= FG_PORTMADE;
5661 lck_mtx_unlock(&fg->fg_lock);
5662
5663 fp_drop(p, fd, fp, 0);
5664
5665 return 0;
5666
5667 out_unlock:
5668 proc_fdunlock(p);
5669 out:
5670 if (MACH_PORT_VALID(name)) {
5671 /* Don't care if another thread races us to deallocate the entry */
5672 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5673 }
5674
5675 if (fp != FILEPROC_NULL) {
5676 fp_drop(p, fd, fp, 0);
5677 }
5678
5679 return err;
5680 }
5681
5682 void
5683 fileport_releasefg(struct fileglob *fg)
5684 {
5685 (void)closef_locked(NULL, fg, PROC_NULL);
5686
5687 return;
5688 }
5689
5690 /*
5691 * fileport_makefd_internal
5692 *
5693 * Description: Obtain the file descriptor for a given Mach send right.
5694 *
5695 * Returns: 0 Success
5696 * EINVAL Invalid Mach port name, or port is not for a file.
5697 * fdalloc:EMFILE
5698 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5699 *
5700 * Implicit returns:
5701 * *retval (modified) The new descriptor
5702 */
5703 int
5704 fileport_makefd_internal(proc_t p, ipc_port_t port, int uf_flags, int *retval)
5705 {
5706 struct fileglob *fg;
5707 struct fileproc *fp = FILEPROC_NULL;
5708 int fd;
5709 int err;
5710
5711 fg = fileport_port_to_fileglob(port);
5712 if (fg == NULL) {
5713 err = EINVAL;
5714 goto out;
5715 }
5716
5717 fp = fileproc_alloc_init(NULL);
5718 if (fp == FILEPROC_NULL) {
5719 err = ENOMEM;
5720 goto out;
5721 }
5722
5723 fp->f_fglob = fg;
5724 fg_ref(fp);
5725
5726 proc_fdlock(p);
5727 err = fdalloc(p, 0, &fd);
5728 if (err != 0) {
5729 proc_fdunlock(p);
5730 fg_drop(fp);
5731 goto out;
5732 }
5733 if (uf_flags) {
5734 *fdflags(p, fd) |= uf_flags;
5735 }
5736
5737 procfdtbl_releasefd(p, fd, fp);
5738 proc_fdunlock(p);
5739
5740 *retval = fd;
5741 err = 0;
5742 out:
5743 if ((fp != NULL) && (0 != err)) {
5744 fileproc_free(fp);
5745 }
5746
5747 return err;
5748 }
5749
5750 /*
5751 * fileport_makefd
5752 *
5753 * Description: Obtain the file descriptor for a given Mach send right.
5754 *
5755 * Parameters: p Process calling fileport
5756 * uap->port Name of send right to file port.
5757 *
5758 * Returns: 0 Success
5759 * EINVAL Invalid Mach port name, or port is not for a file.
5760 * fdalloc:EMFILE
5761 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5762 *
5763 * Implicit returns:
5764 * *retval (modified) The new descriptor
5765 */
5766 int
5767 fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5768 {
5769 ipc_port_t port = IPC_PORT_NULL;
5770 mach_port_name_t send = uap->port;
5771 kern_return_t res;
5772 int err;
5773
5774 res = ipc_object_copyin(get_task_ipcspace(p->task),
5775 send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
5776
5777 if (res == KERN_SUCCESS) {
5778 err = fileport_makefd_internal(p, port, UF_EXCLOSE, retval);
5779 } else {
5780 err = EINVAL;
5781 }
5782
5783 if (IPC_PORT_NULL != port) {
5784 ipc_port_release_send(port);
5785 }
5786
5787 return err;
5788 }
5789
5790
5791 /*
5792 * dupfdopen
5793 *
5794 * Description: Duplicate the specified descriptor to a free descriptor;
5795 * this is the second half of fdopen(), above.
5796 *
5797 * Parameters: fdp filedesc pointer to fill in
5798 * indx fd to dup to
5799 * dfd fd to dup from
5800 * mode mode to set on new fd
5801 * error command code
5802 *
5803 * Returns: 0 Success
5804 * EBADF Source fd is bad
5805 * EACCES Requested mode not allowed
5806 * !0 'error', if not ENODEV or
5807 * ENXIO
5808 *
5809 * Notes: XXX This is not thread safe; see fdopen() above
5810 */
5811 int
5812 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5813 {
5814 struct fileproc *wfp;
5815 struct fileproc *fp;
5816 #if CONFIG_MACF
5817 int myerror;
5818 #endif
5819 proc_t p = current_proc();
5820
5821 /*
5822 * If the to-be-dup'd fd number is greater than the allowed number
5823 * of file descriptors, or the fd to be dup'd has already been
5824 * closed, reject. Note, check for new == old is necessary as
5825 * falloc could allocate an already closed to-be-dup'd descriptor
5826 * as the new descriptor.
5827 */
5828 proc_fdlock(p);
5829
5830 fp = fdp->fd_ofiles[indx];
5831 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5832 (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5833 (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5834 proc_fdunlock(p);
5835 return EBADF;
5836 }
5837 #if CONFIG_MACF
5838 myerror = mac_file_check_dup(proc_ucred(p), wfp->f_fglob, dfd);
5839 if (myerror) {
5840 proc_fdunlock(p);
5841 return myerror;
5842 }
5843 #endif
5844 /*
5845 * There are two cases of interest here.
5846 *
5847 * For ENODEV simply dup (dfd) to file descriptor
5848 * (indx) and return.
5849 *
5850 * For ENXIO steal away the file structure from (dfd) and
5851 * store it in (indx). (dfd) is effectively closed by
5852 * this operation.
5853 *
5854 * Any other error code is just returned.
5855 */
5856 switch (error) {
5857 case ENODEV:
5858 if (FP_ISGUARDED(wfp, GUARD_DUP)) {
5859 proc_fdunlock(p);
5860 return EPERM;
5861 }
5862
5863 /*
5864 * Check that the mode the file is being opened for is a
5865 * subset of the mode of the existing descriptor.
5866 */
5867 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5868 proc_fdunlock(p);
5869 return EACCES;
5870 }
5871 if (indx > fdp->fd_lastfile) {
5872 fdp->fd_lastfile = indx;
5873 }
5874 (void)fg_ref(wfp);
5875
5876 if (fp->f_fglob) {
5877 fg_free(fp->f_fglob);
5878 }
5879 fp->f_fglob = wfp->f_fglob;
5880
5881 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5882 (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5883
5884 proc_fdunlock(p);
5885 return 0;
5886
5887 default:
5888 proc_fdunlock(p);
5889 return error;
5890 }
5891 /* NOTREACHED */
5892 }
5893
5894
5895 /*
5896 * fg_ref
5897 *
5898 * Description: Add a reference to a fileglob by fileproc
5899 *
5900 * Parameters: fp fileproc containing fileglob
5901 * pointer
5902 *
5903 * Returns: void
5904 *
5905 * Notes: XXX Should use OSAddAtomic?
5906 */
5907 void
5908 fg_ref(struct fileproc * fp)
5909 {
5910 struct fileglob *fg;
5911
5912 fg = fp->f_fglob;
5913
5914 lck_mtx_lock_spin(&fg->fg_lock);
5915
5916 #if DIAGNOSTIC
5917 if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != 0) {
5918 panic("fg_ref: invalid bits on fp %p", fp);
5919 }
5920
5921 if (fg->fg_count == 0) {
5922 panic("fg_ref: adding fgcount to zeroed fg: fp %p fg %p",
5923 fp, fg);
5924 }
5925 #endif
5926 fg->fg_count++;
5927 lck_mtx_unlock(&fg->fg_lock);
5928 }
5929
5930
5931 /*
5932 * fg_drop
5933 *
5934 * Description: Remove a reference to a fileglob by fileproc
5935 *
5936 * Parameters: fp fileproc containing fileglob
5937 * pointer
5938 *
5939 * Returns: void
5940 *
5941 * Notes: XXX Should use OSAddAtomic?
5942 */
5943 void
5944 fg_drop(struct fileproc * fp)
5945 {
5946 struct fileglob *fg;
5947
5948 fg = fp->f_fglob;
5949 lck_mtx_lock_spin(&fg->fg_lock);
5950 fg->fg_count--;
5951 lck_mtx_unlock(&fg->fg_lock);
5952 }
5953
5954 #if SOCKETS
5955 /*
5956 * fg_insertuipc_mark
5957 *
5958 * Description: Mark fileglob for insertion onto message queue if needed
5959 * Also takes fileglob reference
5960 *
5961 * Parameters: fg Fileglob pointer to insert
5962 *
5963 * Returns: true, if the fileglob needs to be inserted onto msg queue
5964 *
5965 * Locks: Takes and drops fg_lock, potentially many times
5966 */
5967 boolean_t
5968 fg_insertuipc_mark(struct fileglob * fg)
5969 {
5970 boolean_t insert = FALSE;
5971
5972 lck_mtx_lock_spin(&fg->fg_lock);
5973 while (fg->fg_lflags & FG_RMMSGQ) {
5974 lck_mtx_convert_spin(&fg->fg_lock);
5975
5976 fg->fg_lflags |= FG_WRMMSGQ;
5977 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
5978 }
5979
5980 fg->fg_count++;
5981 fg->fg_msgcount++;
5982 if (fg->fg_msgcount == 1) {
5983 fg->fg_lflags |= FG_INSMSGQ;
5984 insert = TRUE;
5985 }
5986 lck_mtx_unlock(&fg->fg_lock);
5987 return insert;
5988 }
5989
5990 /*
5991 * fg_insertuipc
5992 *
5993 * Description: Insert marked fileglob onto message queue
5994 *
5995 * Parameters: fg Fileglob pointer to insert
5996 *
5997 * Returns: void
5998 *
5999 * Locks: Takes and drops fg_lock & uipc_lock
6000 * DO NOT call this function with proc_fdlock held as unp_gc()
6001 * can potentially try to acquire proc_fdlock, which can result
6002 * in a deadlock if this function is in unp_gc_wait().
6003 */
6004 void
6005 fg_insertuipc(struct fileglob * fg)
6006 {
6007 if (fg->fg_lflags & FG_INSMSGQ) {
6008 lck_mtx_lock_spin(uipc_lock);
6009 unp_gc_wait();
6010 LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
6011 lck_mtx_unlock(uipc_lock);
6012 lck_mtx_lock(&fg->fg_lock);
6013 fg->fg_lflags &= ~FG_INSMSGQ;
6014 if (fg->fg_lflags & FG_WINSMSGQ) {
6015 fg->fg_lflags &= ~FG_WINSMSGQ;
6016 wakeup(&fg->fg_lflags);
6017 }
6018 lck_mtx_unlock(&fg->fg_lock);
6019 }
6020 }
6021
6022 /*
6023 * fg_removeuipc_mark
6024 *
6025 * Description: Mark the fileglob for removal from message queue if needed
6026 * Also releases fileglob message queue reference
6027 *
6028 * Parameters: fg Fileglob pointer to remove
6029 *
6030 * Returns: true, if the fileglob needs to be removed from msg queue
6031 *
6032 * Locks: Takes and drops fg_lock, potentially many times
6033 */
6034 boolean_t
6035 fg_removeuipc_mark(struct fileglob * fg)
6036 {
6037 boolean_t remove = FALSE;
6038
6039 lck_mtx_lock_spin(&fg->fg_lock);
6040 while (fg->fg_lflags & FG_INSMSGQ) {
6041 lck_mtx_convert_spin(&fg->fg_lock);
6042
6043 fg->fg_lflags |= FG_WINSMSGQ;
6044 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
6045 }
6046 fg->fg_msgcount--;
6047 if (fg->fg_msgcount == 0) {
6048 fg->fg_lflags |= FG_RMMSGQ;
6049 remove = TRUE;
6050 }
6051 lck_mtx_unlock(&fg->fg_lock);
6052 return remove;
6053 }
6054
6055 /*
6056 * fg_removeuipc
6057 *
6058 * Description: Remove marked fileglob from message queue
6059 *
6060 * Parameters: fg Fileglob pointer to remove
6061 *
6062 * Returns: void
6063 *
6064 * Locks: Takes and drops fg_lock & uipc_lock
6065 * DO NOT call this function with proc_fdlock held as unp_gc()
6066 * can potentially try to acquire proc_fdlock, which can result
6067 * in a deadlock if this function is in unp_gc_wait().
6068 */
6069 void
6070 fg_removeuipc(struct fileglob * fg)
6071 {
6072 if (fg->fg_lflags & FG_RMMSGQ) {
6073 lck_mtx_lock_spin(uipc_lock);
6074 unp_gc_wait();
6075 LIST_REMOVE(fg, f_msglist);
6076 lck_mtx_unlock(uipc_lock);
6077 lck_mtx_lock(&fg->fg_lock);
6078 fg->fg_lflags &= ~FG_RMMSGQ;
6079 if (fg->fg_lflags & FG_WRMMSGQ) {
6080 fg->fg_lflags &= ~FG_WRMMSGQ;
6081 wakeup(&fg->fg_lflags);
6082 }
6083 lck_mtx_unlock(&fg->fg_lock);
6084 }
6085 }
6086 #endif /* SOCKETS */
6087
6088 /*
6089 * fo_read
6090 *
6091 * Description: Generic fileops read indirected through the fileops pointer
6092 * in the fileproc structure
6093 *
6094 * Parameters: fp fileproc structure pointer
6095 * uio user I/O structure pointer
6096 * flags FOF_ flags
6097 * ctx VFS context for operation
6098 *
6099 * Returns: 0 Success
6100 * !0 Errno from read
6101 */
6102 int
6103 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6104 {
6105 return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
6106 }
6107
6108 int
6109 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6110 {
6111 #pragma unused(fp, uio, flags, ctx)
6112 return ENXIO;
6113 }
6114
6115
6116 /*
6117 * fo_write
6118 *
6119 * Description: Generic fileops write indirected through the fileops pointer
6120 * in the fileproc structure
6121 *
6122 * Parameters: fp fileproc structure pointer
6123 * uio user I/O structure pointer
6124 * flags FOF_ flags
6125 * ctx VFS context for operation
6126 *
6127 * Returns: 0 Success
6128 * !0 Errno from write
6129 */
6130 int
6131 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6132 {
6133 return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
6134 }
6135
6136 int
6137 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6138 {
6139 #pragma unused(fp, uio, flags, ctx)
6140 return ENXIO;
6141 }
6142
6143
6144 /*
6145 * fo_ioctl
6146 *
6147 * Description: Generic fileops ioctl indirected through the fileops pointer
6148 * in the fileproc structure
6149 *
6150 * Parameters: fp fileproc structure pointer
6151 * com ioctl command
6152 * data pointer to internalized copy
6153 * of user space ioctl command
6154 * parameter data in kernel space
6155 * ctx VFS context for operation
6156 *
6157 * Returns: 0 Success
6158 * !0 Errno from ioctl
6159 *
6160 * Locks: The caller is assumed to have held the proc_fdlock; this
6161 * function releases and reacquires this lock. If the caller
6162 * accesses data protected by this lock prior to calling this
6163 * function, it will need to revalidate/reacquire any cached
6164 * protected data obtained prior to the call.
6165 */
6166 int
6167 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6168 {
6169 int error;
6170
6171 proc_fdunlock(vfs_context_proc(ctx));
6172 error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
6173 proc_fdlock(vfs_context_proc(ctx));
6174 return error;
6175 }
6176
6177 int
6178 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6179 {
6180 #pragma unused(fp, com, data, ctx)
6181 return ENOTTY;
6182 }
6183
6184
6185 /*
6186 * fo_select
6187 *
6188 * Description: Generic fileops select indirected through the fileops pointer
6189 * in the fileproc structure
6190 *
6191 * Parameters: fp fileproc structure pointer
6192 * which select which
6193 * wql pointer to wait queue list
6194 * ctx VFS context for operation
6195 *
6196 * Returns: 0 Success
6197 * !0 Errno from select
6198 */
6199 int
6200 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6201 {
6202 return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
6203 }
6204
6205 int
6206 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6207 {
6208 #pragma unused(fp, which, wql, ctx)
6209 return ENOTSUP;
6210 }
6211
6212
6213 /*
6214 * fo_close
6215 *
6216 * Description: Generic fileops close indirected through the fileops pointer
6217 * in the fileproc structure
6218 *
6219 * Parameters: fp fileproc structure pointer for
6220 * file to close
6221 * ctx VFS context for operation
6222 *
6223 * Returns: 0 Success
6224 * !0 Errno from close
6225 */
6226 int
6227 fo_close(struct fileglob *fg, vfs_context_t ctx)
6228 {
6229 return (*fg->fg_ops->fo_close)(fg, ctx);
6230 }
6231
6232
6233 /*
6234 * fo_drain
6235 *
6236 * Description: Generic fileops kqueue filter indirected through the fileops
6237 * pointer in the fileproc structure
6238 *
6239 * Parameters: fp fileproc structure pointer
6240 * ctx VFS context for operation
6241 *
6242 * Returns: 0 Success
6243 * !0 errno from drain
6244 */
6245 int
6246 fo_drain(struct fileproc *fp, vfs_context_t ctx)
6247 {
6248 return (*fp->f_ops->fo_drain)(fp, ctx);
6249 }
6250
6251 int
6252 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
6253 {
6254 #pragma unused(fp, ctx)
6255 return ENOTSUP;
6256 }
6257
6258
6259 /*
6260 * fo_kqfilter
6261 *
6262 * Description: Generic fileops kqueue filter indirected through the fileops
6263 * pointer in the fileproc structure
6264 *
6265 * Parameters: fp fileproc structure pointer
6266 * kn pointer to knote to filter on
6267 *
6268 * Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
6269 * 0 Filter is not active
6270 * !0 Filter is active
6271 */
6272 int
6273 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6274 {
6275 return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
6276 }
6277
6278 int
6279 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6280 {
6281 #pragma unused(fp, kev)
6282 knote_set_error(kn, ENOTSUP);
6283 return 0;
6284 }
6285
6286
6287 /*
6288 * The ability to send a file descriptor to another
6289 * process is opt-in by file type.
6290 */
6291 boolean_t
6292 file_issendable(proc_t p, struct fileproc *fp)
6293 {
6294 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
6295
6296 switch (fp->f_type) {
6297 case DTYPE_VNODE:
6298 case DTYPE_SOCKET:
6299 case DTYPE_PIPE:
6300 case DTYPE_PSXSHM:
6301 case DTYPE_NETPOLICY:
6302 return 0 == (fp->f_fglob->fg_lflags & FG_CONFINED);
6303 default:
6304 /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */
6305 return FALSE;
6306 }
6307 }
6308
6309 os_refgrp_decl(, f_iocount_refgrp, "f_iocount", NULL);
6310
6311 struct fileproc *
6312 fileproc_alloc_init(__unused void *arg)
6313 {
6314 struct fileproc *fp;
6315
6316 MALLOC_ZONE(fp, struct fileproc *, sizeof(*fp), M_FILEPROC, M_WAITOK);
6317 if (fp) {
6318 bzero(fp, sizeof(*fp));
6319 os_ref_init(&fp->f_iocount, &f_iocount_refgrp);
6320 }
6321
6322 return fp;
6323 }
6324
6325
6326 void
6327 fileproc_free(struct fileproc *fp)
6328 {
6329 os_ref_count_t __unused refc = os_ref_release(&fp->f_iocount);
6330 #if DEVELOPMENT || DEBUG
6331 if (0 != refc) {
6332 panic("%s: pid %d refc: %u != 0",
6333 __func__, proc_pid(current_proc()), refc);
6334 }
6335 #endif
6336 switch (FILEPROC_TYPE(fp)) {
6337 case FTYPE_SIMPLE:
6338 FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
6339 break;
6340 case FTYPE_GUARDED:
6341 guarded_fileproc_free(fp);
6342 break;
6343 default:
6344 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->f_flags);
6345 }
6346 }
6347
6348 void
6349 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
6350 {
6351 if (clearflags) {
6352 os_atomic_andnot(&fp->f_vflags, vflags, relaxed);
6353 } else {
6354 os_atomic_or(&fp->f_vflags, vflags, relaxed);
6355 }
6356 }
6357
6358 fileproc_vflags_t
6359 fileproc_get_vflags(struct fileproc *fp)
6360 {
6361 return os_atomic_load(&fp->f_vflags, relaxed);
6362 }