]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_descrip.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / kern / kern_descrip.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/syslog.h>
94 #include <sys/unistd.h>
95 #include <sys/resourcevar.h>
96 #include <sys/aio_kern.h>
97 #include <sys/ev.h>
98 #include <kern/locks.h>
99 #include <sys/uio_internal.h>
100 #include <sys/codesign.h>
101 #include <sys/codedir_internal.h>
102 #include <sys/mount_internal.h>
103 #include <sys/kdebug.h>
104 #include <sys/sysproto.h>
105 #include <sys/pipe.h>
106 #include <sys/spawn.h>
107 #include <sys/cprotect.h>
108 #include <sys/ubc_internal.h>
109
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/waitq.h>
113 #include <kern/ipc_misc.h>
114
115 #include <vm/vm_protos.h>
116 #include <mach/mach_port.h>
117
118 #include <security/audit/audit.h>
119 #if CONFIG_MACF
120 #include <security/mac_framework.h>
121 #endif
122
123 #include <stdbool.h>
124 #include <os/atomic_private.h>
125 #include <IOKit/IOBSD.h>
126
127 #define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
128 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
129 mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
130 void ipc_port_release_send(ipc_port_t);
131
132 static void fileproc_drain(proc_t, struct fileproc *);
133 static int finishdup(proc_t p,
134 struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
135
136 void fileport_releasefg(struct fileglob *fg);
137
138 /* flags for fp_close_and_unlock */
139 #define FD_DUP2RESV 1
140
141 /* We don't want these exported */
142
143 __private_extern__
144 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
145
146 static void fdrelse(struct proc * p, int fd);
147
148 extern void file_lock_init(void);
149
150 extern kauth_scope_t kauth_scope_fileop;
151
152 /* Conflict wait queue for when selects collide (opaque type) */
153 extern struct waitq select_conflict_queue;
154
155 #ifndef HFS_GET_BOOT_INFO
156 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
157 #endif
158
159 #ifndef HFS_SET_BOOT_INFO
160 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
161 #endif
162
163 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
164 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
165 #endif
166
167 #define f_flag fp_glob->fg_flag
168 #define f_type fp_glob->fg_ops->fo_type
169 #define f_cred fp_glob->fg_cred
170 #define f_ops fp_glob->fg_ops
171 #define f_offset fp_glob->fg_offset
172 #define f_data fp_glob->fg_data
173 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
174 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
175 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
176 ? 1 : 0)
177
178 ZONE_DECLARE(fg_zone, "fileglob",
179 sizeof(struct fileglob), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
180 ZONE_DECLARE(fp_zone, "fileproc",
181 sizeof(struct fileproc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
182 ZONE_DECLARE(fdp_zone, "filedesc",
183 sizeof(struct filedesc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
184
185 /*
186 * Descriptor management.
187 */
188 int nfiles; /* actual number of open files */
189 /*
190 * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
191 */
192 static const struct fileops uninitops;
193
194 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
195 lck_grp_attr_t * file_lck_grp_attr;
196 lck_grp_t * file_lck_grp;
197 lck_attr_t * file_lck_attr;
198
199 #pragma mark fileglobs
200
201 /*!
202 * @function fg_free
203 *
204 * @brief
205 * Free a file structure.
206 */
207 static void
208 fg_free(struct fileglob *fg)
209 {
210 os_atomic_dec(&nfiles, relaxed);
211
212 if (fg->fg_vn_data) {
213 fg_vn_data_free(fg->fg_vn_data);
214 fg->fg_vn_data = NULL;
215 }
216
217 if (IS_VALID_CRED(fg->fg_cred)) {
218 kauth_cred_unref(&fg->fg_cred);
219 }
220 lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
221
222 #if CONFIG_MACF
223 mac_file_label_destroy(fg);
224 #endif
225 zfree(fg_zone, fg);
226 }
227
228 OS_ALWAYS_INLINE
229 void
230 fg_ref(proc_t p, struct fileglob *fg)
231 {
232 #if DEBUG || DEVELOPMENT
233 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
234 #else
235 (void)p;
236 #endif
237 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
238 }
239
240 void
241 fg_drop_live(struct fileglob *fg)
242 {
243 os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
244 }
245
246 int
247 fg_drop(proc_t p, struct fileglob *fg)
248 {
249 struct vnode *vp;
250 struct vfs_context context;
251 int error = 0;
252
253 if (fg == NULL) {
254 return 0;
255 }
256
257 /* Set up context with cred stashed in fg */
258 if (p == current_proc()) {
259 context.vc_thread = current_thread();
260 } else {
261 context.vc_thread = NULL;
262 }
263 context.vc_ucred = fg->fg_cred;
264
265 /*
266 * POSIX record locking dictates that any close releases ALL
267 * locks owned by this process. This is handled by setting
268 * a flag in the unlock to free ONLY locks obeying POSIX
269 * semantics, and not to free BSD-style file locks.
270 * If the descriptor was in a message, POSIX-style locks
271 * aren't passed with the descriptor.
272 */
273 if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
274 (p->p_ladvflag & P_LADVLOCK)) {
275 struct flock lf = {
276 .l_whence = SEEK_SET,
277 .l_type = F_UNLCK,
278 };
279
280 vp = (struct vnode *)fg->fg_data;
281 if ((error = vnode_getwithref(vp)) == 0) {
282 (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
283 (void)vnode_put(vp);
284 }
285 }
286
287 if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
288 /*
289 * Since we ensure that fg->fg_ops is always initialized,
290 * it is safe to invoke fo_close on the fg
291 */
292 error = fo_close(fg, &context);
293
294 fg_free(fg);
295 }
296
297 return error;
298 }
299
300 /*
301 * fg_get_vnode
302 *
303 * Description: Return vnode associated with the file structure, if
304 * any. The lifetime of the returned vnode is bound to
305 * the lifetime of the file structure.
306 *
307 * Parameters: fg Pointer to fileglob to
308 * inspect
309 *
310 * Returns: vnode_t
311 */
312 vnode_t
313 fg_get_vnode(struct fileglob *fg)
314 {
315 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
316 return (vnode_t)fg->fg_data;
317 } else {
318 return NULL;
319 }
320 }
321
322 bool
323 fg_sendable(struct fileglob *fg)
324 {
325 switch (FILEGLOB_DTYPE(fg)) {
326 case DTYPE_VNODE:
327 case DTYPE_SOCKET:
328 case DTYPE_PIPE:
329 case DTYPE_PSXSHM:
330 case DTYPE_NETPOLICY:
331 return (fg->fg_lflags & FG_CONFINED) == 0;
332
333 default:
334 return false;
335 }
336 }
337
338
339 #pragma mark fileprocs
340
341 /*
342 * check_file_seek_range
343 *
344 * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
345 *
346 * Parameters: fl Flock structure.
347 * cur_file_offset Current offset in the file.
348 *
349 * Returns: 0 on Success.
350 * EOVERFLOW on overflow.
351 * EINVAL on offset less than zero.
352 */
353
354 static int
355 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
356 {
357 if (fl->l_whence == SEEK_CUR) {
358 /* Check if the start marker is beyond LLONG_MAX. */
359 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
360 /* Check if start marker is negative */
361 if (fl->l_start < 0) {
362 return EINVAL;
363 }
364 return EOVERFLOW;
365 }
366 /* Check if the start marker is negative. */
367 if (fl->l_start + cur_file_offset < 0) {
368 return EINVAL;
369 }
370 /* Check if end marker is beyond LLONG_MAX. */
371 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
372 cur_file_offset, fl->l_len - 1))) {
373 return EOVERFLOW;
374 }
375 /* Check if the end marker is negative. */
376 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
377 fl->l_len < 0)) {
378 return EINVAL;
379 }
380 } else if (fl->l_whence == SEEK_SET) {
381 /* Check if the start marker is negative. */
382 if (fl->l_start < 0) {
383 return EINVAL;
384 }
385 /* Check if the end marker is beyond LLONG_MAX. */
386 if ((fl->l_len > 0) &&
387 CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
388 return EOVERFLOW;
389 }
390 /* Check if the end marker is negative. */
391 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
392 return EINVAL;
393 }
394 }
395 return 0;
396 }
397
398
399 /*
400 * file_lock_init
401 *
402 * Description: Initialize the file lock group and the uipc and flist locks
403 *
404 * Parameters: (void)
405 *
406 * Returns: void
407 *
408 * Notes: Called at system startup from bsd_init().
409 */
410 void
411 file_lock_init(void)
412 {
413 /* allocate file lock group attribute and group */
414 file_lck_grp_attr = lck_grp_attr_alloc_init();
415
416 file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr);
417
418 /* Allocate file lock attribute */
419 file_lck_attr = lck_attr_alloc_init();
420 }
421
422
423 void
424 proc_dirs_lock_shared(proc_t p)
425 {
426 lck_rw_lock_shared(&p->p_dirs_lock);
427 }
428
429 void
430 proc_dirs_unlock_shared(proc_t p)
431 {
432 lck_rw_unlock_shared(&p->p_dirs_lock);
433 }
434
435 void
436 proc_dirs_lock_exclusive(proc_t p)
437 {
438 lck_rw_lock_exclusive(&p->p_dirs_lock);
439 }
440
441 void
442 proc_dirs_unlock_exclusive(proc_t p)
443 {
444 lck_rw_unlock_exclusive(&p->p_dirs_lock);
445 }
446
447 /*
448 * proc_fdlock, proc_fdlock_spin
449 *
450 * Description: Lock to control access to the per process struct fileproc
451 * and struct filedesc
452 *
453 * Parameters: p Process to take the lock on
454 *
455 * Returns: void
456 *
457 * Notes: The lock is initialized in forkproc() and destroyed in
458 * reap_child_process().
459 */
460 void
461 proc_fdlock(proc_t p)
462 {
463 lck_mtx_lock(&p->p_fdmlock);
464 }
465
466 void
467 proc_fdlock_spin(proc_t p)
468 {
469 lck_mtx_lock_spin(&p->p_fdmlock);
470 }
471
472 void
473 proc_fdlock_assert(proc_t p, int assertflags)
474 {
475 lck_mtx_assert(&p->p_fdmlock, assertflags);
476 }
477
478
479 /*
480 * proc_fdunlock
481 *
482 * Description: Unlock the lock previously locked by a call to proc_fdlock()
483 *
484 * Parameters: p Process to drop the lock on
485 *
486 * Returns: void
487 */
488 void
489 proc_fdunlock(proc_t p)
490 {
491 lck_mtx_unlock(&p->p_fdmlock);
492 }
493
494 struct fdt_iterator
495 fdt_next(proc_t p, int fd, bool only_settled)
496 {
497 struct fdt_iterator it;
498 struct filedesc *fdp = p->p_fd;
499 struct fileproc *fp;
500 int nfds = min(fdp->fd_lastfile + 1, fdp->fd_nfiles);
501
502 while (++fd < nfds) {
503 fp = fdp->fd_ofiles[fd];
504 if (fp == NULL || fp->fp_glob == NULL) {
505 continue;
506 }
507 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
508 continue;
509 }
510 it.fdti_fd = fd;
511 it.fdti_fp = fp;
512 return it;
513 }
514
515 it.fdti_fd = nfds;
516 it.fdti_fp = NULL;
517 return it;
518 }
519
520 struct fdt_iterator
521 fdt_prev(proc_t p, int fd, bool only_settled)
522 {
523 struct fdt_iterator it;
524 struct filedesc *fdp = p->p_fd;
525 struct fileproc *fp;
526
527 while (--fd >= 0) {
528 fp = fdp->fd_ofiles[fd];
529 if (fp == NULL || fp->fp_glob == NULL) {
530 continue;
531 }
532 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
533 continue;
534 }
535 it.fdti_fd = fd;
536 it.fdti_fp = fp;
537 return it;
538 }
539
540 it.fdti_fd = -1;
541 it.fdti_fp = NULL;
542 return it;
543 }
544
545 /*
546 * System calls on descriptors.
547 */
548
549
550 /*
551 * sys_getdtablesize
552 *
553 * Description: Returns the per process maximum size of the descriptor table
554 *
555 * Parameters: p Process being queried
556 * retval Pointer to the call return area
557 *
558 * Returns: 0 Success
559 *
560 * Implicit returns:
561 * *retval (modified) Size of dtable
562 */
563 int
564 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
565 {
566 *retval = (int32_t)MIN(proc_limitgetcur(p, RLIMIT_NOFILE, TRUE), maxfilesperproc);
567
568 return 0;
569 }
570
571
572 static void
573 procfdtbl_reservefd(struct proc * p, int fd)
574 {
575 p->p_fd->fd_ofiles[fd] = NULL;
576 p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
577 }
578
579 void
580 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
581 {
582 if (fp != NULL) {
583 p->p_fd->fd_ofiles[fd] = fp;
584 }
585 p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
586 if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
587 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
588 wakeup(&p->p_fd);
589 }
590 }
591
592 static void
593 procfdtbl_waitfd(struct proc * p, int fd)
594 {
595 p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
596 msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
597 }
598
599 static void
600 procfdtbl_clearfd(struct proc * p, int fd)
601 {
602 int waiting;
603
604 waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
605 p->p_fd->fd_ofiles[fd] = NULL;
606 p->p_fd->fd_ofileflags[fd] = 0;
607 if (waiting == UF_RESVWAIT) {
608 wakeup(&p->p_fd);
609 }
610 }
611
612 /*
613 * fdrelse
614 *
615 * Description: Inline utility function to free an fd in a filedesc
616 *
617 * Parameters: fdp Pointer to filedesc fd lies in
618 * fd fd to free
619 * reserv fd should be reserved
620 *
621 * Returns: void
622 *
623 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
624 * the caller
625 */
626 static void
627 fdrelse(struct proc * p, int fd)
628 {
629 struct filedesc *fdp = p->p_fd;
630 int nfd = 0;
631
632 if (fd < fdp->fd_freefile) {
633 fdp->fd_freefile = fd;
634 }
635 #if DIAGNOSTIC
636 if (fd > fdp->fd_lastfile) {
637 panic("fdrelse: fd_lastfile inconsistent");
638 }
639 #endif
640 procfdtbl_clearfd(p, fd);
641
642 while ((nfd = fdp->fd_lastfile) > 0 &&
643 fdp->fd_ofiles[nfd] == NULL &&
644 !(fdp->fd_ofileflags[nfd] & UF_RESERVED)) {
645 /* JMM - What about files with lingering EV_VANISHED knotes? */
646 fdp->fd_lastfile--;
647 }
648 }
649
650
651 int
652 fd_rdwr(
653 int fd,
654 enum uio_rw rw,
655 uint64_t base,
656 int64_t len,
657 enum uio_seg segflg,
658 off_t offset,
659 int io_flg,
660 int64_t *aresid)
661 {
662 struct fileproc *fp;
663 proc_t p;
664 int error = 0;
665 int flags = 0;
666 int spacetype;
667 uio_t auio = NULL;
668 char uio_buf[UIO_SIZEOF(1)];
669 struct vfs_context context = *(vfs_context_current());
670
671 p = current_proc();
672
673 error = fp_lookup(p, fd, &fp, 0);
674 if (error) {
675 return error;
676 }
677
678 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
679 case DTYPE_VNODE:
680 case DTYPE_PIPE:
681 case DTYPE_SOCKET:
682 break;
683 default:
684 error = EINVAL;
685 goto out;
686 }
687 if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
688 error = EBADF;
689 goto out;
690 }
691
692 if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
693 error = EBADF;
694 goto out;
695 }
696
697 context.vc_ucred = fp->fp_glob->fg_cred;
698
699 if (UIO_SEG_IS_USER_SPACE(segflg)) {
700 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
701 } else {
702 spacetype = UIO_SYSSPACE;
703 }
704
705 auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
706
707 uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
708
709 if (!(io_flg & IO_APPEND)) {
710 flags = FOF_OFFSET;
711 }
712
713 if (rw == UIO_WRITE) {
714 user_ssize_t orig_resid = uio_resid(auio);
715 error = fo_write(fp, auio, flags, &context);
716 if (uio_resid(auio) < orig_resid) {
717 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
718 }
719 } else {
720 error = fo_read(fp, auio, flags, &context);
721 }
722
723 if (aresid) {
724 *aresid = uio_resid(auio);
725 } else if (uio_resid(auio) && error == 0) {
726 error = EIO;
727 }
728 out:
729 fp_drop(p, fd, fp, 0);
730 return error;
731 }
732
733
734
735 /*
736 * sys_dup
737 *
738 * Description: Duplicate a file descriptor.
739 *
740 * Parameters: p Process performing the dup
741 * uap->fd The fd to dup
742 * retval Pointer to the call return area
743 *
744 * Returns: 0 Success
745 * !0 Errno
746 *
747 * Implicit returns:
748 * *retval (modified) The new descriptor
749 */
750 int
751 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
752 {
753 struct filedesc *fdp = p->p_fd;
754 int old = uap->fd;
755 int new, error;
756 struct fileproc *fp;
757
758 proc_fdlock(p);
759 if ((error = fp_lookup(p, old, &fp, 1))) {
760 proc_fdunlock(p);
761 return error;
762 }
763 if (fp_isguarded(fp, GUARD_DUP)) {
764 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
765 (void) fp_drop(p, old, fp, 1);
766 proc_fdunlock(p);
767 return error;
768 }
769 if ((error = fdalloc(p, 0, &new))) {
770 fp_drop(p, old, fp, 1);
771 proc_fdunlock(p);
772 return error;
773 }
774 error = finishdup(p, fdp, old, new, 0, retval);
775 fp_drop(p, old, fp, 1);
776 proc_fdunlock(p);
777
778 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
779 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
780 new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
781 }
782
783 return error;
784 }
785
786 /*
787 * sys_dup2
788 *
789 * Description: Duplicate a file descriptor to a particular value.
790 *
791 * Parameters: p Process performing the dup
792 * uap->from The fd to dup
793 * uap->to The fd to dup it to
794 * retval Pointer to the call return area
795 *
796 * Returns: 0 Success
797 * !0 Errno
798 *
799 * Implicit returns:
800 * *retval (modified) The new descriptor
801 */
802 int
803 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
804 {
805 return dup2(p, uap->from, uap->to, retval);
806 }
807
808 int
809 dup2(proc_t p, int old, int new, int *retval)
810 {
811 struct filedesc *fdp = p->p_fd;
812 struct fileproc *fp, *nfp;
813 int i, error;
814 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
815
816 proc_fdlock(p);
817
818 startover:
819 if ((error = fp_lookup(p, old, &fp, 1))) {
820 proc_fdunlock(p);
821 return error;
822 }
823 if (fp_isguarded(fp, GUARD_DUP)) {
824 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
825 (void) fp_drop(p, old, fp, 1);
826 proc_fdunlock(p);
827 return error;
828 }
829 if (new < 0 ||
830 (rlim_t)new >= nofile ||
831 new >= maxfilesperproc) {
832 fp_drop(p, old, fp, 1);
833 proc_fdunlock(p);
834 return EBADF;
835 }
836 if (old == new) {
837 fp_drop(p, old, fp, 1);
838 *retval = new;
839 proc_fdunlock(p);
840 return 0;
841 }
842 if (new < 0 || new >= fdp->fd_nfiles) {
843 if ((error = fdalloc(p, new, &i))) {
844 fp_drop(p, old, fp, 1);
845 proc_fdunlock(p);
846 return error;
847 }
848 if (new != i) {
849 fdrelse(p, i);
850 goto closeit;
851 }
852 } else {
853 closeit:
854 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
855 fp_drop(p, old, fp, 1);
856 procfdtbl_waitfd(p, new);
857 #if DIAGNOSTIC
858 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
859 #endif
860 goto startover;
861 }
862
863 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
864 if (fp_isguarded(nfp, GUARD_CLOSE)) {
865 fp_drop(p, old, fp, 1);
866 error = fp_guard_exception(p,
867 new, nfp, kGUARD_EXC_CLOSE);
868 proc_fdunlock(p);
869 return error;
870 }
871 (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
872 proc_fdlock(p);
873 assert(fdp->fd_ofileflags[new] & UF_RESERVED);
874 } else {
875 #if DIAGNOSTIC
876 if (fdp->fd_ofiles[new] != NULL) {
877 panic("dup2: no ref on fileproc %d", new);
878 }
879 #endif
880 procfdtbl_reservefd(p, new);
881 }
882 }
883 #if DIAGNOSTIC
884 if (fdp->fd_ofiles[new] != 0) {
885 panic("dup2: overwriting fd_ofiles with new %d", new);
886 }
887 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
888 panic("dup2: unreserved fileflags with new %d", new);
889 }
890 #endif
891 error = finishdup(p, fdp, old, new, 0, retval);
892 fp_drop(p, old, fp, 1);
893 proc_fdunlock(p);
894
895 return error;
896 }
897
898
899 /*
900 * fcntl
901 *
902 * Description: The file control system call.
903 *
904 * Parameters: p Process performing the fcntl
905 * uap->fd The fd to operate against
906 * uap->cmd The command to perform
907 * uap->arg Pointer to the command argument
908 * retval Pointer to the call return area
909 *
910 * Returns: 0 Success
911 * !0 Errno (see fcntl_nocancel)
912 *
913 * Implicit returns:
914 * *retval (modified) fcntl return value (if any)
915 *
916 * Notes: This system call differs from fcntl_nocancel() in that it
917 * tests for cancellation prior to performing a potentially
918 * blocking operation.
919 */
920 int
921 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
922 {
923 __pthread_testcancel(1);
924 return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
925 }
926
927 #define ACCOUNT_OPENFROM_ENTITLEMENT \
928 "com.apple.private.vfs.role-account-openfrom"
929
930 /*
931 * sys_fcntl_nocancel
932 *
933 * Description: A non-cancel-testing file control system call.
934 *
935 * Parameters: p Process performing the fcntl
936 * uap->fd The fd to operate against
937 * uap->cmd The command to perform
938 * uap->arg Pointer to the command argument
939 * retval Pointer to the call return area
940 *
941 * Returns: 0 Success
942 * EINVAL
943 * fp_lookup:EBADF Bad file descriptor
944 * [F_DUPFD]
945 * fdalloc:EMFILE
946 * fdalloc:ENOMEM
947 * finishdup:EBADF
948 * finishdup:ENOMEM
949 * [F_SETOWN]
950 * ESRCH
951 * [F_SETLK]
952 * EBADF
953 * EOVERFLOW
954 * copyin:EFAULT
955 * vnode_getwithref:???
956 * VNOP_ADVLOCK:???
957 * msleep:ETIMEDOUT
958 * [F_GETLK]
959 * EBADF
960 * EOVERFLOW
961 * copyin:EFAULT
962 * copyout:EFAULT
963 * vnode_getwithref:???
964 * VNOP_ADVLOCK:???
965 * [F_PREALLOCATE]
966 * EBADF
967 * EINVAL
968 * copyin:EFAULT
969 * copyout:EFAULT
970 * vnode_getwithref:???
971 * VNOP_ALLOCATE:???
972 * [F_SETSIZE,F_RDADVISE]
973 * EBADF
974 * EINVAL
975 * copyin:EFAULT
976 * vnode_getwithref:???
977 * [F_RDAHEAD,F_NOCACHE]
978 * EBADF
979 * vnode_getwithref:???
980 * [???]
981 *
982 * Implicit returns:
983 * *retval (modified) fcntl return value (if any)
984 */
985 int
986 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
987 {
988 int fd = uap->fd;
989 struct filedesc *fdp = p->p_fd;
990 struct fileproc *fp;
991 char *pop;
992 struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */
993 unsigned int oflags, nflags;
994 int i, tmp, error, error2, flg = 0;
995 struct flock fl = {};
996 struct flocktimeout fltimeout;
997 struct timespec *timeout = NULL;
998 struct vfs_context context;
999 off_t offset;
1000 int newmin;
1001 daddr64_t lbn, bn;
1002 unsigned int fflag;
1003 user_addr_t argp;
1004 boolean_t is64bit;
1005 rlim_t nofile;
1006 int has_entitlement = 0;
1007
1008 AUDIT_ARG(fd, uap->fd);
1009 AUDIT_ARG(cmd, uap->cmd);
1010
1011 nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
1012
1013 proc_fdlock(p);
1014 if ((error = fp_lookup(p, fd, &fp, 1))) {
1015 proc_fdunlock(p);
1016 return error;
1017 }
1018 context.vc_thread = current_thread();
1019 context.vc_ucred = fp->f_cred;
1020
1021 is64bit = proc_is64bit(p);
1022 if (is64bit) {
1023 argp = uap->arg;
1024 } else {
1025 /*
1026 * Since the arg parameter is defined as a long but may be
1027 * either a long or a pointer we must take care to handle
1028 * sign extension issues. Our sys call munger will sign
1029 * extend a long when we are called from a 32-bit process.
1030 * Since we can never have an address greater than 32-bits
1031 * from a 32-bit process we lop off the top 32-bits to avoid
1032 * getting the wrong address
1033 */
1034 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
1035 }
1036
1037 #if CONFIG_MACF
1038 error = mac_file_check_fcntl(proc_ucred(p), fp->fp_glob, uap->cmd,
1039 uap->arg);
1040 if (error) {
1041 goto out;
1042 }
1043 #endif
1044
1045 pop = &fdp->fd_ofileflags[fd];
1046
1047 switch (uap->cmd) {
1048 case F_DUPFD:
1049 case F_DUPFD_CLOEXEC:
1050 if (fp_isguarded(fp, GUARD_DUP)) {
1051 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
1052 goto out;
1053 }
1054 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1055 AUDIT_ARG(value32, newmin);
1056 if ((rlim_t)newmin >= nofile ||
1057 newmin >= maxfilesperproc) {
1058 error = EINVAL;
1059 goto out;
1060 }
1061 if ((error = fdalloc(p, newmin, &i))) {
1062 goto out;
1063 }
1064 error = finishdup(p, fdp, fd, i,
1065 uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
1066 goto out;
1067
1068 case F_GETFD:
1069 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
1070 error = 0;
1071 goto out;
1072
1073 case F_SETFD:
1074 AUDIT_ARG(value32, (uint32_t)uap->arg);
1075 if (uap->arg & FD_CLOEXEC) {
1076 *pop |= UF_EXCLOSE;
1077 } else {
1078 if (fp_isguarded(fp, 0)) {
1079 error = fp_guard_exception(p,
1080 fd, fp, kGUARD_EXC_NOCLOEXEC);
1081 goto out;
1082 }
1083 *pop &= ~UF_EXCLOSE;
1084 }
1085 error = 0;
1086 goto out;
1087
1088 case F_GETFL:
1089 *retval = OFLAGS(fp->f_flag);
1090 error = 0;
1091 goto out;
1092
1093 case F_SETFL:
1094 // FIXME (rdar://54898652)
1095 //
1096 // this code is broken if fnctl(F_SETFL), ioctl() are
1097 // called concurrently for the same fileglob.
1098
1099 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1100 AUDIT_ARG(value32, tmp);
1101
1102 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
1103 nflags = oflags & ~FCNTLFLAGS;
1104 nflags |= FFLAGS(tmp) & FCNTLFLAGS;
1105 });
1106 tmp = nflags & FNONBLOCK;
1107 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1108 if (error) {
1109 goto out;
1110 }
1111 tmp = nflags & FASYNC;
1112 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1113 if (!error) {
1114 goto out;
1115 }
1116 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
1117 tmp = 0;
1118 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1119 goto out;
1120
1121 case F_GETOWN:
1122 if (fp->f_type == DTYPE_SOCKET) {
1123 *retval = ((struct socket *)fp->f_data)->so_pgid;
1124 error = 0;
1125 goto out;
1126 }
1127 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
1128 *retval = -*retval;
1129 goto out;
1130
1131 case F_SETOWN:
1132 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
1133 AUDIT_ARG(value32, tmp);
1134 if (fp->f_type == DTYPE_SOCKET) {
1135 ((struct socket *)fp->f_data)->so_pgid = tmp;
1136 error = 0;
1137 goto out;
1138 }
1139 if (fp->f_type == DTYPE_PIPE) {
1140 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1141 goto out;
1142 }
1143
1144 if (tmp <= 0) {
1145 tmp = -tmp;
1146 } else {
1147 proc_t p1 = proc_find(tmp);
1148 if (p1 == 0) {
1149 error = ESRCH;
1150 goto out;
1151 }
1152 tmp = (int)p1->p_pgrpid;
1153 proc_rele(p1);
1154 }
1155 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1156 goto out;
1157
1158 case F_SETNOSIGPIPE:
1159 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
1160 if (fp->f_type == DTYPE_SOCKET) {
1161 #if SOCKETS
1162 error = sock_setsockopt((struct socket *)fp->f_data,
1163 SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
1164 #else
1165 error = EINVAL;
1166 #endif
1167 } else {
1168 struct fileglob *fg = fp->fp_glob;
1169
1170 lck_mtx_lock_spin(&fg->fg_lock);
1171 if (tmp) {
1172 fg->fg_lflags |= FG_NOSIGPIPE;
1173 } else {
1174 fg->fg_lflags &= ~FG_NOSIGPIPE;
1175 }
1176 lck_mtx_unlock(&fg->fg_lock);
1177 error = 0;
1178 }
1179 goto out;
1180
1181 case F_GETNOSIGPIPE:
1182 if (fp->f_type == DTYPE_SOCKET) {
1183 #if SOCKETS
1184 int retsize = sizeof(*retval);
1185 error = sock_getsockopt((struct socket *)fp->f_data,
1186 SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
1187 #else
1188 error = EINVAL;
1189 #endif
1190 } else {
1191 *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
1192 1 : 0;
1193 error = 0;
1194 }
1195 goto out;
1196
1197 case F_SETCONFINED:
1198 /*
1199 * If this is the only reference to this fglob in the process
1200 * and it's already marked as close-on-fork then mark it as
1201 * (immutably) "confined" i.e. any fd that points to it will
1202 * forever be close-on-fork, and attempts to use an IPC
1203 * mechanism to move the descriptor elsewhere will fail.
1204 */
1205 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
1206 struct fileglob *fg = fp->fp_glob;
1207
1208 lck_mtx_lock_spin(&fg->fg_lock);
1209 if (fg->fg_lflags & FG_CONFINED) {
1210 error = 0;
1211 } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
1212 error = EAGAIN; /* go close the dup .. */
1213 } else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
1214 fg->fg_lflags |= FG_CONFINED;
1215 error = 0;
1216 } else {
1217 error = EBADF; /* open without O_CLOFORK? */
1218 }
1219 lck_mtx_unlock(&fg->fg_lock);
1220 } else {
1221 /*
1222 * Other subsystems may have built on the immutability
1223 * of FG_CONFINED; clearing it may be tricky.
1224 */
1225 error = EPERM; /* immutable */
1226 }
1227 goto out;
1228
1229 case F_GETCONFINED:
1230 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
1231 error = 0;
1232 goto out;
1233
1234 case F_SETLKWTIMEOUT:
1235 case F_SETLKW:
1236 case F_OFD_SETLKWTIMEOUT:
1237 case F_OFD_SETLKW:
1238 flg |= F_WAIT;
1239 OS_FALLTHROUGH;
1240
1241 case F_SETLK:
1242 case F_OFD_SETLK:
1243 if (fp->f_type != DTYPE_VNODE) {
1244 error = EBADF;
1245 goto out;
1246 }
1247 vp = (struct vnode *)fp->f_data;
1248
1249 fflag = fp->f_flag;
1250 offset = fp->f_offset;
1251 proc_fdunlock(p);
1252
1253 /* Copy in the lock structure */
1254 if (F_SETLKWTIMEOUT == uap->cmd ||
1255 F_OFD_SETLKWTIMEOUT == uap->cmd) {
1256 error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1257 if (error) {
1258 goto outdrop;
1259 }
1260 fl = fltimeout.fl;
1261 timeout = &fltimeout.timeout;
1262 } else {
1263 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1264 if (error) {
1265 goto outdrop;
1266 }
1267 }
1268
1269 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1270 /* and ending byte for EOVERFLOW in SEEK_SET */
1271 error = check_file_seek_range(&fl, offset);
1272 if (error) {
1273 goto outdrop;
1274 }
1275
1276 if ((error = vnode_getwithref(vp))) {
1277 goto outdrop;
1278 }
1279 if (fl.l_whence == SEEK_CUR) {
1280 fl.l_start += offset;
1281 }
1282
1283 #if CONFIG_MACF
1284 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1285 F_SETLK, &fl);
1286 if (error) {
1287 (void)vnode_put(vp);
1288 goto outdrop;
1289 }
1290 #endif
1291 switch (uap->cmd) {
1292 case F_OFD_SETLK:
1293 case F_OFD_SETLKW:
1294 case F_OFD_SETLKWTIMEOUT:
1295 flg |= F_OFD_LOCK;
1296 switch (fl.l_type) {
1297 case F_RDLCK:
1298 if ((fflag & FREAD) == 0) {
1299 error = EBADF;
1300 break;
1301 }
1302 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1303 F_SETLK, &fl, flg, &context, timeout);
1304 break;
1305 case F_WRLCK:
1306 if ((fflag & FWRITE) == 0) {
1307 error = EBADF;
1308 break;
1309 }
1310 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1311 F_SETLK, &fl, flg, &context, timeout);
1312 break;
1313 case F_UNLCK:
1314 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1315 F_UNLCK, &fl, F_OFD_LOCK, &context,
1316 timeout);
1317 break;
1318 default:
1319 error = EINVAL;
1320 break;
1321 }
1322 if (0 == error &&
1323 (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
1324 struct fileglob *fg = fp->fp_glob;
1325
1326 /*
1327 * arrange F_UNLCK on last close (once
1328 * set, FG_HAS_OFDLOCK is immutable)
1329 */
1330 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
1331 lck_mtx_lock_spin(&fg->fg_lock);
1332 fg->fg_lflags |= FG_HAS_OFDLOCK;
1333 lck_mtx_unlock(&fg->fg_lock);
1334 }
1335 }
1336 break;
1337 default:
1338 flg |= F_POSIX;
1339 switch (fl.l_type) {
1340 case F_RDLCK:
1341 if ((fflag & FREAD) == 0) {
1342 error = EBADF;
1343 break;
1344 }
1345 // XXX UInt32 unsafe for LP64 kernel
1346 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1347 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1348 F_SETLK, &fl, flg, &context, timeout);
1349 break;
1350 case F_WRLCK:
1351 if ((fflag & FWRITE) == 0) {
1352 error = EBADF;
1353 break;
1354 }
1355 // XXX UInt32 unsafe for LP64 kernel
1356 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1357 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1358 F_SETLK, &fl, flg, &context, timeout);
1359 break;
1360 case F_UNLCK:
1361 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1362 F_UNLCK, &fl, F_POSIX, &context, timeout);
1363 break;
1364 default:
1365 error = EINVAL;
1366 break;
1367 }
1368 break;
1369 }
1370 (void) vnode_put(vp);
1371 goto outdrop;
1372
1373 case F_GETLK:
1374 case F_OFD_GETLK:
1375 case F_GETLKPID:
1376 case F_OFD_GETLKPID:
1377 if (fp->f_type != DTYPE_VNODE) {
1378 error = EBADF;
1379 goto out;
1380 }
1381 vp = (struct vnode *)fp->f_data;
1382
1383 offset = fp->f_offset;
1384 proc_fdunlock(p);
1385
1386 /* Copy in the lock structure */
1387 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1388 if (error) {
1389 goto outdrop;
1390 }
1391
1392 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1393 /* and ending byte for EOVERFLOW in SEEK_SET */
1394 error = check_file_seek_range(&fl, offset);
1395 if (error) {
1396 goto outdrop;
1397 }
1398
1399 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1400 error = EINVAL;
1401 goto outdrop;
1402 }
1403
1404 switch (fl.l_type) {
1405 case F_RDLCK:
1406 case F_UNLCK:
1407 case F_WRLCK:
1408 break;
1409 default:
1410 error = EINVAL;
1411 goto outdrop;
1412 }
1413
1414 switch (fl.l_whence) {
1415 case SEEK_CUR:
1416 case SEEK_SET:
1417 case SEEK_END:
1418 break;
1419 default:
1420 error = EINVAL;
1421 goto outdrop;
1422 }
1423
1424 if ((error = vnode_getwithref(vp)) == 0) {
1425 if (fl.l_whence == SEEK_CUR) {
1426 fl.l_start += offset;
1427 }
1428
1429 #if CONFIG_MACF
1430 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1431 uap->cmd, &fl);
1432 if (error == 0)
1433 #endif
1434 switch (uap->cmd) {
1435 case F_OFD_GETLK:
1436 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1437 F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1438 break;
1439 case F_OFD_GETLKPID:
1440 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1441 F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1442 break;
1443 default:
1444 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1445 uap->cmd, &fl, F_POSIX, &context, NULL);
1446 break;
1447 }
1448
1449 (void)vnode_put(vp);
1450
1451 if (error == 0) {
1452 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1453 }
1454 }
1455 goto outdrop;
1456
1457 case F_PREALLOCATE: {
1458 fstore_t alloc_struct; /* structure for allocate command */
1459 u_int32_t alloc_flags = 0;
1460
1461 if (fp->f_type != DTYPE_VNODE) {
1462 error = EBADF;
1463 goto out;
1464 }
1465
1466 vp = (struct vnode *)fp->f_data;
1467 proc_fdunlock(p);
1468
1469 /* make sure that we have write permission */
1470 if ((fp->f_flag & FWRITE) == 0) {
1471 error = EBADF;
1472 goto outdrop;
1473 }
1474
1475 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1476 if (error) {
1477 goto outdrop;
1478 }
1479
1480 /* now set the space allocated to 0 */
1481 alloc_struct.fst_bytesalloc = 0;
1482
1483 /*
1484 * Do some simple parameter checking
1485 */
1486
1487 /* set up the flags */
1488
1489 alloc_flags |= PREALLOCATE;
1490
1491 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
1492 alloc_flags |= ALLOCATECONTIG;
1493 }
1494
1495 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
1496 alloc_flags |= ALLOCATEALL;
1497 }
1498
1499 /*
1500 * Do any position mode specific stuff. The only
1501 * position mode supported now is PEOFPOSMODE
1502 */
1503
1504 switch (alloc_struct.fst_posmode) {
1505 case F_PEOFPOSMODE:
1506 if (alloc_struct.fst_offset != 0) {
1507 error = EINVAL;
1508 goto outdrop;
1509 }
1510
1511 alloc_flags |= ALLOCATEFROMPEOF;
1512 break;
1513
1514 case F_VOLPOSMODE:
1515 if (alloc_struct.fst_offset <= 0) {
1516 error = EINVAL;
1517 goto outdrop;
1518 }
1519
1520 alloc_flags |= ALLOCATEFROMVOL;
1521 break;
1522
1523 default: {
1524 error = EINVAL;
1525 goto outdrop;
1526 }
1527 }
1528 if ((error = vnode_getwithref(vp)) == 0) {
1529 /*
1530 * call allocate to get the space
1531 */
1532 error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
1533 &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1534 &context);
1535 (void)vnode_put(vp);
1536
1537 error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1538
1539 if (error == 0) {
1540 error = error2;
1541 }
1542 }
1543 goto outdrop;
1544 }
1545 case F_PUNCHHOLE: {
1546 fpunchhole_t args;
1547
1548 if (fp->f_type != DTYPE_VNODE) {
1549 error = EBADF;
1550 goto out;
1551 }
1552
1553 vp = (struct vnode *)fp->f_data;
1554 proc_fdunlock(p);
1555
1556 /* need write permissions */
1557 if ((fp->f_flag & FWRITE) == 0) {
1558 error = EPERM;
1559 goto outdrop;
1560 }
1561
1562 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1563 goto outdrop;
1564 }
1565
1566 if ((error = vnode_getwithref(vp))) {
1567 goto outdrop;
1568 }
1569
1570 #if CONFIG_MACF
1571 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
1572 (void)vnode_put(vp);
1573 goto outdrop;
1574 }
1575 #endif
1576
1577 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
1578 (void)vnode_put(vp);
1579
1580 goto outdrop;
1581 }
1582 case F_TRIM_ACTIVE_FILE: {
1583 ftrimactivefile_t args;
1584
1585 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
1586 error = EACCES;
1587 goto out;
1588 }
1589
1590 if (fp->f_type != DTYPE_VNODE) {
1591 error = EBADF;
1592 goto out;
1593 }
1594
1595 vp = (struct vnode *)fp->f_data;
1596 proc_fdunlock(p);
1597
1598 /* need write permissions */
1599 if ((fp->f_flag & FWRITE) == 0) {
1600 error = EPERM;
1601 goto outdrop;
1602 }
1603
1604 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1605 goto outdrop;
1606 }
1607
1608 if ((error = vnode_getwithref(vp))) {
1609 goto outdrop;
1610 }
1611
1612 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
1613 (void)vnode_put(vp);
1614
1615 goto outdrop;
1616 }
1617 case F_SPECULATIVE_READ: {
1618 fspecread_t args;
1619
1620 if (fp->f_type != DTYPE_VNODE) {
1621 error = EBADF;
1622 goto out;
1623 }
1624
1625 vp = (struct vnode *)fp->f_data;
1626 proc_fdunlock(p);
1627
1628 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1629 goto outdrop;
1630 }
1631
1632 /* Discard invalid offsets or lengths */
1633 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
1634 error = EINVAL;
1635 goto outdrop;
1636 }
1637
1638 /*
1639 * Round the file offset down to a page-size boundary (or to 0).
1640 * The filesystem will need to round the length up to the end of the page boundary
1641 * or to the EOF of the file.
1642 */
1643 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
1644 uint64_t foff_delta = args.fsr_offset - foff;
1645 args.fsr_offset = (off_t) foff;
1646
1647 /*
1648 * Now add in the delta to the supplied length. Since we may have adjusted the
1649 * offset, increase it by the amount that we adjusted.
1650 */
1651 args.fsr_length += foff_delta;
1652
1653 if ((error = vnode_getwithref(vp))) {
1654 goto outdrop;
1655 }
1656 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
1657 (void)vnode_put(vp);
1658
1659 goto outdrop;
1660 }
1661 case F_SETSIZE:
1662 if (fp->f_type != DTYPE_VNODE) {
1663 error = EBADF;
1664 goto out;
1665 }
1666 vp = (struct vnode *)fp->f_data;
1667 proc_fdunlock(p);
1668
1669 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
1670 if (error) {
1671 goto outdrop;
1672 }
1673 AUDIT_ARG(value64, offset);
1674
1675 error = vnode_getwithref(vp);
1676 if (error) {
1677 goto outdrop;
1678 }
1679
1680 #if CONFIG_MACF
1681 error = mac_vnode_check_truncate(&context,
1682 fp->fp_glob->fg_cred, vp);
1683 if (error) {
1684 (void)vnode_put(vp);
1685 goto outdrop;
1686 }
1687 #endif
1688 /*
1689 * Make sure that we are root. Growing a file
1690 * without zero filling the data is a security hole.
1691 */
1692 if (!kauth_cred_issuser(kauth_cred_get())) {
1693 error = EACCES;
1694 } else {
1695 /*
1696 * Require privilege to change file size without zerofill,
1697 * else will change the file size and zerofill it.
1698 */
1699 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
1700 if (error == 0) {
1701 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
1702 } else {
1703 error = vnode_setsize(vp, offset, 0, &context);
1704 }
1705
1706 #if CONFIG_MACF
1707 if (error == 0) {
1708 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
1709 }
1710 #endif
1711 }
1712
1713 (void)vnode_put(vp);
1714 goto outdrop;
1715
1716 case F_RDAHEAD:
1717 if (fp->f_type != DTYPE_VNODE) {
1718 error = EBADF;
1719 goto out;
1720 }
1721 if (uap->arg) {
1722 os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1723 } else {
1724 os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1725 }
1726 goto out;
1727
1728 case F_NOCACHE:
1729 if (fp->f_type != DTYPE_VNODE) {
1730 error = EBADF;
1731 goto out;
1732 }
1733 if (uap->arg) {
1734 os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1735 } else {
1736 os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1737 }
1738 goto out;
1739
1740 case F_NODIRECT:
1741 if (fp->f_type != DTYPE_VNODE) {
1742 error = EBADF;
1743 goto out;
1744 }
1745 if (uap->arg) {
1746 os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1747 } else {
1748 os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1749 }
1750 goto out;
1751
1752 case F_SINGLE_WRITER:
1753 if (fp->f_type != DTYPE_VNODE) {
1754 error = EBADF;
1755 goto out;
1756 }
1757 if (uap->arg) {
1758 os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1759 } else {
1760 os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1761 }
1762 goto out;
1763
1764 case F_GLOBAL_NOCACHE:
1765 if (fp->f_type != DTYPE_VNODE) {
1766 error = EBADF;
1767 goto out;
1768 }
1769 vp = (struct vnode *)fp->f_data;
1770 proc_fdunlock(p);
1771
1772 if ((error = vnode_getwithref(vp)) == 0) {
1773 *retval = vnode_isnocache(vp);
1774
1775 if (uap->arg) {
1776 vnode_setnocache(vp);
1777 } else {
1778 vnode_clearnocache(vp);
1779 }
1780
1781 (void)vnode_put(vp);
1782 }
1783 goto outdrop;
1784
1785 case F_CHECK_OPENEVT:
1786 if (fp->f_type != DTYPE_VNODE) {
1787 error = EBADF;
1788 goto out;
1789 }
1790 vp = (struct vnode *)fp->f_data;
1791 proc_fdunlock(p);
1792
1793 if ((error = vnode_getwithref(vp)) == 0) {
1794 *retval = vnode_is_openevt(vp);
1795
1796 if (uap->arg) {
1797 vnode_set_openevt(vp);
1798 } else {
1799 vnode_clear_openevt(vp);
1800 }
1801
1802 (void)vnode_put(vp);
1803 }
1804 goto outdrop;
1805
1806 case F_RDADVISE: {
1807 struct radvisory ra_struct;
1808
1809 if (fp->f_type != DTYPE_VNODE) {
1810 error = EBADF;
1811 goto out;
1812 }
1813 vp = (struct vnode *)fp->f_data;
1814 proc_fdunlock(p);
1815
1816 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
1817 goto outdrop;
1818 }
1819 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
1820 error = EINVAL;
1821 goto outdrop;
1822 }
1823 if ((error = vnode_getwithref(vp)) == 0) {
1824 error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1825
1826 (void)vnode_put(vp);
1827 }
1828 goto outdrop;
1829 }
1830
1831 case F_FLUSH_DATA:
1832
1833 if (fp->f_type != DTYPE_VNODE) {
1834 error = EBADF;
1835 goto out;
1836 }
1837 vp = (struct vnode *)fp->f_data;
1838 proc_fdunlock(p);
1839
1840 if ((error = vnode_getwithref(vp)) == 0) {
1841 error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
1842
1843 (void)vnode_put(vp);
1844 }
1845 goto outdrop;
1846
1847 case F_LOG2PHYS:
1848 case F_LOG2PHYS_EXT: {
1849 struct log2phys l2p_struct = {}; /* structure for allocate command */
1850 int devBlockSize;
1851
1852 off_t file_offset = 0;
1853 size_t a_size = 0;
1854 size_t run = 0;
1855
1856 if (uap->cmd == F_LOG2PHYS_EXT) {
1857 error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1858 if (error) {
1859 goto out;
1860 }
1861 file_offset = l2p_struct.l2p_devoffset;
1862 } else {
1863 file_offset = fp->f_offset;
1864 }
1865 if (fp->f_type != DTYPE_VNODE) {
1866 error = EBADF;
1867 goto out;
1868 }
1869 vp = (struct vnode *)fp->f_data;
1870 proc_fdunlock(p);
1871 if ((error = vnode_getwithref(vp))) {
1872 goto outdrop;
1873 }
1874 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1875 if (error) {
1876 (void)vnode_put(vp);
1877 goto outdrop;
1878 }
1879 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1880 if (error) {
1881 (void)vnode_put(vp);
1882 goto outdrop;
1883 }
1884 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1885 if (uap->cmd == F_LOG2PHYS_EXT) {
1886 if (l2p_struct.l2p_contigbytes < 0) {
1887 vnode_put(vp);
1888 error = EINVAL;
1889 goto outdrop;
1890 }
1891
1892 a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1893 } else {
1894 a_size = devBlockSize;
1895 }
1896
1897 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1898
1899 (void)vnode_put(vp);
1900
1901 if (!error) {
1902 l2p_struct.l2p_flags = 0; /* for now */
1903 if (uap->cmd == F_LOG2PHYS_EXT) {
1904 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1905 } else {
1906 l2p_struct.l2p_contigbytes = 0; /* for now */
1907 }
1908
1909 /*
1910 * The block number being -1 suggests that the file offset is not backed
1911 * by any real blocks on-disk. As a result, just let it be passed back up wholesale.
1912 */
1913 if (bn == -1) {
1914 /* Don't multiply it by the block size */
1915 l2p_struct.l2p_devoffset = bn;
1916 } else {
1917 l2p_struct.l2p_devoffset = bn * devBlockSize;
1918 l2p_struct.l2p_devoffset += file_offset - offset;
1919 }
1920 error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1921 }
1922 goto outdrop;
1923 }
1924 case F_GETPATH:
1925 case F_GETPATH_NOFIRMLINK: {
1926 char *pathbufp;
1927 int pathlen;
1928
1929 if (fp->f_type != DTYPE_VNODE) {
1930 error = EBADF;
1931 goto out;
1932 }
1933 vp = (struct vnode *)fp->f_data;
1934 proc_fdunlock(p);
1935
1936 pathlen = MAXPATHLEN;
1937 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1938 if (pathbufp == NULL) {
1939 error = ENOMEM;
1940 goto outdrop;
1941 }
1942 if ((error = vnode_getwithref(vp)) == 0) {
1943 if (uap->cmd == F_GETPATH_NOFIRMLINK) {
1944 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
1945 } else {
1946 error = vn_getpath(vp, pathbufp, &pathlen);
1947 }
1948 (void)vnode_put(vp);
1949
1950 if (error == 0) {
1951 error = copyout((caddr_t)pathbufp, argp, pathlen);
1952 }
1953 }
1954 FREE(pathbufp, M_TEMP);
1955 goto outdrop;
1956 }
1957
1958 case F_PATHPKG_CHECK: {
1959 char *pathbufp;
1960 size_t pathlen;
1961
1962 if (fp->f_type != DTYPE_VNODE) {
1963 error = EBADF;
1964 goto out;
1965 }
1966 vp = (struct vnode *)fp->f_data;
1967 proc_fdunlock(p);
1968
1969 pathlen = MAXPATHLEN;
1970 pathbufp = zalloc(ZV_NAMEI);
1971
1972 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
1973 if ((error = vnode_getwithref(vp)) == 0) {
1974 AUDIT_ARG(text, pathbufp);
1975 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
1976
1977 (void)vnode_put(vp);
1978 }
1979 }
1980 zfree(ZV_NAMEI, pathbufp);
1981 goto outdrop;
1982 }
1983
1984 case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
1985 case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
1986 case F_BARRIERFSYNC: // fsync + barrier
1987 case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
1988 case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
1989 if (fp->f_type != DTYPE_VNODE) {
1990 error = EBADF;
1991 goto out;
1992 }
1993 vp = (struct vnode *)fp->f_data;
1994 proc_fdunlock(p);
1995
1996 if ((error = vnode_getwithref(vp)) == 0) {
1997 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1998
1999 (void)vnode_put(vp);
2000 }
2001 break;
2002 }
2003
2004 /*
2005 * SPI (private) for opening a file starting from a dir fd
2006 */
2007 case F_OPENFROM: {
2008 struct user_fopenfrom fopen;
2009 struct vnode_attr va;
2010 struct nameidata nd;
2011 int cmode;
2012
2013 /* Check if this isn't a valid file descriptor */
2014 if ((fp->f_type != DTYPE_VNODE) ||
2015 (fp->f_flag & FREAD) == 0) {
2016 error = EBADF;
2017 goto out;
2018 }
2019 vp = (struct vnode *)fp->f_data;
2020 proc_fdunlock(p);
2021
2022 if (vnode_getwithref(vp)) {
2023 error = ENOENT;
2024 goto outdrop;
2025 }
2026
2027 /* Only valid for directories */
2028 if (vp->v_type != VDIR) {
2029 vnode_put(vp);
2030 error = ENOTDIR;
2031 goto outdrop;
2032 }
2033
2034 /*
2035 * Only entitled apps may use the credentials of the thread
2036 * that opened the file descriptor.
2037 * Non-entitled threads will use their own context.
2038 */
2039 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2040 has_entitlement = 1;
2041 }
2042
2043 /* Get flags, mode and pathname arguments. */
2044 if (IS_64BIT_PROCESS(p)) {
2045 error = copyin(argp, &fopen, sizeof(fopen));
2046 } else {
2047 struct user32_fopenfrom fopen32;
2048
2049 error = copyin(argp, &fopen32, sizeof(fopen32));
2050 fopen.o_flags = fopen32.o_flags;
2051 fopen.o_mode = fopen32.o_mode;
2052 fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2053 }
2054 if (error) {
2055 vnode_put(vp);
2056 goto outdrop;
2057 }
2058 AUDIT_ARG(fflags, fopen.o_flags);
2059 AUDIT_ARG(mode, fopen.o_mode);
2060 VATTR_INIT(&va);
2061 /* Mask off all but regular access permissions */
2062 cmode = ((fopen.o_mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2063 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2064
2065 /* Start the lookup relative to the file descriptor's vnode. */
2066 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2067 fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2068 nd.ni_dvp = vp;
2069
2070 error = open1(has_entitlement ? &context : vfs_context_current(),
2071 &nd, fopen.o_flags, &va, fileproc_alloc_init, NULL, retval);
2072
2073 vnode_put(vp);
2074 break;
2075 }
2076 /*
2077 * SPI (private) for unlinking a file starting from a dir fd
2078 */
2079 case F_UNLINKFROM: {
2080 user_addr_t pathname;
2081
2082 /* Check if this isn't a valid file descriptor */
2083 if ((fp->f_type != DTYPE_VNODE) ||
2084 (fp->f_flag & FREAD) == 0) {
2085 error = EBADF;
2086 goto out;
2087 }
2088 vp = (struct vnode *)fp->f_data;
2089 proc_fdunlock(p);
2090
2091 if (vnode_getwithref(vp)) {
2092 error = ENOENT;
2093 goto outdrop;
2094 }
2095
2096 /* Only valid for directories */
2097 if (vp->v_type != VDIR) {
2098 vnode_put(vp);
2099 error = ENOTDIR;
2100 goto outdrop;
2101 }
2102
2103 /*
2104 * Only entitled apps may use the credentials of the thread
2105 * that opened the file descriptor.
2106 * Non-entitled threads will use their own context.
2107 */
2108 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2109 has_entitlement = 1;
2110 }
2111
2112 /* Get flags, mode and pathname arguments. */
2113 if (IS_64BIT_PROCESS(p)) {
2114 pathname = (user_addr_t)argp;
2115 } else {
2116 pathname = CAST_USER_ADDR_T(argp);
2117 }
2118
2119 /* Start the lookup relative to the file descriptor's vnode. */
2120 error = unlink1(has_entitlement ? &context : vfs_context_current(),
2121 vp, pathname, UIO_USERSPACE, 0);
2122
2123 vnode_put(vp);
2124 break;
2125 }
2126
2127 case F_ADDSIGS:
2128 case F_ADDFILESIGS:
2129 case F_ADDFILESIGS_FOR_DYLD_SIM:
2130 case F_ADDFILESIGS_RETURN:
2131 case F_ADDFILESIGS_INFO:
2132 {
2133 struct cs_blob *blob = NULL;
2134 struct user_fsignatures fs;
2135 kern_return_t kr;
2136 vm_offset_t kernel_blob_addr;
2137 vm_size_t kernel_blob_size;
2138 int blob_add_flags = 0;
2139 const size_t sizeof_fs = (uap->cmd == F_ADDFILESIGS_INFO ?
2140 offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
2141 offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
2142
2143 if (fp->f_type != DTYPE_VNODE) {
2144 error = EBADF;
2145 goto out;
2146 }
2147 vp = (struct vnode *)fp->f_data;
2148 proc_fdunlock(p);
2149
2150 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2151 blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
2152 if ((p->p_csflags & CS_KILL) == 0) {
2153 proc_lock(p);
2154 p->p_csflags |= CS_KILL;
2155 proc_unlock(p);
2156 }
2157 }
2158
2159 error = vnode_getwithref(vp);
2160 if (error) {
2161 goto outdrop;
2162 }
2163
2164 if (IS_64BIT_PROCESS(p)) {
2165 error = copyin(argp, &fs, sizeof_fs);
2166 } else {
2167 if (uap->cmd == F_ADDFILESIGS_INFO) {
2168 error = EINVAL;
2169 vnode_put(vp);
2170 goto outdrop;
2171 }
2172
2173 struct user32_fsignatures fs32;
2174
2175 error = copyin(argp, &fs32, sizeof(fs32));
2176 fs.fs_file_start = fs32.fs_file_start;
2177 fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
2178 fs.fs_blob_size = fs32.fs_blob_size;
2179 }
2180
2181 if (error) {
2182 vnode_put(vp);
2183 goto outdrop;
2184 }
2185
2186 /*
2187 * First check if we have something loaded a this offset
2188 */
2189 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
2190 if (blob != NULL) {
2191 /* If this is for dyld_sim revalidate the blob */
2192 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2193 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
2194 if (error) {
2195 blob = NULL;
2196 if (error != EAGAIN) {
2197 vnode_put(vp);
2198 goto outdrop;
2199 }
2200 }
2201 }
2202 }
2203
2204 if (blob == NULL) {
2205 /*
2206 * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
2207 * our use cases for the immediate future, but note that at the time of this commit, some
2208 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
2209 *
2210 * We should consider how we can manage this more effectively; the above means that some
2211 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
2212 * threshold considered ridiculous at the time of this change.
2213 */
2214 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
2215 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2216 error = E2BIG;
2217 vnode_put(vp);
2218 goto outdrop;
2219 }
2220
2221 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2222 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2223 if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
2224 error = ENOMEM;
2225 vnode_put(vp);
2226 goto outdrop;
2227 }
2228
2229 if (uap->cmd == F_ADDSIGS) {
2230 error = copyin(fs.fs_blob_start,
2231 (void *) kernel_blob_addr,
2232 fs.fs_blob_size);
2233 } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
2234 int resid;
2235
2236 error = vn_rdwr(UIO_READ,
2237 vp,
2238 (caddr_t) kernel_blob_addr,
2239 (int)kernel_blob_size,
2240 fs.fs_file_start + fs.fs_blob_start,
2241 UIO_SYSSPACE,
2242 0,
2243 kauth_cred_get(),
2244 &resid,
2245 p);
2246 if ((error == 0) && resid) {
2247 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2248 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2249 }
2250 }
2251
2252 if (error) {
2253 ubc_cs_blob_deallocate(kernel_blob_addr,
2254 kernel_blob_size);
2255 vnode_put(vp);
2256 goto outdrop;
2257 }
2258
2259 blob = NULL;
2260 error = ubc_cs_blob_add(vp,
2261 proc_platform(p),
2262 CPU_TYPE_ANY, /* not for a specific architecture */
2263 CPU_SUBTYPE_ANY,
2264 fs.fs_file_start,
2265 &kernel_blob_addr,
2266 kernel_blob_size,
2267 NULL,
2268 blob_add_flags,
2269 &blob);
2270
2271 /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
2272 if (error) {
2273 if (kernel_blob_addr) {
2274 ubc_cs_blob_deallocate(kernel_blob_addr,
2275 kernel_blob_size);
2276 }
2277 vnode_put(vp);
2278 goto outdrop;
2279 } else {
2280 #if CHECK_CS_VALIDATION_BITMAP
2281 ubc_cs_validation_bitmap_allocate( vp );
2282 #endif
2283 }
2284 }
2285
2286 if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
2287 uap->cmd == F_ADDFILESIGS_INFO) {
2288 /*
2289 * The first element of the structure is a
2290 * off_t that happen to have the same size for
2291 * all archs. Lets overwrite that.
2292 */
2293 off_t end_offset = 0;
2294 if (blob) {
2295 end_offset = blob->csb_end_offset;
2296 }
2297 error = copyout(&end_offset, argp, sizeof(end_offset));
2298
2299 if (error) {
2300 vnode_put(vp);
2301 goto outdrop;
2302 }
2303 }
2304
2305 if (uap->cmd == F_ADDFILESIGS_INFO) {
2306 /* Return information. What we copy out depends on the size of the
2307 * passed in structure, to keep binary compatibility. */
2308
2309 if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
2310 // enough room for fs_cdhash[20]+fs_hash_type
2311
2312 if (blob != NULL) {
2313 error = copyout(blob->csb_cdhash,
2314 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
2315 USER_FSIGNATURES_CDHASH_LEN);
2316 if (error) {
2317 vnode_put(vp);
2318 goto outdrop;
2319 }
2320 int hashtype = cs_hash_type(blob->csb_hashtype);
2321 error = copyout(&hashtype,
2322 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
2323 sizeof(int));
2324 if (error) {
2325 vnode_put(vp);
2326 goto outdrop;
2327 }
2328 }
2329 }
2330 }
2331
2332 (void) vnode_put(vp);
2333 break;
2334 }
2335 #if CONFIG_SUPPLEMENTAL_SIGNATURES
2336 case F_ADDFILESUPPL:
2337 {
2338 struct vnode *ivp;
2339 struct cs_blob *blob = NULL;
2340 struct user_fsupplement fs;
2341 int orig_fd;
2342 struct fileproc* orig_fp = NULL;
2343 kern_return_t kr;
2344 vm_offset_t kernel_blob_addr;
2345 vm_size_t kernel_blob_size;
2346
2347 if (!IS_64BIT_PROCESS(p)) {
2348 error = EINVAL;
2349 goto out; // drop fp and unlock fds
2350 }
2351
2352 if (fp->f_type != DTYPE_VNODE) {
2353 error = EBADF;
2354 goto out;
2355 }
2356
2357 error = copyin(argp, &fs, sizeof(fs));
2358 if (error) {
2359 goto out;
2360 }
2361
2362 orig_fd = fs.fs_orig_fd;
2363 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
2364 printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
2365 goto out;
2366 }
2367
2368 if (orig_fp->f_type != DTYPE_VNODE) {
2369 error = EBADF;
2370 fp_drop(p, orig_fd, orig_fp, 1);
2371 goto out;
2372 }
2373
2374 ivp = (struct vnode *)orig_fp->f_data;
2375
2376 vp = (struct vnode *)fp->f_data;
2377
2378 proc_fdunlock(p);
2379
2380 error = vnode_getwithref(ivp);
2381 if (error) {
2382 fp_drop(p, orig_fd, orig_fp, 0);
2383 goto outdrop; //drop fp
2384 }
2385
2386 error = vnode_getwithref(vp);
2387 if (error) {
2388 vnode_put(ivp);
2389 fp_drop(p, orig_fd, orig_fp, 0);
2390 goto outdrop;
2391 }
2392
2393 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2394 error = E2BIG;
2395 goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
2396 }
2397
2398 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2399 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2400 if (kr != KERN_SUCCESS) {
2401 error = ENOMEM;
2402 goto dropboth;
2403 }
2404
2405 int resid;
2406 error = vn_rdwr(UIO_READ, vp,
2407 (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
2408 fs.fs_file_start + fs.fs_blob_start,
2409 UIO_SYSSPACE, 0,
2410 kauth_cred_get(), &resid, p);
2411 if ((error == 0) && resid) {
2412 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2413 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2414 }
2415
2416 if (error) {
2417 ubc_cs_blob_deallocate(kernel_blob_addr,
2418 kernel_blob_size);
2419 goto dropboth;
2420 }
2421
2422 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
2423 &kernel_blob_addr, kernel_blob_size, &blob);
2424
2425 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
2426 if (error) {
2427 if (kernel_blob_addr) {
2428 ubc_cs_blob_deallocate(kernel_blob_addr,
2429 kernel_blob_size);
2430 }
2431 goto dropboth;
2432 }
2433 vnode_put(ivp);
2434 vnode_put(vp);
2435 fp_drop(p, orig_fd, orig_fp, 0);
2436 break;
2437
2438 dropboth:
2439 vnode_put(ivp);
2440 vnode_put(vp);
2441 fp_drop(p, orig_fd, orig_fp, 0);
2442 goto outdrop;
2443 }
2444 #endif
2445 case F_GETCODEDIR:
2446 case F_FINDSIGS: {
2447 error = ENOTSUP;
2448 goto out;
2449 }
2450 case F_CHECK_LV: {
2451 struct fileglob *fg;
2452 fchecklv_t lv = {};
2453
2454 if (fp->f_type != DTYPE_VNODE) {
2455 error = EBADF;
2456 goto out;
2457 }
2458 fg = fp->fp_glob;
2459 proc_fdunlock(p);
2460
2461 if (IS_64BIT_PROCESS(p)) {
2462 error = copyin(argp, &lv, sizeof(lv));
2463 } else {
2464 struct user32_fchecklv lv32 = {};
2465
2466 error = copyin(argp, &lv32, sizeof(lv32));
2467 lv.lv_file_start = lv32.lv_file_start;
2468 lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
2469 lv.lv_error_message_size = lv32.lv_error_message_size;
2470 }
2471 if (error) {
2472 goto outdrop;
2473 }
2474
2475 #if CONFIG_MACF
2476 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
2477 (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
2478 #endif
2479
2480 break;
2481 }
2482 case F_GETSIGSINFO: {
2483 struct cs_blob *blob = NULL;
2484 fgetsigsinfo_t sigsinfo = {};
2485
2486 if (fp->f_type != DTYPE_VNODE) {
2487 error = EBADF;
2488 goto out;
2489 }
2490 vp = (struct vnode *)fp->f_data;
2491 proc_fdunlock(p);
2492
2493 error = vnode_getwithref(vp);
2494 if (error) {
2495 goto outdrop;
2496 }
2497
2498 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
2499 if (error) {
2500 vnode_put(vp);
2501 goto outdrop;
2502 }
2503
2504 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
2505 if (blob == NULL) {
2506 error = ENOENT;
2507 vnode_put(vp);
2508 goto outdrop;
2509 }
2510 switch (sigsinfo.fg_info_request) {
2511 case GETSIGSINFO_PLATFORM_BINARY:
2512 sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
2513 error = copyout(&sigsinfo.fg_sig_is_platform,
2514 (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
2515 sizeof(sigsinfo.fg_sig_is_platform));
2516 if (error) {
2517 vnode_put(vp);
2518 goto outdrop;
2519 }
2520 break;
2521 default:
2522 error = EINVAL;
2523 vnode_put(vp);
2524 goto outdrop;
2525 }
2526 vnode_put(vp);
2527 break;
2528 }
2529 #if CONFIG_PROTECT
2530 case F_GETPROTECTIONCLASS: {
2531 if (fp->f_type != DTYPE_VNODE) {
2532 error = EBADF;
2533 goto out;
2534 }
2535 vp = (struct vnode *)fp->f_data;
2536
2537 proc_fdunlock(p);
2538
2539 if (vnode_getwithref(vp)) {
2540 error = ENOENT;
2541 goto outdrop;
2542 }
2543
2544 struct vnode_attr va;
2545
2546 VATTR_INIT(&va);
2547 VATTR_WANTED(&va, va_dataprotect_class);
2548 error = VNOP_GETATTR(vp, &va, &context);
2549 if (!error) {
2550 if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
2551 *retval = va.va_dataprotect_class;
2552 } else {
2553 error = ENOTSUP;
2554 }
2555 }
2556
2557 vnode_put(vp);
2558 break;
2559 }
2560
2561 case F_SETPROTECTIONCLASS: {
2562 /* tmp must be a valid PROTECTION_CLASS_* */
2563 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2564
2565 if (fp->f_type != DTYPE_VNODE) {
2566 error = EBADF;
2567 goto out;
2568 }
2569 vp = (struct vnode *)fp->f_data;
2570
2571 proc_fdunlock(p);
2572
2573 if (vnode_getwithref(vp)) {
2574 error = ENOENT;
2575 goto outdrop;
2576 }
2577
2578 /* Only go forward if you have write access */
2579 vfs_context_t ctx = vfs_context_current();
2580 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2581 vnode_put(vp);
2582 error = EBADF;
2583 goto outdrop;
2584 }
2585
2586 struct vnode_attr va;
2587
2588 VATTR_INIT(&va);
2589 VATTR_SET(&va, va_dataprotect_class, tmp);
2590
2591 error = VNOP_SETATTR(vp, &va, ctx);
2592
2593 vnode_put(vp);
2594 break;
2595 }
2596
2597 case F_TRANSCODEKEY: {
2598 if (fp->f_type != DTYPE_VNODE) {
2599 error = EBADF;
2600 goto out;
2601 }
2602
2603 vp = (struct vnode *)fp->f_data;
2604 proc_fdunlock(p);
2605
2606 if (vnode_getwithref(vp)) {
2607 error = ENOENT;
2608 goto outdrop;
2609 }
2610
2611 cp_key_t k = {
2612 .len = CP_MAX_WRAPPEDKEYSIZE,
2613 };
2614
2615 MALLOC(k.key, char *, k.len, M_TEMP, M_WAITOK | M_ZERO);
2616
2617 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
2618
2619 vnode_put(vp);
2620
2621 if (error == 0) {
2622 error = copyout(k.key, argp, k.len);
2623 *retval = k.len;
2624 }
2625
2626 FREE(k.key, M_TEMP);
2627
2628 break;
2629 }
2630
2631 case F_GETPROTECTIONLEVEL: {
2632 if (fp->f_type != DTYPE_VNODE) {
2633 error = EBADF;
2634 goto out;
2635 }
2636
2637 vp = (struct vnode*) fp->f_data;
2638 proc_fdunlock(p);
2639
2640 if (vnode_getwithref(vp)) {
2641 error = ENOENT;
2642 goto outdrop;
2643 }
2644
2645 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
2646
2647 vnode_put(vp);
2648 break;
2649 }
2650
2651 case F_GETDEFAULTPROTLEVEL: {
2652 if (fp->f_type != DTYPE_VNODE) {
2653 error = EBADF;
2654 goto out;
2655 }
2656
2657 vp = (struct vnode*) fp->f_data;
2658 proc_fdunlock(p);
2659
2660 if (vnode_getwithref(vp)) {
2661 error = ENOENT;
2662 goto outdrop;
2663 }
2664
2665 /*
2666 * if cp_get_major_vers fails, error will be set to proper errno
2667 * and cp_version will still be 0.
2668 */
2669
2670 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
2671
2672 vnode_put(vp);
2673 break;
2674 }
2675
2676 #endif /* CONFIG_PROTECT */
2677
2678 case F_MOVEDATAEXTENTS: {
2679 struct fileproc *fp2 = NULL;
2680 struct vnode *src_vp = NULLVP;
2681 struct vnode *dst_vp = NULLVP;
2682 /* We need to grab the 2nd FD out of the argments before moving on. */
2683 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2684
2685 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
2686 if (error) {
2687 goto out;
2688 }
2689
2690 if (fp->f_type != DTYPE_VNODE) {
2691 error = EBADF;
2692 goto out;
2693 }
2694
2695 /*
2696 * For now, special case HFS+ and APFS only, since this
2697 * is SPI.
2698 */
2699 src_vp = (struct vnode *)fp->f_data;
2700 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
2701 error = ENOTSUP;
2702 goto out;
2703 }
2704
2705 /*
2706 * Get the references before we start acquiring iocounts on the vnodes,
2707 * while we still hold the proc fd lock
2708 */
2709 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
2710 error = EBADF;
2711 goto out;
2712 }
2713 if (fp2->f_type != DTYPE_VNODE) {
2714 fp_drop(p, fd2, fp2, 1);
2715 error = EBADF;
2716 goto out;
2717 }
2718 dst_vp = (struct vnode *)fp2->f_data;
2719 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
2720 fp_drop(p, fd2, fp2, 1);
2721 error = ENOTSUP;
2722 goto out;
2723 }
2724
2725 #if CONFIG_MACF
2726 /* Re-do MAC checks against the new FD, pass in a fake argument */
2727 error = mac_file_check_fcntl(proc_ucred(p), fp2->fp_glob, uap->cmd, 0);
2728 if (error) {
2729 fp_drop(p, fd2, fp2, 1);
2730 goto out;
2731 }
2732 #endif
2733 /* Audit the 2nd FD */
2734 AUDIT_ARG(fd, fd2);
2735
2736 proc_fdunlock(p);
2737
2738 if (vnode_getwithref(src_vp)) {
2739 fp_drop(p, fd2, fp2, 0);
2740 error = ENOENT;
2741 goto outdrop;
2742 }
2743 if (vnode_getwithref(dst_vp)) {
2744 vnode_put(src_vp);
2745 fp_drop(p, fd2, fp2, 0);
2746 error = ENOENT;
2747 goto outdrop;
2748 }
2749
2750 /*
2751 * Basic asserts; validate they are not the same and that
2752 * both live on the same filesystem.
2753 */
2754 if (dst_vp == src_vp) {
2755 vnode_put(src_vp);
2756 vnode_put(dst_vp);
2757 fp_drop(p, fd2, fp2, 0);
2758 error = EINVAL;
2759 goto outdrop;
2760 }
2761
2762 if (dst_vp->v_mount != src_vp->v_mount) {
2763 vnode_put(src_vp);
2764 vnode_put(dst_vp);
2765 fp_drop(p, fd2, fp2, 0);
2766 error = EXDEV;
2767 goto outdrop;
2768 }
2769
2770 /* Now we have a legit pair of FDs. Go to work */
2771
2772 /* Now check for write access to the target files */
2773 if (vnode_authorize(src_vp, NULLVP,
2774 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2775 vnode_put(src_vp);
2776 vnode_put(dst_vp);
2777 fp_drop(p, fd2, fp2, 0);
2778 error = EBADF;
2779 goto outdrop;
2780 }
2781
2782 if (vnode_authorize(dst_vp, NULLVP,
2783 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2784 vnode_put(src_vp);
2785 vnode_put(dst_vp);
2786 fp_drop(p, fd2, fp2, 0);
2787 error = EBADF;
2788 goto outdrop;
2789 }
2790
2791 /* Verify that both vps point to files and not directories */
2792 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2793 error = EINVAL;
2794 vnode_put(src_vp);
2795 vnode_put(dst_vp);
2796 fp_drop(p, fd2, fp2, 0);
2797 goto outdrop;
2798 }
2799
2800 /*
2801 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2802 * We'll pass in our special bit indicating that the new behavior is expected
2803 */
2804
2805 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2806
2807 vnode_put(src_vp);
2808 vnode_put(dst_vp);
2809 fp_drop(p, fd2, fp2, 0);
2810 break;
2811 }
2812
2813 /*
2814 * SPI for making a file compressed.
2815 */
2816 case F_MAKECOMPRESSED: {
2817 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2818
2819 if (fp->f_type != DTYPE_VNODE) {
2820 error = EBADF;
2821 goto out;
2822 }
2823
2824 vp = (struct vnode*) fp->f_data;
2825 proc_fdunlock(p);
2826
2827 /* get the vnode */
2828 if (vnode_getwithref(vp)) {
2829 error = ENOENT;
2830 goto outdrop;
2831 }
2832
2833 /* Is it a file? */
2834 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2835 vnode_put(vp);
2836 error = EBADF;
2837 goto outdrop;
2838 }
2839
2840 /* invoke ioctl to pass off to FS */
2841 /* Only go forward if you have write access */
2842 vfs_context_t ctx = vfs_context_current();
2843 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2844 vnode_put(vp);
2845 error = EBADF;
2846 goto outdrop;
2847 }
2848
2849 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2850
2851 vnode_put(vp);
2852 break;
2853 }
2854
2855 /*
2856 * SPI (private) for indicating to a filesystem that subsequent writes to
2857 * the open FD will written to the Fastflow.
2858 */
2859 case F_SET_GREEDY_MODE:
2860 /* intentionally drop through to the same handler as F_SETSTATIC.
2861 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2862 */
2863
2864 /*
2865 * SPI (private) for indicating to a filesystem that subsequent writes to
2866 * the open FD will represent static content.
2867 */
2868 case F_SETSTATICCONTENT: {
2869 caddr_t ioctl_arg = NULL;
2870
2871 if (uap->arg) {
2872 ioctl_arg = (caddr_t) 1;
2873 }
2874
2875 if (fp->f_type != DTYPE_VNODE) {
2876 error = EBADF;
2877 goto out;
2878 }
2879 vp = (struct vnode *)fp->f_data;
2880 proc_fdunlock(p);
2881
2882 error = vnode_getwithref(vp);
2883 if (error) {
2884 error = ENOENT;
2885 goto outdrop;
2886 }
2887
2888 /* Only go forward if you have write access */
2889 vfs_context_t ctx = vfs_context_current();
2890 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2891 vnode_put(vp);
2892 error = EBADF;
2893 goto outdrop;
2894 }
2895
2896 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2897 (void)vnode_put(vp);
2898
2899 break;
2900 }
2901
2902 /*
2903 * SPI (private) for indicating to the lower level storage driver that the
2904 * subsequent writes should be of a particular IO type (burst, greedy, static),
2905 * or other flavors that may be necessary.
2906 */
2907 case F_SETIOTYPE: {
2908 caddr_t param_ptr;
2909 uint32_t param;
2910
2911 if (uap->arg) {
2912 /* extract 32 bits of flags from userland */
2913 param_ptr = (caddr_t) uap->arg;
2914 param = (uint32_t) param_ptr;
2915 } else {
2916 /* If no argument is specified, error out */
2917 error = EINVAL;
2918 goto out;
2919 }
2920
2921 /*
2922 * Validate the different types of flags that can be specified:
2923 * all of them are mutually exclusive for now.
2924 */
2925 switch (param) {
2926 case F_IOTYPE_ISOCHRONOUS:
2927 break;
2928
2929 default:
2930 error = EINVAL;
2931 goto out;
2932 }
2933
2934
2935 if (fp->f_type != DTYPE_VNODE) {
2936 error = EBADF;
2937 goto out;
2938 }
2939 vp = (struct vnode *)fp->f_data;
2940 proc_fdunlock(p);
2941
2942 error = vnode_getwithref(vp);
2943 if (error) {
2944 error = ENOENT;
2945 goto outdrop;
2946 }
2947
2948 /* Only go forward if you have write access */
2949 vfs_context_t ctx = vfs_context_current();
2950 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2951 vnode_put(vp);
2952 error = EBADF;
2953 goto outdrop;
2954 }
2955
2956 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2957 (void)vnode_put(vp);
2958
2959 break;
2960 }
2961
2962 /*
2963 * Set the vnode pointed to by 'fd'
2964 * and tag it as the (potentially future) backing store
2965 * for another filesystem
2966 */
2967 case F_SETBACKINGSTORE: {
2968 if (fp->f_type != DTYPE_VNODE) {
2969 error = EBADF;
2970 goto out;
2971 }
2972
2973 vp = (struct vnode *)fp->f_data;
2974
2975 if (vp->v_tag != VT_HFS) {
2976 error = EINVAL;
2977 goto out;
2978 }
2979 proc_fdunlock(p);
2980
2981 if (vnode_getwithref(vp)) {
2982 error = ENOENT;
2983 goto outdrop;
2984 }
2985
2986 /* only proceed if you have write access */
2987 vfs_context_t ctx = vfs_context_current();
2988 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2989 vnode_put(vp);
2990 error = EBADF;
2991 goto outdrop;
2992 }
2993
2994
2995 /* If arg != 0, set, otherwise unset */
2996 if (uap->arg) {
2997 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)1, 0, &context);
2998 } else {
2999 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
3000 }
3001
3002 vnode_put(vp);
3003 break;
3004 }
3005
3006 /*
3007 * like F_GETPATH, but special semantics for
3008 * the mobile time machine handler.
3009 */
3010 case F_GETPATH_MTMINFO: {
3011 char *pathbufp;
3012 int pathlen;
3013
3014 if (fp->f_type != DTYPE_VNODE) {
3015 error = EBADF;
3016 goto out;
3017 }
3018 vp = (struct vnode *)fp->f_data;
3019 proc_fdunlock(p);
3020
3021 pathlen = MAXPATHLEN;
3022 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
3023 if (pathbufp == NULL) {
3024 error = ENOMEM;
3025 goto outdrop;
3026 }
3027 if ((error = vnode_getwithref(vp)) == 0) {
3028 int backingstore = 0;
3029
3030 /* Check for error from vn_getpath before moving on */
3031 if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
3032 if (vp->v_tag == VT_HFS) {
3033 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
3034 }
3035 (void)vnode_put(vp);
3036
3037 if (error == 0) {
3038 error = copyout((caddr_t)pathbufp, argp, pathlen);
3039 }
3040 if (error == 0) {
3041 /*
3042 * If the copyout was successful, now check to ensure
3043 * that this vnode is not a BACKINGSTORE vnode. mtmd
3044 * wants the path regardless.
3045 */
3046 if (backingstore) {
3047 error = EBUSY;
3048 }
3049 }
3050 } else {
3051 (void)vnode_put(vp);
3052 }
3053 }
3054 FREE(pathbufp, M_TEMP);
3055 goto outdrop;
3056 }
3057
3058 #if DEBUG || DEVELOPMENT
3059 case F_RECYCLE:
3060 if (fp->f_type != DTYPE_VNODE) {
3061 error = EBADF;
3062 goto out;
3063 }
3064 vp = (struct vnode *)fp->f_data;
3065 proc_fdunlock(p);
3066
3067 vnode_recycle(vp);
3068 break;
3069 #endif
3070
3071 default:
3072 /*
3073 * This is an fcntl() that we d not recognize at this level;
3074 * if this is a vnode, we send it down into the VNOP_IOCTL
3075 * for this vnode; this can include special devices, and will
3076 * effectively overload fcntl() to send ioctl()'s.
3077 */
3078 if ((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)) {
3079 error = EINVAL;
3080 goto out;
3081 }
3082
3083 /* Catch any now-invalid fcntl() selectors */
3084 switch (uap->cmd) {
3085 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
3086 case (int)FSIOC_FIOSEEKHOLE:
3087 case (int)FSIOC_FIOSEEKDATA:
3088 case (int)FSIOC_CAS_BSDFLAGS:
3089 case HFS_GET_BOOT_INFO:
3090 case HFS_SET_BOOT_INFO:
3091 case FIOPINSWAP:
3092 case F_MARKDEPENDENCY:
3093 case TIOCREVOKE:
3094 case TIOCREVOKECLEAR:
3095 error = EINVAL;
3096 goto out;
3097 default:
3098 break;
3099 }
3100
3101 if (fp->f_type != DTYPE_VNODE) {
3102 error = EBADF;
3103 goto out;
3104 }
3105 vp = (struct vnode *)fp->f_data;
3106 proc_fdunlock(p);
3107
3108 if ((error = vnode_getwithref(vp)) == 0) {
3109 #define STK_PARAMS 128
3110 char stkbuf[STK_PARAMS] = {0};
3111 unsigned int size;
3112 caddr_t data, memp;
3113 /*
3114 * For this to work properly, we have to copy in the
3115 * ioctl() cmd argument if there is one; we must also
3116 * check that a command parameter, if present, does
3117 * not exceed the maximum command length dictated by
3118 * the number of bits we have available in the command
3119 * to represent a structure length. Finally, we have
3120 * to copy the results back out, if it is that type of
3121 * ioctl().
3122 */
3123 size = IOCPARM_LEN(uap->cmd);
3124 if (size > IOCPARM_MAX) {
3125 (void)vnode_put(vp);
3126 error = EINVAL;
3127 break;
3128 }
3129
3130 memp = NULL;
3131 if (size > sizeof(stkbuf)) {
3132 memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
3133 if (memp == 0) {
3134 (void)vnode_put(vp);
3135 error = ENOMEM;
3136 goto outdrop;
3137 }
3138 data = memp;
3139 } else {
3140 data = &stkbuf[0];
3141 }
3142
3143 if (uap->cmd & IOC_IN) {
3144 if (size) {
3145 /* structure */
3146 error = copyin(argp, data, size);
3147 if (error) {
3148 (void)vnode_put(vp);
3149 if (memp) {
3150 kheap_free(KHEAP_TEMP, memp, size);
3151 }
3152 goto outdrop;
3153 }
3154
3155 /* Bzero the section beyond that which was needed */
3156 if (size <= sizeof(stkbuf)) {
3157 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
3158 }
3159 } else {
3160 /* int */
3161 if (is64bit) {
3162 *(user_addr_t *)data = argp;
3163 } else {
3164 *(uint32_t *)data = (uint32_t)argp;
3165 }
3166 };
3167 } else if ((uap->cmd & IOC_OUT) && size) {
3168 /*
3169 * Zero the buffer so the user always
3170 * gets back something deterministic.
3171 */
3172 bzero(data, size);
3173 } else if (uap->cmd & IOC_VOID) {
3174 if (is64bit) {
3175 *(user_addr_t *)data = argp;
3176 } else {
3177 *(uint32_t *)data = (uint32_t)argp;
3178 }
3179 }
3180
3181 error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
3182
3183 (void)vnode_put(vp);
3184
3185 /* Copy any output data to user */
3186 if (error == 0 && (uap->cmd & IOC_OUT) && size) {
3187 error = copyout(data, argp, size);
3188 }
3189 if (memp) {
3190 kheap_free(KHEAP_TEMP, memp, size);
3191 }
3192 }
3193 break;
3194 }
3195
3196 outdrop:
3197 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3198 fp_drop(p, fd, fp, 0);
3199 return error;
3200 out:
3201 fp_drop(p, fd, fp, 1);
3202 proc_fdunlock(p);
3203 return error;
3204 }
3205
3206
3207 /*
3208 * finishdup
3209 *
3210 * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
3211 *
3212 * Parameters: p Process performing the dup
3213 * old The fd to dup
3214 * new The fd to dup it to
3215 * fd_flags Flags to augment the new fd
3216 * retval Pointer to the call return area
3217 *
3218 * Returns: 0 Success
3219 * EBADF
3220 * ENOMEM
3221 *
3222 * Implicit returns:
3223 * *retval (modified) The new descriptor
3224 *
3225 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
3226 * the caller
3227 *
3228 * Notes: This function may drop and reacquire this lock; it is unsafe
3229 * for a caller to assume that other state protected by the lock
3230 * has not been subsequently changed out from under it.
3231 */
3232 int
3233 finishdup(proc_t p,
3234 struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
3235 {
3236 struct fileproc *nfp;
3237 struct fileproc *ofp;
3238 #if CONFIG_MACF
3239 int error;
3240 #endif
3241
3242 #if DIAGNOSTIC
3243 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3244 #endif
3245 if ((ofp = fdp->fd_ofiles[old]) == NULL ||
3246 (fdp->fd_ofileflags[old] & UF_RESERVED)) {
3247 fdrelse(p, new);
3248 return EBADF;
3249 }
3250
3251 #if CONFIG_MACF
3252 error = mac_file_check_dup(proc_ucred(p), ofp->fp_glob, new);
3253 if (error) {
3254 fdrelse(p, new);
3255 return error;
3256 }
3257 #endif
3258
3259 proc_fdunlock(p);
3260
3261 nfp = fileproc_alloc_init(NULL);
3262
3263 proc_fdlock(p);
3264
3265 if (nfp == NULL) {
3266 fdrelse(p, new);
3267 return ENOMEM;
3268 }
3269
3270 fg_ref(p, ofp->fp_glob);
3271 nfp->fp_glob = ofp->fp_glob;
3272
3273 #if DIAGNOSTIC
3274 if (fdp->fd_ofiles[new] != 0) {
3275 panic("finishdup: overwriting fd_ofiles with new %d", new);
3276 }
3277 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
3278 panic("finishdup: unreserved fileflags with new %d", new);
3279 }
3280 #endif
3281
3282 if (new > fdp->fd_lastfile) {
3283 fdp->fd_lastfile = new;
3284 }
3285 *fdflags(p, new) |= fd_flags;
3286 procfdtbl_releasefd(p, new, nfp);
3287 *retval = new;
3288 return 0;
3289 }
3290
3291
3292 /*
3293 * sys_close
3294 *
3295 * Description: The implementation of the close(2) system call
3296 *
3297 * Parameters: p Process in whose per process file table
3298 * the close is to occur
3299 * uap->fd fd to be closed
3300 * retval <unused>
3301 *
3302 * Returns: 0 Success
3303 * fp_lookup:EBADF Bad file descriptor
3304 * fp_guard_exception:??? Guarded file descriptor
3305 * close_internal:EBADF
3306 * close_internal:??? Anything returnable by a per-fileops
3307 * close function
3308 */
3309 int
3310 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
3311 {
3312 __pthread_testcancel(1);
3313 return close_nocancel(p, uap->fd);
3314 }
3315
3316 int
3317 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
3318 {
3319 return close_nocancel(p, uap->fd);
3320 }
3321
3322 int
3323 close_nocancel(proc_t p, int fd)
3324 {
3325 struct fileproc *fp;
3326
3327 AUDIT_SYSCLOSE(p, fd);
3328
3329 proc_fdlock(p);
3330 if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
3331 proc_fdunlock(p);
3332 return EBADF;
3333 }
3334
3335 if (fp_isguarded(fp, GUARD_CLOSE)) {
3336 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
3337 proc_fdunlock(p);
3338 return error;
3339 }
3340
3341 return fp_close_and_unlock(p, fd, fp, 0);
3342 }
3343
3344
3345 int
3346 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
3347 {
3348 struct filedesc *fdp = p->p_fd;
3349 struct fileglob *fg = fp->fp_glob;
3350
3351 #if DIAGNOSTIC
3352 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3353 #endif
3354
3355 /*
3356 * Keep most people from finding the filedesc while we are closing it.
3357 *
3358 * Callers are:
3359 *
3360 * - dup2() which always waits for UF_RESERVED to clear
3361 *
3362 * - close/guarded_close/... who will fail the fileproc lookup if
3363 * UF_RESERVED is set,
3364 *
3365 * - fdexec()/fdfree() who only run once all threads in the proc
3366 * are properly canceled, hence no fileproc in this proc should
3367 * be in flux.
3368 *
3369 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
3370 *
3371 * Callers of fp_get_noref_locked_with_iocount() can still find
3372 * this entry so that they can drop their I/O reference despite
3373 * not having remembered the fileproc pointer (namely select() and
3374 * file_drop()).
3375 */
3376 if (p->p_fd->fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
3377 panic("%s: called with fileproc in flux (%d/:%p)",
3378 __func__, fd, fp);
3379 }
3380 p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
3381
3382 if ((fp->fp_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
3383 proc_fdunlock(p);
3384
3385 if ((FILEGLOB_DTYPE(fg) == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
3386 /*
3387 * call out to allow 3rd party notification of close.
3388 * Ignore result of kauth_authorize_fileop call.
3389 */
3390 if (vnode_getwithref((vnode_t)fg->fg_data) == 0) {
3391 u_int fileop_flags = 0;
3392 if (fg->fg_flag & FWASWRITTEN) {
3393 fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
3394 }
3395 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
3396 (uintptr_t)fg->fg_data, (uintptr_t)fileop_flags);
3397 #if CONFIG_MACF
3398 mac_file_notify_close(proc_ucred(p), fp->fp_glob);
3399 #endif
3400 vnode_put((vnode_t)fg->fg_data);
3401 }
3402 }
3403 if (fp->fp_flags & FP_AIOISSUED) {
3404 /*
3405 * cancel all async IO requests that can be cancelled.
3406 */
3407 _aio_close( p, fd );
3408 }
3409
3410 proc_fdlock(p);
3411 }
3412
3413 if (fd < fdp->fd_knlistsize) {
3414 knote_fdclose(p, fd);
3415 }
3416
3417 fileproc_drain(p, fp);
3418
3419 if (flags & FD_DUP2RESV) {
3420 fdp->fd_ofiles[fd] = NULL;
3421 fdp->fd_ofileflags[fd] &= ~(UF_CLOSING | UF_EXCLOSE | UF_FORKCLOSE);
3422 } else {
3423 fdrelse(p, fd);
3424 }
3425
3426 proc_fdunlock(p);
3427
3428 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
3429 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
3430 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg->fg_data));
3431 }
3432
3433 fileproc_free(fp);
3434
3435 return fg_drop(p, fg);
3436 }
3437
3438
3439 /*
3440 * fstat
3441 *
3442 * Description: Return status information about a file descriptor.
3443 *
3444 * Parameters: p The process doing the fstat
3445 * fd The fd to stat
3446 * ub The user stat buffer
3447 * xsecurity The user extended security
3448 * buffer, or 0 if none
3449 * xsecurity_size The size of xsecurity, or 0
3450 * if no xsecurity
3451 * isstat64 Flag to indicate 64 bit version
3452 * for inode size, etc.
3453 *
3454 * Returns: 0 Success
3455 * EBADF
3456 * EFAULT
3457 * fp_lookup:EBADF Bad file descriptor
3458 * vnode_getwithref:???
3459 * copyout:EFAULT
3460 * vnode_getwithref:???
3461 * vn_stat:???
3462 * soo_stat:???
3463 * pipe_stat:???
3464 * pshm_stat:???
3465 * kqueue_stat:???
3466 *
3467 * Notes: Internal implementation for all other fstat() related
3468 * functions
3469 *
3470 * XXX switch on node type is bogus; need a stat in struct
3471 * XXX fileops instead.
3472 */
3473 static int
3474 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3475 {
3476 struct fileproc *fp;
3477 union {
3478 struct stat sb;
3479 struct stat64 sb64;
3480 } source;
3481 union {
3482 struct user64_stat user64_sb;
3483 struct user32_stat user32_sb;
3484 struct user64_stat64 user64_sb64;
3485 struct user32_stat64 user32_sb64;
3486 } dest;
3487 int error, my_size;
3488 file_type_t type;
3489 caddr_t data;
3490 kauth_filesec_t fsec;
3491 user_size_t xsecurity_bufsize;
3492 vfs_context_t ctx = vfs_context_current();
3493 void * sbptr;
3494
3495
3496 AUDIT_ARG(fd, fd);
3497
3498 if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
3499 return error;
3500 }
3501 type = fp->f_type;
3502 data = fp->f_data;
3503 fsec = KAUTH_FILESEC_NONE;
3504
3505 sbptr = (void *)&source;
3506
3507 switch (type) {
3508 case DTYPE_VNODE:
3509 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
3510 /*
3511 * If the caller has the file open, and is not
3512 * requesting extended security information, we are
3513 * going to let them get the basic stat information.
3514 */
3515 if (xsecurity == USER_ADDR_NULL) {
3516 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
3517 fp->fp_glob->fg_cred);
3518 } else {
3519 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
3520 }
3521
3522 AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
3523 (void)vnode_put((vnode_t)data);
3524 }
3525 break;
3526
3527 #if SOCKETS
3528 case DTYPE_SOCKET:
3529 error = soo_stat((struct socket *)data, sbptr, isstat64);
3530 break;
3531 #endif /* SOCKETS */
3532
3533 case DTYPE_PIPE:
3534 error = pipe_stat((void *)data, sbptr, isstat64);
3535 break;
3536
3537 case DTYPE_PSXSHM:
3538 error = pshm_stat((void *)data, sbptr, isstat64);
3539 break;
3540
3541 case DTYPE_KQUEUE:
3542 error = kqueue_stat((void *)data, sbptr, isstat64, p);
3543 break;
3544
3545 default:
3546 error = EBADF;
3547 goto out;
3548 }
3549 if (error == 0) {
3550 caddr_t sbp;
3551
3552 if (isstat64 != 0) {
3553 source.sb64.st_lspare = 0;
3554 source.sb64.st_qspare[0] = 0LL;
3555 source.sb64.st_qspare[1] = 0LL;
3556
3557 if (IS_64BIT_PROCESS(current_proc())) {
3558 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3559 my_size = sizeof(dest.user64_sb64);
3560 sbp = (caddr_t)&dest.user64_sb64;
3561 } else {
3562 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3563 my_size = sizeof(dest.user32_sb64);
3564 sbp = (caddr_t)&dest.user32_sb64;
3565 }
3566 } else {
3567 source.sb.st_lspare = 0;
3568 source.sb.st_qspare[0] = 0LL;
3569 source.sb.st_qspare[1] = 0LL;
3570 if (IS_64BIT_PROCESS(current_proc())) {
3571 munge_user64_stat(&source.sb, &dest.user64_sb);
3572 my_size = sizeof(dest.user64_sb);
3573 sbp = (caddr_t)&dest.user64_sb;
3574 } else {
3575 munge_user32_stat(&source.sb, &dest.user32_sb);
3576 my_size = sizeof(dest.user32_sb);
3577 sbp = (caddr_t)&dest.user32_sb;
3578 }
3579 }
3580
3581 error = copyout(sbp, ub, my_size);
3582 }
3583
3584 /* caller wants extended security information? */
3585 if (xsecurity != USER_ADDR_NULL) {
3586 /* did we get any? */
3587 if (fsec == KAUTH_FILESEC_NONE) {
3588 if (susize(xsecurity_size, 0) != 0) {
3589 error = EFAULT;
3590 goto out;
3591 }
3592 } else {
3593 /* find the user buffer size */
3594 xsecurity_bufsize = fusize(xsecurity_size);
3595
3596 /* copy out the actual data size */
3597 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3598 error = EFAULT;
3599 goto out;
3600 }
3601
3602 /* if the caller supplied enough room, copy out to it */
3603 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
3604 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3605 }
3606 }
3607 }
3608 out:
3609 fp_drop(p, fd, fp, 0);
3610 if (fsec != NULL) {
3611 kauth_filesec_free(fsec);
3612 }
3613 return error;
3614 }
3615
3616
3617 /*
3618 * sys_fstat_extended
3619 *
3620 * Description: Extended version of fstat supporting returning extended
3621 * security information
3622 *
3623 * Parameters: p The process doing the fstat
3624 * uap->fd The fd to stat
3625 * uap->ub The user stat buffer
3626 * uap->xsecurity The user extended security
3627 * buffer, or 0 if none
3628 * uap->xsecurity_size The size of xsecurity, or 0
3629 *
3630 * Returns: 0 Success
3631 * !0 Errno (see fstat)
3632 */
3633 int
3634 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
3635 {
3636 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
3637 }
3638
3639
3640 /*
3641 * sys_fstat
3642 *
3643 * Description: Get file status for the file associated with fd
3644 *
3645 * Parameters: p The process doing the fstat
3646 * uap->fd The fd to stat
3647 * uap->ub The user stat buffer
3648 *
3649 * Returns: 0 Success
3650 * !0 Errno (see fstat)
3651 */
3652 int
3653 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
3654 {
3655 return fstat(p, uap->fd, uap->ub, 0, 0, 0);
3656 }
3657
3658
3659 /*
3660 * sys_fstat64_extended
3661 *
3662 * Description: Extended version of fstat64 supporting returning extended
3663 * security information
3664 *
3665 * Parameters: p The process doing the fstat
3666 * uap->fd The fd to stat
3667 * uap->ub The user stat buffer
3668 * uap->xsecurity The user extended security
3669 * buffer, or 0 if none
3670 * uap->xsecurity_size The size of xsecurity, or 0
3671 *
3672 * Returns: 0 Success
3673 * !0 Errno (see fstat)
3674 */
3675 int
3676 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3677 {
3678 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
3679 }
3680
3681
3682 /*
3683 * sys_fstat64
3684 *
3685 * Description: Get 64 bit version of the file status for the file associated
3686 * with fd
3687 *
3688 * Parameters: p The process doing the fstat
3689 * uap->fd The fd to stat
3690 * uap->ub The user stat buffer
3691 *
3692 * Returns: 0 Success
3693 * !0 Errno (see fstat)
3694 */
3695 int
3696 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
3697 {
3698 return fstat(p, uap->fd, uap->ub, 0, 0, 1);
3699 }
3700
3701
3702 /*
3703 * sys_fpathconf
3704 *
3705 * Description: Return pathconf information about a file descriptor.
3706 *
3707 * Parameters: p Process making the request
3708 * uap->fd fd to get information about
3709 * uap->name Name of information desired
3710 * retval Pointer to the call return area
3711 *
3712 * Returns: 0 Success
3713 * EINVAL
3714 * fp_lookup:EBADF Bad file descriptor
3715 * vnode_getwithref:???
3716 * vn_pathconf:???
3717 *
3718 * Implicit returns:
3719 * *retval (modified) Returned information (numeric)
3720 */
3721 int
3722 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3723 {
3724 int fd = uap->fd;
3725 struct fileproc *fp;
3726 struct vnode *vp;
3727 int error = 0;
3728 file_type_t type;
3729 caddr_t data;
3730
3731
3732 AUDIT_ARG(fd, uap->fd);
3733 if ((error = fp_lookup(p, fd, &fp, 0))) {
3734 return error;
3735 }
3736 type = fp->f_type;
3737 data = fp->f_data;
3738
3739 switch (type) {
3740 case DTYPE_SOCKET:
3741 if (uap->name != _PC_PIPE_BUF) {
3742 error = EINVAL;
3743 goto out;
3744 }
3745 *retval = PIPE_BUF;
3746 error = 0;
3747 goto out;
3748
3749 case DTYPE_PIPE:
3750 if (uap->name != _PC_PIPE_BUF) {
3751 error = EINVAL;
3752 goto out;
3753 }
3754 *retval = PIPE_BUF;
3755 error = 0;
3756 goto out;
3757
3758 case DTYPE_VNODE:
3759 vp = (struct vnode *)data;
3760
3761 if ((error = vnode_getwithref(vp)) == 0) {
3762 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3763
3764 error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3765
3766 (void)vnode_put(vp);
3767 }
3768 goto out;
3769
3770 default:
3771 error = EINVAL;
3772 goto out;
3773 }
3774 /*NOTREACHED*/
3775 out:
3776 fp_drop(p, fd, fp, 0);
3777 return error;
3778 }
3779
3780 /*
3781 * Statistics counter for the number of times a process calling fdalloc()
3782 * has resulted in an expansion of the per process open file table.
3783 *
3784 * XXX This would likely be of more use if it were per process
3785 */
3786 int fdexpand;
3787
3788
3789 /*
3790 * fdalloc
3791 *
3792 * Description: Allocate a file descriptor for the process.
3793 *
3794 * Parameters: p Process to allocate the fd in
3795 * want The fd we would prefer to get
3796 * result Pointer to fd we got
3797 *
3798 * Returns: 0 Success
3799 * EMFILE
3800 * ENOMEM
3801 *
3802 * Implicit returns:
3803 * *result (modified) The fd which was allocated
3804 */
3805 int
3806 fdalloc(proc_t p, int want, int *result)
3807 {
3808 struct filedesc *fdp = p->p_fd;
3809 int i;
3810 int last, numfiles, oldnfiles;
3811 struct fileproc **newofiles, **ofiles;
3812 char *newofileflags;
3813 rlim_t lim;
3814 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3815
3816 nofile = MIN(nofile, INT_MAX);
3817
3818 /*
3819 * Search for a free descriptor starting at the higher
3820 * of want or fd_freefile. If that fails, consider
3821 * expanding the ofile array.
3822 */
3823 #if DIAGNOSTIC
3824 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3825 #endif
3826
3827 lim = MIN(nofile, maxfilesperproc);
3828 for (;;) {
3829 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
3830 if ((i = want) < fdp->fd_freefile) {
3831 i = fdp->fd_freefile;
3832 }
3833 for (; i < last; i++) {
3834 if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3835 procfdtbl_reservefd(p, i);
3836 if (i > fdp->fd_lastfile) {
3837 fdp->fd_lastfile = i;
3838 }
3839 if (want <= fdp->fd_freefile) {
3840 fdp->fd_freefile = i;
3841 }
3842 *result = i;
3843 return 0;
3844 }
3845 }
3846
3847 /*
3848 * No space in current array. Expand?
3849 */
3850 if ((rlim_t)fdp->fd_nfiles >= lim) {
3851 return EMFILE;
3852 }
3853 if (fdp->fd_nfiles < NDEXTENT) {
3854 numfiles = NDEXTENT;
3855 } else {
3856 numfiles = 2 * fdp->fd_nfiles;
3857 }
3858 /* Enforce lim */
3859 if ((rlim_t)numfiles > lim) {
3860 numfiles = (int)lim;
3861 }
3862 proc_fdunlock(p);
3863 MALLOC(newofiles, struct fileproc **,
3864 numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3865 proc_fdlock(p);
3866 if (newofiles == NULL) {
3867 return ENOMEM;
3868 }
3869 if (fdp->fd_nfiles >= numfiles) {
3870 FREE(newofiles, M_OFILETABL);
3871 continue;
3872 }
3873 newofileflags = (char *) &newofiles[numfiles];
3874 /*
3875 * Copy the existing ofile and ofileflags arrays
3876 * and zero the new portion of each array.
3877 */
3878 oldnfiles = fdp->fd_nfiles;
3879 (void) memcpy(newofiles, fdp->fd_ofiles,
3880 oldnfiles * sizeof(*fdp->fd_ofiles));
3881 (void) memset(&newofiles[oldnfiles], 0,
3882 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3883
3884 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3885 oldnfiles * sizeof(*fdp->fd_ofileflags));
3886 (void) memset(&newofileflags[oldnfiles], 0,
3887 (numfiles - oldnfiles) *
3888 sizeof(*fdp->fd_ofileflags));
3889 ofiles = fdp->fd_ofiles;
3890 fdp->fd_ofiles = newofiles;
3891 fdp->fd_ofileflags = newofileflags;
3892 fdp->fd_nfiles = numfiles;
3893 FREE(ofiles, M_OFILETABL);
3894 fdexpand++;
3895 }
3896 }
3897
3898
3899 /*
3900 * fdavail
3901 *
3902 * Description: Check to see whether n user file descriptors are available
3903 * to the process p.
3904 *
3905 * Parameters: p Process to check in
3906 * n The number of fd's desired
3907 *
3908 * Returns: 0 No
3909 * 1 Yes
3910 *
3911 * Locks: Assumes proc_fdlock for process is held by the caller
3912 *
3913 * Notes: The answer only remains valid so long as the proc_fdlock is
3914 * held by the caller.
3915 */
3916 int
3917 fdavail(proc_t p, int n)
3918 {
3919 struct filedesc *fdp = p->p_fd;
3920 struct fileproc **fpp;
3921 char *flags;
3922 int i;
3923 int lim;
3924 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3925
3926 lim = (int)MIN(nofile, maxfilesperproc);
3927 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
3928 return 1;
3929 }
3930 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3931 flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3932 for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
3933 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
3934 return 1;
3935 }
3936 }
3937 return 0;
3938 }
3939
3940
3941 struct fileproc *
3942 fp_get_noref_locked(proc_t p, int fd)
3943 {
3944 struct filedesc *fdp = p->p_fd;
3945 struct fileproc *fp;
3946
3947 if (fd < 0 || fd >= fdp->fd_nfiles ||
3948 (fp = fdp->fd_ofiles[fd]) == NULL ||
3949 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3950 return NULL;
3951 }
3952 return fp;
3953 }
3954
3955 struct fileproc *
3956 fp_get_noref_locked_with_iocount(proc_t p, int fd)
3957 {
3958 struct filedesc *fdp = p->p_fd;
3959 struct fileproc *fp = NULL;
3960
3961 if (fd < 0 || fd >= fdp->fd_nfiles ||
3962 (fp = fdp->fd_ofiles[fd]) == NULL ||
3963 os_ref_get_count(&fp->fp_iocount) <= 1 ||
3964 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
3965 !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
3966 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
3967 __func__, fd, fp);
3968 }
3969
3970 return fp;
3971 }
3972
3973 int
3974 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
3975 {
3976 struct filedesc *fdp = p->p_fd;
3977 struct fileproc *fp;
3978
3979 proc_fdlock_spin(p);
3980 if (fd < 0 || fd >= fdp->fd_nfiles ||
3981 (fp = fdp->fd_ofiles[fd]) == NULL ||
3982 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3983 proc_fdunlock(p);
3984 return EBADF;
3985 }
3986
3987 if (fp->f_type != ftype) {
3988 proc_fdunlock(p);
3989 return err;
3990 }
3991
3992 os_ref_retain_locked(&fp->fp_iocount);
3993 proc_fdunlock(p);
3994
3995 *fpp = fp;
3996 return 0;
3997 }
3998
3999
4000 /*
4001 * fp_getfvp
4002 *
4003 * Description: Get fileproc and vnode pointer for a given fd from the per
4004 * process open file table of the specified process, and if
4005 * successful, increment the fp_iocount
4006 *
4007 * Parameters: p Process in which fd lives
4008 * fd fd to get information for
4009 * resultfp Pointer to result fileproc
4010 * pointer area, or 0 if none
4011 * resultvp Pointer to result vnode pointer
4012 * area, or 0 if none
4013 *
4014 * Returns: 0 Success
4015 * EBADF Bad file descriptor
4016 * ENOTSUP fd does not refer to a vnode
4017 *
4018 * Implicit returns:
4019 * *resultfp (modified) Fileproc pointer
4020 * *resultvp (modified) vnode pointer
4021 *
4022 * Notes: The resultfp and resultvp fields are optional, and may be
4023 * independently specified as NULL to skip returning information
4024 *
4025 * Locks: Internally takes and releases proc_fdlock
4026 */
4027 int
4028 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
4029 {
4030 struct fileproc *fp;
4031 int error;
4032
4033 error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
4034 if (error == 0) {
4035 if (resultfp) {
4036 *resultfp = fp;
4037 }
4038 if (resultvp) {
4039 *resultvp = (struct vnode *)fp->f_data;
4040 }
4041 }
4042
4043 return error;
4044 }
4045
4046
4047 /*
4048 * fp_get_pipe_id
4049 *
4050 * Description: Get pipe id for a given fd from the per process open file table
4051 * of the specified process.
4052 *
4053 * Parameters: p Process in which fd lives
4054 * fd fd to get information for
4055 * result_pipe_id Pointer to result pipe id
4056 *
4057 * Returns: 0 Success
4058 * EIVAL NULL pointer arguments passed
4059 * fp_lookup:EBADF Bad file descriptor
4060 * ENOTSUP fd does not refer to a pipe
4061 *
4062 * Implicit returns:
4063 * *result_pipe_id (modified) pipe id
4064 *
4065 * Locks: Internally takes and releases proc_fdlock
4066 */
4067 int
4068 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
4069 {
4070 struct fileproc *fp = FILEPROC_NULL;
4071 struct fileglob *fg = NULL;
4072 int error = 0;
4073
4074 if (p == NULL || result_pipe_id == NULL) {
4075 return EINVAL;
4076 }
4077
4078 proc_fdlock(p);
4079 if ((error = fp_lookup(p, fd, &fp, 1))) {
4080 proc_fdunlock(p);
4081 return error;
4082 }
4083 fg = fp->fp_glob;
4084
4085 if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
4086 *result_pipe_id = pipe_id((struct pipe*)fg->fg_data);
4087 } else {
4088 error = ENOTSUP;
4089 }
4090
4091 fp_drop(p, fd, fp, 1);
4092 proc_fdunlock(p);
4093 return error;
4094 }
4095
4096
4097 /*
4098 * fp_lookup
4099 *
4100 * Description: Get fileproc pointer for a given fd from the per process
4101 * open file table of the specified process and if successful,
4102 * increment the fp_iocount
4103 *
4104 * Parameters: p Process in which fd lives
4105 * fd fd to get information for
4106 * resultfp Pointer to result fileproc
4107 * pointer area, or 0 if none
4108 * locked !0 if the caller holds the
4109 * proc_fdlock, 0 otherwise
4110 *
4111 * Returns: 0 Success
4112 * EBADF Bad file descriptor
4113 *
4114 * Implicit returns:
4115 * *resultfp (modified) Fileproc pointer
4116 *
4117 * Locks: If the argument 'locked' is non-zero, then the caller is
4118 * expected to have taken and held the proc_fdlock; if it is
4119 * zero, than this routine internally takes and drops this lock.
4120 */
4121 int
4122 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
4123 {
4124 struct filedesc *fdp = p->p_fd;
4125 struct fileproc *fp;
4126
4127 if (!locked) {
4128 proc_fdlock_spin(p);
4129 }
4130 if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
4131 (fp = fdp->fd_ofiles[fd]) == NULL ||
4132 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4133 if (!locked) {
4134 proc_fdunlock(p);
4135 }
4136 return EBADF;
4137 }
4138 os_ref_retain_locked(&fp->fp_iocount);
4139
4140 if (resultfp) {
4141 *resultfp = fp;
4142 }
4143 if (!locked) {
4144 proc_fdunlock(p);
4145 }
4146
4147 return 0;
4148 }
4149
4150
4151 /*
4152 * fp_tryswap
4153 *
4154 * Description: Swap the fileproc pointer for a given fd with a new
4155 * fileproc pointer in the per-process open file table of
4156 * the specified process. The fdlock must be held at entry.
4157 * Iff the swap is successful, the old fileproc pointer is freed.
4158 *
4159 * Parameters: p Process containing the fd
4160 * fd The fd of interest
4161 * nfp Pointer to the newfp
4162 *
4163 * Returns: 0 Success
4164 * EBADF Bad file descriptor
4165 * EINTR Interrupted
4166 * EKEEPLOOKING Other references were active, try again.
4167 */
4168 int
4169 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
4170 {
4171 struct fileproc *fp;
4172 int error;
4173
4174 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
4175
4176 if (0 != (error = fp_lookup(p, fd, &fp, 1))) {
4177 return error;
4178 }
4179 /*
4180 * At this point, our caller (change_guardedfd_np) has
4181 * one fp_iocount reference, and we just took another
4182 * one to begin the replacement.
4183 * fp and nfp have a +1 reference from allocation.
4184 * Thus if no-one else is looking, fp_iocount should be 3.
4185 */
4186 if (os_ref_get_count(&fp->fp_iocount) < 3 ||
4187 1 != os_ref_get_count(&nfp->fp_iocount)) {
4188 panic("%s: fp_iocount", __func__);
4189 } else if (3 == os_ref_get_count(&fp->fp_iocount)) {
4190 /* Copy the contents of *fp, preserving the "type" of *nfp */
4191
4192 nfp->fp_flags = (nfp->fp_flags & FP_TYPEMASK) |
4193 (fp->fp_flags & ~FP_TYPEMASK);
4194 os_ref_retain_locked(&nfp->fp_iocount);
4195 os_ref_retain_locked(&nfp->fp_iocount);
4196 nfp->fp_glob = fp->fp_glob;
4197 nfp->fp_wset = fp->fp_wset;
4198
4199 p->p_fd->fd_ofiles[fd] = nfp;
4200 fp_drop(p, fd, nfp, 1);
4201
4202 os_ref_release_live(&fp->fp_iocount);
4203 os_ref_release_live(&fp->fp_iocount);
4204 fileproc_free(fp);
4205 } else {
4206 /*
4207 * Wait for all other active references to evaporate.
4208 */
4209 p->p_fpdrainwait = 1;
4210 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
4211 PRIBIO | PCATCH, "tryswap fpdrain", NULL);
4212 if (0 == error) {
4213 /*
4214 * Return an "internal" errno to trigger a full
4215 * reevaluation of the change-guard attempt.
4216 */
4217 error = EKEEPLOOKING;
4218 }
4219 (void) fp_drop(p, fd, fp, 1);
4220 }
4221 return error;
4222 }
4223
4224
4225 /*
4226 * fp_drop
4227 *
4228 * Description: Drop the I/O reference previously taken by calling fp_lookup
4229 * et. al.
4230 *
4231 * Parameters: p Process in which the fd lives
4232 * fd fd associated with the fileproc
4233 * fp fileproc on which to set the
4234 * flag and drop the reference
4235 * locked flag to internally take and
4236 * drop proc_fdlock if it is not
4237 * already held by the caller
4238 *
4239 * Returns: 0 Success
4240 * EBADF Bad file descriptor
4241 *
4242 * Locks: This function internally takes and drops the proc_fdlock for
4243 * the supplied process if 'locked' is non-zero, and assumes that
4244 * the caller already holds this lock if 'locked' is non-zero.
4245 *
4246 * Notes: The fileproc must correspond to the fd in the supplied proc
4247 */
4248 int
4249 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
4250 {
4251 struct filedesc *fdp = p->p_fd;
4252 int needwakeup = 0;
4253
4254 if (!locked) {
4255 proc_fdlock_spin(p);
4256 }
4257 if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
4258 (fp = fdp->fd_ofiles[fd]) == NULL ||
4259 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4260 !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4261 if (!locked) {
4262 proc_fdunlock(p);
4263 }
4264 return EBADF;
4265 }
4266
4267 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4268 if (fp->fp_flags & FP_SELCONFLICT) {
4269 fp->fp_flags &= ~FP_SELCONFLICT;
4270 }
4271
4272 if (p->p_fpdrainwait) {
4273 p->p_fpdrainwait = 0;
4274 needwakeup = 1;
4275 }
4276 }
4277 if (!locked) {
4278 proc_fdunlock(p);
4279 }
4280 if (needwakeup) {
4281 wakeup(&p->p_fpdrainwait);
4282 }
4283
4284 return 0;
4285 }
4286
4287
4288 /*
4289 * file_vnode
4290 *
4291 * Description: Given an fd, look it up in the current process's per process
4292 * open file table, and return its internal vnode pointer.
4293 *
4294 * Parameters: fd fd to obtain vnode from
4295 * vpp pointer to vnode return area
4296 *
4297 * Returns: 0 Success
4298 * EINVAL The fd does not refer to a
4299 * vnode fileproc entry
4300 * fp_lookup:EBADF Bad file descriptor
4301 *
4302 * Implicit returns:
4303 * *vpp (modified) Returned vnode pointer
4304 *
4305 * Locks: This function internally takes and drops the proc_fdlock for
4306 * the current process
4307 *
4308 * Notes: If successful, this function increments the fp_iocount on the
4309 * fd's corresponding fileproc.
4310 *
4311 * The fileproc referenced is not returned; because of this, care
4312 * must be taken to not drop the last reference (e.g. by closing
4313 * the file). This is inherently unsafe, since the reference may
4314 * not be recoverable from the vnode, if there is a subsequent
4315 * close that destroys the associate fileproc. The caller should
4316 * therefore retain their own reference on the fileproc so that
4317 * the fp_iocount can be dropped subsequently. Failure to do this
4318 * can result in the returned pointer immediately becoming invalid
4319 * following the call.
4320 *
4321 * Use of this function is discouraged.
4322 */
4323 int
4324 file_vnode(int fd, struct vnode **vpp)
4325 {
4326 return file_vnode_withvid(fd, vpp, NULL);
4327 }
4328
4329 /*
4330 * file_vnode_withvid
4331 *
4332 * Description: Given an fd, look it up in the current process's per process
4333 * open file table, and return its internal vnode pointer.
4334 *
4335 * Parameters: fd fd to obtain vnode from
4336 * vpp pointer to vnode return area
4337 * vidp pointer to vid of the returned vnode
4338 *
4339 * Returns: 0 Success
4340 * EINVAL The fd does not refer to a
4341 * vnode fileproc entry
4342 * fp_lookup:EBADF Bad file descriptor
4343 *
4344 * Implicit returns:
4345 * *vpp (modified) Returned vnode pointer
4346 *
4347 * Locks: This function internally takes and drops the proc_fdlock for
4348 * the current process
4349 *
4350 * Notes: If successful, this function increments the fp_iocount on the
4351 * fd's corresponding fileproc.
4352 *
4353 * The fileproc referenced is not returned; because of this, care
4354 * must be taken to not drop the last reference (e.g. by closing
4355 * the file). This is inherently unsafe, since the reference may
4356 * not be recoverable from the vnode, if there is a subsequent
4357 * close that destroys the associate fileproc. The caller should
4358 * therefore retain their own reference on the fileproc so that
4359 * the fp_iocount can be dropped subsequently. Failure to do this
4360 * can result in the returned pointer immediately becoming invalid
4361 * following the call.
4362 *
4363 * Use of this function is discouraged.
4364 */
4365 int
4366 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
4367 {
4368 struct fileproc *fp;
4369 int error;
4370
4371 error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
4372 if (error == 0) {
4373 if (vpp) {
4374 *vpp = fp->f_data;
4375 }
4376 if (vidp) {
4377 *vidp = vnode_vid(fp->f_data);
4378 }
4379 }
4380 return error;
4381 }
4382
4383 /*
4384 * file_socket
4385 *
4386 * Description: Given an fd, look it up in the current process's per process
4387 * open file table, and return its internal socket pointer.
4388 *
4389 * Parameters: fd fd to obtain vnode from
4390 * sp pointer to socket return area
4391 *
4392 * Returns: 0 Success
4393 * ENOTSOCK Not a socket
4394 * fp_lookup:EBADF Bad file descriptor
4395 *
4396 * Implicit returns:
4397 * *sp (modified) Returned socket pointer
4398 *
4399 * Locks: This function internally takes and drops the proc_fdlock for
4400 * the current process
4401 *
4402 * Notes: If successful, this function increments the fp_iocount on the
4403 * fd's corresponding fileproc.
4404 *
4405 * The fileproc referenced is not returned; because of this, care
4406 * must be taken to not drop the last reference (e.g. by closing
4407 * the file). This is inherently unsafe, since the reference may
4408 * not be recoverable from the socket, if there is a subsequent
4409 * close that destroys the associate fileproc. The caller should
4410 * therefore retain their own reference on the fileproc so that
4411 * the fp_iocount can be dropped subsequently. Failure to do this
4412 * can result in the returned pointer immediately becoming invalid
4413 * following the call.
4414 *
4415 * Use of this function is discouraged.
4416 */
4417 int
4418 file_socket(int fd, struct socket **sp)
4419 {
4420 struct fileproc *fp;
4421 int error;
4422
4423 error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
4424 if (error == 0) {
4425 if (sp) {
4426 *sp = (struct socket *)fp->f_data;
4427 }
4428 }
4429 return error;
4430 }
4431
4432
4433 /*
4434 * file_flags
4435 *
4436 * Description: Given an fd, look it up in the current process's per process
4437 * open file table, and return its fileproc's flags field.
4438 *
4439 * Parameters: fd fd whose flags are to be
4440 * retrieved
4441 * flags pointer to flags data area
4442 *
4443 * Returns: 0 Success
4444 * ENOTSOCK Not a socket
4445 * fp_lookup:EBADF Bad file descriptor
4446 *
4447 * Implicit returns:
4448 * *flags (modified) Returned flags field
4449 *
4450 * Locks: This function internally takes and drops the proc_fdlock for
4451 * the current process
4452 */
4453 int
4454 file_flags(int fd, int *flags)
4455 {
4456 proc_t p = current_proc();
4457 struct fileproc *fp;
4458 int error = EBADF;
4459
4460 proc_fdlock_spin(p);
4461 fp = fp_get_noref_locked(p, fd);
4462 if (fp) {
4463 *flags = (int)fp->f_flag;
4464 error = 0;
4465 }
4466 proc_fdunlock(p);
4467
4468 return error;
4469 }
4470
4471
4472 /*
4473 * file_drop
4474 *
4475 * Description: Drop an iocount reference on an fd, and wake up any waiters
4476 * for draining (i.e. blocked in fileproc_drain() called during
4477 * the last attempt to close a file).
4478 *
4479 * Parameters: fd fd on which an ioreference is
4480 * to be dropped
4481 *
4482 * Returns: 0 Success
4483 *
4484 * Description: Given an fd, look it up in the current process's per process
4485 * open file table, and drop it's fileproc's fp_iocount by one
4486 *
4487 * Notes: This is intended as a corresponding operation to the functions
4488 * file_vnode() and file_socket() operations.
4489 *
4490 * If the caller can't possibly hold an I/O reference,
4491 * this function will panic the kernel rather than allowing
4492 * for memory corruption. Callers should always call this
4493 * because they acquired an I/O reference on this file before.
4494 *
4495 * Use of this function is discouraged.
4496 */
4497 int
4498 file_drop(int fd)
4499 {
4500 struct fileproc *fp;
4501 proc_t p = current_proc();
4502 int needwakeup = 0;
4503
4504 proc_fdlock_spin(p);
4505 fp = fp_get_noref_locked_with_iocount(p, fd);
4506
4507 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4508 if (fp->fp_flags & FP_SELCONFLICT) {
4509 fp->fp_flags &= ~FP_SELCONFLICT;
4510 }
4511
4512 if (p->p_fpdrainwait) {
4513 p->p_fpdrainwait = 0;
4514 needwakeup = 1;
4515 }
4516 }
4517 proc_fdunlock(p);
4518
4519 if (needwakeup) {
4520 wakeup(&p->p_fpdrainwait);
4521 }
4522 return 0;
4523 }
4524
4525
4526
4527 /*
4528 * falloc_withalloc
4529 *
4530 * Create a new open file structure and allocate
4531 * a file descriptor for the process that refers to it.
4532 *
4533 * Returns: 0 Success
4534 *
4535 * Description: Allocate an entry in the per process open file table and
4536 * return the corresponding fileproc and fd.
4537 *
4538 * Parameters: p The process in whose open file
4539 * table the fd is to be allocated
4540 * resultfp Pointer to fileproc pointer
4541 * return area
4542 * resultfd Pointer to fd return area
4543 * ctx VFS context
4544 * fp_zalloc fileproc allocator to use
4545 * crarg allocator args
4546 *
4547 * Returns: 0 Success
4548 * ENFILE Too many open files in system
4549 * fdalloc:EMFILE Too many open files in process
4550 * fdalloc:ENOMEM M_OFILETABL zone exhausted
4551 * ENOMEM fp_zone or fg_zone zone
4552 * exhausted
4553 *
4554 * Implicit returns:
4555 * *resultfd (modified) Returned fileproc pointer
4556 * *resultfd (modified) Returned fd
4557 *
4558 * Notes: This function takes separate process and context arguments
4559 * solely to support kern_exec.c; otherwise, it would take
4560 * neither, and use the vfs_context_current() routine internally.
4561 */
4562 int
4563 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4564 vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg)
4565 {
4566 struct fileproc *fp;
4567 struct fileglob *fg;
4568 int error, nfd;
4569
4570 /* Make sure we don't go beyond the system-wide limit */
4571 if (nfiles >= maxfiles) {
4572 tablefull("file");
4573 return ENFILE;
4574 }
4575
4576 proc_fdlock(p);
4577
4578 /* fdalloc will make sure the process stays below per-process limit */
4579 if ((error = fdalloc(p, 0, &nfd))) {
4580 proc_fdunlock(p);
4581 return error;
4582 }
4583
4584 #if CONFIG_MACF
4585 error = mac_file_check_create(proc_ucred(p));
4586 if (error) {
4587 proc_fdunlock(p);
4588 return error;
4589 }
4590 #endif
4591
4592 /*
4593 * Allocate a new file descriptor.
4594 * If the process has file descriptor zero open, add to the list
4595 * of open files at that point, otherwise put it at the front of
4596 * the list of open files.
4597 */
4598 proc_fdunlock(p);
4599
4600 fp = (*fp_zalloc)(crarg);
4601 if (fp == NULL) {
4602 return ENOMEM;
4603 }
4604 fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
4605 lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4606
4607 os_ref_retain_locked(&fp->fp_iocount);
4608 os_ref_init_raw(&fg->fg_count, &f_refgrp);
4609 fg->fg_ops = &uninitops;
4610 fp->fp_glob = fg;
4611 #if CONFIG_MACF
4612 mac_file_label_init(fg);
4613 #endif
4614
4615 kauth_cred_ref(ctx->vc_ucred);
4616
4617 fp->f_cred = ctx->vc_ucred;
4618
4619 #if CONFIG_MACF
4620 mac_file_label_associate(fp->f_cred, fg);
4621 #endif
4622
4623 os_atomic_inc(&nfiles, relaxed);
4624
4625 proc_fdlock(p);
4626
4627 p->p_fd->fd_ofiles[nfd] = fp;
4628
4629 proc_fdunlock(p);
4630
4631 if (resultfp) {
4632 *resultfp = fp;
4633 }
4634 if (resultfd) {
4635 *resultfd = nfd;
4636 }
4637
4638 return 0;
4639 }
4640
4641 int
4642 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4643 {
4644 return falloc_withalloc(p, resultfp, resultfd, ctx,
4645 fileproc_alloc_init, NULL);
4646 }
4647
4648 /*
4649 * fdexec
4650 *
4651 * Description: Perform close-on-exec processing for all files in a process
4652 * that are either marked as close-on-exec, or which were in the
4653 * process of being opened at the time of the execve
4654 *
4655 * Also handles the case (via posix_spawn()) where -all-
4656 * files except those marked with "inherit" as treated as
4657 * close-on-exec.
4658 *
4659 * Parameters: p Pointer to process calling
4660 * execve
4661 *
4662 * Returns: void
4663 *
4664 * Locks: This function internally takes and drops proc_fdlock()
4665 * But assumes tables don't grow/change while unlocked.
4666 *
4667 */
4668 void
4669 fdexec(proc_t p, short flags, int self_exec)
4670 {
4671 struct filedesc *fdp = p->p_fd;
4672 int i;
4673 boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4674 thread_t self = current_thread();
4675 struct uthread *ut = get_bsdthread_info(self);
4676 struct kqworkq *dealloc_kqwq = NULL;
4677
4678 /*
4679 * If the current thread is bound as a workq/workloop
4680 * servicing thread, we need to unbind it first.
4681 */
4682 if (ut->uu_kqr_bound && self_exec) {
4683 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
4684 }
4685
4686 proc_fdlock(p);
4687
4688 /*
4689 * Deallocate the knotes for this process
4690 * and mark the tables non-existent so
4691 * subsequent kqueue closes go faster.
4692 */
4693 knotes_dealloc(p);
4694 assert(fdp->fd_knlistsize == 0);
4695 assert(fdp->fd_knhashmask == 0);
4696
4697 for (i = fdp->fd_lastfile; i >= 0; i--) {
4698 struct fileproc *fp = fdp->fd_ofiles[i];
4699 char *flagp = &fdp->fd_ofileflags[i];
4700
4701 if (fp && cloexec_default) {
4702 /*
4703 * Reverse the usual semantics of file descriptor
4704 * inheritance - all of them should be closed
4705 * except files marked explicitly as "inherit" and
4706 * not marked close-on-exec.
4707 */
4708 if ((*flagp & (UF_EXCLOSE | UF_INHERIT)) != UF_INHERIT) {
4709 *flagp |= UF_EXCLOSE;
4710 }
4711 *flagp &= ~UF_INHERIT;
4712 }
4713
4714 if (
4715 ((*flagp & (UF_RESERVED | UF_EXCLOSE)) == UF_EXCLOSE)
4716 #if CONFIG_MACF
4717 || (fp && mac_file_check_inherit(proc_ucred(p), fp->fp_glob))
4718 #endif
4719 ) {
4720 fp_close_and_unlock(p, i, fp, 0);
4721 proc_fdlock(p);
4722 }
4723 }
4724
4725 /* release the per-process workq kq */
4726 if (fdp->fd_wqkqueue) {
4727 dealloc_kqwq = fdp->fd_wqkqueue;
4728 fdp->fd_wqkqueue = NULL;
4729 }
4730
4731 proc_fdunlock(p);
4732
4733 /* Anything to free? */
4734 if (dealloc_kqwq) {
4735 kqworkq_dealloc(dealloc_kqwq);
4736 }
4737 }
4738
4739
4740 /*
4741 * fdcopy
4742 *
4743 * Description: Copy a filedesc structure. This is normally used as part of
4744 * forkproc() when forking a new process, to copy the per process
4745 * open file table over to the new process.
4746 *
4747 * Parameters: p Process whose open file table
4748 * is to be copied (parent)
4749 * uth_cdir Per thread current working
4750 * cirectory, or NULL
4751 *
4752 * Returns: NULL Copy failed
4753 * !NULL Pointer to new struct filedesc
4754 *
4755 * Locks: This function internally takes and drops proc_fdlock()
4756 *
4757 * Notes: Files are copied directly, ignoring the new resource limits
4758 * for the process that's being copied into. Since the descriptor
4759 * references are just additional references, this does not count
4760 * against the number of open files on the system.
4761 *
4762 * The struct filedesc includes the current working directory,
4763 * and the current root directory, if the process is chroot'ed.
4764 *
4765 * If the exec was called by a thread using a per thread current
4766 * working directory, we inherit the working directory from the
4767 * thread making the call, rather than from the process.
4768 *
4769 * In the case of a failure to obtain a reference, for most cases,
4770 * the file entry will be silently dropped. There's an exception
4771 * for the case of a chroot dir, since a failure to to obtain a
4772 * reference there would constitute an "escape" from the chroot
4773 * environment, which must not be allowed. In that case, we will
4774 * deny the execve() operation, rather than allowing the escape.
4775 */
4776 struct filedesc *
4777 fdcopy(proc_t p, vnode_t uth_cdir)
4778 {
4779 struct filedesc *newfdp, *fdp = p->p_fd;
4780 int i;
4781 struct fileproc *ofp, *fp;
4782 vnode_t v_dir;
4783
4784 newfdp = zalloc(fdp_zone);
4785
4786 proc_fdlock(p);
4787
4788 /*
4789 * the FD_CHROOT flag will be inherited via this copy
4790 */
4791 (void) memcpy(newfdp, fdp, sizeof(*newfdp));
4792
4793 /*
4794 * If we are running with per-thread current working directories,
4795 * inherit the new current working directory from the current thread
4796 * instead, before we take our references.
4797 */
4798 if (uth_cdir != NULLVP) {
4799 newfdp->fd_cdir = uth_cdir;
4800 }
4801
4802 /*
4803 * For both fd_cdir and fd_rdir make sure we get
4804 * a valid reference... if we can't, than set
4805 * set the pointer(s) to NULL in the child... this
4806 * will keep us from using a non-referenced vp
4807 * and allows us to do the vnode_rele only on
4808 * a properly referenced vp
4809 */
4810 if ((v_dir = newfdp->fd_cdir)) {
4811 if (vnode_getwithref(v_dir) == 0) {
4812 if ((vnode_ref(v_dir))) {
4813 newfdp->fd_cdir = NULL;
4814 }
4815 vnode_put(v_dir);
4816 } else {
4817 newfdp->fd_cdir = NULL;
4818 }
4819 }
4820 if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4821 /*
4822 * we couldn't get a new reference on
4823 * the current working directory being
4824 * inherited... we might as well drop
4825 * our reference from the parent also
4826 * since the vnode has gone DEAD making
4827 * it useless... by dropping it we'll
4828 * be that much closer to recycling it
4829 */
4830 vnode_rele(fdp->fd_cdir);
4831 fdp->fd_cdir = NULL;
4832 }
4833
4834 if ((v_dir = newfdp->fd_rdir)) {
4835 if (vnode_getwithref(v_dir) == 0) {
4836 if ((vnode_ref(v_dir))) {
4837 newfdp->fd_rdir = NULL;
4838 }
4839 vnode_put(v_dir);
4840 } else {
4841 newfdp->fd_rdir = NULL;
4842 }
4843 }
4844 /* Coming from a chroot environment and unable to get a reference... */
4845 if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4846 proc_fdunlock(p);
4847 /*
4848 * We couldn't get a new reference on
4849 * the chroot directory being
4850 * inherited... this is fatal, since
4851 * otherwise it would constitute an
4852 * escape from a chroot environment by
4853 * the new process.
4854 */
4855 if (newfdp->fd_cdir) {
4856 vnode_rele(newfdp->fd_cdir);
4857 }
4858 zfree(fdp_zone, newfdp);
4859 return NULL;
4860 }
4861
4862 /*
4863 * If the number of open files fits in the internal arrays
4864 * of the open file structure, use them, otherwise allocate
4865 * additional memory for the number of descriptors currently
4866 * in use.
4867 */
4868 if (newfdp->fd_lastfile < NDFILE) {
4869 i = NDFILE;
4870 } else {
4871 /*
4872 * Compute the smallest multiple of NDEXTENT needed
4873 * for the file descriptors currently in use,
4874 * allowing the table to shrink.
4875 */
4876 i = newfdp->fd_nfiles;
4877 while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2) {
4878 i /= 2;
4879 }
4880 }
4881 proc_fdunlock(p);
4882
4883 MALLOC(newfdp->fd_ofiles, struct fileproc **,
4884 i * OFILESIZE, M_OFILETABL, M_WAITOK);
4885 if (newfdp->fd_ofiles == NULL) {
4886 if (newfdp->fd_cdir) {
4887 vnode_rele(newfdp->fd_cdir);
4888 }
4889 if (newfdp->fd_rdir) {
4890 vnode_rele(newfdp->fd_rdir);
4891 }
4892
4893 zfree(fdp_zone, newfdp);
4894 return NULL;
4895 }
4896 (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
4897 proc_fdlock(p);
4898
4899 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4900 newfdp->fd_nfiles = i;
4901
4902 if (fdp->fd_nfiles > 0) {
4903 struct fileproc **fpp;
4904 char *flags;
4905
4906 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4907 (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4908 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4909 (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4910
4911 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4912 flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
4913 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--, flags--) {
4914 if ((ofp = *fpp) != NULL &&
4915 0 == (ofp->fp_glob->fg_lflags & FG_CONFINED) &&
4916 0 == (*flags & (UF_FORKCLOSE | UF_RESERVED))) {
4917 #if DEBUG
4918 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) {
4919 panic("complex fileproc");
4920 }
4921 #endif
4922 fp = fileproc_alloc_init(NULL);
4923 if (fp == NULL) {
4924 /*
4925 * XXX no room to copy, unable to
4926 * XXX safely unwind state at present
4927 */
4928 *fpp = NULL;
4929 } else {
4930 fp->fp_flags |=
4931 (ofp->fp_flags & ~FP_TYPEMASK);
4932 fp->fp_glob = ofp->fp_glob;
4933 fg_ref(p, fp->fp_glob);
4934 *fpp = fp;
4935 }
4936 } else {
4937 *fpp = NULL;
4938 *flags = 0;
4939 }
4940 if (*fpp == NULL) {
4941 if (i == newfdp->fd_lastfile && i > 0) {
4942 newfdp->fd_lastfile--;
4943 }
4944 if (i < newfdp->fd_freefile) {
4945 newfdp->fd_freefile = i;
4946 }
4947 }
4948 }
4949 }
4950
4951 proc_fdunlock(p);
4952
4953 /*
4954 * Initialize knote and kqueue tracking structs
4955 */
4956 newfdp->fd_knlist = NULL;
4957 newfdp->fd_knlistsize = 0;
4958 newfdp->fd_knhash = NULL;
4959 newfdp->fd_knhashmask = 0;
4960 newfdp->fd_kqhash = NULL;
4961 newfdp->fd_kqhashmask = 0;
4962 newfdp->fd_wqkqueue = NULL;
4963 lck_mtx_init(&newfdp->fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
4964 lck_mtx_init(&newfdp->fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
4965
4966 return newfdp;
4967 }
4968
4969
4970 /*
4971 * fdfree
4972 *
4973 * Description: Release a filedesc (per process open file table) structure;
4974 * this is done on process exit(), or from forkproc_free() if
4975 * the fork fails for some reason subsequent to a successful
4976 * call to fdcopy()
4977 *
4978 * Parameters: p Pointer to process going away
4979 *
4980 * Returns: void
4981 *
4982 * Locks: This function internally takes and drops proc_fdlock()
4983 */
4984 void
4985 fdfree(proc_t p)
4986 {
4987 struct filedesc *fdp;
4988 struct fileproc *fp;
4989 struct kqworkq *dealloc_kqwq = NULL;
4990 int i;
4991
4992 proc_fdlock(p);
4993
4994 if (p == kernproc || NULL == (fdp = p->p_fd)) {
4995 proc_fdunlock(p);
4996 return;
4997 }
4998
4999 extern struct filedesc filedesc0;
5000
5001 if (&filedesc0 == fdp) {
5002 panic("filedesc0");
5003 }
5004
5005 /*
5006 * deallocate all the knotes up front and claim empty
5007 * tables to make any subsequent kqueue closes faster.
5008 */
5009 knotes_dealloc(p);
5010 assert(fdp->fd_knlistsize == 0);
5011 assert(fdp->fd_knhashmask == 0);
5012
5013 /*
5014 * dealloc all workloops that have outstanding retains
5015 * when created with scheduling parameters.
5016 */
5017 kqworkloops_dealloc(p);
5018
5019 /* close file descriptors */
5020 if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
5021 for (i = fdp->fd_lastfile; i >= 0; i--) {
5022 if ((fp = fdp->fd_ofiles[i]) != NULL) {
5023 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
5024 panic("fdfree: found fp with UF_RESERVED");
5025 }
5026 fp_close_and_unlock(p, i, fp, 0);
5027 proc_fdlock(p);
5028 }
5029 }
5030 FREE(fdp->fd_ofiles, M_OFILETABL);
5031 fdp->fd_ofiles = NULL;
5032 fdp->fd_nfiles = 0;
5033 }
5034
5035 if (fdp->fd_wqkqueue) {
5036 dealloc_kqwq = fdp->fd_wqkqueue;
5037 fdp->fd_wqkqueue = NULL;
5038 }
5039
5040 proc_fdunlock(p);
5041
5042 if (dealloc_kqwq) {
5043 kqworkq_dealloc(dealloc_kqwq);
5044 }
5045 if (fdp->fd_cdir) {
5046 vnode_rele(fdp->fd_cdir);
5047 }
5048 if (fdp->fd_rdir) {
5049 vnode_rele(fdp->fd_rdir);
5050 }
5051
5052 proc_fdlock_spin(p);
5053 p->p_fd = NULL;
5054 proc_fdunlock(p);
5055
5056 if (fdp->fd_kqhash) {
5057 for (uint32_t j = 0; j <= fdp->fd_kqhashmask; j++) {
5058 assert(LIST_EMPTY(&fdp->fd_kqhash[j]));
5059 }
5060 hashdestroy(fdp->fd_kqhash, M_KQUEUE, fdp->fd_kqhashmask);
5061 }
5062
5063 lck_mtx_destroy(&fdp->fd_kqhashlock, proc_kqhashlock_grp);
5064 lck_mtx_destroy(&fdp->fd_knhashlock, proc_knhashlock_grp);
5065
5066 zfree(fdp_zone, fdp);
5067 }
5068
5069 /*
5070 * fileproc_drain
5071 *
5072 * Description: Drain out pending I/O operations
5073 *
5074 * Parameters: p Process closing this file
5075 * fp fileproc struct for the open
5076 * instance on the file
5077 *
5078 * Returns: void
5079 *
5080 * Locks: Assumes the caller holds the proc_fdlock
5081 *
5082 * Notes: For character devices, this occurs on the last close of the
5083 * device; for all other file descriptors, this occurs on each
5084 * close to prevent fd's from being closed out from under
5085 * operations currently in progress and blocked
5086 *
5087 * See Also: file_vnode(), file_socket(), file_drop(), and the cautions
5088 * regarding their use and interaction with this function.
5089 */
5090 void
5091 fileproc_drain(proc_t p, struct fileproc * fp)
5092 {
5093 struct vfs_context context;
5094 thread_t thread;
5095 bool is_current_proc;
5096
5097 is_current_proc = (p == current_proc());
5098
5099 if (!is_current_proc) {
5100 proc_lock(p);
5101 thread = proc_thread(p); /* XXX */
5102 thread_reference(thread);
5103 proc_unlock(p);
5104 } else {
5105 thread = current_thread();
5106 }
5107
5108 context.vc_thread = thread;
5109 context.vc_ucred = fp->fp_glob->fg_cred;
5110
5111 /* Set the vflag for drain */
5112 fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
5113
5114 while (os_ref_get_count(&fp->fp_iocount) > 1) {
5115 lck_mtx_convert_spin(&p->p_fdmlock);
5116
5117 fo_drain(fp, &context);
5118 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
5119 if (waitq_wakeup64_all((struct waitq *)fp->fp_wset, NO_EVENT64,
5120 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5121 panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->fp_wset, fp);
5122 }
5123 }
5124 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5125 if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5126 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5127 panic("bad select_conflict_queue");
5128 }
5129 }
5130 p->p_fpdrainwait = 1;
5131
5132 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5133 }
5134 #if DIAGNOSTIC
5135 if ((fp->fp_flags & FP_INSELECT) != 0) {
5136 panic("FP_INSELECT set on drained fp");
5137 }
5138 #endif
5139 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5140 fp->fp_flags &= ~FP_SELCONFLICT;
5141 }
5142
5143 if (!is_current_proc) {
5144 thread_deallocate(thread);
5145 }
5146 }
5147
5148
5149 /*
5150 * fp_free
5151 *
5152 * Description: Release the fd and free the fileproc associated with the fd
5153 * in the per process open file table of the specified process;
5154 * these values must correspond.
5155 *
5156 * Parameters: p Process containing fd
5157 * fd fd to be released
5158 * fp fileproc to be freed
5159 */
5160 void
5161 fp_free(proc_t p, int fd, struct fileproc * fp)
5162 {
5163 proc_fdlock_spin(p);
5164 fdrelse(p, fd);
5165 proc_fdunlock(p);
5166
5167 fg_free(fp->fp_glob);
5168 os_ref_release_live(&fp->fp_iocount);
5169 fileproc_free(fp);
5170 }
5171
5172
5173 /*
5174 * sys_flock
5175 *
5176 * Description: Apply an advisory lock on a file descriptor.
5177 *
5178 * Parameters: p Process making request
5179 * uap->fd fd on which the lock is to be
5180 * attempted
5181 * uap->how (Un)Lock bits, including type
5182 * retval Pointer to the call return area
5183 *
5184 * Returns: 0 Success
5185 * fp_getfvp:EBADF Bad file descriptor
5186 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5187 * vnode_getwithref:???
5188 * VNOP_ADVLOCK:???
5189 *
5190 * Implicit returns:
5191 * *retval (modified) Size of dtable
5192 *
5193 * Notes: Just attempt to get a record lock of the requested type on
5194 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5195 */
5196 int
5197 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5198 {
5199 int fd = uap->fd;
5200 int how = uap->how;
5201 struct fileproc *fp;
5202 struct vnode *vp;
5203 struct flock lf;
5204 vfs_context_t ctx = vfs_context_current();
5205 int error = 0;
5206
5207 AUDIT_ARG(fd, uap->fd);
5208 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5209 return error;
5210 }
5211 if ((error = vnode_getwithref(vp))) {
5212 goto out1;
5213 }
5214 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5215
5216 lf.l_whence = SEEK_SET;
5217 lf.l_start = 0;
5218 lf.l_len = 0;
5219 if (how & LOCK_UN) {
5220 lf.l_type = F_UNLCK;
5221 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5222 goto out;
5223 }
5224 if (how & LOCK_EX) {
5225 lf.l_type = F_WRLCK;
5226 } else if (how & LOCK_SH) {
5227 lf.l_type = F_RDLCK;
5228 } else {
5229 error = EBADF;
5230 goto out;
5231 }
5232 #if CONFIG_MACF
5233 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob, F_SETLK, &lf);
5234 if (error) {
5235 goto out;
5236 }
5237 #endif
5238 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5239 (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5240 ctx, NULL);
5241 if (!error) {
5242 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5243 }
5244 out:
5245 (void)vnode_put(vp);
5246 out1:
5247 fp_drop(p, fd, fp, 0);
5248 return error;
5249 }
5250
5251 /*
5252 * sys_fileport_makeport
5253 *
5254 * Description: Obtain a Mach send right for a given file descriptor.
5255 *
5256 * Parameters: p Process calling fileport
5257 * uap->fd The fd to reference
5258 * uap->portnamep User address at which to place port name.
5259 *
5260 * Returns: 0 Success.
5261 * EBADF Bad file descriptor.
5262 * EINVAL File descriptor had type that cannot be sent, misc. other errors.
5263 * EFAULT Address at which to store port name is not valid.
5264 * EAGAIN Resource shortage.
5265 *
5266 * Implicit returns:
5267 * On success, name of send right is stored at user-specified address.
5268 */
5269 int
5270 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5271 __unused int *retval)
5272 {
5273 int err;
5274 int fd = uap->fd;
5275 user_addr_t user_portaddr = uap->portnamep;
5276 struct fileproc *fp = FILEPROC_NULL;
5277 struct fileglob *fg = NULL;
5278 ipc_port_t fileport;
5279 mach_port_name_t name = MACH_PORT_NULL;
5280
5281 proc_fdlock(p);
5282 err = fp_lookup(p, fd, &fp, 1);
5283 if (err != 0) {
5284 goto out_unlock;
5285 }
5286
5287 fg = fp->fp_glob;
5288 if (!fg_sendable(fg)) {
5289 err = EINVAL;
5290 goto out_unlock;
5291 }
5292
5293 if (fp_isguarded(fp, GUARD_FILEPORT)) {
5294 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5295 goto out_unlock;
5296 }
5297
5298 /* Dropped when port is deallocated */
5299 fg_ref(p, fg);
5300
5301 proc_fdunlock(p);
5302
5303 /* Allocate and initialize a port */
5304 fileport = fileport_alloc(fg);
5305 if (fileport == IPC_PORT_NULL) {
5306 fg_drop_live(fg);
5307 err = EAGAIN;
5308 goto out;
5309 }
5310
5311 /* Add an entry. Deallocates port on failure. */
5312 name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5313 if (!MACH_PORT_VALID(name)) {
5314 err = EINVAL;
5315 goto out;
5316 }
5317
5318 err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5319 if (err != 0) {
5320 goto out;
5321 }
5322
5323 /* Tag the fileglob for debugging purposes */
5324 lck_mtx_lock_spin(&fg->fg_lock);
5325 fg->fg_lflags |= FG_PORTMADE;
5326 lck_mtx_unlock(&fg->fg_lock);
5327
5328 fp_drop(p, fd, fp, 0);
5329
5330 return 0;
5331
5332 out_unlock:
5333 proc_fdunlock(p);
5334 out:
5335 if (MACH_PORT_VALID(name)) {
5336 /* Don't care if another thread races us to deallocate the entry */
5337 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5338 }
5339
5340 if (fp != FILEPROC_NULL) {
5341 fp_drop(p, fd, fp, 0);
5342 }
5343
5344 return err;
5345 }
5346
5347 void
5348 fileport_releasefg(struct fileglob *fg)
5349 {
5350 (void)fg_drop(PROC_NULL, fg);
5351 }
5352
5353 /*
5354 * fileport_makefd
5355 *
5356 * Description: Obtain the file descriptor for a given Mach send right.
5357 *
5358 * Returns: 0 Success
5359 * EINVAL Invalid Mach port name, or port is not for a file.
5360 * fdalloc:EMFILE
5361 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5362 *
5363 * Implicit returns:
5364 * *retval (modified) The new descriptor
5365 */
5366 int
5367 fileport_makefd(proc_t p, ipc_port_t port, int uf_flags, int *retval)
5368 {
5369 struct fileglob *fg;
5370 struct fileproc *fp = FILEPROC_NULL;
5371 int fd;
5372 int err;
5373
5374 fg = fileport_port_to_fileglob(port);
5375 if (fg == NULL) {
5376 err = EINVAL;
5377 goto out;
5378 }
5379
5380 fp = fileproc_alloc_init(NULL);
5381 if (fp == FILEPROC_NULL) {
5382 err = ENOMEM;
5383 goto out;
5384 }
5385
5386 proc_fdlock(p);
5387 err = fdalloc(p, 0, &fd);
5388 if (err != 0) {
5389 proc_fdunlock(p);
5390 goto out;
5391 }
5392 if (uf_flags) {
5393 *fdflags(p, fd) |= uf_flags;
5394 }
5395
5396 fp->fp_glob = fg;
5397 fg_ref(p, fg);
5398
5399 procfdtbl_releasefd(p, fd, fp);
5400 proc_fdunlock(p);
5401
5402 *retval = fd;
5403 err = 0;
5404 out:
5405 if ((fp != NULL) && (0 != err)) {
5406 fileproc_free(fp);
5407 }
5408
5409 return err;
5410 }
5411
5412 /*
5413 * sys_fileport_makefd
5414 *
5415 * Description: Obtain the file descriptor for a given Mach send right.
5416 *
5417 * Parameters: p Process calling fileport
5418 * uap->port Name of send right to file port.
5419 *
5420 * Returns: 0 Success
5421 * EINVAL Invalid Mach port name, or port is not for a file.
5422 * fdalloc:EMFILE
5423 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5424 *
5425 * Implicit returns:
5426 * *retval (modified) The new descriptor
5427 */
5428 int
5429 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5430 {
5431 ipc_port_t port = IPC_PORT_NULL;
5432 mach_port_name_t send = uap->port;
5433 kern_return_t res;
5434 int err;
5435
5436 res = ipc_object_copyin(get_task_ipcspace(p->task),
5437 send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
5438
5439 if (res == KERN_SUCCESS) {
5440 err = fileport_makefd(p, port, UF_EXCLOSE, retval);
5441 } else {
5442 err = EINVAL;
5443 }
5444
5445 if (IPC_PORT_NULL != port) {
5446 ipc_port_release_send(port);
5447 }
5448
5449 return err;
5450 }
5451
5452
5453 /*
5454 * dupfdopen
5455 *
5456 * Description: Duplicate the specified descriptor to a free descriptor;
5457 * this is the second half of fdopen(), above.
5458 *
5459 * Parameters: fdp filedesc pointer to fill in
5460 * indx fd to dup to
5461 * dfd fd to dup from
5462 * mode mode to set on new fd
5463 * error command code
5464 *
5465 * Returns: 0 Success
5466 * EBADF Source fd is bad
5467 * EACCES Requested mode not allowed
5468 * !0 'error', if not ENODEV or
5469 * ENXIO
5470 *
5471 * Notes: XXX This is not thread safe; see fdopen() above
5472 */
5473 int
5474 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5475 {
5476 struct fileproc *wfp;
5477 struct fileproc *fp;
5478 #if CONFIG_MACF
5479 int myerror;
5480 #endif
5481 proc_t p = current_proc();
5482
5483 /*
5484 * If the to-be-dup'd fd number is greater than the allowed number
5485 * of file descriptors, or the fd to be dup'd has already been
5486 * closed, reject. Note, check for new == old is necessary as
5487 * falloc could allocate an already closed to-be-dup'd descriptor
5488 * as the new descriptor.
5489 */
5490 proc_fdlock(p);
5491
5492 fp = fdp->fd_ofiles[indx];
5493 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5494 (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5495 (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5496 proc_fdunlock(p);
5497 return EBADF;
5498 }
5499 #if CONFIG_MACF
5500 myerror = mac_file_check_dup(proc_ucred(p), wfp->fp_glob, dfd);
5501 if (myerror) {
5502 proc_fdunlock(p);
5503 return myerror;
5504 }
5505 #endif
5506 /*
5507 * There are two cases of interest here.
5508 *
5509 * For ENODEV simply dup (dfd) to file descriptor
5510 * (indx) and return.
5511 *
5512 * For ENXIO steal away the file structure from (dfd) and
5513 * store it in (indx). (dfd) is effectively closed by
5514 * this operation.
5515 *
5516 * Any other error code is just returned.
5517 */
5518 switch (error) {
5519 case ENODEV:
5520 if (fp_isguarded(wfp, GUARD_DUP)) {
5521 proc_fdunlock(p);
5522 return EPERM;
5523 }
5524
5525 /*
5526 * Check that the mode the file is being opened for is a
5527 * subset of the mode of the existing descriptor.
5528 */
5529 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5530 proc_fdunlock(p);
5531 return EACCES;
5532 }
5533 if (indx > fdp->fd_lastfile) {
5534 fdp->fd_lastfile = indx;
5535 }
5536
5537 if (fp->fp_glob) {
5538 fg_free(fp->fp_glob);
5539 }
5540 fg_ref(p, wfp->fp_glob);
5541 fp->fp_glob = wfp->fp_glob;
5542
5543 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5544 (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5545
5546 proc_fdunlock(p);
5547 return 0;
5548
5549 default:
5550 proc_fdunlock(p);
5551 return error;
5552 }
5553 /* NOTREACHED */
5554 }
5555
5556
5557 /*
5558 * fo_read
5559 *
5560 * Description: Generic fileops read indirected through the fileops pointer
5561 * in the fileproc structure
5562 *
5563 * Parameters: fp fileproc structure pointer
5564 * uio user I/O structure pointer
5565 * flags FOF_ flags
5566 * ctx VFS context for operation
5567 *
5568 * Returns: 0 Success
5569 * !0 Errno from read
5570 */
5571 int
5572 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5573 {
5574 return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5575 }
5576
5577 int
5578 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5579 {
5580 #pragma unused(fp, uio, flags, ctx)
5581 return ENXIO;
5582 }
5583
5584
5585 /*
5586 * fo_write
5587 *
5588 * Description: Generic fileops write indirected through the fileops pointer
5589 * in the fileproc structure
5590 *
5591 * Parameters: fp fileproc structure pointer
5592 * uio user I/O structure pointer
5593 * flags FOF_ flags
5594 * ctx VFS context for operation
5595 *
5596 * Returns: 0 Success
5597 * !0 Errno from write
5598 */
5599 int
5600 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5601 {
5602 return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5603 }
5604
5605 int
5606 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5607 {
5608 #pragma unused(fp, uio, flags, ctx)
5609 return ENXIO;
5610 }
5611
5612
5613 /*
5614 * fo_ioctl
5615 *
5616 * Description: Generic fileops ioctl indirected through the fileops pointer
5617 * in the fileproc structure
5618 *
5619 * Parameters: fp fileproc structure pointer
5620 * com ioctl command
5621 * data pointer to internalized copy
5622 * of user space ioctl command
5623 * parameter data in kernel space
5624 * ctx VFS context for operation
5625 *
5626 * Returns: 0 Success
5627 * !0 Errno from ioctl
5628 *
5629 * Locks: The caller is assumed to have held the proc_fdlock; this
5630 * function releases and reacquires this lock. If the caller
5631 * accesses data protected by this lock prior to calling this
5632 * function, it will need to revalidate/reacquire any cached
5633 * protected data obtained prior to the call.
5634 */
5635 int
5636 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5637 {
5638 int error;
5639
5640 proc_fdunlock(vfs_context_proc(ctx));
5641 error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5642 proc_fdlock(vfs_context_proc(ctx));
5643 return error;
5644 }
5645
5646 int
5647 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5648 {
5649 #pragma unused(fp, com, data, ctx)
5650 return ENOTTY;
5651 }
5652
5653
5654 /*
5655 * fo_select
5656 *
5657 * Description: Generic fileops select indirected through the fileops pointer
5658 * in the fileproc structure
5659 *
5660 * Parameters: fp fileproc structure pointer
5661 * which select which
5662 * wql pointer to wait queue list
5663 * ctx VFS context for operation
5664 *
5665 * Returns: 0 Success
5666 * !0 Errno from select
5667 */
5668 int
5669 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5670 {
5671 return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5672 }
5673
5674 int
5675 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5676 {
5677 #pragma unused(fp, which, wql, ctx)
5678 return ENOTSUP;
5679 }
5680
5681
5682 /*
5683 * fo_close
5684 *
5685 * Description: Generic fileops close indirected through the fileops pointer
5686 * in the fileproc structure
5687 *
5688 * Parameters: fp fileproc structure pointer for
5689 * file to close
5690 * ctx VFS context for operation
5691 *
5692 * Returns: 0 Success
5693 * !0 Errno from close
5694 */
5695 int
5696 fo_close(struct fileglob *fg, vfs_context_t ctx)
5697 {
5698 return (*fg->fg_ops->fo_close)(fg, ctx);
5699 }
5700
5701
5702 /*
5703 * fo_drain
5704 *
5705 * Description: Generic fileops kqueue filter indirected through the fileops
5706 * pointer in the fileproc structure
5707 *
5708 * Parameters: fp fileproc structure pointer
5709 * ctx VFS context for operation
5710 *
5711 * Returns: 0 Success
5712 * !0 errno from drain
5713 */
5714 int
5715 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5716 {
5717 return (*fp->f_ops->fo_drain)(fp, ctx);
5718 }
5719
5720 int
5721 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5722 {
5723 #pragma unused(fp, ctx)
5724 return ENOTSUP;
5725 }
5726
5727
5728 /*
5729 * fo_kqfilter
5730 *
5731 * Description: Generic fileops kqueue filter indirected through the fileops
5732 * pointer in the fileproc structure
5733 *
5734 * Parameters: fp fileproc structure pointer
5735 * kn pointer to knote to filter on
5736 *
5737 * Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
5738 * 0 Filter is not active
5739 * !0 Filter is active
5740 */
5741 int
5742 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5743 {
5744 return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5745 }
5746
5747 int
5748 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5749 {
5750 #pragma unused(fp, kev)
5751 knote_set_error(kn, ENOTSUP);
5752 return 0;
5753 }
5754
5755
5756 struct fileproc *
5757 fileproc_alloc_init(__unused void *arg)
5758 {
5759 struct fileproc *fp = zalloc_flags(fp_zone, Z_WAITOK | Z_ZERO);
5760
5761 os_ref_init(&fp->fp_iocount, &f_refgrp);
5762 return fp;
5763 }
5764
5765
5766 void
5767 fileproc_free(struct fileproc *fp)
5768 {
5769 os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
5770 #if DEVELOPMENT || DEBUG
5771 if (0 != refc) {
5772 panic("%s: pid %d refc: %u != 0",
5773 __func__, proc_pid(current_proc()), refc);
5774 }
5775 #endif
5776 switch (FILEPROC_TYPE(fp)) {
5777 case FTYPE_SIMPLE:
5778 zfree(fp_zone, fp);
5779 break;
5780 case FTYPE_GUARDED:
5781 guarded_fileproc_free(fp);
5782 break;
5783 default:
5784 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->fp_flags);
5785 }
5786 }
5787
5788 void
5789 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
5790 {
5791 if (clearflags) {
5792 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
5793 } else {
5794 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
5795 }
5796 }
5797
5798 fileproc_vflags_t
5799 fileproc_get_vflags(struct fileproc *fp)
5800 {
5801 return os_atomic_load(&fp->fp_vflags, relaxed);
5802 }