]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_descrip.c
efca619e89bc73860731442e61e1ccaed48e1e7a
[apple/xnu.git] / bsd / kern / kern_descrip.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/syslog.h>
94 #include <sys/unistd.h>
95 #include <sys/resourcevar.h>
96 #include <sys/aio_kern.h>
97 #include <sys/ev.h>
98 #include <kern/locks.h>
99 #include <sys/uio_internal.h>
100 #include <sys/codesign.h>
101 #include <sys/codedir_internal.h>
102 #include <sys/mount_internal.h>
103 #include <sys/kdebug.h>
104 #include <sys/sysproto.h>
105 #include <sys/pipe.h>
106 #include <sys/spawn.h>
107 #include <sys/cprotect.h>
108 #include <sys/ubc_internal.h>
109
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/waitq.h>
113 #include <kern/ipc_misc.h>
114
115 #include <vm/vm_protos.h>
116 #include <mach/mach_port.h>
117
118 #include <security/audit/audit.h>
119 #if CONFIG_MACF
120 #include <security/mac_framework.h>
121 #endif
122
123 #include <stdbool.h>
124 #include <os/atomic_private.h>
125 #include <IOKit/IOBSD.h>
126
127 #define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
128 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
129 mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
130 void ipc_port_release_send(ipc_port_t);
131
132 static void fileproc_drain(proc_t, struct fileproc *);
133 static int finishdup(proc_t p,
134 struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
135
136 void fileport_releasefg(struct fileglob *fg);
137
138 /* flags for fp_close_and_unlock */
139 #define FD_DUP2RESV 1
140
141 /* We don't want these exported */
142
143 __private_extern__
144 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
145
146 static void fdrelse(struct proc * p, int fd);
147
148 extern void file_lock_init(void);
149
150 extern kauth_scope_t kauth_scope_fileop;
151
152 /* Conflict wait queue for when selects collide (opaque type) */
153 extern struct waitq select_conflict_queue;
154
155 #ifndef HFS_GET_BOOT_INFO
156 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
157 #endif
158
159 #ifndef HFS_SET_BOOT_INFO
160 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
161 #endif
162
163 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
164 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
165 #endif
166
167 #define f_flag fp_glob->fg_flag
168 #define f_type fp_glob->fg_ops->fo_type
169 #define f_cred fp_glob->fg_cred
170 #define f_ops fp_glob->fg_ops
171 #define f_offset fp_glob->fg_offset
172 #define f_data fp_glob->fg_data
173 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
174 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
175 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
176 ? 1 : 0)
177
178 ZONE_DECLARE(fg_zone, "fileglob",
179 sizeof(struct fileglob), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
180 ZONE_DECLARE(fp_zone, "fileproc",
181 sizeof(struct fileproc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
182 ZONE_DECLARE(fdp_zone, "filedesc",
183 sizeof(struct filedesc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
184
185 /*
186 * Descriptor management.
187 */
188 int nfiles; /* actual number of open files */
189 /*
190 * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
191 */
192 static const struct fileops uninitops;
193
194 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
195 lck_grp_attr_t * file_lck_grp_attr;
196 lck_grp_t * file_lck_grp;
197 lck_attr_t * file_lck_attr;
198
199 #pragma mark fileglobs
200
201 /*!
202 * @function fg_free
203 *
204 * @brief
205 * Free a file structure.
206 */
207 static void
208 fg_free(struct fileglob *fg)
209 {
210 os_atomic_dec(&nfiles, relaxed);
211
212 if (fg->fg_vn_data) {
213 fg_vn_data_free(fg->fg_vn_data);
214 fg->fg_vn_data = NULL;
215 }
216
217 if (IS_VALID_CRED(fg->fg_cred)) {
218 kauth_cred_unref(&fg->fg_cred);
219 }
220 lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
221
222 #if CONFIG_MACF
223 mac_file_label_destroy(fg);
224 #endif
225 zfree(fg_zone, fg);
226 }
227
228 OS_ALWAYS_INLINE
229 void
230 fg_ref(struct fileglob *fg)
231 {
232 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
233 }
234
235 int
236 fg_drop(proc_t p, struct fileglob *fg)
237 {
238 struct vnode *vp;
239 struct vfs_context context;
240 int error = 0;
241
242 if (fg == NULL) {
243 return 0;
244 }
245
246 /* Set up context with cred stashed in fg */
247 if (p == current_proc()) {
248 context.vc_thread = current_thread();
249 } else {
250 context.vc_thread = NULL;
251 }
252 context.vc_ucred = fg->fg_cred;
253
254 /*
255 * POSIX record locking dictates that any close releases ALL
256 * locks owned by this process. This is handled by setting
257 * a flag in the unlock to free ONLY locks obeying POSIX
258 * semantics, and not to free BSD-style file locks.
259 * If the descriptor was in a message, POSIX-style locks
260 * aren't passed with the descriptor.
261 */
262 if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
263 (p->p_ladvflag & P_LADVLOCK)) {
264 struct flock lf = {
265 .l_whence = SEEK_SET,
266 .l_type = F_UNLCK,
267 };
268
269 vp = (struct vnode *)fg->fg_data;
270 if ((error = vnode_getwithref(vp)) == 0) {
271 (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
272 (void)vnode_put(vp);
273 }
274 }
275
276 if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
277 /*
278 * Since we ensure that fg->fg_ops is always initialized,
279 * it is safe to invoke fo_close on the fg
280 */
281 error = fo_close(fg, &context);
282
283 fg_free(fg);
284 }
285
286 return error;
287 }
288
289 /*
290 * fg_get_vnode
291 *
292 * Description: Return vnode associated with the file structure, if
293 * any. The lifetime of the returned vnode is bound to
294 * the lifetime of the file structure.
295 *
296 * Parameters: fg Pointer to fileglob to
297 * inspect
298 *
299 * Returns: vnode_t
300 */
301 vnode_t
302 fg_get_vnode(struct fileglob *fg)
303 {
304 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
305 return (vnode_t)fg->fg_data;
306 } else {
307 return NULL;
308 }
309 }
310
311 bool
312 fg_sendable(struct fileglob *fg)
313 {
314 switch (FILEGLOB_DTYPE(fg)) {
315 case DTYPE_VNODE:
316 case DTYPE_SOCKET:
317 case DTYPE_PIPE:
318 case DTYPE_PSXSHM:
319 case DTYPE_NETPOLICY:
320 return (fg->fg_lflags & FG_CONFINED) == 0;
321
322 default:
323 return false;
324 }
325 }
326
327
328 #pragma mark fileprocs
329
330 /*
331 * check_file_seek_range
332 *
333 * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
334 *
335 * Parameters: fl Flock structure.
336 * cur_file_offset Current offset in the file.
337 *
338 * Returns: 0 on Success.
339 * EOVERFLOW on overflow.
340 * EINVAL on offset less than zero.
341 */
342
343 static int
344 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
345 {
346 if (fl->l_whence == SEEK_CUR) {
347 /* Check if the start marker is beyond LLONG_MAX. */
348 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
349 /* Check if start marker is negative */
350 if (fl->l_start < 0) {
351 return EINVAL;
352 }
353 return EOVERFLOW;
354 }
355 /* Check if the start marker is negative. */
356 if (fl->l_start + cur_file_offset < 0) {
357 return EINVAL;
358 }
359 /* Check if end marker is beyond LLONG_MAX. */
360 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
361 cur_file_offset, fl->l_len - 1))) {
362 return EOVERFLOW;
363 }
364 /* Check if the end marker is negative. */
365 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
366 fl->l_len < 0)) {
367 return EINVAL;
368 }
369 } else if (fl->l_whence == SEEK_SET) {
370 /* Check if the start marker is negative. */
371 if (fl->l_start < 0) {
372 return EINVAL;
373 }
374 /* Check if the end marker is beyond LLONG_MAX. */
375 if ((fl->l_len > 0) &&
376 CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
377 return EOVERFLOW;
378 }
379 /* Check if the end marker is negative. */
380 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
381 return EINVAL;
382 }
383 }
384 return 0;
385 }
386
387
388 /*
389 * file_lock_init
390 *
391 * Description: Initialize the file lock group and the uipc and flist locks
392 *
393 * Parameters: (void)
394 *
395 * Returns: void
396 *
397 * Notes: Called at system startup from bsd_init().
398 */
399 void
400 file_lock_init(void)
401 {
402 /* allocate file lock group attribute and group */
403 file_lck_grp_attr = lck_grp_attr_alloc_init();
404
405 file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr);
406
407 /* Allocate file lock attribute */
408 file_lck_attr = lck_attr_alloc_init();
409 }
410
411
412 void
413 proc_dirs_lock_shared(proc_t p)
414 {
415 lck_rw_lock_shared(&p->p_dirs_lock);
416 }
417
418 void
419 proc_dirs_unlock_shared(proc_t p)
420 {
421 lck_rw_unlock_shared(&p->p_dirs_lock);
422 }
423
424 void
425 proc_dirs_lock_exclusive(proc_t p)
426 {
427 lck_rw_lock_exclusive(&p->p_dirs_lock);
428 }
429
430 void
431 proc_dirs_unlock_exclusive(proc_t p)
432 {
433 lck_rw_unlock_exclusive(&p->p_dirs_lock);
434 }
435
436 /*
437 * proc_fdlock, proc_fdlock_spin
438 *
439 * Description: Lock to control access to the per process struct fileproc
440 * and struct filedesc
441 *
442 * Parameters: p Process to take the lock on
443 *
444 * Returns: void
445 *
446 * Notes: The lock is initialized in forkproc() and destroyed in
447 * reap_child_process().
448 */
449 void
450 proc_fdlock(proc_t p)
451 {
452 lck_mtx_lock(&p->p_fdmlock);
453 }
454
455 void
456 proc_fdlock_spin(proc_t p)
457 {
458 lck_mtx_lock_spin(&p->p_fdmlock);
459 }
460
461 void
462 proc_fdlock_assert(proc_t p, int assertflags)
463 {
464 lck_mtx_assert(&p->p_fdmlock, assertflags);
465 }
466
467
468 /*
469 * proc_fdunlock
470 *
471 * Description: Unlock the lock previously locked by a call to proc_fdlock()
472 *
473 * Parameters: p Process to drop the lock on
474 *
475 * Returns: void
476 */
477 void
478 proc_fdunlock(proc_t p)
479 {
480 lck_mtx_unlock(&p->p_fdmlock);
481 }
482
483 struct fdt_iterator
484 fdt_next(proc_t p, int fd, bool only_settled)
485 {
486 struct fdt_iterator it;
487 struct filedesc *fdp = p->p_fd;
488 struct fileproc *fp;
489 int nfds = min(fdp->fd_lastfile + 1, fdp->fd_nfiles);
490
491 while (++fd < nfds) {
492 fp = fdp->fd_ofiles[fd];
493 if (fp == NULL || fp->fp_glob == NULL) {
494 continue;
495 }
496 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
497 continue;
498 }
499 it.fdti_fd = fd;
500 it.fdti_fp = fp;
501 return it;
502 }
503
504 it.fdti_fd = nfds;
505 it.fdti_fp = NULL;
506 return it;
507 }
508
509 struct fdt_iterator
510 fdt_prev(proc_t p, int fd, bool only_settled)
511 {
512 struct fdt_iterator it;
513 struct filedesc *fdp = p->p_fd;
514 struct fileproc *fp;
515
516 while (--fd >= 0) {
517 fp = fdp->fd_ofiles[fd];
518 if (fp == NULL || fp->fp_glob == NULL) {
519 continue;
520 }
521 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
522 continue;
523 }
524 it.fdti_fd = fd;
525 it.fdti_fp = fp;
526 return it;
527 }
528
529 it.fdti_fd = -1;
530 it.fdti_fp = NULL;
531 return it;
532 }
533
534 /*
535 * System calls on descriptors.
536 */
537
538
539 /*
540 * sys_getdtablesize
541 *
542 * Description: Returns the per process maximum size of the descriptor table
543 *
544 * Parameters: p Process being queried
545 * retval Pointer to the call return area
546 *
547 * Returns: 0 Success
548 *
549 * Implicit returns:
550 * *retval (modified) Size of dtable
551 */
552 int
553 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
554 {
555 *retval = (int32_t)MIN(proc_limitgetcur(p, RLIMIT_NOFILE, TRUE), maxfilesperproc);
556
557 return 0;
558 }
559
560
561 static void
562 procfdtbl_reservefd(struct proc * p, int fd)
563 {
564 p->p_fd->fd_ofiles[fd] = NULL;
565 p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
566 }
567
568 void
569 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
570 {
571 if (fp != NULL) {
572 p->p_fd->fd_ofiles[fd] = fp;
573 }
574 p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
575 if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
576 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
577 wakeup(&p->p_fd);
578 }
579 }
580
581 static void
582 procfdtbl_waitfd(struct proc * p, int fd)
583 {
584 p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
585 msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
586 }
587
588 static void
589 procfdtbl_clearfd(struct proc * p, int fd)
590 {
591 int waiting;
592
593 waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
594 p->p_fd->fd_ofiles[fd] = NULL;
595 p->p_fd->fd_ofileflags[fd] = 0;
596 if (waiting == UF_RESVWAIT) {
597 wakeup(&p->p_fd);
598 }
599 }
600
601 /*
602 * fdrelse
603 *
604 * Description: Inline utility function to free an fd in a filedesc
605 *
606 * Parameters: fdp Pointer to filedesc fd lies in
607 * fd fd to free
608 * reserv fd should be reserved
609 *
610 * Returns: void
611 *
612 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
613 * the caller
614 */
615 static void
616 fdrelse(struct proc * p, int fd)
617 {
618 struct filedesc *fdp = p->p_fd;
619 int nfd = 0;
620
621 if (fd < fdp->fd_freefile) {
622 fdp->fd_freefile = fd;
623 }
624 #if DIAGNOSTIC
625 if (fd > fdp->fd_lastfile) {
626 panic("fdrelse: fd_lastfile inconsistent");
627 }
628 #endif
629 procfdtbl_clearfd(p, fd);
630
631 while ((nfd = fdp->fd_lastfile) > 0 &&
632 fdp->fd_ofiles[nfd] == NULL &&
633 !(fdp->fd_ofileflags[nfd] & UF_RESERVED)) {
634 /* JMM - What about files with lingering EV_VANISHED knotes? */
635 fdp->fd_lastfile--;
636 }
637 }
638
639
640 int
641 fd_rdwr(
642 int fd,
643 enum uio_rw rw,
644 uint64_t base,
645 int64_t len,
646 enum uio_seg segflg,
647 off_t offset,
648 int io_flg,
649 int64_t *aresid)
650 {
651 struct fileproc *fp;
652 proc_t p;
653 int error = 0;
654 int flags = 0;
655 int spacetype;
656 uio_t auio = NULL;
657 char uio_buf[UIO_SIZEOF(1)];
658 struct vfs_context context = *(vfs_context_current());
659
660 p = current_proc();
661
662 error = fp_lookup(p, fd, &fp, 0);
663 if (error) {
664 return error;
665 }
666
667 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
668 case DTYPE_VNODE:
669 case DTYPE_PIPE:
670 case DTYPE_SOCKET:
671 break;
672 default:
673 error = EINVAL;
674 goto out;
675 }
676 if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
677 error = EBADF;
678 goto out;
679 }
680
681 if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
682 error = EBADF;
683 goto out;
684 }
685
686 context.vc_ucred = fp->fp_glob->fg_cred;
687
688 if (UIO_SEG_IS_USER_SPACE(segflg)) {
689 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
690 } else {
691 spacetype = UIO_SYSSPACE;
692 }
693
694 auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
695
696 uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
697
698 if (!(io_flg & IO_APPEND)) {
699 flags = FOF_OFFSET;
700 }
701
702 if (rw == UIO_WRITE) {
703 user_ssize_t orig_resid = uio_resid(auio);
704 error = fo_write(fp, auio, flags, &context);
705 if (uio_resid(auio) < orig_resid) {
706 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
707 }
708 } else {
709 error = fo_read(fp, auio, flags, &context);
710 }
711
712 if (aresid) {
713 *aresid = uio_resid(auio);
714 } else if (uio_resid(auio) && error == 0) {
715 error = EIO;
716 }
717 out:
718 fp_drop(p, fd, fp, 0);
719 return error;
720 }
721
722
723
724 /*
725 * sys_dup
726 *
727 * Description: Duplicate a file descriptor.
728 *
729 * Parameters: p Process performing the dup
730 * uap->fd The fd to dup
731 * retval Pointer to the call return area
732 *
733 * Returns: 0 Success
734 * !0 Errno
735 *
736 * Implicit returns:
737 * *retval (modified) The new descriptor
738 */
739 int
740 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
741 {
742 struct filedesc *fdp = p->p_fd;
743 int old = uap->fd;
744 int new, error;
745 struct fileproc *fp;
746
747 proc_fdlock(p);
748 if ((error = fp_lookup(p, old, &fp, 1))) {
749 proc_fdunlock(p);
750 return error;
751 }
752 if (FP_ISGUARDED(fp, GUARD_DUP)) {
753 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
754 (void) fp_drop(p, old, fp, 1);
755 proc_fdunlock(p);
756 return error;
757 }
758 if ((error = fdalloc(p, 0, &new))) {
759 fp_drop(p, old, fp, 1);
760 proc_fdunlock(p);
761 return error;
762 }
763 error = finishdup(p, fdp, old, new, 0, retval);
764 fp_drop(p, old, fp, 1);
765 proc_fdunlock(p);
766
767 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
768 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
769 new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
770 }
771
772 return error;
773 }
774
775 /*
776 * sys_dup2
777 *
778 * Description: Duplicate a file descriptor to a particular value.
779 *
780 * Parameters: p Process performing the dup
781 * uap->from The fd to dup
782 * uap->to The fd to dup it to
783 * retval Pointer to the call return area
784 *
785 * Returns: 0 Success
786 * !0 Errno
787 *
788 * Implicit returns:
789 * *retval (modified) The new descriptor
790 */
791 int
792 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
793 {
794 return dup2(p, uap->from, uap->to, retval);
795 }
796
797 int
798 dup2(proc_t p, int old, int new, int *retval)
799 {
800 struct filedesc *fdp = p->p_fd;
801 struct fileproc *fp, *nfp;
802 int i, error;
803 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
804
805 proc_fdlock(p);
806
807 startover:
808 if ((error = fp_lookup(p, old, &fp, 1))) {
809 proc_fdunlock(p);
810 return error;
811 }
812 if (FP_ISGUARDED(fp, GUARD_DUP)) {
813 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
814 (void) fp_drop(p, old, fp, 1);
815 proc_fdunlock(p);
816 return error;
817 }
818 if (new < 0 ||
819 (rlim_t)new >= nofile ||
820 new >= maxfilesperproc) {
821 fp_drop(p, old, fp, 1);
822 proc_fdunlock(p);
823 return EBADF;
824 }
825 if (old == new) {
826 fp_drop(p, old, fp, 1);
827 *retval = new;
828 proc_fdunlock(p);
829 return 0;
830 }
831 if (new < 0 || new >= fdp->fd_nfiles) {
832 if ((error = fdalloc(p, new, &i))) {
833 fp_drop(p, old, fp, 1);
834 proc_fdunlock(p);
835 return error;
836 }
837 if (new != i) {
838 fdrelse(p, i);
839 goto closeit;
840 }
841 } else {
842 closeit:
843 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
844 fp_drop(p, old, fp, 1);
845 procfdtbl_waitfd(p, new);
846 #if DIAGNOSTIC
847 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
848 #endif
849 goto startover;
850 }
851
852 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
853 if (FP_ISGUARDED(nfp, GUARD_CLOSE)) {
854 fp_drop(p, old, fp, 1);
855 error = fp_guard_exception(p,
856 new, nfp, kGUARD_EXC_CLOSE);
857 proc_fdunlock(p);
858 return error;
859 }
860 (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
861 proc_fdlock(p);
862 assert(fdp->fd_ofileflags[new] & UF_RESERVED);
863 } else {
864 #if DIAGNOSTIC
865 if (fdp->fd_ofiles[new] != NULL) {
866 panic("dup2: no ref on fileproc %d", new);
867 }
868 #endif
869 procfdtbl_reservefd(p, new);
870 }
871 }
872 #if DIAGNOSTIC
873 if (fdp->fd_ofiles[new] != 0) {
874 panic("dup2: overwriting fd_ofiles with new %d", new);
875 }
876 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
877 panic("dup2: unreserved fileflags with new %d", new);
878 }
879 #endif
880 error = finishdup(p, fdp, old, new, 0, retval);
881 fp_drop(p, old, fp, 1);
882 proc_fdunlock(p);
883
884 return error;
885 }
886
887
888 /*
889 * fcntl
890 *
891 * Description: The file control system call.
892 *
893 * Parameters: p Process performing the fcntl
894 * uap->fd The fd to operate against
895 * uap->cmd The command to perform
896 * uap->arg Pointer to the command argument
897 * retval Pointer to the call return area
898 *
899 * Returns: 0 Success
900 * !0 Errno (see fcntl_nocancel)
901 *
902 * Implicit returns:
903 * *retval (modified) fcntl return value (if any)
904 *
905 * Notes: This system call differs from fcntl_nocancel() in that it
906 * tests for cancellation prior to performing a potentially
907 * blocking operation.
908 */
909 int
910 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
911 {
912 __pthread_testcancel(1);
913 return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
914 }
915
916 #define ACCOUNT_OPENFROM_ENTITLEMENT \
917 "com.apple.private.vfs.role-account-openfrom"
918
919 /*
920 * sys_fcntl_nocancel
921 *
922 * Description: A non-cancel-testing file control system call.
923 *
924 * Parameters: p Process performing the fcntl
925 * uap->fd The fd to operate against
926 * uap->cmd The command to perform
927 * uap->arg Pointer to the command argument
928 * retval Pointer to the call return area
929 *
930 * Returns: 0 Success
931 * EINVAL
932 * fp_lookup:EBADF Bad file descriptor
933 * [F_DUPFD]
934 * fdalloc:EMFILE
935 * fdalloc:ENOMEM
936 * finishdup:EBADF
937 * finishdup:ENOMEM
938 * [F_SETOWN]
939 * ESRCH
940 * [F_SETLK]
941 * EBADF
942 * EOVERFLOW
943 * copyin:EFAULT
944 * vnode_getwithref:???
945 * VNOP_ADVLOCK:???
946 * msleep:ETIMEDOUT
947 * [F_GETLK]
948 * EBADF
949 * EOVERFLOW
950 * copyin:EFAULT
951 * copyout:EFAULT
952 * vnode_getwithref:???
953 * VNOP_ADVLOCK:???
954 * [F_PREALLOCATE]
955 * EBADF
956 * EINVAL
957 * copyin:EFAULT
958 * copyout:EFAULT
959 * vnode_getwithref:???
960 * VNOP_ALLOCATE:???
961 * [F_SETSIZE,F_RDADVISE]
962 * EBADF
963 * EINVAL
964 * copyin:EFAULT
965 * vnode_getwithref:???
966 * [F_RDAHEAD,F_NOCACHE]
967 * EBADF
968 * vnode_getwithref:???
969 * [???]
970 *
971 * Implicit returns:
972 * *retval (modified) fcntl return value (if any)
973 */
974 int
975 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
976 {
977 int fd = uap->fd;
978 struct filedesc *fdp = p->p_fd;
979 struct fileproc *fp;
980 char *pop;
981 struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */
982 unsigned int oflags, nflags;
983 int i, tmp, error, error2, flg = 0;
984 struct flock fl = {};
985 struct flocktimeout fltimeout;
986 struct timespec *timeout = NULL;
987 struct vfs_context context;
988 off_t offset;
989 int newmin;
990 daddr64_t lbn, bn;
991 unsigned int fflag;
992 user_addr_t argp;
993 boolean_t is64bit;
994 rlim_t nofile;
995 int has_entitlement = 0;
996
997 AUDIT_ARG(fd, uap->fd);
998 AUDIT_ARG(cmd, uap->cmd);
999
1000 nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
1001
1002 proc_fdlock(p);
1003 if ((error = fp_lookup(p, fd, &fp, 1))) {
1004 proc_fdunlock(p);
1005 return error;
1006 }
1007 context.vc_thread = current_thread();
1008 context.vc_ucred = fp->f_cred;
1009
1010 is64bit = proc_is64bit(p);
1011 if (is64bit) {
1012 argp = uap->arg;
1013 } else {
1014 /*
1015 * Since the arg parameter is defined as a long but may be
1016 * either a long or a pointer we must take care to handle
1017 * sign extension issues. Our sys call munger will sign
1018 * extend a long when we are called from a 32-bit process.
1019 * Since we can never have an address greater than 32-bits
1020 * from a 32-bit process we lop off the top 32-bits to avoid
1021 * getting the wrong address
1022 */
1023 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
1024 }
1025
1026 #if CONFIG_MACF
1027 error = mac_file_check_fcntl(proc_ucred(p), fp->fp_glob, uap->cmd,
1028 uap->arg);
1029 if (error) {
1030 goto out;
1031 }
1032 #endif
1033
1034 pop = &fdp->fd_ofileflags[fd];
1035
1036 switch (uap->cmd) {
1037 case F_DUPFD:
1038 case F_DUPFD_CLOEXEC:
1039 if (FP_ISGUARDED(fp, GUARD_DUP)) {
1040 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
1041 goto out;
1042 }
1043 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1044 AUDIT_ARG(value32, newmin);
1045 if ((rlim_t)newmin >= nofile ||
1046 newmin >= maxfilesperproc) {
1047 error = EINVAL;
1048 goto out;
1049 }
1050 if ((error = fdalloc(p, newmin, &i))) {
1051 goto out;
1052 }
1053 error = finishdup(p, fdp, fd, i,
1054 uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
1055 goto out;
1056
1057 case F_GETFD:
1058 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
1059 error = 0;
1060 goto out;
1061
1062 case F_SETFD:
1063 AUDIT_ARG(value32, (uint32_t)uap->arg);
1064 if (uap->arg & FD_CLOEXEC) {
1065 *pop |= UF_EXCLOSE;
1066 } else {
1067 if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
1068 error = fp_guard_exception(p,
1069 fd, fp, kGUARD_EXC_NOCLOEXEC);
1070 goto out;
1071 }
1072 *pop &= ~UF_EXCLOSE;
1073 }
1074 error = 0;
1075 goto out;
1076
1077 case F_GETFL:
1078 *retval = OFLAGS(fp->f_flag);
1079 error = 0;
1080 goto out;
1081
1082 case F_SETFL:
1083 // FIXME (rdar://54898652)
1084 //
1085 // this code is broken if fnctl(F_SETFL), ioctl() are
1086 // called concurrently for the same fileglob.
1087
1088 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1089 AUDIT_ARG(value32, tmp);
1090
1091 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
1092 nflags = oflags & ~FCNTLFLAGS;
1093 nflags |= FFLAGS(tmp) & FCNTLFLAGS;
1094 });
1095 tmp = nflags & FNONBLOCK;
1096 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1097 if (error) {
1098 goto out;
1099 }
1100 tmp = nflags & FASYNC;
1101 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1102 if (!error) {
1103 goto out;
1104 }
1105 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
1106 tmp = 0;
1107 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1108 goto out;
1109
1110 case F_GETOWN:
1111 if (fp->f_type == DTYPE_SOCKET) {
1112 *retval = ((struct socket *)fp->f_data)->so_pgid;
1113 error = 0;
1114 goto out;
1115 }
1116 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
1117 *retval = -*retval;
1118 goto out;
1119
1120 case F_SETOWN:
1121 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
1122 AUDIT_ARG(value32, tmp);
1123 if (fp->f_type == DTYPE_SOCKET) {
1124 ((struct socket *)fp->f_data)->so_pgid = tmp;
1125 error = 0;
1126 goto out;
1127 }
1128 if (fp->f_type == DTYPE_PIPE) {
1129 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1130 goto out;
1131 }
1132
1133 if (tmp <= 0) {
1134 tmp = -tmp;
1135 } else {
1136 proc_t p1 = proc_find(tmp);
1137 if (p1 == 0) {
1138 error = ESRCH;
1139 goto out;
1140 }
1141 tmp = (int)p1->p_pgrpid;
1142 proc_rele(p1);
1143 }
1144 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1145 goto out;
1146
1147 case F_SETNOSIGPIPE:
1148 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
1149 if (fp->f_type == DTYPE_SOCKET) {
1150 #if SOCKETS
1151 error = sock_setsockopt((struct socket *)fp->f_data,
1152 SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
1153 #else
1154 error = EINVAL;
1155 #endif
1156 } else {
1157 struct fileglob *fg = fp->fp_glob;
1158
1159 lck_mtx_lock_spin(&fg->fg_lock);
1160 if (tmp) {
1161 fg->fg_lflags |= FG_NOSIGPIPE;
1162 } else {
1163 fg->fg_lflags &= ~FG_NOSIGPIPE;
1164 }
1165 lck_mtx_unlock(&fg->fg_lock);
1166 error = 0;
1167 }
1168 goto out;
1169
1170 case F_GETNOSIGPIPE:
1171 if (fp->f_type == DTYPE_SOCKET) {
1172 #if SOCKETS
1173 int retsize = sizeof(*retval);
1174 error = sock_getsockopt((struct socket *)fp->f_data,
1175 SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
1176 #else
1177 error = EINVAL;
1178 #endif
1179 } else {
1180 *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
1181 1 : 0;
1182 error = 0;
1183 }
1184 goto out;
1185
1186 case F_SETCONFINED:
1187 /*
1188 * If this is the only reference to this fglob in the process
1189 * and it's already marked as close-on-fork then mark it as
1190 * (immutably) "confined" i.e. any fd that points to it will
1191 * forever be close-on-fork, and attempts to use an IPC
1192 * mechanism to move the descriptor elsewhere will fail.
1193 */
1194 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
1195 struct fileglob *fg = fp->fp_glob;
1196
1197 lck_mtx_lock_spin(&fg->fg_lock);
1198 if (fg->fg_lflags & FG_CONFINED) {
1199 error = 0;
1200 } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
1201 error = EAGAIN; /* go close the dup .. */
1202 } else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
1203 fg->fg_lflags |= FG_CONFINED;
1204 error = 0;
1205 } else {
1206 error = EBADF; /* open without O_CLOFORK? */
1207 }
1208 lck_mtx_unlock(&fg->fg_lock);
1209 } else {
1210 /*
1211 * Other subsystems may have built on the immutability
1212 * of FG_CONFINED; clearing it may be tricky.
1213 */
1214 error = EPERM; /* immutable */
1215 }
1216 goto out;
1217
1218 case F_GETCONFINED:
1219 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
1220 error = 0;
1221 goto out;
1222
1223 case F_SETLKWTIMEOUT:
1224 case F_SETLKW:
1225 case F_OFD_SETLKWTIMEOUT:
1226 case F_OFD_SETLKW:
1227 flg |= F_WAIT;
1228 OS_FALLTHROUGH;
1229
1230 case F_SETLK:
1231 case F_OFD_SETLK:
1232 if (fp->f_type != DTYPE_VNODE) {
1233 error = EBADF;
1234 goto out;
1235 }
1236 vp = (struct vnode *)fp->f_data;
1237
1238 fflag = fp->f_flag;
1239 offset = fp->f_offset;
1240 proc_fdunlock(p);
1241
1242 /* Copy in the lock structure */
1243 if (F_SETLKWTIMEOUT == uap->cmd ||
1244 F_OFD_SETLKWTIMEOUT == uap->cmd) {
1245 error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1246 if (error) {
1247 goto outdrop;
1248 }
1249 fl = fltimeout.fl;
1250 timeout = &fltimeout.timeout;
1251 } else {
1252 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1253 if (error) {
1254 goto outdrop;
1255 }
1256 }
1257
1258 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1259 /* and ending byte for EOVERFLOW in SEEK_SET */
1260 error = check_file_seek_range(&fl, offset);
1261 if (error) {
1262 goto outdrop;
1263 }
1264
1265 if ((error = vnode_getwithref(vp))) {
1266 goto outdrop;
1267 }
1268 if (fl.l_whence == SEEK_CUR) {
1269 fl.l_start += offset;
1270 }
1271
1272 #if CONFIG_MACF
1273 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1274 F_SETLK, &fl);
1275 if (error) {
1276 (void)vnode_put(vp);
1277 goto outdrop;
1278 }
1279 #endif
1280 switch (uap->cmd) {
1281 case F_OFD_SETLK:
1282 case F_OFD_SETLKW:
1283 case F_OFD_SETLKWTIMEOUT:
1284 flg |= F_OFD_LOCK;
1285 switch (fl.l_type) {
1286 case F_RDLCK:
1287 if ((fflag & FREAD) == 0) {
1288 error = EBADF;
1289 break;
1290 }
1291 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1292 F_SETLK, &fl, flg, &context, timeout);
1293 break;
1294 case F_WRLCK:
1295 if ((fflag & FWRITE) == 0) {
1296 error = EBADF;
1297 break;
1298 }
1299 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1300 F_SETLK, &fl, flg, &context, timeout);
1301 break;
1302 case F_UNLCK:
1303 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1304 F_UNLCK, &fl, F_OFD_LOCK, &context,
1305 timeout);
1306 break;
1307 default:
1308 error = EINVAL;
1309 break;
1310 }
1311 if (0 == error &&
1312 (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
1313 struct fileglob *fg = fp->fp_glob;
1314
1315 /*
1316 * arrange F_UNLCK on last close (once
1317 * set, FG_HAS_OFDLOCK is immutable)
1318 */
1319 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
1320 lck_mtx_lock_spin(&fg->fg_lock);
1321 fg->fg_lflags |= FG_HAS_OFDLOCK;
1322 lck_mtx_unlock(&fg->fg_lock);
1323 }
1324 }
1325 break;
1326 default:
1327 flg |= F_POSIX;
1328 switch (fl.l_type) {
1329 case F_RDLCK:
1330 if ((fflag & FREAD) == 0) {
1331 error = EBADF;
1332 break;
1333 }
1334 // XXX UInt32 unsafe for LP64 kernel
1335 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1336 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1337 F_SETLK, &fl, flg, &context, timeout);
1338 break;
1339 case F_WRLCK:
1340 if ((fflag & FWRITE) == 0) {
1341 error = EBADF;
1342 break;
1343 }
1344 // XXX UInt32 unsafe for LP64 kernel
1345 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1346 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1347 F_SETLK, &fl, flg, &context, timeout);
1348 break;
1349 case F_UNLCK:
1350 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1351 F_UNLCK, &fl, F_POSIX, &context, timeout);
1352 break;
1353 default:
1354 error = EINVAL;
1355 break;
1356 }
1357 break;
1358 }
1359 (void) vnode_put(vp);
1360 goto outdrop;
1361
1362 case F_GETLK:
1363 case F_OFD_GETLK:
1364 case F_GETLKPID:
1365 case F_OFD_GETLKPID:
1366 if (fp->f_type != DTYPE_VNODE) {
1367 error = EBADF;
1368 goto out;
1369 }
1370 vp = (struct vnode *)fp->f_data;
1371
1372 offset = fp->f_offset;
1373 proc_fdunlock(p);
1374
1375 /* Copy in the lock structure */
1376 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1377 if (error) {
1378 goto outdrop;
1379 }
1380
1381 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1382 /* and ending byte for EOVERFLOW in SEEK_SET */
1383 error = check_file_seek_range(&fl, offset);
1384 if (error) {
1385 goto outdrop;
1386 }
1387
1388 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1389 error = EINVAL;
1390 goto outdrop;
1391 }
1392
1393 switch (fl.l_type) {
1394 case F_RDLCK:
1395 case F_UNLCK:
1396 case F_WRLCK:
1397 break;
1398 default:
1399 error = EINVAL;
1400 goto outdrop;
1401 }
1402
1403 switch (fl.l_whence) {
1404 case SEEK_CUR:
1405 case SEEK_SET:
1406 case SEEK_END:
1407 break;
1408 default:
1409 error = EINVAL;
1410 goto outdrop;
1411 }
1412
1413 if ((error = vnode_getwithref(vp)) == 0) {
1414 if (fl.l_whence == SEEK_CUR) {
1415 fl.l_start += offset;
1416 }
1417
1418 #if CONFIG_MACF
1419 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1420 uap->cmd, &fl);
1421 if (error == 0)
1422 #endif
1423 switch (uap->cmd) {
1424 case F_OFD_GETLK:
1425 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1426 F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1427 break;
1428 case F_OFD_GETLKPID:
1429 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1430 F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1431 break;
1432 default:
1433 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1434 uap->cmd, &fl, F_POSIX, &context, NULL);
1435 break;
1436 }
1437
1438 (void)vnode_put(vp);
1439
1440 if (error == 0) {
1441 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1442 }
1443 }
1444 goto outdrop;
1445
1446 case F_PREALLOCATE: {
1447 fstore_t alloc_struct; /* structure for allocate command */
1448 u_int32_t alloc_flags = 0;
1449
1450 if (fp->f_type != DTYPE_VNODE) {
1451 error = EBADF;
1452 goto out;
1453 }
1454
1455 vp = (struct vnode *)fp->f_data;
1456 proc_fdunlock(p);
1457
1458 /* make sure that we have write permission */
1459 if ((fp->f_flag & FWRITE) == 0) {
1460 error = EBADF;
1461 goto outdrop;
1462 }
1463
1464 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1465 if (error) {
1466 goto outdrop;
1467 }
1468
1469 /* now set the space allocated to 0 */
1470 alloc_struct.fst_bytesalloc = 0;
1471
1472 /*
1473 * Do some simple parameter checking
1474 */
1475
1476 /* set up the flags */
1477
1478 alloc_flags |= PREALLOCATE;
1479
1480 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
1481 alloc_flags |= ALLOCATECONTIG;
1482 }
1483
1484 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
1485 alloc_flags |= ALLOCATEALL;
1486 }
1487
1488 /*
1489 * Do any position mode specific stuff. The only
1490 * position mode supported now is PEOFPOSMODE
1491 */
1492
1493 switch (alloc_struct.fst_posmode) {
1494 case F_PEOFPOSMODE:
1495 if (alloc_struct.fst_offset != 0) {
1496 error = EINVAL;
1497 goto outdrop;
1498 }
1499
1500 alloc_flags |= ALLOCATEFROMPEOF;
1501 break;
1502
1503 case F_VOLPOSMODE:
1504 if (alloc_struct.fst_offset <= 0) {
1505 error = EINVAL;
1506 goto outdrop;
1507 }
1508
1509 alloc_flags |= ALLOCATEFROMVOL;
1510 break;
1511
1512 default: {
1513 error = EINVAL;
1514 goto outdrop;
1515 }
1516 }
1517 if ((error = vnode_getwithref(vp)) == 0) {
1518 /*
1519 * call allocate to get the space
1520 */
1521 error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
1522 &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1523 &context);
1524 (void)vnode_put(vp);
1525
1526 error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1527
1528 if (error == 0) {
1529 error = error2;
1530 }
1531 }
1532 goto outdrop;
1533 }
1534 case F_PUNCHHOLE: {
1535 fpunchhole_t args;
1536
1537 if (fp->f_type != DTYPE_VNODE) {
1538 error = EBADF;
1539 goto out;
1540 }
1541
1542 vp = (struct vnode *)fp->f_data;
1543 proc_fdunlock(p);
1544
1545 /* need write permissions */
1546 if ((fp->f_flag & FWRITE) == 0) {
1547 error = EPERM;
1548 goto outdrop;
1549 }
1550
1551 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1552 goto outdrop;
1553 }
1554
1555 if ((error = vnode_getwithref(vp))) {
1556 goto outdrop;
1557 }
1558
1559 #if CONFIG_MACF
1560 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
1561 (void)vnode_put(vp);
1562 goto outdrop;
1563 }
1564 #endif
1565
1566 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
1567 (void)vnode_put(vp);
1568
1569 goto outdrop;
1570 }
1571 case F_TRIM_ACTIVE_FILE: {
1572 ftrimactivefile_t args;
1573
1574 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
1575 error = EACCES;
1576 goto out;
1577 }
1578
1579 if (fp->f_type != DTYPE_VNODE) {
1580 error = EBADF;
1581 goto out;
1582 }
1583
1584 vp = (struct vnode *)fp->f_data;
1585 proc_fdunlock(p);
1586
1587 /* need write permissions */
1588 if ((fp->f_flag & FWRITE) == 0) {
1589 error = EPERM;
1590 goto outdrop;
1591 }
1592
1593 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1594 goto outdrop;
1595 }
1596
1597 if ((error = vnode_getwithref(vp))) {
1598 goto outdrop;
1599 }
1600
1601 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
1602 (void)vnode_put(vp);
1603
1604 goto outdrop;
1605 }
1606 case F_SPECULATIVE_READ: {
1607 fspecread_t args;
1608
1609 if (fp->f_type != DTYPE_VNODE) {
1610 error = EBADF;
1611 goto out;
1612 }
1613
1614 vp = (struct vnode *)fp->f_data;
1615 proc_fdunlock(p);
1616
1617 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1618 goto outdrop;
1619 }
1620
1621 /* Discard invalid offsets or lengths */
1622 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
1623 error = EINVAL;
1624 goto outdrop;
1625 }
1626
1627 /*
1628 * Round the file offset down to a page-size boundary (or to 0).
1629 * The filesystem will need to round the length up to the end of the page boundary
1630 * or to the EOF of the file.
1631 */
1632 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
1633 uint64_t foff_delta = args.fsr_offset - foff;
1634 args.fsr_offset = (off_t) foff;
1635
1636 /*
1637 * Now add in the delta to the supplied length. Since we may have adjusted the
1638 * offset, increase it by the amount that we adjusted.
1639 */
1640 args.fsr_length += foff_delta;
1641
1642 if ((error = vnode_getwithref(vp))) {
1643 goto outdrop;
1644 }
1645 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
1646 (void)vnode_put(vp);
1647
1648 goto outdrop;
1649 }
1650 case F_SETSIZE:
1651 if (fp->f_type != DTYPE_VNODE) {
1652 error = EBADF;
1653 goto out;
1654 }
1655 vp = (struct vnode *)fp->f_data;
1656 proc_fdunlock(p);
1657
1658 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
1659 if (error) {
1660 goto outdrop;
1661 }
1662 AUDIT_ARG(value64, offset);
1663
1664 error = vnode_getwithref(vp);
1665 if (error) {
1666 goto outdrop;
1667 }
1668
1669 #if CONFIG_MACF
1670 error = mac_vnode_check_truncate(&context,
1671 fp->fp_glob->fg_cred, vp);
1672 if (error) {
1673 (void)vnode_put(vp);
1674 goto outdrop;
1675 }
1676 #endif
1677 /*
1678 * Make sure that we are root. Growing a file
1679 * without zero filling the data is a security hole.
1680 */
1681 if (!kauth_cred_issuser(kauth_cred_get())) {
1682 error = EACCES;
1683 } else {
1684 /*
1685 * Require privilege to change file size without zerofill,
1686 * else will change the file size and zerofill it.
1687 */
1688 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
1689 if (error == 0) {
1690 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
1691 } else {
1692 error = vnode_setsize(vp, offset, 0, &context);
1693 }
1694
1695 #if CONFIG_MACF
1696 if (error == 0) {
1697 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
1698 }
1699 #endif
1700 }
1701
1702 (void)vnode_put(vp);
1703 goto outdrop;
1704
1705 case F_RDAHEAD:
1706 if (fp->f_type != DTYPE_VNODE) {
1707 error = EBADF;
1708 goto out;
1709 }
1710 if (uap->arg) {
1711 os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1712 } else {
1713 os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1714 }
1715 goto out;
1716
1717 case F_NOCACHE:
1718 if (fp->f_type != DTYPE_VNODE) {
1719 error = EBADF;
1720 goto out;
1721 }
1722 if (uap->arg) {
1723 os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1724 } else {
1725 os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1726 }
1727 goto out;
1728
1729 case F_NODIRECT:
1730 if (fp->f_type != DTYPE_VNODE) {
1731 error = EBADF;
1732 goto out;
1733 }
1734 if (uap->arg) {
1735 os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1736 } else {
1737 os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1738 }
1739 goto out;
1740
1741 case F_SINGLE_WRITER:
1742 if (fp->f_type != DTYPE_VNODE) {
1743 error = EBADF;
1744 goto out;
1745 }
1746 if (uap->arg) {
1747 os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1748 } else {
1749 os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1750 }
1751 goto out;
1752
1753 case F_GLOBAL_NOCACHE:
1754 if (fp->f_type != DTYPE_VNODE) {
1755 error = EBADF;
1756 goto out;
1757 }
1758 vp = (struct vnode *)fp->f_data;
1759 proc_fdunlock(p);
1760
1761 if ((error = vnode_getwithref(vp)) == 0) {
1762 *retval = vnode_isnocache(vp);
1763
1764 if (uap->arg) {
1765 vnode_setnocache(vp);
1766 } else {
1767 vnode_clearnocache(vp);
1768 }
1769
1770 (void)vnode_put(vp);
1771 }
1772 goto outdrop;
1773
1774 case F_CHECK_OPENEVT:
1775 if (fp->f_type != DTYPE_VNODE) {
1776 error = EBADF;
1777 goto out;
1778 }
1779 vp = (struct vnode *)fp->f_data;
1780 proc_fdunlock(p);
1781
1782 if ((error = vnode_getwithref(vp)) == 0) {
1783 *retval = vnode_is_openevt(vp);
1784
1785 if (uap->arg) {
1786 vnode_set_openevt(vp);
1787 } else {
1788 vnode_clear_openevt(vp);
1789 }
1790
1791 (void)vnode_put(vp);
1792 }
1793 goto outdrop;
1794
1795 case F_RDADVISE: {
1796 struct radvisory ra_struct;
1797
1798 if (fp->f_type != DTYPE_VNODE) {
1799 error = EBADF;
1800 goto out;
1801 }
1802 vp = (struct vnode *)fp->f_data;
1803 proc_fdunlock(p);
1804
1805 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
1806 goto outdrop;
1807 }
1808 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
1809 error = EINVAL;
1810 goto outdrop;
1811 }
1812 if ((error = vnode_getwithref(vp)) == 0) {
1813 error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1814
1815 (void)vnode_put(vp);
1816 }
1817 goto outdrop;
1818 }
1819
1820 case F_FLUSH_DATA:
1821
1822 if (fp->f_type != DTYPE_VNODE) {
1823 error = EBADF;
1824 goto out;
1825 }
1826 vp = (struct vnode *)fp->f_data;
1827 proc_fdunlock(p);
1828
1829 if ((error = vnode_getwithref(vp)) == 0) {
1830 error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
1831
1832 (void)vnode_put(vp);
1833 }
1834 goto outdrop;
1835
1836 case F_LOG2PHYS:
1837 case F_LOG2PHYS_EXT: {
1838 struct log2phys l2p_struct = {}; /* structure for allocate command */
1839 int devBlockSize;
1840
1841 off_t file_offset = 0;
1842 size_t a_size = 0;
1843 size_t run = 0;
1844
1845 if (uap->cmd == F_LOG2PHYS_EXT) {
1846 error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1847 if (error) {
1848 goto out;
1849 }
1850 file_offset = l2p_struct.l2p_devoffset;
1851 } else {
1852 file_offset = fp->f_offset;
1853 }
1854 if (fp->f_type != DTYPE_VNODE) {
1855 error = EBADF;
1856 goto out;
1857 }
1858 vp = (struct vnode *)fp->f_data;
1859 proc_fdunlock(p);
1860 if ((error = vnode_getwithref(vp))) {
1861 goto outdrop;
1862 }
1863 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1864 if (error) {
1865 (void)vnode_put(vp);
1866 goto outdrop;
1867 }
1868 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1869 if (error) {
1870 (void)vnode_put(vp);
1871 goto outdrop;
1872 }
1873 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1874 if (uap->cmd == F_LOG2PHYS_EXT) {
1875 if (l2p_struct.l2p_contigbytes < 0) {
1876 vnode_put(vp);
1877 error = EINVAL;
1878 goto outdrop;
1879 }
1880
1881 a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1882 } else {
1883 a_size = devBlockSize;
1884 }
1885
1886 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1887
1888 (void)vnode_put(vp);
1889
1890 if (!error) {
1891 l2p_struct.l2p_flags = 0; /* for now */
1892 if (uap->cmd == F_LOG2PHYS_EXT) {
1893 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1894 } else {
1895 l2p_struct.l2p_contigbytes = 0; /* for now */
1896 }
1897
1898 /*
1899 * The block number being -1 suggests that the file offset is not backed
1900 * by any real blocks on-disk. As a result, just let it be passed back up wholesale.
1901 */
1902 if (bn == -1) {
1903 /* Don't multiply it by the block size */
1904 l2p_struct.l2p_devoffset = bn;
1905 } else {
1906 l2p_struct.l2p_devoffset = bn * devBlockSize;
1907 l2p_struct.l2p_devoffset += file_offset - offset;
1908 }
1909 error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1910 }
1911 goto outdrop;
1912 }
1913 case F_GETPATH:
1914 case F_GETPATH_NOFIRMLINK: {
1915 char *pathbufp;
1916 int pathlen;
1917
1918 if (fp->f_type != DTYPE_VNODE) {
1919 error = EBADF;
1920 goto out;
1921 }
1922 vp = (struct vnode *)fp->f_data;
1923 proc_fdunlock(p);
1924
1925 pathlen = MAXPATHLEN;
1926 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1927 if (pathbufp == NULL) {
1928 error = ENOMEM;
1929 goto outdrop;
1930 }
1931 if ((error = vnode_getwithref(vp)) == 0) {
1932 if (uap->cmd == F_GETPATH_NOFIRMLINK) {
1933 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
1934 } else {
1935 error = vn_getpath(vp, pathbufp, &pathlen);
1936 }
1937 (void)vnode_put(vp);
1938
1939 if (error == 0) {
1940 error = copyout((caddr_t)pathbufp, argp, pathlen);
1941 }
1942 }
1943 FREE(pathbufp, M_TEMP);
1944 goto outdrop;
1945 }
1946
1947 case F_PATHPKG_CHECK: {
1948 char *pathbufp;
1949 size_t pathlen;
1950
1951 if (fp->f_type != DTYPE_VNODE) {
1952 error = EBADF;
1953 goto out;
1954 }
1955 vp = (struct vnode *)fp->f_data;
1956 proc_fdunlock(p);
1957
1958 pathlen = MAXPATHLEN;
1959 pathbufp = zalloc(ZV_NAMEI);
1960
1961 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
1962 if ((error = vnode_getwithref(vp)) == 0) {
1963 AUDIT_ARG(text, pathbufp);
1964 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
1965
1966 (void)vnode_put(vp);
1967 }
1968 }
1969 zfree(ZV_NAMEI, pathbufp);
1970 goto outdrop;
1971 }
1972
1973 case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
1974 case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
1975 case F_BARRIERFSYNC: // fsync + barrier
1976 case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
1977 case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
1978 if (fp->f_type != DTYPE_VNODE) {
1979 error = EBADF;
1980 goto out;
1981 }
1982 vp = (struct vnode *)fp->f_data;
1983 proc_fdunlock(p);
1984
1985 if ((error = vnode_getwithref(vp)) == 0) {
1986 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1987
1988 (void)vnode_put(vp);
1989 }
1990 break;
1991 }
1992
1993 /*
1994 * SPI (private) for opening a file starting from a dir fd
1995 */
1996 case F_OPENFROM: {
1997 struct user_fopenfrom fopen;
1998 struct vnode_attr va;
1999 struct nameidata nd;
2000 int cmode;
2001
2002 /* Check if this isn't a valid file descriptor */
2003 if ((fp->f_type != DTYPE_VNODE) ||
2004 (fp->f_flag & FREAD) == 0) {
2005 error = EBADF;
2006 goto out;
2007 }
2008 vp = (struct vnode *)fp->f_data;
2009 proc_fdunlock(p);
2010
2011 if (vnode_getwithref(vp)) {
2012 error = ENOENT;
2013 goto outdrop;
2014 }
2015
2016 /* Only valid for directories */
2017 if (vp->v_type != VDIR) {
2018 vnode_put(vp);
2019 error = ENOTDIR;
2020 goto outdrop;
2021 }
2022
2023 /*
2024 * Only entitled apps may use the credentials of the thread
2025 * that opened the file descriptor.
2026 * Non-entitled threads will use their own context.
2027 */
2028 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2029 has_entitlement = 1;
2030 }
2031
2032 /* Get flags, mode and pathname arguments. */
2033 if (IS_64BIT_PROCESS(p)) {
2034 error = copyin(argp, &fopen, sizeof(fopen));
2035 } else {
2036 struct user32_fopenfrom fopen32;
2037
2038 error = copyin(argp, &fopen32, sizeof(fopen32));
2039 fopen.o_flags = fopen32.o_flags;
2040 fopen.o_mode = fopen32.o_mode;
2041 fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2042 }
2043 if (error) {
2044 vnode_put(vp);
2045 goto outdrop;
2046 }
2047 AUDIT_ARG(fflags, fopen.o_flags);
2048 AUDIT_ARG(mode, fopen.o_mode);
2049 VATTR_INIT(&va);
2050 /* Mask off all but regular access permissions */
2051 cmode = ((fopen.o_mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2052 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2053
2054 /* Start the lookup relative to the file descriptor's vnode. */
2055 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2056 fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2057 nd.ni_dvp = vp;
2058
2059 error = open1(has_entitlement ? &context : vfs_context_current(),
2060 &nd, fopen.o_flags, &va, fileproc_alloc_init, NULL, retval);
2061
2062 vnode_put(vp);
2063 break;
2064 }
2065 /*
2066 * SPI (private) for unlinking a file starting from a dir fd
2067 */
2068 case F_UNLINKFROM: {
2069 user_addr_t pathname;
2070
2071 /* Check if this isn't a valid file descriptor */
2072 if ((fp->f_type != DTYPE_VNODE) ||
2073 (fp->f_flag & FREAD) == 0) {
2074 error = EBADF;
2075 goto out;
2076 }
2077 vp = (struct vnode *)fp->f_data;
2078 proc_fdunlock(p);
2079
2080 if (vnode_getwithref(vp)) {
2081 error = ENOENT;
2082 goto outdrop;
2083 }
2084
2085 /* Only valid for directories */
2086 if (vp->v_type != VDIR) {
2087 vnode_put(vp);
2088 error = ENOTDIR;
2089 goto outdrop;
2090 }
2091
2092 /*
2093 * Only entitled apps may use the credentials of the thread
2094 * that opened the file descriptor.
2095 * Non-entitled threads will use their own context.
2096 */
2097 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2098 has_entitlement = 1;
2099 }
2100
2101 /* Get flags, mode and pathname arguments. */
2102 if (IS_64BIT_PROCESS(p)) {
2103 pathname = (user_addr_t)argp;
2104 } else {
2105 pathname = CAST_USER_ADDR_T(argp);
2106 }
2107
2108 /* Start the lookup relative to the file descriptor's vnode. */
2109 error = unlink1(has_entitlement ? &context : vfs_context_current(),
2110 vp, pathname, UIO_USERSPACE, 0);
2111
2112 vnode_put(vp);
2113 break;
2114 }
2115
2116 case F_ADDSIGS:
2117 case F_ADDFILESIGS:
2118 case F_ADDFILESIGS_FOR_DYLD_SIM:
2119 case F_ADDFILESIGS_RETURN:
2120 case F_ADDFILESIGS_INFO:
2121 {
2122 struct cs_blob *blob = NULL;
2123 struct user_fsignatures fs;
2124 kern_return_t kr;
2125 vm_offset_t kernel_blob_addr;
2126 vm_size_t kernel_blob_size;
2127 int blob_add_flags = 0;
2128 const size_t sizeof_fs = (uap->cmd == F_ADDFILESIGS_INFO ?
2129 offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
2130 offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
2131
2132 if (fp->f_type != DTYPE_VNODE) {
2133 error = EBADF;
2134 goto out;
2135 }
2136 vp = (struct vnode *)fp->f_data;
2137 proc_fdunlock(p);
2138
2139 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2140 blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
2141 if ((p->p_csflags & CS_KILL) == 0) {
2142 proc_lock(p);
2143 p->p_csflags |= CS_KILL;
2144 proc_unlock(p);
2145 }
2146 }
2147
2148 error = vnode_getwithref(vp);
2149 if (error) {
2150 goto outdrop;
2151 }
2152
2153 if (IS_64BIT_PROCESS(p)) {
2154 error = copyin(argp, &fs, sizeof_fs);
2155 } else {
2156 if (uap->cmd == F_ADDFILESIGS_INFO) {
2157 error = EINVAL;
2158 vnode_put(vp);
2159 goto outdrop;
2160 }
2161
2162 struct user32_fsignatures fs32;
2163
2164 error = copyin(argp, &fs32, sizeof(fs32));
2165 fs.fs_file_start = fs32.fs_file_start;
2166 fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
2167 fs.fs_blob_size = fs32.fs_blob_size;
2168 }
2169
2170 if (error) {
2171 vnode_put(vp);
2172 goto outdrop;
2173 }
2174
2175 /*
2176 * First check if we have something loaded a this offset
2177 */
2178 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
2179 if (blob != NULL) {
2180 /* If this is for dyld_sim revalidate the blob */
2181 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2182 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
2183 if (error) {
2184 blob = NULL;
2185 if (error != EAGAIN) {
2186 vnode_put(vp);
2187 goto outdrop;
2188 }
2189 }
2190 }
2191 }
2192
2193 if (blob == NULL) {
2194 /*
2195 * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
2196 * our use cases for the immediate future, but note that at the time of this commit, some
2197 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
2198 *
2199 * We should consider how we can manage this more effectively; the above means that some
2200 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
2201 * threshold considered ridiculous at the time of this change.
2202 */
2203 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
2204 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2205 error = E2BIG;
2206 vnode_put(vp);
2207 goto outdrop;
2208 }
2209
2210 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2211 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2212 if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
2213 error = ENOMEM;
2214 vnode_put(vp);
2215 goto outdrop;
2216 }
2217
2218 if (uap->cmd == F_ADDSIGS) {
2219 error = copyin(fs.fs_blob_start,
2220 (void *) kernel_blob_addr,
2221 fs.fs_blob_size);
2222 } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
2223 int resid;
2224
2225 error = vn_rdwr(UIO_READ,
2226 vp,
2227 (caddr_t) kernel_blob_addr,
2228 (int)kernel_blob_size,
2229 fs.fs_file_start + fs.fs_blob_start,
2230 UIO_SYSSPACE,
2231 0,
2232 kauth_cred_get(),
2233 &resid,
2234 p);
2235 if ((error == 0) && resid) {
2236 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2237 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2238 }
2239 }
2240
2241 if (error) {
2242 ubc_cs_blob_deallocate(kernel_blob_addr,
2243 kernel_blob_size);
2244 vnode_put(vp);
2245 goto outdrop;
2246 }
2247
2248 blob = NULL;
2249 error = ubc_cs_blob_add(vp,
2250 proc_platform(p),
2251 CPU_TYPE_ANY, /* not for a specific architecture */
2252 CPU_SUBTYPE_ANY,
2253 fs.fs_file_start,
2254 &kernel_blob_addr,
2255 kernel_blob_size,
2256 NULL,
2257 blob_add_flags,
2258 &blob);
2259
2260 /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
2261 if (error) {
2262 if (kernel_blob_addr) {
2263 ubc_cs_blob_deallocate(kernel_blob_addr,
2264 kernel_blob_size);
2265 }
2266 vnode_put(vp);
2267 goto outdrop;
2268 } else {
2269 #if CHECK_CS_VALIDATION_BITMAP
2270 ubc_cs_validation_bitmap_allocate( vp );
2271 #endif
2272 }
2273 }
2274
2275 if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
2276 uap->cmd == F_ADDFILESIGS_INFO) {
2277 /*
2278 * The first element of the structure is a
2279 * off_t that happen to have the same size for
2280 * all archs. Lets overwrite that.
2281 */
2282 off_t end_offset = 0;
2283 if (blob) {
2284 end_offset = blob->csb_end_offset;
2285 }
2286 error = copyout(&end_offset, argp, sizeof(end_offset));
2287
2288 if (error) {
2289 vnode_put(vp);
2290 goto outdrop;
2291 }
2292 }
2293
2294 if (uap->cmd == F_ADDFILESIGS_INFO) {
2295 /* Return information. What we copy out depends on the size of the
2296 * passed in structure, to keep binary compatibility. */
2297
2298 if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
2299 // enough room for fs_cdhash[20]+fs_hash_type
2300
2301 if (blob != NULL) {
2302 error = copyout(blob->csb_cdhash,
2303 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
2304 USER_FSIGNATURES_CDHASH_LEN);
2305 if (error) {
2306 vnode_put(vp);
2307 goto outdrop;
2308 }
2309 int hashtype = cs_hash_type(blob->csb_hashtype);
2310 error = copyout(&hashtype,
2311 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
2312 sizeof(int));
2313 if (error) {
2314 vnode_put(vp);
2315 goto outdrop;
2316 }
2317 }
2318 }
2319 }
2320
2321 (void) vnode_put(vp);
2322 break;
2323 }
2324 #if CONFIG_SUPPLEMENTAL_SIGNATURES
2325 case F_ADDFILESUPPL:
2326 {
2327 struct vnode *ivp;
2328 struct cs_blob *blob = NULL;
2329 struct user_fsupplement fs;
2330 int orig_fd;
2331 struct fileproc* orig_fp = NULL;
2332 kern_return_t kr;
2333 vm_offset_t kernel_blob_addr;
2334 vm_size_t kernel_blob_size;
2335
2336 if (!IS_64BIT_PROCESS(p)) {
2337 error = EINVAL;
2338 goto out; // drop fp and unlock fds
2339 }
2340
2341 if (fp->f_type != DTYPE_VNODE) {
2342 error = EBADF;
2343 goto out;
2344 }
2345
2346 error = copyin(argp, &fs, sizeof(fs));
2347 if (error) {
2348 goto out;
2349 }
2350
2351 orig_fd = fs.fs_orig_fd;
2352 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
2353 printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
2354 goto out;
2355 }
2356
2357 if (orig_fp->f_type != DTYPE_VNODE) {
2358 error = EBADF;
2359 fp_drop(p, orig_fd, orig_fp, 1);
2360 goto out;
2361 }
2362
2363 ivp = (struct vnode *)orig_fp->f_data;
2364
2365 vp = (struct vnode *)fp->f_data;
2366
2367 proc_fdunlock(p);
2368
2369 error = vnode_getwithref(ivp);
2370 if (error) {
2371 fp_drop(p, orig_fd, orig_fp, 0);
2372 goto outdrop; //drop fp
2373 }
2374
2375 error = vnode_getwithref(vp);
2376 if (error) {
2377 vnode_put(ivp);
2378 fp_drop(p, orig_fd, orig_fp, 0);
2379 goto outdrop;
2380 }
2381
2382 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2383 error = E2BIG;
2384 goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
2385 }
2386
2387 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2388 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2389 if (kr != KERN_SUCCESS) {
2390 error = ENOMEM;
2391 goto dropboth;
2392 }
2393
2394 int resid;
2395 error = vn_rdwr(UIO_READ, vp,
2396 (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
2397 fs.fs_file_start + fs.fs_blob_start,
2398 UIO_SYSSPACE, 0,
2399 kauth_cred_get(), &resid, p);
2400 if ((error == 0) && resid) {
2401 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2402 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2403 }
2404
2405 if (error) {
2406 ubc_cs_blob_deallocate(kernel_blob_addr,
2407 kernel_blob_size);
2408 goto dropboth;
2409 }
2410
2411 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
2412 &kernel_blob_addr, kernel_blob_size, &blob);
2413
2414 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
2415 if (error) {
2416 if (kernel_blob_addr) {
2417 ubc_cs_blob_deallocate(kernel_blob_addr,
2418 kernel_blob_size);
2419 }
2420 goto dropboth;
2421 }
2422 vnode_put(ivp);
2423 vnode_put(vp);
2424 fp_drop(p, orig_fd, orig_fp, 0);
2425 break;
2426
2427 dropboth:
2428 vnode_put(ivp);
2429 vnode_put(vp);
2430 fp_drop(p, orig_fd, orig_fp, 0);
2431 goto outdrop;
2432 }
2433 #endif
2434 case F_GETCODEDIR:
2435 case F_FINDSIGS: {
2436 error = ENOTSUP;
2437 goto out;
2438 }
2439 case F_CHECK_LV: {
2440 struct fileglob *fg;
2441 fchecklv_t lv = {};
2442
2443 if (fp->f_type != DTYPE_VNODE) {
2444 error = EBADF;
2445 goto out;
2446 }
2447 fg = fp->fp_glob;
2448 proc_fdunlock(p);
2449
2450 if (IS_64BIT_PROCESS(p)) {
2451 error = copyin(argp, &lv, sizeof(lv));
2452 } else {
2453 struct user32_fchecklv lv32 = {};
2454
2455 error = copyin(argp, &lv32, sizeof(lv32));
2456 lv.lv_file_start = lv32.lv_file_start;
2457 lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
2458 lv.lv_error_message_size = lv32.lv_error_message_size;
2459 }
2460 if (error) {
2461 goto outdrop;
2462 }
2463
2464 #if CONFIG_MACF
2465 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
2466 (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
2467 #endif
2468
2469 break;
2470 }
2471 case F_GETSIGSINFO: {
2472 struct cs_blob *blob = NULL;
2473 fgetsigsinfo_t sigsinfo = {};
2474
2475 if (fp->f_type != DTYPE_VNODE) {
2476 error = EBADF;
2477 goto out;
2478 }
2479 vp = (struct vnode *)fp->f_data;
2480 proc_fdunlock(p);
2481
2482 error = vnode_getwithref(vp);
2483 if (error) {
2484 goto outdrop;
2485 }
2486
2487 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
2488 if (error) {
2489 vnode_put(vp);
2490 goto outdrop;
2491 }
2492
2493 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
2494 if (blob == NULL) {
2495 error = ENOENT;
2496 vnode_put(vp);
2497 goto outdrop;
2498 }
2499 switch (sigsinfo.fg_info_request) {
2500 case GETSIGSINFO_PLATFORM_BINARY:
2501 sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
2502 error = copyout(&sigsinfo.fg_sig_is_platform,
2503 (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
2504 sizeof(sigsinfo.fg_sig_is_platform));
2505 if (error) {
2506 vnode_put(vp);
2507 goto outdrop;
2508 }
2509 break;
2510 default:
2511 error = EINVAL;
2512 vnode_put(vp);
2513 goto outdrop;
2514 }
2515 vnode_put(vp);
2516 break;
2517 }
2518 #if CONFIG_PROTECT
2519 case F_GETPROTECTIONCLASS: {
2520 if (fp->f_type != DTYPE_VNODE) {
2521 error = EBADF;
2522 goto out;
2523 }
2524 vp = (struct vnode *)fp->f_data;
2525
2526 proc_fdunlock(p);
2527
2528 if (vnode_getwithref(vp)) {
2529 error = ENOENT;
2530 goto outdrop;
2531 }
2532
2533 struct vnode_attr va;
2534
2535 VATTR_INIT(&va);
2536 VATTR_WANTED(&va, va_dataprotect_class);
2537 error = VNOP_GETATTR(vp, &va, &context);
2538 if (!error) {
2539 if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
2540 *retval = va.va_dataprotect_class;
2541 } else {
2542 error = ENOTSUP;
2543 }
2544 }
2545
2546 vnode_put(vp);
2547 break;
2548 }
2549
2550 case F_SETPROTECTIONCLASS: {
2551 /* tmp must be a valid PROTECTION_CLASS_* */
2552 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2553
2554 if (fp->f_type != DTYPE_VNODE) {
2555 error = EBADF;
2556 goto out;
2557 }
2558 vp = (struct vnode *)fp->f_data;
2559
2560 proc_fdunlock(p);
2561
2562 if (vnode_getwithref(vp)) {
2563 error = ENOENT;
2564 goto outdrop;
2565 }
2566
2567 /* Only go forward if you have write access */
2568 vfs_context_t ctx = vfs_context_current();
2569 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2570 vnode_put(vp);
2571 error = EBADF;
2572 goto outdrop;
2573 }
2574
2575 struct vnode_attr va;
2576
2577 VATTR_INIT(&va);
2578 VATTR_SET(&va, va_dataprotect_class, tmp);
2579
2580 error = VNOP_SETATTR(vp, &va, ctx);
2581
2582 vnode_put(vp);
2583 break;
2584 }
2585
2586 case F_TRANSCODEKEY: {
2587 if (fp->f_type != DTYPE_VNODE) {
2588 error = EBADF;
2589 goto out;
2590 }
2591
2592 vp = (struct vnode *)fp->f_data;
2593 proc_fdunlock(p);
2594
2595 if (vnode_getwithref(vp)) {
2596 error = ENOENT;
2597 goto outdrop;
2598 }
2599
2600 cp_key_t k = {
2601 .len = CP_MAX_WRAPPEDKEYSIZE,
2602 };
2603
2604 MALLOC(k.key, char *, k.len, M_TEMP, M_WAITOK | M_ZERO);
2605
2606 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
2607
2608 vnode_put(vp);
2609
2610 if (error == 0) {
2611 error = copyout(k.key, argp, k.len);
2612 *retval = k.len;
2613 }
2614
2615 FREE(k.key, M_TEMP);
2616
2617 break;
2618 }
2619
2620 case F_GETPROTECTIONLEVEL: {
2621 if (fp->f_type != DTYPE_VNODE) {
2622 error = EBADF;
2623 goto out;
2624 }
2625
2626 vp = (struct vnode*) fp->f_data;
2627 proc_fdunlock(p);
2628
2629 if (vnode_getwithref(vp)) {
2630 error = ENOENT;
2631 goto outdrop;
2632 }
2633
2634 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
2635
2636 vnode_put(vp);
2637 break;
2638 }
2639
2640 case F_GETDEFAULTPROTLEVEL: {
2641 if (fp->f_type != DTYPE_VNODE) {
2642 error = EBADF;
2643 goto out;
2644 }
2645
2646 vp = (struct vnode*) fp->f_data;
2647 proc_fdunlock(p);
2648
2649 if (vnode_getwithref(vp)) {
2650 error = ENOENT;
2651 goto outdrop;
2652 }
2653
2654 /*
2655 * if cp_get_major_vers fails, error will be set to proper errno
2656 * and cp_version will still be 0.
2657 */
2658
2659 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
2660
2661 vnode_put(vp);
2662 break;
2663 }
2664
2665 #endif /* CONFIG_PROTECT */
2666
2667 case F_MOVEDATAEXTENTS: {
2668 struct fileproc *fp2 = NULL;
2669 struct vnode *src_vp = NULLVP;
2670 struct vnode *dst_vp = NULLVP;
2671 /* We need to grab the 2nd FD out of the argments before moving on. */
2672 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2673
2674 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
2675 if (error) {
2676 goto out;
2677 }
2678
2679 if (fp->f_type != DTYPE_VNODE) {
2680 error = EBADF;
2681 goto out;
2682 }
2683
2684 /*
2685 * For now, special case HFS+ and APFS only, since this
2686 * is SPI.
2687 */
2688 src_vp = (struct vnode *)fp->f_data;
2689 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
2690 error = ENOTSUP;
2691 goto out;
2692 }
2693
2694 /*
2695 * Get the references before we start acquiring iocounts on the vnodes,
2696 * while we still hold the proc fd lock
2697 */
2698 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
2699 error = EBADF;
2700 goto out;
2701 }
2702 if (fp2->f_type != DTYPE_VNODE) {
2703 fp_drop(p, fd2, fp2, 1);
2704 error = EBADF;
2705 goto out;
2706 }
2707 dst_vp = (struct vnode *)fp2->f_data;
2708 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
2709 fp_drop(p, fd2, fp2, 1);
2710 error = ENOTSUP;
2711 goto out;
2712 }
2713
2714 #if CONFIG_MACF
2715 /* Re-do MAC checks against the new FD, pass in a fake argument */
2716 error = mac_file_check_fcntl(proc_ucred(p), fp2->fp_glob, uap->cmd, 0);
2717 if (error) {
2718 fp_drop(p, fd2, fp2, 1);
2719 goto out;
2720 }
2721 #endif
2722 /* Audit the 2nd FD */
2723 AUDIT_ARG(fd, fd2);
2724
2725 proc_fdunlock(p);
2726
2727 if (vnode_getwithref(src_vp)) {
2728 fp_drop(p, fd2, fp2, 0);
2729 error = ENOENT;
2730 goto outdrop;
2731 }
2732 if (vnode_getwithref(dst_vp)) {
2733 vnode_put(src_vp);
2734 fp_drop(p, fd2, fp2, 0);
2735 error = ENOENT;
2736 goto outdrop;
2737 }
2738
2739 /*
2740 * Basic asserts; validate they are not the same and that
2741 * both live on the same filesystem.
2742 */
2743 if (dst_vp == src_vp) {
2744 vnode_put(src_vp);
2745 vnode_put(dst_vp);
2746 fp_drop(p, fd2, fp2, 0);
2747 error = EINVAL;
2748 goto outdrop;
2749 }
2750
2751 if (dst_vp->v_mount != src_vp->v_mount) {
2752 vnode_put(src_vp);
2753 vnode_put(dst_vp);
2754 fp_drop(p, fd2, fp2, 0);
2755 error = EXDEV;
2756 goto outdrop;
2757 }
2758
2759 /* Now we have a legit pair of FDs. Go to work */
2760
2761 /* Now check for write access to the target files */
2762 if (vnode_authorize(src_vp, NULLVP,
2763 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2764 vnode_put(src_vp);
2765 vnode_put(dst_vp);
2766 fp_drop(p, fd2, fp2, 0);
2767 error = EBADF;
2768 goto outdrop;
2769 }
2770
2771 if (vnode_authorize(dst_vp, NULLVP,
2772 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2773 vnode_put(src_vp);
2774 vnode_put(dst_vp);
2775 fp_drop(p, fd2, fp2, 0);
2776 error = EBADF;
2777 goto outdrop;
2778 }
2779
2780 /* Verify that both vps point to files and not directories */
2781 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2782 error = EINVAL;
2783 vnode_put(src_vp);
2784 vnode_put(dst_vp);
2785 fp_drop(p, fd2, fp2, 0);
2786 goto outdrop;
2787 }
2788
2789 /*
2790 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2791 * We'll pass in our special bit indicating that the new behavior is expected
2792 */
2793
2794 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2795
2796 vnode_put(src_vp);
2797 vnode_put(dst_vp);
2798 fp_drop(p, fd2, fp2, 0);
2799 break;
2800 }
2801
2802 /*
2803 * SPI for making a file compressed.
2804 */
2805 case F_MAKECOMPRESSED: {
2806 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2807
2808 if (fp->f_type != DTYPE_VNODE) {
2809 error = EBADF;
2810 goto out;
2811 }
2812
2813 vp = (struct vnode*) fp->f_data;
2814 proc_fdunlock(p);
2815
2816 /* get the vnode */
2817 if (vnode_getwithref(vp)) {
2818 error = ENOENT;
2819 goto outdrop;
2820 }
2821
2822 /* Is it a file? */
2823 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2824 vnode_put(vp);
2825 error = EBADF;
2826 goto outdrop;
2827 }
2828
2829 /* invoke ioctl to pass off to FS */
2830 /* Only go forward if you have write access */
2831 vfs_context_t ctx = vfs_context_current();
2832 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2833 vnode_put(vp);
2834 error = EBADF;
2835 goto outdrop;
2836 }
2837
2838 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2839
2840 vnode_put(vp);
2841 break;
2842 }
2843
2844 /*
2845 * SPI (private) for indicating to a filesystem that subsequent writes to
2846 * the open FD will written to the Fastflow.
2847 */
2848 case F_SET_GREEDY_MODE:
2849 /* intentionally drop through to the same handler as F_SETSTATIC.
2850 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2851 */
2852
2853 /*
2854 * SPI (private) for indicating to a filesystem that subsequent writes to
2855 * the open FD will represent static content.
2856 */
2857 case F_SETSTATICCONTENT: {
2858 caddr_t ioctl_arg = NULL;
2859
2860 if (uap->arg) {
2861 ioctl_arg = (caddr_t) 1;
2862 }
2863
2864 if (fp->f_type != DTYPE_VNODE) {
2865 error = EBADF;
2866 goto out;
2867 }
2868 vp = (struct vnode *)fp->f_data;
2869 proc_fdunlock(p);
2870
2871 error = vnode_getwithref(vp);
2872 if (error) {
2873 error = ENOENT;
2874 goto outdrop;
2875 }
2876
2877 /* Only go forward if you have write access */
2878 vfs_context_t ctx = vfs_context_current();
2879 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2880 vnode_put(vp);
2881 error = EBADF;
2882 goto outdrop;
2883 }
2884
2885 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2886 (void)vnode_put(vp);
2887
2888 break;
2889 }
2890
2891 /*
2892 * SPI (private) for indicating to the lower level storage driver that the
2893 * subsequent writes should be of a particular IO type (burst, greedy, static),
2894 * or other flavors that may be necessary.
2895 */
2896 case F_SETIOTYPE: {
2897 caddr_t param_ptr;
2898 uint32_t param;
2899
2900 if (uap->arg) {
2901 /* extract 32 bits of flags from userland */
2902 param_ptr = (caddr_t) uap->arg;
2903 param = (uint32_t) param_ptr;
2904 } else {
2905 /* If no argument is specified, error out */
2906 error = EINVAL;
2907 goto out;
2908 }
2909
2910 /*
2911 * Validate the different types of flags that can be specified:
2912 * all of them are mutually exclusive for now.
2913 */
2914 switch (param) {
2915 case F_IOTYPE_ISOCHRONOUS:
2916 break;
2917
2918 default:
2919 error = EINVAL;
2920 goto out;
2921 }
2922
2923
2924 if (fp->f_type != DTYPE_VNODE) {
2925 error = EBADF;
2926 goto out;
2927 }
2928 vp = (struct vnode *)fp->f_data;
2929 proc_fdunlock(p);
2930
2931 error = vnode_getwithref(vp);
2932 if (error) {
2933 error = ENOENT;
2934 goto outdrop;
2935 }
2936
2937 /* Only go forward if you have write access */
2938 vfs_context_t ctx = vfs_context_current();
2939 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2940 vnode_put(vp);
2941 error = EBADF;
2942 goto outdrop;
2943 }
2944
2945 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2946 (void)vnode_put(vp);
2947
2948 break;
2949 }
2950
2951 /*
2952 * Set the vnode pointed to by 'fd'
2953 * and tag it as the (potentially future) backing store
2954 * for another filesystem
2955 */
2956 case F_SETBACKINGSTORE: {
2957 if (fp->f_type != DTYPE_VNODE) {
2958 error = EBADF;
2959 goto out;
2960 }
2961
2962 vp = (struct vnode *)fp->f_data;
2963
2964 if (vp->v_tag != VT_HFS) {
2965 error = EINVAL;
2966 goto out;
2967 }
2968 proc_fdunlock(p);
2969
2970 if (vnode_getwithref(vp)) {
2971 error = ENOENT;
2972 goto outdrop;
2973 }
2974
2975 /* only proceed if you have write access */
2976 vfs_context_t ctx = vfs_context_current();
2977 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2978 vnode_put(vp);
2979 error = EBADF;
2980 goto outdrop;
2981 }
2982
2983
2984 /* If arg != 0, set, otherwise unset */
2985 if (uap->arg) {
2986 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)1, 0, &context);
2987 } else {
2988 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
2989 }
2990
2991 vnode_put(vp);
2992 break;
2993 }
2994
2995 /*
2996 * like F_GETPATH, but special semantics for
2997 * the mobile time machine handler.
2998 */
2999 case F_GETPATH_MTMINFO: {
3000 char *pathbufp;
3001 int pathlen;
3002
3003 if (fp->f_type != DTYPE_VNODE) {
3004 error = EBADF;
3005 goto out;
3006 }
3007 vp = (struct vnode *)fp->f_data;
3008 proc_fdunlock(p);
3009
3010 pathlen = MAXPATHLEN;
3011 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
3012 if (pathbufp == NULL) {
3013 error = ENOMEM;
3014 goto outdrop;
3015 }
3016 if ((error = vnode_getwithref(vp)) == 0) {
3017 int backingstore = 0;
3018
3019 /* Check for error from vn_getpath before moving on */
3020 if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
3021 if (vp->v_tag == VT_HFS) {
3022 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
3023 }
3024 (void)vnode_put(vp);
3025
3026 if (error == 0) {
3027 error = copyout((caddr_t)pathbufp, argp, pathlen);
3028 }
3029 if (error == 0) {
3030 /*
3031 * If the copyout was successful, now check to ensure
3032 * that this vnode is not a BACKINGSTORE vnode. mtmd
3033 * wants the path regardless.
3034 */
3035 if (backingstore) {
3036 error = EBUSY;
3037 }
3038 }
3039 } else {
3040 (void)vnode_put(vp);
3041 }
3042 }
3043 FREE(pathbufp, M_TEMP);
3044 goto outdrop;
3045 }
3046
3047 #if DEBUG || DEVELOPMENT
3048 case F_RECYCLE:
3049 if (fp->f_type != DTYPE_VNODE) {
3050 error = EBADF;
3051 goto out;
3052 }
3053 vp = (struct vnode *)fp->f_data;
3054 proc_fdunlock(p);
3055
3056 vnode_recycle(vp);
3057 break;
3058 #endif
3059
3060 default:
3061 /*
3062 * This is an fcntl() that we d not recognize at this level;
3063 * if this is a vnode, we send it down into the VNOP_IOCTL
3064 * for this vnode; this can include special devices, and will
3065 * effectively overload fcntl() to send ioctl()'s.
3066 */
3067 if ((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)) {
3068 error = EINVAL;
3069 goto out;
3070 }
3071
3072 /* Catch any now-invalid fcntl() selectors */
3073 switch (uap->cmd) {
3074 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
3075 case (int)FSIOC_FIOSEEKHOLE:
3076 case (int)FSIOC_FIOSEEKDATA:
3077 case (int)FSIOC_CAS_BSDFLAGS:
3078 case HFS_GET_BOOT_INFO:
3079 case HFS_SET_BOOT_INFO:
3080 case FIOPINSWAP:
3081 case F_MARKDEPENDENCY:
3082 case TIOCREVOKE:
3083 case TIOCREVOKECLEAR:
3084 error = EINVAL;
3085 goto out;
3086 default:
3087 break;
3088 }
3089
3090 if (fp->f_type != DTYPE_VNODE) {
3091 error = EBADF;
3092 goto out;
3093 }
3094 vp = (struct vnode *)fp->f_data;
3095 proc_fdunlock(p);
3096
3097 if ((error = vnode_getwithref(vp)) == 0) {
3098 #define STK_PARAMS 128
3099 char stkbuf[STK_PARAMS] = {0};
3100 unsigned int size;
3101 caddr_t data, memp;
3102 /*
3103 * For this to work properly, we have to copy in the
3104 * ioctl() cmd argument if there is one; we must also
3105 * check that a command parameter, if present, does
3106 * not exceed the maximum command length dictated by
3107 * the number of bits we have available in the command
3108 * to represent a structure length. Finally, we have
3109 * to copy the results back out, if it is that type of
3110 * ioctl().
3111 */
3112 size = IOCPARM_LEN(uap->cmd);
3113 if (size > IOCPARM_MAX) {
3114 (void)vnode_put(vp);
3115 error = EINVAL;
3116 break;
3117 }
3118
3119 memp = NULL;
3120 if (size > sizeof(stkbuf)) {
3121 memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
3122 if (memp == 0) {
3123 (void)vnode_put(vp);
3124 error = ENOMEM;
3125 goto outdrop;
3126 }
3127 data = memp;
3128 } else {
3129 data = &stkbuf[0];
3130 }
3131
3132 if (uap->cmd & IOC_IN) {
3133 if (size) {
3134 /* structure */
3135 error = copyin(argp, data, size);
3136 if (error) {
3137 (void)vnode_put(vp);
3138 if (memp) {
3139 kheap_free(KHEAP_TEMP, memp, size);
3140 }
3141 goto outdrop;
3142 }
3143
3144 /* Bzero the section beyond that which was needed */
3145 if (size <= sizeof(stkbuf)) {
3146 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
3147 }
3148 } else {
3149 /* int */
3150 if (is64bit) {
3151 *(user_addr_t *)data = argp;
3152 } else {
3153 *(uint32_t *)data = (uint32_t)argp;
3154 }
3155 };
3156 } else if ((uap->cmd & IOC_OUT) && size) {
3157 /*
3158 * Zero the buffer so the user always
3159 * gets back something deterministic.
3160 */
3161 bzero(data, size);
3162 } else if (uap->cmd & IOC_VOID) {
3163 if (is64bit) {
3164 *(user_addr_t *)data = argp;
3165 } else {
3166 *(uint32_t *)data = (uint32_t)argp;
3167 }
3168 }
3169
3170 error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
3171
3172 (void)vnode_put(vp);
3173
3174 /* Copy any output data to user */
3175 if (error == 0 && (uap->cmd & IOC_OUT) && size) {
3176 error = copyout(data, argp, size);
3177 }
3178 if (memp) {
3179 kheap_free(KHEAP_TEMP, memp, size);
3180 }
3181 }
3182 break;
3183 }
3184
3185 outdrop:
3186 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3187 fp_drop(p, fd, fp, 0);
3188 return error;
3189 out:
3190 fp_drop(p, fd, fp, 1);
3191 proc_fdunlock(p);
3192 return error;
3193 }
3194
3195
3196 /*
3197 * finishdup
3198 *
3199 * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
3200 *
3201 * Parameters: p Process performing the dup
3202 * old The fd to dup
3203 * new The fd to dup it to
3204 * fd_flags Flags to augment the new fd
3205 * retval Pointer to the call return area
3206 *
3207 * Returns: 0 Success
3208 * EBADF
3209 * ENOMEM
3210 *
3211 * Implicit returns:
3212 * *retval (modified) The new descriptor
3213 *
3214 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
3215 * the caller
3216 *
3217 * Notes: This function may drop and reacquire this lock; it is unsafe
3218 * for a caller to assume that other state protected by the lock
3219 * has not been subsequently changed out from under it.
3220 */
3221 int
3222 finishdup(proc_t p,
3223 struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
3224 {
3225 struct fileproc *nfp;
3226 struct fileproc *ofp;
3227 #if CONFIG_MACF
3228 int error;
3229 #endif
3230
3231 #if DIAGNOSTIC
3232 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3233 #endif
3234 if ((ofp = fdp->fd_ofiles[old]) == NULL ||
3235 (fdp->fd_ofileflags[old] & UF_RESERVED)) {
3236 fdrelse(p, new);
3237 return EBADF;
3238 }
3239
3240 #if CONFIG_MACF
3241 error = mac_file_check_dup(proc_ucred(p), ofp->fp_glob, new);
3242 if (error) {
3243 fdrelse(p, new);
3244 return error;
3245 }
3246 #endif
3247
3248 proc_fdunlock(p);
3249
3250 nfp = fileproc_alloc_init(NULL);
3251
3252 proc_fdlock(p);
3253
3254 if (nfp == NULL) {
3255 fdrelse(p, new);
3256 return ENOMEM;
3257 }
3258
3259 fg_ref(ofp->fp_glob);
3260 nfp->fp_glob = ofp->fp_glob;
3261
3262 #if DIAGNOSTIC
3263 if (fdp->fd_ofiles[new] != 0) {
3264 panic("finishdup: overwriting fd_ofiles with new %d", new);
3265 }
3266 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
3267 panic("finishdup: unreserved fileflags with new %d", new);
3268 }
3269 #endif
3270
3271 if (new > fdp->fd_lastfile) {
3272 fdp->fd_lastfile = new;
3273 }
3274 *fdflags(p, new) |= fd_flags;
3275 procfdtbl_releasefd(p, new, nfp);
3276 *retval = new;
3277 return 0;
3278 }
3279
3280
3281 /*
3282 * sys_close
3283 *
3284 * Description: The implementation of the close(2) system call
3285 *
3286 * Parameters: p Process in whose per process file table
3287 * the close is to occur
3288 * uap->fd fd to be closed
3289 * retval <unused>
3290 *
3291 * Returns: 0 Success
3292 * fp_lookup:EBADF Bad file descriptor
3293 * fp_guard_exception:??? Guarded file descriptor
3294 * close_internal:EBADF
3295 * close_internal:??? Anything returnable by a per-fileops
3296 * close function
3297 */
3298 int
3299 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
3300 {
3301 __pthread_testcancel(1);
3302 return close_nocancel(p, uap->fd);
3303 }
3304
3305 int
3306 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
3307 {
3308 return close_nocancel(p, uap->fd);
3309 }
3310
3311 int
3312 close_nocancel(proc_t p, int fd)
3313 {
3314 struct fileproc *fp;
3315
3316 AUDIT_SYSCLOSE(p, fd);
3317
3318 proc_fdlock(p);
3319 if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
3320 proc_fdunlock(p);
3321 return EBADF;
3322 }
3323
3324 if (FP_ISGUARDED(fp, GUARD_CLOSE)) {
3325 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
3326 proc_fdunlock(p);
3327 return error;
3328 }
3329
3330 return fp_close_and_unlock(p, fd, fp, 0);
3331 }
3332
3333
3334 int
3335 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
3336 {
3337 struct filedesc *fdp = p->p_fd;
3338 struct fileglob *fg = fp->fp_glob;
3339
3340 #if DIAGNOSTIC
3341 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3342 #endif
3343
3344 /*
3345 * Keep most people from finding the filedesc while we are closing it.
3346 *
3347 * Callers are:
3348 *
3349 * - dup2() which always waits for UF_RESERVED to clear
3350 *
3351 * - close/guarded_close/... who will fail the fileproc lookup if
3352 * UF_RESERVED is set,
3353 *
3354 * - fdexec()/fdfree() who only run once all threads in the proc
3355 * are properly canceled, hence no fileproc in this proc should
3356 * be in flux.
3357 *
3358 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
3359 *
3360 * Callers of fp_get_noref_locked_with_iocount() can still find
3361 * this entry so that they can drop their I/O reference despite
3362 * not having remembered the fileproc pointer (namely select() and
3363 * file_drop()).
3364 */
3365 if (p->p_fd->fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
3366 panic("%s: called with fileproc in flux (%d/:%p)",
3367 __func__, fd, fp);
3368 }
3369 p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
3370
3371 if ((fp->fp_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
3372 proc_fdunlock(p);
3373
3374 if ((FILEGLOB_DTYPE(fg) == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
3375 /*
3376 * call out to allow 3rd party notification of close.
3377 * Ignore result of kauth_authorize_fileop call.
3378 */
3379 if (vnode_getwithref((vnode_t)fg->fg_data) == 0) {
3380 u_int fileop_flags = 0;
3381 if (fg->fg_flag & FWASWRITTEN) {
3382 fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
3383 }
3384 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
3385 (uintptr_t)fg->fg_data, (uintptr_t)fileop_flags);
3386 #if CONFIG_MACF
3387 mac_file_notify_close(proc_ucred(p), fp->fp_glob);
3388 #endif
3389 vnode_put((vnode_t)fg->fg_data);
3390 }
3391 }
3392 if (fp->fp_flags & FP_AIOISSUED) {
3393 /*
3394 * cancel all async IO requests that can be cancelled.
3395 */
3396 _aio_close( p, fd );
3397 }
3398
3399 proc_fdlock(p);
3400 }
3401
3402 if (fd < fdp->fd_knlistsize) {
3403 knote_fdclose(p, fd);
3404 }
3405
3406 fileproc_drain(p, fp);
3407
3408 if (flags & FD_DUP2RESV) {
3409 fdp->fd_ofiles[fd] = NULL;
3410 fdp->fd_ofileflags[fd] &= ~(UF_CLOSING | UF_EXCLOSE | UF_FORKCLOSE);
3411 } else {
3412 fdrelse(p, fd);
3413 }
3414
3415 proc_fdunlock(p);
3416
3417 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
3418 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
3419 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg->fg_data));
3420 }
3421
3422 fileproc_free(fp);
3423
3424 return fg_drop(p, fg);
3425 }
3426
3427
3428 /*
3429 * fstat
3430 *
3431 * Description: Return status information about a file descriptor.
3432 *
3433 * Parameters: p The process doing the fstat
3434 * fd The fd to stat
3435 * ub The user stat buffer
3436 * xsecurity The user extended security
3437 * buffer, or 0 if none
3438 * xsecurity_size The size of xsecurity, or 0
3439 * if no xsecurity
3440 * isstat64 Flag to indicate 64 bit version
3441 * for inode size, etc.
3442 *
3443 * Returns: 0 Success
3444 * EBADF
3445 * EFAULT
3446 * fp_lookup:EBADF Bad file descriptor
3447 * vnode_getwithref:???
3448 * copyout:EFAULT
3449 * vnode_getwithref:???
3450 * vn_stat:???
3451 * soo_stat:???
3452 * pipe_stat:???
3453 * pshm_stat:???
3454 * kqueue_stat:???
3455 *
3456 * Notes: Internal implementation for all other fstat() related
3457 * functions
3458 *
3459 * XXX switch on node type is bogus; need a stat in struct
3460 * XXX fileops instead.
3461 */
3462 static int
3463 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3464 {
3465 struct fileproc *fp;
3466 union {
3467 struct stat sb;
3468 struct stat64 sb64;
3469 } source;
3470 union {
3471 struct user64_stat user64_sb;
3472 struct user32_stat user32_sb;
3473 struct user64_stat64 user64_sb64;
3474 struct user32_stat64 user32_sb64;
3475 } dest;
3476 int error, my_size;
3477 file_type_t type;
3478 caddr_t data;
3479 kauth_filesec_t fsec;
3480 user_size_t xsecurity_bufsize;
3481 vfs_context_t ctx = vfs_context_current();
3482 void * sbptr;
3483
3484
3485 AUDIT_ARG(fd, fd);
3486
3487 if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
3488 return error;
3489 }
3490 type = fp->f_type;
3491 data = fp->f_data;
3492 fsec = KAUTH_FILESEC_NONE;
3493
3494 sbptr = (void *)&source;
3495
3496 switch (type) {
3497 case DTYPE_VNODE:
3498 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
3499 /*
3500 * If the caller has the file open, and is not
3501 * requesting extended security information, we are
3502 * going to let them get the basic stat information.
3503 */
3504 if (xsecurity == USER_ADDR_NULL) {
3505 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
3506 fp->fp_glob->fg_cred);
3507 } else {
3508 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
3509 }
3510
3511 AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
3512 (void)vnode_put((vnode_t)data);
3513 }
3514 break;
3515
3516 #if SOCKETS
3517 case DTYPE_SOCKET:
3518 error = soo_stat((struct socket *)data, sbptr, isstat64);
3519 break;
3520 #endif /* SOCKETS */
3521
3522 case DTYPE_PIPE:
3523 error = pipe_stat((void *)data, sbptr, isstat64);
3524 break;
3525
3526 case DTYPE_PSXSHM:
3527 error = pshm_stat((void *)data, sbptr, isstat64);
3528 break;
3529
3530 case DTYPE_KQUEUE:
3531 error = kqueue_stat((void *)data, sbptr, isstat64, p);
3532 break;
3533
3534 default:
3535 error = EBADF;
3536 goto out;
3537 }
3538 if (error == 0) {
3539 caddr_t sbp;
3540
3541 if (isstat64 != 0) {
3542 source.sb64.st_lspare = 0;
3543 source.sb64.st_qspare[0] = 0LL;
3544 source.sb64.st_qspare[1] = 0LL;
3545
3546 if (IS_64BIT_PROCESS(current_proc())) {
3547 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3548 my_size = sizeof(dest.user64_sb64);
3549 sbp = (caddr_t)&dest.user64_sb64;
3550 } else {
3551 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3552 my_size = sizeof(dest.user32_sb64);
3553 sbp = (caddr_t)&dest.user32_sb64;
3554 }
3555 } else {
3556 source.sb.st_lspare = 0;
3557 source.sb.st_qspare[0] = 0LL;
3558 source.sb.st_qspare[1] = 0LL;
3559 if (IS_64BIT_PROCESS(current_proc())) {
3560 munge_user64_stat(&source.sb, &dest.user64_sb);
3561 my_size = sizeof(dest.user64_sb);
3562 sbp = (caddr_t)&dest.user64_sb;
3563 } else {
3564 munge_user32_stat(&source.sb, &dest.user32_sb);
3565 my_size = sizeof(dest.user32_sb);
3566 sbp = (caddr_t)&dest.user32_sb;
3567 }
3568 }
3569
3570 error = copyout(sbp, ub, my_size);
3571 }
3572
3573 /* caller wants extended security information? */
3574 if (xsecurity != USER_ADDR_NULL) {
3575 /* did we get any? */
3576 if (fsec == KAUTH_FILESEC_NONE) {
3577 if (susize(xsecurity_size, 0) != 0) {
3578 error = EFAULT;
3579 goto out;
3580 }
3581 } else {
3582 /* find the user buffer size */
3583 xsecurity_bufsize = fusize(xsecurity_size);
3584
3585 /* copy out the actual data size */
3586 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3587 error = EFAULT;
3588 goto out;
3589 }
3590
3591 /* if the caller supplied enough room, copy out to it */
3592 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
3593 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3594 }
3595 }
3596 }
3597 out:
3598 fp_drop(p, fd, fp, 0);
3599 if (fsec != NULL) {
3600 kauth_filesec_free(fsec);
3601 }
3602 return error;
3603 }
3604
3605
3606 /*
3607 * sys_fstat_extended
3608 *
3609 * Description: Extended version of fstat supporting returning extended
3610 * security information
3611 *
3612 * Parameters: p The process doing the fstat
3613 * uap->fd The fd to stat
3614 * uap->ub The user stat buffer
3615 * uap->xsecurity The user extended security
3616 * buffer, or 0 if none
3617 * uap->xsecurity_size The size of xsecurity, or 0
3618 *
3619 * Returns: 0 Success
3620 * !0 Errno (see fstat)
3621 */
3622 int
3623 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
3624 {
3625 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
3626 }
3627
3628
3629 /*
3630 * sys_fstat
3631 *
3632 * Description: Get file status for the file associated with fd
3633 *
3634 * Parameters: p The process doing the fstat
3635 * uap->fd The fd to stat
3636 * uap->ub The user stat buffer
3637 *
3638 * Returns: 0 Success
3639 * !0 Errno (see fstat)
3640 */
3641 int
3642 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
3643 {
3644 return fstat(p, uap->fd, uap->ub, 0, 0, 0);
3645 }
3646
3647
3648 /*
3649 * sys_fstat64_extended
3650 *
3651 * Description: Extended version of fstat64 supporting returning extended
3652 * security information
3653 *
3654 * Parameters: p The process doing the fstat
3655 * uap->fd The fd to stat
3656 * uap->ub The user stat buffer
3657 * uap->xsecurity The user extended security
3658 * buffer, or 0 if none
3659 * uap->xsecurity_size The size of xsecurity, or 0
3660 *
3661 * Returns: 0 Success
3662 * !0 Errno (see fstat)
3663 */
3664 int
3665 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3666 {
3667 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
3668 }
3669
3670
3671 /*
3672 * sys_fstat64
3673 *
3674 * Description: Get 64 bit version of the file status for the file associated
3675 * with fd
3676 *
3677 * Parameters: p The process doing the fstat
3678 * uap->fd The fd to stat
3679 * uap->ub The user stat buffer
3680 *
3681 * Returns: 0 Success
3682 * !0 Errno (see fstat)
3683 */
3684 int
3685 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
3686 {
3687 return fstat(p, uap->fd, uap->ub, 0, 0, 1);
3688 }
3689
3690
3691 /*
3692 * sys_fpathconf
3693 *
3694 * Description: Return pathconf information about a file descriptor.
3695 *
3696 * Parameters: p Process making the request
3697 * uap->fd fd to get information about
3698 * uap->name Name of information desired
3699 * retval Pointer to the call return area
3700 *
3701 * Returns: 0 Success
3702 * EINVAL
3703 * fp_lookup:EBADF Bad file descriptor
3704 * vnode_getwithref:???
3705 * vn_pathconf:???
3706 *
3707 * Implicit returns:
3708 * *retval (modified) Returned information (numeric)
3709 */
3710 int
3711 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3712 {
3713 int fd = uap->fd;
3714 struct fileproc *fp;
3715 struct vnode *vp;
3716 int error = 0;
3717 file_type_t type;
3718 caddr_t data;
3719
3720
3721 AUDIT_ARG(fd, uap->fd);
3722 if ((error = fp_lookup(p, fd, &fp, 0))) {
3723 return error;
3724 }
3725 type = fp->f_type;
3726 data = fp->f_data;
3727
3728 switch (type) {
3729 case DTYPE_SOCKET:
3730 if (uap->name != _PC_PIPE_BUF) {
3731 error = EINVAL;
3732 goto out;
3733 }
3734 *retval = PIPE_BUF;
3735 error = 0;
3736 goto out;
3737
3738 case DTYPE_PIPE:
3739 if (uap->name != _PC_PIPE_BUF) {
3740 error = EINVAL;
3741 goto out;
3742 }
3743 *retval = PIPE_BUF;
3744 error = 0;
3745 goto out;
3746
3747 case DTYPE_VNODE:
3748 vp = (struct vnode *)data;
3749
3750 if ((error = vnode_getwithref(vp)) == 0) {
3751 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3752
3753 error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3754
3755 (void)vnode_put(vp);
3756 }
3757 goto out;
3758
3759 default:
3760 error = EINVAL;
3761 goto out;
3762 }
3763 /*NOTREACHED*/
3764 out:
3765 fp_drop(p, fd, fp, 0);
3766 return error;
3767 }
3768
3769 /*
3770 * Statistics counter for the number of times a process calling fdalloc()
3771 * has resulted in an expansion of the per process open file table.
3772 *
3773 * XXX This would likely be of more use if it were per process
3774 */
3775 int fdexpand;
3776
3777
3778 /*
3779 * fdalloc
3780 *
3781 * Description: Allocate a file descriptor for the process.
3782 *
3783 * Parameters: p Process to allocate the fd in
3784 * want The fd we would prefer to get
3785 * result Pointer to fd we got
3786 *
3787 * Returns: 0 Success
3788 * EMFILE
3789 * ENOMEM
3790 *
3791 * Implicit returns:
3792 * *result (modified) The fd which was allocated
3793 */
3794 int
3795 fdalloc(proc_t p, int want, int *result)
3796 {
3797 struct filedesc *fdp = p->p_fd;
3798 int i;
3799 int last, numfiles, oldnfiles;
3800 struct fileproc **newofiles, **ofiles;
3801 char *newofileflags;
3802 rlim_t lim;
3803 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3804
3805 nofile = MIN(nofile, INT_MAX);
3806
3807 /*
3808 * Search for a free descriptor starting at the higher
3809 * of want or fd_freefile. If that fails, consider
3810 * expanding the ofile array.
3811 */
3812 #if DIAGNOSTIC
3813 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3814 #endif
3815
3816 lim = MIN(nofile, maxfilesperproc);
3817 for (;;) {
3818 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
3819 if ((i = want) < fdp->fd_freefile) {
3820 i = fdp->fd_freefile;
3821 }
3822 for (; i < last; i++) {
3823 if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3824 procfdtbl_reservefd(p, i);
3825 if (i > fdp->fd_lastfile) {
3826 fdp->fd_lastfile = i;
3827 }
3828 if (want <= fdp->fd_freefile) {
3829 fdp->fd_freefile = i;
3830 }
3831 *result = i;
3832 return 0;
3833 }
3834 }
3835
3836 /*
3837 * No space in current array. Expand?
3838 */
3839 if ((rlim_t)fdp->fd_nfiles >= lim) {
3840 return EMFILE;
3841 }
3842 if (fdp->fd_nfiles < NDEXTENT) {
3843 numfiles = NDEXTENT;
3844 } else {
3845 numfiles = 2 * fdp->fd_nfiles;
3846 }
3847 /* Enforce lim */
3848 if ((rlim_t)numfiles > lim) {
3849 numfiles = (int)lim;
3850 }
3851 proc_fdunlock(p);
3852 MALLOC(newofiles, struct fileproc **,
3853 numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3854 proc_fdlock(p);
3855 if (newofiles == NULL) {
3856 return ENOMEM;
3857 }
3858 if (fdp->fd_nfiles >= numfiles) {
3859 FREE(newofiles, M_OFILETABL);
3860 continue;
3861 }
3862 newofileflags = (char *) &newofiles[numfiles];
3863 /*
3864 * Copy the existing ofile and ofileflags arrays
3865 * and zero the new portion of each array.
3866 */
3867 oldnfiles = fdp->fd_nfiles;
3868 (void) memcpy(newofiles, fdp->fd_ofiles,
3869 oldnfiles * sizeof(*fdp->fd_ofiles));
3870 (void) memset(&newofiles[oldnfiles], 0,
3871 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3872
3873 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3874 oldnfiles * sizeof(*fdp->fd_ofileflags));
3875 (void) memset(&newofileflags[oldnfiles], 0,
3876 (numfiles - oldnfiles) *
3877 sizeof(*fdp->fd_ofileflags));
3878 ofiles = fdp->fd_ofiles;
3879 fdp->fd_ofiles = newofiles;
3880 fdp->fd_ofileflags = newofileflags;
3881 fdp->fd_nfiles = numfiles;
3882 FREE(ofiles, M_OFILETABL);
3883 fdexpand++;
3884 }
3885 }
3886
3887
3888 /*
3889 * fdavail
3890 *
3891 * Description: Check to see whether n user file descriptors are available
3892 * to the process p.
3893 *
3894 * Parameters: p Process to check in
3895 * n The number of fd's desired
3896 *
3897 * Returns: 0 No
3898 * 1 Yes
3899 *
3900 * Locks: Assumes proc_fdlock for process is held by the caller
3901 *
3902 * Notes: The answer only remains valid so long as the proc_fdlock is
3903 * held by the caller.
3904 */
3905 int
3906 fdavail(proc_t p, int n)
3907 {
3908 struct filedesc *fdp = p->p_fd;
3909 struct fileproc **fpp;
3910 char *flags;
3911 int i;
3912 int lim;
3913 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3914
3915 lim = (int)MIN(nofile, maxfilesperproc);
3916 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
3917 return 1;
3918 }
3919 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3920 flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3921 for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
3922 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
3923 return 1;
3924 }
3925 }
3926 return 0;
3927 }
3928
3929
3930 struct fileproc *
3931 fp_get_noref_locked(proc_t p, int fd)
3932 {
3933 struct filedesc *fdp = p->p_fd;
3934 struct fileproc *fp;
3935
3936 if (fd < 0 || fd >= fdp->fd_nfiles ||
3937 (fp = fdp->fd_ofiles[fd]) == NULL ||
3938 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3939 return NULL;
3940 }
3941 return fp;
3942 }
3943
3944 struct fileproc *
3945 fp_get_noref_locked_with_iocount(proc_t p, int fd)
3946 {
3947 struct filedesc *fdp = p->p_fd;
3948 struct fileproc *fp = NULL;
3949
3950 if (fd < 0 || fd >= fdp->fd_nfiles ||
3951 (fp = fdp->fd_ofiles[fd]) == NULL ||
3952 os_ref_get_count(&fp->fp_iocount) <= 1 ||
3953 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
3954 !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
3955 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
3956 __func__, fd, fp);
3957 }
3958
3959 return fp;
3960 }
3961
3962 int
3963 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
3964 {
3965 struct filedesc *fdp = p->p_fd;
3966 struct fileproc *fp;
3967
3968 proc_fdlock_spin(p);
3969 if (fd < 0 || fd >= fdp->fd_nfiles ||
3970 (fp = fdp->fd_ofiles[fd]) == NULL ||
3971 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3972 proc_fdunlock(p);
3973 return EBADF;
3974 }
3975
3976 if (fp->f_type != ftype) {
3977 proc_fdunlock(p);
3978 return err;
3979 }
3980
3981 os_ref_retain_locked(&fp->fp_iocount);
3982 proc_fdunlock(p);
3983
3984 *fpp = fp;
3985 return 0;
3986 }
3987
3988
3989 /*
3990 * fp_getfvp
3991 *
3992 * Description: Get fileproc and vnode pointer for a given fd from the per
3993 * process open file table of the specified process, and if
3994 * successful, increment the fp_iocount
3995 *
3996 * Parameters: p Process in which fd lives
3997 * fd fd to get information for
3998 * resultfp Pointer to result fileproc
3999 * pointer area, or 0 if none
4000 * resultvp Pointer to result vnode pointer
4001 * area, or 0 if none
4002 *
4003 * Returns: 0 Success
4004 * EBADF Bad file descriptor
4005 * ENOTSUP fd does not refer to a vnode
4006 *
4007 * Implicit returns:
4008 * *resultfp (modified) Fileproc pointer
4009 * *resultvp (modified) vnode pointer
4010 *
4011 * Notes: The resultfp and resultvp fields are optional, and may be
4012 * independently specified as NULL to skip returning information
4013 *
4014 * Locks: Internally takes and releases proc_fdlock
4015 */
4016 int
4017 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
4018 {
4019 struct fileproc *fp;
4020 int error;
4021
4022 error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
4023 if (error == 0) {
4024 if (resultfp) {
4025 *resultfp = fp;
4026 }
4027 if (resultvp) {
4028 *resultvp = (struct vnode *)fp->f_data;
4029 }
4030 }
4031
4032 return error;
4033 }
4034
4035
4036 /*
4037 * fp_get_pipe_id
4038 *
4039 * Description: Get pipe id for a given fd from the per process open file table
4040 * of the specified process.
4041 *
4042 * Parameters: p Process in which fd lives
4043 * fd fd to get information for
4044 * result_pipe_id Pointer to result pipe id
4045 *
4046 * Returns: 0 Success
4047 * EIVAL NULL pointer arguments passed
4048 * fp_lookup:EBADF Bad file descriptor
4049 * ENOTSUP fd does not refer to a pipe
4050 *
4051 * Implicit returns:
4052 * *result_pipe_id (modified) pipe id
4053 *
4054 * Locks: Internally takes and releases proc_fdlock
4055 */
4056 int
4057 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
4058 {
4059 struct fileproc *fp = FILEPROC_NULL;
4060 struct fileglob *fg = NULL;
4061 int error = 0;
4062
4063 if (p == NULL || result_pipe_id == NULL) {
4064 return EINVAL;
4065 }
4066
4067 proc_fdlock(p);
4068 if ((error = fp_lookup(p, fd, &fp, 1))) {
4069 proc_fdunlock(p);
4070 return error;
4071 }
4072 fg = fp->fp_glob;
4073
4074 if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
4075 *result_pipe_id = pipe_id((struct pipe*)fg->fg_data);
4076 } else {
4077 error = ENOTSUP;
4078 }
4079
4080 fp_drop(p, fd, fp, 1);
4081 proc_fdunlock(p);
4082 return error;
4083 }
4084
4085
4086 /*
4087 * fp_lookup
4088 *
4089 * Description: Get fileproc pointer for a given fd from the per process
4090 * open file table of the specified process and if successful,
4091 * increment the fp_iocount
4092 *
4093 * Parameters: p Process in which fd lives
4094 * fd fd to get information for
4095 * resultfp Pointer to result fileproc
4096 * pointer area, or 0 if none
4097 * locked !0 if the caller holds the
4098 * proc_fdlock, 0 otherwise
4099 *
4100 * Returns: 0 Success
4101 * EBADF Bad file descriptor
4102 *
4103 * Implicit returns:
4104 * *resultfp (modified) Fileproc pointer
4105 *
4106 * Locks: If the argument 'locked' is non-zero, then the caller is
4107 * expected to have taken and held the proc_fdlock; if it is
4108 * zero, than this routine internally takes and drops this lock.
4109 */
4110 int
4111 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
4112 {
4113 struct filedesc *fdp = p->p_fd;
4114 struct fileproc *fp;
4115
4116 if (!locked) {
4117 proc_fdlock_spin(p);
4118 }
4119 if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
4120 (fp = fdp->fd_ofiles[fd]) == NULL ||
4121 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4122 if (!locked) {
4123 proc_fdunlock(p);
4124 }
4125 return EBADF;
4126 }
4127 os_ref_retain_locked(&fp->fp_iocount);
4128
4129 if (resultfp) {
4130 *resultfp = fp;
4131 }
4132 if (!locked) {
4133 proc_fdunlock(p);
4134 }
4135
4136 return 0;
4137 }
4138
4139
4140 /*
4141 * fp_tryswap
4142 *
4143 * Description: Swap the fileproc pointer for a given fd with a new
4144 * fileproc pointer in the per-process open file table of
4145 * the specified process. The fdlock must be held at entry.
4146 * Iff the swap is successful, the old fileproc pointer is freed.
4147 *
4148 * Parameters: p Process containing the fd
4149 * fd The fd of interest
4150 * nfp Pointer to the newfp
4151 *
4152 * Returns: 0 Success
4153 * EBADF Bad file descriptor
4154 * EINTR Interrupted
4155 * EKEEPLOOKING Other references were active, try again.
4156 */
4157 int
4158 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
4159 {
4160 struct fileproc *fp;
4161 int error;
4162
4163 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
4164
4165 if (0 != (error = fp_lookup(p, fd, &fp, 1))) {
4166 return error;
4167 }
4168 /*
4169 * At this point, our caller (change_guardedfd_np) has
4170 * one fp_iocount reference, and we just took another
4171 * one to begin the replacement.
4172 * fp and nfp have a +1 reference from allocation.
4173 * Thus if no-one else is looking, fp_iocount should be 3.
4174 */
4175 if (os_ref_get_count(&fp->fp_iocount) < 3 ||
4176 1 != os_ref_get_count(&nfp->fp_iocount)) {
4177 panic("%s: fp_iocount", __func__);
4178 } else if (3 == os_ref_get_count(&fp->fp_iocount)) {
4179 /* Copy the contents of *fp, preserving the "type" of *nfp */
4180
4181 nfp->fp_flags = (nfp->fp_flags & FP_TYPEMASK) |
4182 (fp->fp_flags & ~FP_TYPEMASK);
4183 os_ref_retain_locked(&nfp->fp_iocount);
4184 os_ref_retain_locked(&nfp->fp_iocount);
4185 nfp->fp_glob = fp->fp_glob;
4186 nfp->fp_wset = fp->fp_wset;
4187
4188 p->p_fd->fd_ofiles[fd] = nfp;
4189 fp_drop(p, fd, nfp, 1);
4190
4191 os_ref_release_live(&fp->fp_iocount);
4192 os_ref_release_live(&fp->fp_iocount);
4193 fileproc_free(fp);
4194 } else {
4195 /*
4196 * Wait for all other active references to evaporate.
4197 */
4198 p->p_fpdrainwait = 1;
4199 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
4200 PRIBIO | PCATCH, "tryswap fpdrain", NULL);
4201 if (0 == error) {
4202 /*
4203 * Return an "internal" errno to trigger a full
4204 * reevaluation of the change-guard attempt.
4205 */
4206 error = EKEEPLOOKING;
4207 }
4208 (void) fp_drop(p, fd, fp, 1);
4209 }
4210 return error;
4211 }
4212
4213
4214 /*
4215 * fp_drop
4216 *
4217 * Description: Drop the I/O reference previously taken by calling fp_lookup
4218 * et. al.
4219 *
4220 * Parameters: p Process in which the fd lives
4221 * fd fd associated with the fileproc
4222 * fp fileproc on which to set the
4223 * flag and drop the reference
4224 * locked flag to internally take and
4225 * drop proc_fdlock if it is not
4226 * already held by the caller
4227 *
4228 * Returns: 0 Success
4229 * EBADF Bad file descriptor
4230 *
4231 * Locks: This function internally takes and drops the proc_fdlock for
4232 * the supplied process if 'locked' is non-zero, and assumes that
4233 * the caller already holds this lock if 'locked' is non-zero.
4234 *
4235 * Notes: The fileproc must correspond to the fd in the supplied proc
4236 */
4237 int
4238 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
4239 {
4240 struct filedesc *fdp = p->p_fd;
4241 int needwakeup = 0;
4242
4243 if (!locked) {
4244 proc_fdlock_spin(p);
4245 }
4246 if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
4247 (fp = fdp->fd_ofiles[fd]) == NULL ||
4248 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4249 !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4250 if (!locked) {
4251 proc_fdunlock(p);
4252 }
4253 return EBADF;
4254 }
4255
4256 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4257 if (fp->fp_flags & FP_SELCONFLICT) {
4258 fp->fp_flags &= ~FP_SELCONFLICT;
4259 }
4260
4261 if (p->p_fpdrainwait) {
4262 p->p_fpdrainwait = 0;
4263 needwakeup = 1;
4264 }
4265 }
4266 if (!locked) {
4267 proc_fdunlock(p);
4268 }
4269 if (needwakeup) {
4270 wakeup(&p->p_fpdrainwait);
4271 }
4272
4273 return 0;
4274 }
4275
4276
4277 /*
4278 * file_vnode
4279 *
4280 * Description: Given an fd, look it up in the current process's per process
4281 * open file table, and return its internal vnode pointer.
4282 *
4283 * Parameters: fd fd to obtain vnode from
4284 * vpp pointer to vnode return area
4285 *
4286 * Returns: 0 Success
4287 * EINVAL The fd does not refer to a
4288 * vnode fileproc entry
4289 * fp_lookup:EBADF Bad file descriptor
4290 *
4291 * Implicit returns:
4292 * *vpp (modified) Returned vnode pointer
4293 *
4294 * Locks: This function internally takes and drops the proc_fdlock for
4295 * the current process
4296 *
4297 * Notes: If successful, this function increments the fp_iocount on the
4298 * fd's corresponding fileproc.
4299 *
4300 * The fileproc referenced is not returned; because of this, care
4301 * must be taken to not drop the last reference (e.g. by closing
4302 * the file). This is inherently unsafe, since the reference may
4303 * not be recoverable from the vnode, if there is a subsequent
4304 * close that destroys the associate fileproc. The caller should
4305 * therefore retain their own reference on the fileproc so that
4306 * the fp_iocount can be dropped subsequently. Failure to do this
4307 * can result in the returned pointer immediately becoming invalid
4308 * following the call.
4309 *
4310 * Use of this function is discouraged.
4311 */
4312 int
4313 file_vnode(int fd, struct vnode **vpp)
4314 {
4315 return file_vnode_withvid(fd, vpp, NULL);
4316 }
4317
4318 /*
4319 * file_vnode_withvid
4320 *
4321 * Description: Given an fd, look it up in the current process's per process
4322 * open file table, and return its internal vnode pointer.
4323 *
4324 * Parameters: fd fd to obtain vnode from
4325 * vpp pointer to vnode return area
4326 * vidp pointer to vid of the returned vnode
4327 *
4328 * Returns: 0 Success
4329 * EINVAL The fd does not refer to a
4330 * vnode fileproc entry
4331 * fp_lookup:EBADF Bad file descriptor
4332 *
4333 * Implicit returns:
4334 * *vpp (modified) Returned vnode pointer
4335 *
4336 * Locks: This function internally takes and drops the proc_fdlock for
4337 * the current process
4338 *
4339 * Notes: If successful, this function increments the fp_iocount on the
4340 * fd's corresponding fileproc.
4341 *
4342 * The fileproc referenced is not returned; because of this, care
4343 * must be taken to not drop the last reference (e.g. by closing
4344 * the file). This is inherently unsafe, since the reference may
4345 * not be recoverable from the vnode, if there is a subsequent
4346 * close that destroys the associate fileproc. The caller should
4347 * therefore retain their own reference on the fileproc so that
4348 * the fp_iocount can be dropped subsequently. Failure to do this
4349 * can result in the returned pointer immediately becoming invalid
4350 * following the call.
4351 *
4352 * Use of this function is discouraged.
4353 */
4354 int
4355 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
4356 {
4357 struct fileproc *fp;
4358 int error;
4359
4360 error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
4361 if (error == 0) {
4362 if (vpp) {
4363 *vpp = fp->f_data;
4364 }
4365 if (vidp) {
4366 *vidp = vnode_vid(fp->f_data);
4367 }
4368 }
4369 return error;
4370 }
4371
4372 /*
4373 * file_socket
4374 *
4375 * Description: Given an fd, look it up in the current process's per process
4376 * open file table, and return its internal socket pointer.
4377 *
4378 * Parameters: fd fd to obtain vnode from
4379 * sp pointer to socket return area
4380 *
4381 * Returns: 0 Success
4382 * ENOTSOCK Not a socket
4383 * fp_lookup:EBADF Bad file descriptor
4384 *
4385 * Implicit returns:
4386 * *sp (modified) Returned socket pointer
4387 *
4388 * Locks: This function internally takes and drops the proc_fdlock for
4389 * the current process
4390 *
4391 * Notes: If successful, this function increments the fp_iocount on the
4392 * fd's corresponding fileproc.
4393 *
4394 * The fileproc referenced is not returned; because of this, care
4395 * must be taken to not drop the last reference (e.g. by closing
4396 * the file). This is inherently unsafe, since the reference may
4397 * not be recoverable from the socket, if there is a subsequent
4398 * close that destroys the associate fileproc. The caller should
4399 * therefore retain their own reference on the fileproc so that
4400 * the fp_iocount can be dropped subsequently. Failure to do this
4401 * can result in the returned pointer immediately becoming invalid
4402 * following the call.
4403 *
4404 * Use of this function is discouraged.
4405 */
4406 int
4407 file_socket(int fd, struct socket **sp)
4408 {
4409 struct fileproc *fp;
4410 int error;
4411
4412 error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
4413 if (error == 0) {
4414 if (sp) {
4415 *sp = (struct socket *)fp->f_data;
4416 }
4417 }
4418 return error;
4419 }
4420
4421
4422 /*
4423 * file_flags
4424 *
4425 * Description: Given an fd, look it up in the current process's per process
4426 * open file table, and return its fileproc's flags field.
4427 *
4428 * Parameters: fd fd whose flags are to be
4429 * retrieved
4430 * flags pointer to flags data area
4431 *
4432 * Returns: 0 Success
4433 * ENOTSOCK Not a socket
4434 * fp_lookup:EBADF Bad file descriptor
4435 *
4436 * Implicit returns:
4437 * *flags (modified) Returned flags field
4438 *
4439 * Locks: This function internally takes and drops the proc_fdlock for
4440 * the current process
4441 */
4442 int
4443 file_flags(int fd, int *flags)
4444 {
4445 proc_t p = current_proc();
4446 struct fileproc *fp;
4447 int error = EBADF;
4448
4449 proc_fdlock_spin(p);
4450 fp = fp_get_noref_locked(p, fd);
4451 if (fp) {
4452 *flags = (int)fp->f_flag;
4453 error = 0;
4454 }
4455 proc_fdunlock(p);
4456
4457 return error;
4458 }
4459
4460
4461 /*
4462 * file_drop
4463 *
4464 * Description: Drop an iocount reference on an fd, and wake up any waiters
4465 * for draining (i.e. blocked in fileproc_drain() called during
4466 * the last attempt to close a file).
4467 *
4468 * Parameters: fd fd on which an ioreference is
4469 * to be dropped
4470 *
4471 * Returns: 0 Success
4472 *
4473 * Description: Given an fd, look it up in the current process's per process
4474 * open file table, and drop it's fileproc's fp_iocount by one
4475 *
4476 * Notes: This is intended as a corresponding operation to the functions
4477 * file_vnode() and file_socket() operations.
4478 *
4479 * If the caller can't possibly hold an I/O reference,
4480 * this function will panic the kernel rather than allowing
4481 * for memory corruption. Callers should always call this
4482 * because they acquired an I/O reference on this file before.
4483 *
4484 * Use of this function is discouraged.
4485 */
4486 int
4487 file_drop(int fd)
4488 {
4489 struct fileproc *fp;
4490 proc_t p = current_proc();
4491 int needwakeup = 0;
4492
4493 proc_fdlock_spin(p);
4494 fp = fp_get_noref_locked_with_iocount(p, fd);
4495
4496 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4497 if (fp->fp_flags & FP_SELCONFLICT) {
4498 fp->fp_flags &= ~FP_SELCONFLICT;
4499 }
4500
4501 if (p->p_fpdrainwait) {
4502 p->p_fpdrainwait = 0;
4503 needwakeup = 1;
4504 }
4505 }
4506 proc_fdunlock(p);
4507
4508 if (needwakeup) {
4509 wakeup(&p->p_fpdrainwait);
4510 }
4511 return 0;
4512 }
4513
4514
4515
4516 /*
4517 * falloc_withalloc
4518 *
4519 * Create a new open file structure and allocate
4520 * a file descriptor for the process that refers to it.
4521 *
4522 * Returns: 0 Success
4523 *
4524 * Description: Allocate an entry in the per process open file table and
4525 * return the corresponding fileproc and fd.
4526 *
4527 * Parameters: p The process in whose open file
4528 * table the fd is to be allocated
4529 * resultfp Pointer to fileproc pointer
4530 * return area
4531 * resultfd Pointer to fd return area
4532 * ctx VFS context
4533 * fp_zalloc fileproc allocator to use
4534 * crarg allocator args
4535 *
4536 * Returns: 0 Success
4537 * ENFILE Too many open files in system
4538 * fdalloc:EMFILE Too many open files in process
4539 * fdalloc:ENOMEM M_OFILETABL zone exhausted
4540 * ENOMEM fp_zone or fg_zone zone
4541 * exhausted
4542 *
4543 * Implicit returns:
4544 * *resultfd (modified) Returned fileproc pointer
4545 * *resultfd (modified) Returned fd
4546 *
4547 * Notes: This function takes separate process and context arguments
4548 * solely to support kern_exec.c; otherwise, it would take
4549 * neither, and use the vfs_context_current() routine internally.
4550 */
4551 int
4552 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4553 vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg)
4554 {
4555 struct fileproc *fp;
4556 struct fileglob *fg;
4557 int error, nfd;
4558
4559 /* Make sure we don't go beyond the system-wide limit */
4560 if (nfiles >= maxfiles) {
4561 tablefull("file");
4562 return ENFILE;
4563 }
4564
4565 proc_fdlock(p);
4566
4567 /* fdalloc will make sure the process stays below per-process limit */
4568 if ((error = fdalloc(p, 0, &nfd))) {
4569 proc_fdunlock(p);
4570 return error;
4571 }
4572
4573 #if CONFIG_MACF
4574 error = mac_file_check_create(proc_ucred(p));
4575 if (error) {
4576 proc_fdunlock(p);
4577 return error;
4578 }
4579 #endif
4580
4581 /*
4582 * Allocate a new file descriptor.
4583 * If the process has file descriptor zero open, add to the list
4584 * of open files at that point, otherwise put it at the front of
4585 * the list of open files.
4586 */
4587 proc_fdunlock(p);
4588
4589 fp = (*fp_zalloc)(crarg);
4590 if (fp == NULL) {
4591 return ENOMEM;
4592 }
4593 fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
4594 lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4595
4596 os_ref_retain_locked(&fp->fp_iocount);
4597 os_ref_init_raw(&fg->fg_count, &f_refgrp);
4598 fg->fg_ops = &uninitops;
4599 fp->fp_glob = fg;
4600 #if CONFIG_MACF
4601 mac_file_label_init(fg);
4602 #endif
4603
4604 kauth_cred_ref(ctx->vc_ucred);
4605
4606 fp->f_cred = ctx->vc_ucred;
4607
4608 #if CONFIG_MACF
4609 mac_file_label_associate(fp->f_cred, fg);
4610 #endif
4611
4612 os_atomic_inc(&nfiles, relaxed);
4613
4614 proc_fdlock(p);
4615
4616 p->p_fd->fd_ofiles[nfd] = fp;
4617
4618 proc_fdunlock(p);
4619
4620 if (resultfp) {
4621 *resultfp = fp;
4622 }
4623 if (resultfd) {
4624 *resultfd = nfd;
4625 }
4626
4627 return 0;
4628 }
4629
4630 int
4631 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4632 {
4633 return falloc_withalloc(p, resultfp, resultfd, ctx,
4634 fileproc_alloc_init, NULL);
4635 }
4636
4637 /*
4638 * fdexec
4639 *
4640 * Description: Perform close-on-exec processing for all files in a process
4641 * that are either marked as close-on-exec, or which were in the
4642 * process of being opened at the time of the execve
4643 *
4644 * Also handles the case (via posix_spawn()) where -all-
4645 * files except those marked with "inherit" as treated as
4646 * close-on-exec.
4647 *
4648 * Parameters: p Pointer to process calling
4649 * execve
4650 *
4651 * Returns: void
4652 *
4653 * Locks: This function internally takes and drops proc_fdlock()
4654 * But assumes tables don't grow/change while unlocked.
4655 *
4656 */
4657 void
4658 fdexec(proc_t p, short flags, int self_exec)
4659 {
4660 struct filedesc *fdp = p->p_fd;
4661 int i;
4662 boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4663 thread_t self = current_thread();
4664 struct uthread *ut = get_bsdthread_info(self);
4665 struct kqworkq *dealloc_kqwq = NULL;
4666
4667 /*
4668 * If the current thread is bound as a workq/workloop
4669 * servicing thread, we need to unbind it first.
4670 */
4671 if (ut->uu_kqr_bound && self_exec) {
4672 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
4673 }
4674
4675 proc_fdlock(p);
4676
4677 /*
4678 * Deallocate the knotes for this process
4679 * and mark the tables non-existent so
4680 * subsequent kqueue closes go faster.
4681 */
4682 knotes_dealloc(p);
4683 assert(fdp->fd_knlistsize == 0);
4684 assert(fdp->fd_knhashmask == 0);
4685
4686 for (i = fdp->fd_lastfile; i >= 0; i--) {
4687 struct fileproc *fp = fdp->fd_ofiles[i];
4688 char *flagp = &fdp->fd_ofileflags[i];
4689
4690 if (fp && cloexec_default) {
4691 /*
4692 * Reverse the usual semantics of file descriptor
4693 * inheritance - all of them should be closed
4694 * except files marked explicitly as "inherit" and
4695 * not marked close-on-exec.
4696 */
4697 if ((*flagp & (UF_EXCLOSE | UF_INHERIT)) != UF_INHERIT) {
4698 *flagp |= UF_EXCLOSE;
4699 }
4700 *flagp &= ~UF_INHERIT;
4701 }
4702
4703 if (
4704 ((*flagp & (UF_RESERVED | UF_EXCLOSE)) == UF_EXCLOSE)
4705 #if CONFIG_MACF
4706 || (fp && mac_file_check_inherit(proc_ucred(p), fp->fp_glob))
4707 #endif
4708 ) {
4709 fp_close_and_unlock(p, i, fp, 0);
4710 proc_fdlock(p);
4711 }
4712 }
4713
4714 /* release the per-process workq kq */
4715 if (fdp->fd_wqkqueue) {
4716 dealloc_kqwq = fdp->fd_wqkqueue;
4717 fdp->fd_wqkqueue = NULL;
4718 }
4719
4720 proc_fdunlock(p);
4721
4722 /* Anything to free? */
4723 if (dealloc_kqwq) {
4724 kqworkq_dealloc(dealloc_kqwq);
4725 }
4726 }
4727
4728
4729 /*
4730 * fdcopy
4731 *
4732 * Description: Copy a filedesc structure. This is normally used as part of
4733 * forkproc() when forking a new process, to copy the per process
4734 * open file table over to the new process.
4735 *
4736 * Parameters: p Process whose open file table
4737 * is to be copied (parent)
4738 * uth_cdir Per thread current working
4739 * cirectory, or NULL
4740 *
4741 * Returns: NULL Copy failed
4742 * !NULL Pointer to new struct filedesc
4743 *
4744 * Locks: This function internally takes and drops proc_fdlock()
4745 *
4746 * Notes: Files are copied directly, ignoring the new resource limits
4747 * for the process that's being copied into. Since the descriptor
4748 * references are just additional references, this does not count
4749 * against the number of open files on the system.
4750 *
4751 * The struct filedesc includes the current working directory,
4752 * and the current root directory, if the process is chroot'ed.
4753 *
4754 * If the exec was called by a thread using a per thread current
4755 * working directory, we inherit the working directory from the
4756 * thread making the call, rather than from the process.
4757 *
4758 * In the case of a failure to obtain a reference, for most cases,
4759 * the file entry will be silently dropped. There's an exception
4760 * for the case of a chroot dir, since a failure to to obtain a
4761 * reference there would constitute an "escape" from the chroot
4762 * environment, which must not be allowed. In that case, we will
4763 * deny the execve() operation, rather than allowing the escape.
4764 */
4765 struct filedesc *
4766 fdcopy(proc_t p, vnode_t uth_cdir)
4767 {
4768 struct filedesc *newfdp, *fdp = p->p_fd;
4769 int i;
4770 struct fileproc *ofp, *fp;
4771 vnode_t v_dir;
4772
4773 newfdp = zalloc(fdp_zone);
4774
4775 proc_fdlock(p);
4776
4777 /*
4778 * the FD_CHROOT flag will be inherited via this copy
4779 */
4780 (void) memcpy(newfdp, fdp, sizeof(*newfdp));
4781
4782 /*
4783 * If we are running with per-thread current working directories,
4784 * inherit the new current working directory from the current thread
4785 * instead, before we take our references.
4786 */
4787 if (uth_cdir != NULLVP) {
4788 newfdp->fd_cdir = uth_cdir;
4789 }
4790
4791 /*
4792 * For both fd_cdir and fd_rdir make sure we get
4793 * a valid reference... if we can't, than set
4794 * set the pointer(s) to NULL in the child... this
4795 * will keep us from using a non-referenced vp
4796 * and allows us to do the vnode_rele only on
4797 * a properly referenced vp
4798 */
4799 if ((v_dir = newfdp->fd_cdir)) {
4800 if (vnode_getwithref(v_dir) == 0) {
4801 if ((vnode_ref(v_dir))) {
4802 newfdp->fd_cdir = NULL;
4803 }
4804 vnode_put(v_dir);
4805 } else {
4806 newfdp->fd_cdir = NULL;
4807 }
4808 }
4809 if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4810 /*
4811 * we couldn't get a new reference on
4812 * the current working directory being
4813 * inherited... we might as well drop
4814 * our reference from the parent also
4815 * since the vnode has gone DEAD making
4816 * it useless... by dropping it we'll
4817 * be that much closer to recycling it
4818 */
4819 vnode_rele(fdp->fd_cdir);
4820 fdp->fd_cdir = NULL;
4821 }
4822
4823 if ((v_dir = newfdp->fd_rdir)) {
4824 if (vnode_getwithref(v_dir) == 0) {
4825 if ((vnode_ref(v_dir))) {
4826 newfdp->fd_rdir = NULL;
4827 }
4828 vnode_put(v_dir);
4829 } else {
4830 newfdp->fd_rdir = NULL;
4831 }
4832 }
4833 /* Coming from a chroot environment and unable to get a reference... */
4834 if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4835 proc_fdunlock(p);
4836 /*
4837 * We couldn't get a new reference on
4838 * the chroot directory being
4839 * inherited... this is fatal, since
4840 * otherwise it would constitute an
4841 * escape from a chroot environment by
4842 * the new process.
4843 */
4844 if (newfdp->fd_cdir) {
4845 vnode_rele(newfdp->fd_cdir);
4846 }
4847 zfree(fdp_zone, newfdp);
4848 return NULL;
4849 }
4850
4851 /*
4852 * If the number of open files fits in the internal arrays
4853 * of the open file structure, use them, otherwise allocate
4854 * additional memory for the number of descriptors currently
4855 * in use.
4856 */
4857 if (newfdp->fd_lastfile < NDFILE) {
4858 i = NDFILE;
4859 } else {
4860 /*
4861 * Compute the smallest multiple of NDEXTENT needed
4862 * for the file descriptors currently in use,
4863 * allowing the table to shrink.
4864 */
4865 i = newfdp->fd_nfiles;
4866 while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2) {
4867 i /= 2;
4868 }
4869 }
4870 proc_fdunlock(p);
4871
4872 MALLOC(newfdp->fd_ofiles, struct fileproc **,
4873 i * OFILESIZE, M_OFILETABL, M_WAITOK);
4874 if (newfdp->fd_ofiles == NULL) {
4875 if (newfdp->fd_cdir) {
4876 vnode_rele(newfdp->fd_cdir);
4877 }
4878 if (newfdp->fd_rdir) {
4879 vnode_rele(newfdp->fd_rdir);
4880 }
4881
4882 zfree(fdp_zone, newfdp);
4883 return NULL;
4884 }
4885 (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
4886 proc_fdlock(p);
4887
4888 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4889 newfdp->fd_nfiles = i;
4890
4891 if (fdp->fd_nfiles > 0) {
4892 struct fileproc **fpp;
4893 char *flags;
4894
4895 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4896 (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4897 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4898 (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4899
4900 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4901 flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
4902 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--, flags--) {
4903 if ((ofp = *fpp) != NULL &&
4904 0 == (ofp->fp_glob->fg_lflags & FG_CONFINED) &&
4905 0 == (*flags & (UF_FORKCLOSE | UF_RESERVED))) {
4906 #if DEBUG
4907 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) {
4908 panic("complex fileproc");
4909 }
4910 #endif
4911 fp = fileproc_alloc_init(NULL);
4912 if (fp == NULL) {
4913 /*
4914 * XXX no room to copy, unable to
4915 * XXX safely unwind state at present
4916 */
4917 *fpp = NULL;
4918 } else {
4919 fp->fp_flags |=
4920 (ofp->fp_flags & ~FP_TYPEMASK);
4921 fp->fp_glob = ofp->fp_glob;
4922 fg_ref(fp->fp_glob);
4923 *fpp = fp;
4924 }
4925 } else {
4926 *fpp = NULL;
4927 *flags = 0;
4928 }
4929 if (*fpp == NULL) {
4930 if (i == newfdp->fd_lastfile && i > 0) {
4931 newfdp->fd_lastfile--;
4932 }
4933 if (i < newfdp->fd_freefile) {
4934 newfdp->fd_freefile = i;
4935 }
4936 }
4937 }
4938 }
4939
4940 proc_fdunlock(p);
4941
4942 /*
4943 * Initialize knote and kqueue tracking structs
4944 */
4945 newfdp->fd_knlist = NULL;
4946 newfdp->fd_knlistsize = 0;
4947 newfdp->fd_knhash = NULL;
4948 newfdp->fd_knhashmask = 0;
4949 newfdp->fd_kqhash = NULL;
4950 newfdp->fd_kqhashmask = 0;
4951 newfdp->fd_wqkqueue = NULL;
4952 lck_mtx_init(&newfdp->fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
4953 lck_mtx_init(&newfdp->fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
4954
4955 return newfdp;
4956 }
4957
4958
4959 /*
4960 * fdfree
4961 *
4962 * Description: Release a filedesc (per process open file table) structure;
4963 * this is done on process exit(), or from forkproc_free() if
4964 * the fork fails for some reason subsequent to a successful
4965 * call to fdcopy()
4966 *
4967 * Parameters: p Pointer to process going away
4968 *
4969 * Returns: void
4970 *
4971 * Locks: This function internally takes and drops proc_fdlock()
4972 */
4973 void
4974 fdfree(proc_t p)
4975 {
4976 struct filedesc *fdp;
4977 struct fileproc *fp;
4978 struct kqworkq *dealloc_kqwq = NULL;
4979 int i;
4980
4981 proc_fdlock(p);
4982
4983 if (p == kernproc || NULL == (fdp = p->p_fd)) {
4984 proc_fdunlock(p);
4985 return;
4986 }
4987
4988 extern struct filedesc filedesc0;
4989
4990 if (&filedesc0 == fdp) {
4991 panic("filedesc0");
4992 }
4993
4994 /*
4995 * deallocate all the knotes up front and claim empty
4996 * tables to make any subsequent kqueue closes faster.
4997 */
4998 knotes_dealloc(p);
4999 assert(fdp->fd_knlistsize == 0);
5000 assert(fdp->fd_knhashmask == 0);
5001
5002 /*
5003 * dealloc all workloops that have outstanding retains
5004 * when created with scheduling parameters.
5005 */
5006 kqworkloops_dealloc(p);
5007
5008 /* close file descriptors */
5009 if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
5010 for (i = fdp->fd_lastfile; i >= 0; i--) {
5011 if ((fp = fdp->fd_ofiles[i]) != NULL) {
5012 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
5013 panic("fdfree: found fp with UF_RESERVED");
5014 }
5015 fp_close_and_unlock(p, i, fp, 0);
5016 proc_fdlock(p);
5017 }
5018 }
5019 FREE(fdp->fd_ofiles, M_OFILETABL);
5020 fdp->fd_ofiles = NULL;
5021 fdp->fd_nfiles = 0;
5022 }
5023
5024 if (fdp->fd_wqkqueue) {
5025 dealloc_kqwq = fdp->fd_wqkqueue;
5026 fdp->fd_wqkqueue = NULL;
5027 }
5028
5029 proc_fdunlock(p);
5030
5031 if (dealloc_kqwq) {
5032 kqworkq_dealloc(dealloc_kqwq);
5033 }
5034 if (fdp->fd_cdir) {
5035 vnode_rele(fdp->fd_cdir);
5036 }
5037 if (fdp->fd_rdir) {
5038 vnode_rele(fdp->fd_rdir);
5039 }
5040
5041 proc_fdlock_spin(p);
5042 p->p_fd = NULL;
5043 proc_fdunlock(p);
5044
5045 if (fdp->fd_kqhash) {
5046 for (uint32_t j = 0; j <= fdp->fd_kqhashmask; j++) {
5047 assert(LIST_EMPTY(&fdp->fd_kqhash[j]));
5048 }
5049 hashdestroy(fdp->fd_kqhash, M_KQUEUE, fdp->fd_kqhashmask);
5050 }
5051
5052 lck_mtx_destroy(&fdp->fd_kqhashlock, proc_kqhashlock_grp);
5053 lck_mtx_destroy(&fdp->fd_knhashlock, proc_knhashlock_grp);
5054
5055 zfree(fdp_zone, fdp);
5056 }
5057
5058 /*
5059 * fileproc_drain
5060 *
5061 * Description: Drain out pending I/O operations
5062 *
5063 * Parameters: p Process closing this file
5064 * fp fileproc struct for the open
5065 * instance on the file
5066 *
5067 * Returns: void
5068 *
5069 * Locks: Assumes the caller holds the proc_fdlock
5070 *
5071 * Notes: For character devices, this occurs on the last close of the
5072 * device; for all other file descriptors, this occurs on each
5073 * close to prevent fd's from being closed out from under
5074 * operations currently in progress and blocked
5075 *
5076 * See Also: file_vnode(), file_socket(), file_drop(), and the cautions
5077 * regarding their use and interaction with this function.
5078 */
5079 void
5080 fileproc_drain(proc_t p, struct fileproc * fp)
5081 {
5082 struct vfs_context context;
5083 thread_t thread;
5084 bool is_current_proc;
5085
5086 is_current_proc = (p == current_proc());
5087
5088 if (!is_current_proc) {
5089 proc_lock(p);
5090 thread = proc_thread(p); /* XXX */
5091 thread_reference(thread);
5092 proc_unlock(p);
5093 } else {
5094 thread = current_thread();
5095 }
5096
5097 context.vc_thread = thread;
5098 context.vc_ucred = fp->fp_glob->fg_cred;
5099
5100 /* Set the vflag for drain */
5101 fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
5102
5103 while (os_ref_get_count(&fp->fp_iocount) > 1) {
5104 lck_mtx_convert_spin(&p->p_fdmlock);
5105
5106 fo_drain(fp, &context);
5107 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
5108 if (waitq_wakeup64_all((struct waitq *)fp->fp_wset, NO_EVENT64,
5109 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5110 panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->fp_wset, fp);
5111 }
5112 }
5113 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5114 if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5115 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5116 panic("bad select_conflict_queue");
5117 }
5118 }
5119 p->p_fpdrainwait = 1;
5120
5121 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5122 }
5123 #if DIAGNOSTIC
5124 if ((fp->fp_flags & FP_INSELECT) != 0) {
5125 panic("FP_INSELECT set on drained fp");
5126 }
5127 #endif
5128 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5129 fp->fp_flags &= ~FP_SELCONFLICT;
5130 }
5131
5132 if (!is_current_proc) {
5133 thread_deallocate(thread);
5134 }
5135 }
5136
5137
5138 /*
5139 * fp_free
5140 *
5141 * Description: Release the fd and free the fileproc associated with the fd
5142 * in the per process open file table of the specified process;
5143 * these values must correspond.
5144 *
5145 * Parameters: p Process containing fd
5146 * fd fd to be released
5147 * fp fileproc to be freed
5148 */
5149 void
5150 fp_free(proc_t p, int fd, struct fileproc * fp)
5151 {
5152 proc_fdlock_spin(p);
5153 fdrelse(p, fd);
5154 proc_fdunlock(p);
5155
5156 fg_free(fp->fp_glob);
5157 os_ref_release_live(&fp->fp_iocount);
5158 fileproc_free(fp);
5159 }
5160
5161
5162 /*
5163 * sys_flock
5164 *
5165 * Description: Apply an advisory lock on a file descriptor.
5166 *
5167 * Parameters: p Process making request
5168 * uap->fd fd on which the lock is to be
5169 * attempted
5170 * uap->how (Un)Lock bits, including type
5171 * retval Pointer to the call return area
5172 *
5173 * Returns: 0 Success
5174 * fp_getfvp:EBADF Bad file descriptor
5175 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5176 * vnode_getwithref:???
5177 * VNOP_ADVLOCK:???
5178 *
5179 * Implicit returns:
5180 * *retval (modified) Size of dtable
5181 *
5182 * Notes: Just attempt to get a record lock of the requested type on
5183 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5184 */
5185 int
5186 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5187 {
5188 int fd = uap->fd;
5189 int how = uap->how;
5190 struct fileproc *fp;
5191 struct vnode *vp;
5192 struct flock lf;
5193 vfs_context_t ctx = vfs_context_current();
5194 int error = 0;
5195
5196 AUDIT_ARG(fd, uap->fd);
5197 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5198 return error;
5199 }
5200 if ((error = vnode_getwithref(vp))) {
5201 goto out1;
5202 }
5203 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5204
5205 lf.l_whence = SEEK_SET;
5206 lf.l_start = 0;
5207 lf.l_len = 0;
5208 if (how & LOCK_UN) {
5209 lf.l_type = F_UNLCK;
5210 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5211 goto out;
5212 }
5213 if (how & LOCK_EX) {
5214 lf.l_type = F_WRLCK;
5215 } else if (how & LOCK_SH) {
5216 lf.l_type = F_RDLCK;
5217 } else {
5218 error = EBADF;
5219 goto out;
5220 }
5221 #if CONFIG_MACF
5222 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob, F_SETLK, &lf);
5223 if (error) {
5224 goto out;
5225 }
5226 #endif
5227 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5228 (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5229 ctx, NULL);
5230 if (!error) {
5231 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5232 }
5233 out:
5234 (void)vnode_put(vp);
5235 out1:
5236 fp_drop(p, fd, fp, 0);
5237 return error;
5238 }
5239
5240 /*
5241 * sys_fileport_makeport
5242 *
5243 * Description: Obtain a Mach send right for a given file descriptor.
5244 *
5245 * Parameters: p Process calling fileport
5246 * uap->fd The fd to reference
5247 * uap->portnamep User address at which to place port name.
5248 *
5249 * Returns: 0 Success.
5250 * EBADF Bad file descriptor.
5251 * EINVAL File descriptor had type that cannot be sent, misc. other errors.
5252 * EFAULT Address at which to store port name is not valid.
5253 * EAGAIN Resource shortage.
5254 *
5255 * Implicit returns:
5256 * On success, name of send right is stored at user-specified address.
5257 */
5258 int
5259 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5260 __unused int *retval)
5261 {
5262 int err;
5263 int fd = uap->fd;
5264 user_addr_t user_portaddr = uap->portnamep;
5265 struct fileproc *fp = FILEPROC_NULL;
5266 struct fileglob *fg = NULL;
5267 ipc_port_t fileport;
5268 mach_port_name_t name = MACH_PORT_NULL;
5269
5270 proc_fdlock(p);
5271 err = fp_lookup(p, fd, &fp, 1);
5272 if (err != 0) {
5273 goto out_unlock;
5274 }
5275
5276 fg = fp->fp_glob;
5277 if (!fg_sendable(fg)) {
5278 err = EINVAL;
5279 goto out_unlock;
5280 }
5281
5282 if (FP_ISGUARDED(fp, GUARD_FILEPORT)) {
5283 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5284 goto out_unlock;
5285 }
5286
5287 proc_fdunlock(p);
5288
5289 /* Allocate and initialize a port */
5290 fileport = fileport_alloc(fg);
5291 if (fileport == IPC_PORT_NULL) {
5292 err = EAGAIN;
5293 goto out;
5294 }
5295
5296 /* Dropped when port is deallocated */
5297 fg_ref(fg);
5298
5299 /* Add an entry. Deallocates port on failure. */
5300 name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5301 if (!MACH_PORT_VALID(name)) {
5302 err = EINVAL;
5303 goto out;
5304 }
5305
5306 err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5307 if (err != 0) {
5308 goto out;
5309 }
5310
5311 /* Tag the fileglob for debugging purposes */
5312 lck_mtx_lock_spin(&fg->fg_lock);
5313 fg->fg_lflags |= FG_PORTMADE;
5314 lck_mtx_unlock(&fg->fg_lock);
5315
5316 fp_drop(p, fd, fp, 0);
5317
5318 return 0;
5319
5320 out_unlock:
5321 proc_fdunlock(p);
5322 out:
5323 if (MACH_PORT_VALID(name)) {
5324 /* Don't care if another thread races us to deallocate the entry */
5325 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5326 }
5327
5328 if (fp != FILEPROC_NULL) {
5329 fp_drop(p, fd, fp, 0);
5330 }
5331
5332 return err;
5333 }
5334
5335 void
5336 fileport_releasefg(struct fileglob *fg)
5337 {
5338 (void)fg_drop(PROC_NULL, fg);
5339 }
5340
5341 /*
5342 * fileport_makefd
5343 *
5344 * Description: Obtain the file descriptor for a given Mach send right.
5345 *
5346 * Returns: 0 Success
5347 * EINVAL Invalid Mach port name, or port is not for a file.
5348 * fdalloc:EMFILE
5349 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5350 *
5351 * Implicit returns:
5352 * *retval (modified) The new descriptor
5353 */
5354 int
5355 fileport_makefd(proc_t p, ipc_port_t port, int uf_flags, int *retval)
5356 {
5357 struct fileglob *fg;
5358 struct fileproc *fp = FILEPROC_NULL;
5359 int fd;
5360 int err;
5361
5362 fg = fileport_port_to_fileglob(port);
5363 if (fg == NULL) {
5364 err = EINVAL;
5365 goto out;
5366 }
5367
5368 fp = fileproc_alloc_init(NULL);
5369 if (fp == FILEPROC_NULL) {
5370 err = ENOMEM;
5371 goto out;
5372 }
5373
5374 proc_fdlock(p);
5375 err = fdalloc(p, 0, &fd);
5376 if (err != 0) {
5377 proc_fdunlock(p);
5378 goto out;
5379 }
5380 if (uf_flags) {
5381 *fdflags(p, fd) |= uf_flags;
5382 }
5383
5384 fp->fp_glob = fg;
5385 fg_ref(fg);
5386
5387 procfdtbl_releasefd(p, fd, fp);
5388 proc_fdunlock(p);
5389
5390 *retval = fd;
5391 err = 0;
5392 out:
5393 if ((fp != NULL) && (0 != err)) {
5394 fileproc_free(fp);
5395 }
5396
5397 return err;
5398 }
5399
5400 /*
5401 * sys_fileport_makefd
5402 *
5403 * Description: Obtain the file descriptor for a given Mach send right.
5404 *
5405 * Parameters: p Process calling fileport
5406 * uap->port Name of send right to file port.
5407 *
5408 * Returns: 0 Success
5409 * EINVAL Invalid Mach port name, or port is not for a file.
5410 * fdalloc:EMFILE
5411 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5412 *
5413 * Implicit returns:
5414 * *retval (modified) The new descriptor
5415 */
5416 int
5417 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5418 {
5419 ipc_port_t port = IPC_PORT_NULL;
5420 mach_port_name_t send = uap->port;
5421 kern_return_t res;
5422 int err;
5423
5424 res = ipc_object_copyin(get_task_ipcspace(p->task),
5425 send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
5426
5427 if (res == KERN_SUCCESS) {
5428 err = fileport_makefd(p, port, UF_EXCLOSE, retval);
5429 } else {
5430 err = EINVAL;
5431 }
5432
5433 if (IPC_PORT_NULL != port) {
5434 ipc_port_release_send(port);
5435 }
5436
5437 return err;
5438 }
5439
5440
5441 /*
5442 * dupfdopen
5443 *
5444 * Description: Duplicate the specified descriptor to a free descriptor;
5445 * this is the second half of fdopen(), above.
5446 *
5447 * Parameters: fdp filedesc pointer to fill in
5448 * indx fd to dup to
5449 * dfd fd to dup from
5450 * mode mode to set on new fd
5451 * error command code
5452 *
5453 * Returns: 0 Success
5454 * EBADF Source fd is bad
5455 * EACCES Requested mode not allowed
5456 * !0 'error', if not ENODEV or
5457 * ENXIO
5458 *
5459 * Notes: XXX This is not thread safe; see fdopen() above
5460 */
5461 int
5462 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5463 {
5464 struct fileproc *wfp;
5465 struct fileproc *fp;
5466 #if CONFIG_MACF
5467 int myerror;
5468 #endif
5469 proc_t p = current_proc();
5470
5471 /*
5472 * If the to-be-dup'd fd number is greater than the allowed number
5473 * of file descriptors, or the fd to be dup'd has already been
5474 * closed, reject. Note, check for new == old is necessary as
5475 * falloc could allocate an already closed to-be-dup'd descriptor
5476 * as the new descriptor.
5477 */
5478 proc_fdlock(p);
5479
5480 fp = fdp->fd_ofiles[indx];
5481 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5482 (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5483 (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5484 proc_fdunlock(p);
5485 return EBADF;
5486 }
5487 #if CONFIG_MACF
5488 myerror = mac_file_check_dup(proc_ucred(p), wfp->fp_glob, dfd);
5489 if (myerror) {
5490 proc_fdunlock(p);
5491 return myerror;
5492 }
5493 #endif
5494 /*
5495 * There are two cases of interest here.
5496 *
5497 * For ENODEV simply dup (dfd) to file descriptor
5498 * (indx) and return.
5499 *
5500 * For ENXIO steal away the file structure from (dfd) and
5501 * store it in (indx). (dfd) is effectively closed by
5502 * this operation.
5503 *
5504 * Any other error code is just returned.
5505 */
5506 switch (error) {
5507 case ENODEV:
5508 if (FP_ISGUARDED(wfp, GUARD_DUP)) {
5509 proc_fdunlock(p);
5510 return EPERM;
5511 }
5512
5513 /*
5514 * Check that the mode the file is being opened for is a
5515 * subset of the mode of the existing descriptor.
5516 */
5517 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5518 proc_fdunlock(p);
5519 return EACCES;
5520 }
5521 if (indx > fdp->fd_lastfile) {
5522 fdp->fd_lastfile = indx;
5523 }
5524
5525 if (fp->fp_glob) {
5526 fg_free(fp->fp_glob);
5527 }
5528 fg_ref(wfp->fp_glob);
5529 fp->fp_glob = wfp->fp_glob;
5530
5531 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5532 (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5533
5534 proc_fdunlock(p);
5535 return 0;
5536
5537 default:
5538 proc_fdunlock(p);
5539 return error;
5540 }
5541 /* NOTREACHED */
5542 }
5543
5544
5545 /*
5546 * fo_read
5547 *
5548 * Description: Generic fileops read indirected through the fileops pointer
5549 * in the fileproc structure
5550 *
5551 * Parameters: fp fileproc structure pointer
5552 * uio user I/O structure pointer
5553 * flags FOF_ flags
5554 * ctx VFS context for operation
5555 *
5556 * Returns: 0 Success
5557 * !0 Errno from read
5558 */
5559 int
5560 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5561 {
5562 return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5563 }
5564
5565 int
5566 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5567 {
5568 #pragma unused(fp, uio, flags, ctx)
5569 return ENXIO;
5570 }
5571
5572
5573 /*
5574 * fo_write
5575 *
5576 * Description: Generic fileops write indirected through the fileops pointer
5577 * in the fileproc structure
5578 *
5579 * Parameters: fp fileproc structure pointer
5580 * uio user I/O structure pointer
5581 * flags FOF_ flags
5582 * ctx VFS context for operation
5583 *
5584 * Returns: 0 Success
5585 * !0 Errno from write
5586 */
5587 int
5588 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5589 {
5590 return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5591 }
5592
5593 int
5594 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5595 {
5596 #pragma unused(fp, uio, flags, ctx)
5597 return ENXIO;
5598 }
5599
5600
5601 /*
5602 * fo_ioctl
5603 *
5604 * Description: Generic fileops ioctl indirected through the fileops pointer
5605 * in the fileproc structure
5606 *
5607 * Parameters: fp fileproc structure pointer
5608 * com ioctl command
5609 * data pointer to internalized copy
5610 * of user space ioctl command
5611 * parameter data in kernel space
5612 * ctx VFS context for operation
5613 *
5614 * Returns: 0 Success
5615 * !0 Errno from ioctl
5616 *
5617 * Locks: The caller is assumed to have held the proc_fdlock; this
5618 * function releases and reacquires this lock. If the caller
5619 * accesses data protected by this lock prior to calling this
5620 * function, it will need to revalidate/reacquire any cached
5621 * protected data obtained prior to the call.
5622 */
5623 int
5624 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5625 {
5626 int error;
5627
5628 proc_fdunlock(vfs_context_proc(ctx));
5629 error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5630 proc_fdlock(vfs_context_proc(ctx));
5631 return error;
5632 }
5633
5634 int
5635 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5636 {
5637 #pragma unused(fp, com, data, ctx)
5638 return ENOTTY;
5639 }
5640
5641
5642 /*
5643 * fo_select
5644 *
5645 * Description: Generic fileops select indirected through the fileops pointer
5646 * in the fileproc structure
5647 *
5648 * Parameters: fp fileproc structure pointer
5649 * which select which
5650 * wql pointer to wait queue list
5651 * ctx VFS context for operation
5652 *
5653 * Returns: 0 Success
5654 * !0 Errno from select
5655 */
5656 int
5657 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5658 {
5659 return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5660 }
5661
5662 int
5663 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5664 {
5665 #pragma unused(fp, which, wql, ctx)
5666 return ENOTSUP;
5667 }
5668
5669
5670 /*
5671 * fo_close
5672 *
5673 * Description: Generic fileops close indirected through the fileops pointer
5674 * in the fileproc structure
5675 *
5676 * Parameters: fp fileproc structure pointer for
5677 * file to close
5678 * ctx VFS context for operation
5679 *
5680 * Returns: 0 Success
5681 * !0 Errno from close
5682 */
5683 int
5684 fo_close(struct fileglob *fg, vfs_context_t ctx)
5685 {
5686 return (*fg->fg_ops->fo_close)(fg, ctx);
5687 }
5688
5689
5690 /*
5691 * fo_drain
5692 *
5693 * Description: Generic fileops kqueue filter indirected through the fileops
5694 * pointer in the fileproc structure
5695 *
5696 * Parameters: fp fileproc structure pointer
5697 * ctx VFS context for operation
5698 *
5699 * Returns: 0 Success
5700 * !0 errno from drain
5701 */
5702 int
5703 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5704 {
5705 return (*fp->f_ops->fo_drain)(fp, ctx);
5706 }
5707
5708 int
5709 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5710 {
5711 #pragma unused(fp, ctx)
5712 return ENOTSUP;
5713 }
5714
5715
5716 /*
5717 * fo_kqfilter
5718 *
5719 * Description: Generic fileops kqueue filter indirected through the fileops
5720 * pointer in the fileproc structure
5721 *
5722 * Parameters: fp fileproc structure pointer
5723 * kn pointer to knote to filter on
5724 *
5725 * Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
5726 * 0 Filter is not active
5727 * !0 Filter is active
5728 */
5729 int
5730 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5731 {
5732 return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5733 }
5734
5735 int
5736 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5737 {
5738 #pragma unused(fp, kev)
5739 knote_set_error(kn, ENOTSUP);
5740 return 0;
5741 }
5742
5743
5744 struct fileproc *
5745 fileproc_alloc_init(__unused void *arg)
5746 {
5747 struct fileproc *fp = zalloc_flags(fp_zone, Z_WAITOK | Z_ZERO);
5748
5749 os_ref_init(&fp->fp_iocount, &f_refgrp);
5750 return fp;
5751 }
5752
5753
5754 void
5755 fileproc_free(struct fileproc *fp)
5756 {
5757 os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
5758 #if DEVELOPMENT || DEBUG
5759 if (0 != refc) {
5760 panic("%s: pid %d refc: %u != 0",
5761 __func__, proc_pid(current_proc()), refc);
5762 }
5763 #endif
5764 switch (FILEPROC_TYPE(fp)) {
5765 case FTYPE_SIMPLE:
5766 zfree(fp_zone, fp);
5767 break;
5768 case FTYPE_GUARDED:
5769 guarded_fileproc_free(fp);
5770 break;
5771 default:
5772 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->fp_flags);
5773 }
5774 }
5775
5776 void
5777 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
5778 {
5779 if (clearflags) {
5780 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
5781 } else {
5782 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
5783 }
5784 }
5785
5786 fileproc_vflags_t
5787 fileproc_get_vflags(struct fileproc *fp)
5788 {
5789 return os_atomic_load(&fp->fp_vflags, relaxed);
5790 }