]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_descrip.c
8952d0220f16fa32da57d169b7f48e76201b36cb
[apple/xnu.git] / bsd / kern / kern_descrip.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/syslog.h>
94 #include <sys/unistd.h>
95 #include <sys/resourcevar.h>
96 #include <sys/aio_kern.h>
97 #include <sys/ev.h>
98 #include <kern/locks.h>
99 #include <sys/uio_internal.h>
100 #include <sys/codesign.h>
101 #include <sys/codedir_internal.h>
102 #include <sys/mount_internal.h>
103 #include <sys/kdebug.h>
104 #include <sys/sysproto.h>
105 #include <sys/pipe.h>
106 #include <sys/spawn.h>
107 #include <sys/cprotect.h>
108 #include <sys/ubc_internal.h>
109
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/waitq.h>
113 #include <kern/ipc_misc.h>
114
115 #include <vm/vm_protos.h>
116 #include <mach/mach_port.h>
117
118 #include <security/audit/audit.h>
119 #if CONFIG_MACF
120 #include <security/mac_framework.h>
121 #endif
122
123 #include <stdbool.h>
124 #include <os/atomic_private.h>
125 #include <IOKit/IOBSD.h>
126
127 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
128 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
129 mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
130 void ipc_port_release_send(ipc_port_t);
131
132 static void fileproc_drain(proc_t, struct fileproc *);
133 static int finishdup(proc_t p,
134 struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
135
136 void fileport_releasefg(struct fileglob *fg);
137
138 /* flags for fp_close_and_unlock */
139 #define FD_DUP2RESV 1
140
141 /* We don't want these exported */
142
143 __private_extern__
144 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
145
146 static void fdrelse(struct proc * p, int fd);
147
148 extern kauth_scope_t kauth_scope_fileop;
149
150 /* Conflict wait queue for when selects collide (opaque type) */
151 extern struct waitq select_conflict_queue;
152
153 #ifndef HFS_GET_BOOT_INFO
154 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
155 #endif
156
157 #ifndef HFS_SET_BOOT_INFO
158 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
159 #endif
160
161 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
162 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
163 #endif
164
165 #define f_flag fp_glob->fg_flag
166 #define f_type fp_glob->fg_ops->fo_type
167 #define f_cred fp_glob->fg_cred
168 #define f_ops fp_glob->fg_ops
169 #define f_offset fp_glob->fg_offset
170 #define f_data fp_glob->fg_data
171 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
172 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
173 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
174 ? 1 : 0)
175
176 ZONE_DECLARE(fg_zone, "fileglob",
177 sizeof(struct fileglob), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
178 ZONE_DECLARE(fp_zone, "fileproc",
179 sizeof(struct fileproc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
180 ZONE_DECLARE(fdp_zone, "filedesc",
181 sizeof(struct filedesc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
182 /*
183 * If you need accounting for KM_OFILETABL consider using
184 * KALLOC_HEAP_DEFINE to define a view.
185 */
186 #define KM_OFILETABL KHEAP_DEFAULT
187
188 /*
189 * Descriptor management.
190 */
191 int nfiles; /* actual number of open files */
192 /*
193 * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
194 */
195 static const struct fileops uninitops;
196
197 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
198 static LCK_GRP_DECLARE(file_lck_grp, "file");
199
200 #pragma mark fileglobs
201
202 /*!
203 * @function fg_free
204 *
205 * @brief
206 * Free a file structure.
207 */
208 static void
209 fg_free(struct fileglob *fg)
210 {
211 os_atomic_dec(&nfiles, relaxed);
212
213 if (fg->fg_vn_data) {
214 fg_vn_data_free(fg->fg_vn_data);
215 fg->fg_vn_data = NULL;
216 }
217
218 if (IS_VALID_CRED(fg->fg_cred)) {
219 kauth_cred_unref(&fg->fg_cred);
220 }
221 lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
222
223 #if CONFIG_MACF
224 mac_file_label_destroy(fg);
225 #endif
226 zfree(fg_zone, fg);
227 }
228
229 OS_ALWAYS_INLINE
230 void
231 fg_ref(proc_t p, struct fileglob *fg)
232 {
233 #if DEBUG || DEVELOPMENT
234 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
235 #else
236 (void)p;
237 #endif
238 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
239 }
240
241 void
242 fg_drop_live(struct fileglob *fg)
243 {
244 os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
245 }
246
247 int
248 fg_drop(proc_t p, struct fileglob *fg)
249 {
250 struct vnode *vp;
251 struct vfs_context context;
252 int error = 0;
253
254 if (fg == NULL) {
255 return 0;
256 }
257
258 /* Set up context with cred stashed in fg */
259 if (p == current_proc()) {
260 context.vc_thread = current_thread();
261 } else {
262 context.vc_thread = NULL;
263 }
264 context.vc_ucred = fg->fg_cred;
265
266 /*
267 * POSIX record locking dictates that any close releases ALL
268 * locks owned by this process. This is handled by setting
269 * a flag in the unlock to free ONLY locks obeying POSIX
270 * semantics, and not to free BSD-style file locks.
271 * If the descriptor was in a message, POSIX-style locks
272 * aren't passed with the descriptor.
273 */
274 if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
275 (p->p_ladvflag & P_LADVLOCK)) {
276 struct flock lf = {
277 .l_whence = SEEK_SET,
278 .l_type = F_UNLCK,
279 };
280
281 vp = (struct vnode *)fg->fg_data;
282 if ((error = vnode_getwithref(vp)) == 0) {
283 (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
284 (void)vnode_put(vp);
285 }
286 }
287
288 if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
289 /*
290 * Since we ensure that fg->fg_ops is always initialized,
291 * it is safe to invoke fo_close on the fg
292 */
293 error = fo_close(fg, &context);
294
295 fg_free(fg);
296 }
297
298 return error;
299 }
300
301 /*
302 * fg_get_vnode
303 *
304 * Description: Return vnode associated with the file structure, if
305 * any. The lifetime of the returned vnode is bound to
306 * the lifetime of the file structure.
307 *
308 * Parameters: fg Pointer to fileglob to
309 * inspect
310 *
311 * Returns: vnode_t
312 */
313 vnode_t
314 fg_get_vnode(struct fileglob *fg)
315 {
316 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
317 return (vnode_t)fg->fg_data;
318 } else {
319 return NULL;
320 }
321 }
322
323 bool
324 fg_sendable(struct fileglob *fg)
325 {
326 switch (FILEGLOB_DTYPE(fg)) {
327 case DTYPE_VNODE:
328 case DTYPE_SOCKET:
329 case DTYPE_PIPE:
330 case DTYPE_PSXSHM:
331 case DTYPE_NETPOLICY:
332 return (fg->fg_lflags & FG_CONFINED) == 0;
333
334 default:
335 return false;
336 }
337 }
338
339
340 #pragma mark fileprocs
341
342 /*
343 * check_file_seek_range
344 *
345 * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
346 *
347 * Parameters: fl Flock structure.
348 * cur_file_offset Current offset in the file.
349 *
350 * Returns: 0 on Success.
351 * EOVERFLOW on overflow.
352 * EINVAL on offset less than zero.
353 */
354
355 static int
356 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
357 {
358 if (fl->l_whence == SEEK_CUR) {
359 /* Check if the start marker is beyond LLONG_MAX. */
360 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
361 /* Check if start marker is negative */
362 if (fl->l_start < 0) {
363 return EINVAL;
364 }
365 return EOVERFLOW;
366 }
367 /* Check if the start marker is negative. */
368 if (fl->l_start + cur_file_offset < 0) {
369 return EINVAL;
370 }
371 /* Check if end marker is beyond LLONG_MAX. */
372 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
373 cur_file_offset, fl->l_len - 1))) {
374 return EOVERFLOW;
375 }
376 /* Check if the end marker is negative. */
377 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
378 fl->l_len < 0)) {
379 return EINVAL;
380 }
381 } else if (fl->l_whence == SEEK_SET) {
382 /* Check if the start marker is negative. */
383 if (fl->l_start < 0) {
384 return EINVAL;
385 }
386 /* Check if the end marker is beyond LLONG_MAX. */
387 if ((fl->l_len > 0) &&
388 CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
389 return EOVERFLOW;
390 }
391 /* Check if the end marker is negative. */
392 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
393 return EINVAL;
394 }
395 }
396 return 0;
397 }
398
399
400 void
401 proc_dirs_lock_shared(proc_t p)
402 {
403 lck_rw_lock_shared(&p->p_dirs_lock);
404 }
405
406 void
407 proc_dirs_unlock_shared(proc_t p)
408 {
409 lck_rw_unlock_shared(&p->p_dirs_lock);
410 }
411
412 void
413 proc_dirs_lock_exclusive(proc_t p)
414 {
415 lck_rw_lock_exclusive(&p->p_dirs_lock);
416 }
417
418 void
419 proc_dirs_unlock_exclusive(proc_t p)
420 {
421 lck_rw_unlock_exclusive(&p->p_dirs_lock);
422 }
423
424 /*
425 * proc_fdlock, proc_fdlock_spin
426 *
427 * Description: Lock to control access to the per process struct fileproc
428 * and struct filedesc
429 *
430 * Parameters: p Process to take the lock on
431 *
432 * Returns: void
433 *
434 * Notes: The lock is initialized in forkproc() and destroyed in
435 * reap_child_process().
436 */
437 void
438 proc_fdlock(proc_t p)
439 {
440 lck_mtx_lock(&p->p_fdmlock);
441 }
442
443 void
444 proc_fdlock_spin(proc_t p)
445 {
446 lck_mtx_lock_spin(&p->p_fdmlock);
447 }
448
449 void
450 proc_fdlock_assert(proc_t p, int assertflags)
451 {
452 lck_mtx_assert(&p->p_fdmlock, assertflags);
453 }
454
455
456 /*
457 * proc_fdunlock
458 *
459 * Description: Unlock the lock previously locked by a call to proc_fdlock()
460 *
461 * Parameters: p Process to drop the lock on
462 *
463 * Returns: void
464 */
465 void
466 proc_fdunlock(proc_t p)
467 {
468 lck_mtx_unlock(&p->p_fdmlock);
469 }
470
471 struct fdt_iterator
472 fdt_next(proc_t p, int fd, bool only_settled)
473 {
474 struct fdt_iterator it;
475 struct filedesc *fdp = p->p_fd;
476 struct fileproc *fp;
477 int nfds = min(fdp->fd_lastfile + 1, fdp->fd_nfiles);
478
479 while (++fd < nfds) {
480 fp = fdp->fd_ofiles[fd];
481 if (fp == NULL || fp->fp_glob == NULL) {
482 continue;
483 }
484 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
485 continue;
486 }
487 it.fdti_fd = fd;
488 it.fdti_fp = fp;
489 return it;
490 }
491
492 it.fdti_fd = nfds;
493 it.fdti_fp = NULL;
494 return it;
495 }
496
497 struct fdt_iterator
498 fdt_prev(proc_t p, int fd, bool only_settled)
499 {
500 struct fdt_iterator it;
501 struct filedesc *fdp = p->p_fd;
502 struct fileproc *fp;
503
504 while (--fd >= 0) {
505 fp = fdp->fd_ofiles[fd];
506 if (fp == NULL || fp->fp_glob == NULL) {
507 continue;
508 }
509 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
510 continue;
511 }
512 it.fdti_fd = fd;
513 it.fdti_fp = fp;
514 return it;
515 }
516
517 it.fdti_fd = -1;
518 it.fdti_fp = NULL;
519 return it;
520 }
521
522 /*
523 * System calls on descriptors.
524 */
525
526
527 /*
528 * sys_getdtablesize
529 *
530 * Description: Returns the per process maximum size of the descriptor table
531 *
532 * Parameters: p Process being queried
533 * retval Pointer to the call return area
534 *
535 * Returns: 0 Success
536 *
537 * Implicit returns:
538 * *retval (modified) Size of dtable
539 */
540 int
541 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
542 {
543 *retval = (int32_t)MIN(proc_limitgetcur(p, RLIMIT_NOFILE, TRUE), maxfilesperproc);
544
545 return 0;
546 }
547
548
549 static void
550 procfdtbl_reservefd(struct proc * p, int fd)
551 {
552 p->p_fd->fd_ofiles[fd] = NULL;
553 p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
554 }
555
556 void
557 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
558 {
559 if (fp != NULL) {
560 p->p_fd->fd_ofiles[fd] = fp;
561 }
562 p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
563 if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
564 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
565 wakeup(&p->p_fd);
566 }
567 }
568
569 static void
570 procfdtbl_waitfd(struct proc * p, int fd)
571 {
572 p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
573 msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
574 }
575
576 static void
577 procfdtbl_clearfd(struct proc * p, int fd)
578 {
579 int waiting;
580
581 waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
582 p->p_fd->fd_ofiles[fd] = NULL;
583 p->p_fd->fd_ofileflags[fd] = 0;
584 if (waiting == UF_RESVWAIT) {
585 wakeup(&p->p_fd);
586 }
587 }
588
589 /*
590 * fdrelse
591 *
592 * Description: Inline utility function to free an fd in a filedesc
593 *
594 * Parameters: fdp Pointer to filedesc fd lies in
595 * fd fd to free
596 * reserv fd should be reserved
597 *
598 * Returns: void
599 *
600 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
601 * the caller
602 */
603 static void
604 fdrelse(struct proc * p, int fd)
605 {
606 struct filedesc *fdp = p->p_fd;
607 int nfd = 0;
608
609 if (fd < fdp->fd_freefile) {
610 fdp->fd_freefile = fd;
611 }
612 #if DIAGNOSTIC
613 if (fd > fdp->fd_lastfile) {
614 panic("fdrelse: fd_lastfile inconsistent");
615 }
616 #endif
617 procfdtbl_clearfd(p, fd);
618
619 while ((nfd = fdp->fd_lastfile) > 0 &&
620 fdp->fd_ofiles[nfd] == NULL &&
621 !(fdp->fd_ofileflags[nfd] & UF_RESERVED)) {
622 /* JMM - What about files with lingering EV_VANISHED knotes? */
623 fdp->fd_lastfile--;
624 }
625 }
626
627
628 int
629 fd_rdwr(
630 int fd,
631 enum uio_rw rw,
632 uint64_t base,
633 int64_t len,
634 enum uio_seg segflg,
635 off_t offset,
636 int io_flg,
637 int64_t *aresid)
638 {
639 struct fileproc *fp;
640 proc_t p;
641 int error = 0;
642 int flags = 0;
643 int spacetype;
644 uio_t auio = NULL;
645 char uio_buf[UIO_SIZEOF(1)];
646 struct vfs_context context = *(vfs_context_current());
647
648 p = current_proc();
649
650 error = fp_lookup(p, fd, &fp, 0);
651 if (error) {
652 return error;
653 }
654
655 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
656 case DTYPE_VNODE:
657 case DTYPE_PIPE:
658 case DTYPE_SOCKET:
659 break;
660 default:
661 error = EINVAL;
662 goto out;
663 }
664 if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
665 error = EBADF;
666 goto out;
667 }
668
669 if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
670 error = EBADF;
671 goto out;
672 }
673
674 context.vc_ucred = fp->fp_glob->fg_cred;
675
676 if (UIO_SEG_IS_USER_SPACE(segflg)) {
677 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
678 } else {
679 spacetype = UIO_SYSSPACE;
680 }
681
682 auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
683
684 uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
685
686 if (!(io_flg & IO_APPEND)) {
687 flags = FOF_OFFSET;
688 }
689
690 if (rw == UIO_WRITE) {
691 user_ssize_t orig_resid = uio_resid(auio);
692 error = fo_write(fp, auio, flags, &context);
693 if (uio_resid(auio) < orig_resid) {
694 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
695 }
696 } else {
697 error = fo_read(fp, auio, flags, &context);
698 }
699
700 if (aresid) {
701 *aresid = uio_resid(auio);
702 } else if (uio_resid(auio) && error == 0) {
703 error = EIO;
704 }
705 out:
706 fp_drop(p, fd, fp, 0);
707 return error;
708 }
709
710
711
712 /*
713 * sys_dup
714 *
715 * Description: Duplicate a file descriptor.
716 *
717 * Parameters: p Process performing the dup
718 * uap->fd The fd to dup
719 * retval Pointer to the call return area
720 *
721 * Returns: 0 Success
722 * !0 Errno
723 *
724 * Implicit returns:
725 * *retval (modified) The new descriptor
726 */
727 int
728 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
729 {
730 struct filedesc *fdp = p->p_fd;
731 int old = uap->fd;
732 int new, error;
733 struct fileproc *fp;
734
735 proc_fdlock(p);
736 if ((error = fp_lookup(p, old, &fp, 1))) {
737 proc_fdunlock(p);
738 return error;
739 }
740 if (fp_isguarded(fp, GUARD_DUP)) {
741 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
742 (void) fp_drop(p, old, fp, 1);
743 proc_fdunlock(p);
744 return error;
745 }
746 if ((error = fdalloc(p, 0, &new))) {
747 fp_drop(p, old, fp, 1);
748 proc_fdunlock(p);
749 return error;
750 }
751 error = finishdup(p, fdp, old, new, 0, retval);
752 fp_drop(p, old, fp, 1);
753 proc_fdunlock(p);
754
755 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
756 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
757 new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
758 }
759
760 return error;
761 }
762
763 /*
764 * sys_dup2
765 *
766 * Description: Duplicate a file descriptor to a particular value.
767 *
768 * Parameters: p Process performing the dup
769 * uap->from The fd to dup
770 * uap->to The fd to dup it to
771 * retval Pointer to the call return area
772 *
773 * Returns: 0 Success
774 * !0 Errno
775 *
776 * Implicit returns:
777 * *retval (modified) The new descriptor
778 */
779 int
780 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
781 {
782 return dup2(p, uap->from, uap->to, retval);
783 }
784
785 int
786 dup2(proc_t p, int old, int new, int *retval)
787 {
788 struct filedesc *fdp = p->p_fd;
789 struct fileproc *fp, *nfp;
790 int i, error;
791 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
792
793 proc_fdlock(p);
794
795 startover:
796 if ((error = fp_lookup(p, old, &fp, 1))) {
797 proc_fdunlock(p);
798 return error;
799 }
800 if (fp_isguarded(fp, GUARD_DUP)) {
801 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
802 (void) fp_drop(p, old, fp, 1);
803 proc_fdunlock(p);
804 return error;
805 }
806 if (new < 0 ||
807 (rlim_t)new >= nofile ||
808 new >= maxfilesperproc) {
809 fp_drop(p, old, fp, 1);
810 proc_fdunlock(p);
811 return EBADF;
812 }
813 if (old == new) {
814 fp_drop(p, old, fp, 1);
815 *retval = new;
816 proc_fdunlock(p);
817 return 0;
818 }
819 if (new < 0 || new >= fdp->fd_nfiles) {
820 if ((error = fdalloc(p, new, &i))) {
821 fp_drop(p, old, fp, 1);
822 proc_fdunlock(p);
823 return error;
824 }
825 if (new != i) {
826 fdrelse(p, i);
827 goto closeit;
828 }
829 } else {
830 closeit:
831 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
832 fp_drop(p, old, fp, 1);
833 procfdtbl_waitfd(p, new);
834 #if DIAGNOSTIC
835 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
836 #endif
837 goto startover;
838 }
839
840 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
841 if (fp_isguarded(nfp, GUARD_CLOSE)) {
842 fp_drop(p, old, fp, 1);
843 error = fp_guard_exception(p,
844 new, nfp, kGUARD_EXC_CLOSE);
845 proc_fdunlock(p);
846 return error;
847 }
848 (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
849 proc_fdlock(p);
850 assert(fdp->fd_ofileflags[new] & UF_RESERVED);
851 } else {
852 #if DIAGNOSTIC
853 if (fdp->fd_ofiles[new] != NULL) {
854 panic("dup2: no ref on fileproc %d", new);
855 }
856 #endif
857 procfdtbl_reservefd(p, new);
858 }
859 }
860 #if DIAGNOSTIC
861 if (fdp->fd_ofiles[new] != 0) {
862 panic("dup2: overwriting fd_ofiles with new %d", new);
863 }
864 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
865 panic("dup2: unreserved fileflags with new %d", new);
866 }
867 #endif
868 error = finishdup(p, fdp, old, new, 0, retval);
869 fp_drop(p, old, fp, 1);
870 proc_fdunlock(p);
871
872 return error;
873 }
874
875
876 /*
877 * fcntl
878 *
879 * Description: The file control system call.
880 *
881 * Parameters: p Process performing the fcntl
882 * uap->fd The fd to operate against
883 * uap->cmd The command to perform
884 * uap->arg Pointer to the command argument
885 * retval Pointer to the call return area
886 *
887 * Returns: 0 Success
888 * !0 Errno (see fcntl_nocancel)
889 *
890 * Implicit returns:
891 * *retval (modified) fcntl return value (if any)
892 *
893 * Notes: This system call differs from fcntl_nocancel() in that it
894 * tests for cancellation prior to performing a potentially
895 * blocking operation.
896 */
897 int
898 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
899 {
900 __pthread_testcancel(1);
901 return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
902 }
903
904 #define ACCOUNT_OPENFROM_ENTITLEMENT \
905 "com.apple.private.vfs.role-account-openfrom"
906
907 /*
908 * sys_fcntl_nocancel
909 *
910 * Description: A non-cancel-testing file control system call.
911 *
912 * Parameters: p Process performing the fcntl
913 * uap->fd The fd to operate against
914 * uap->cmd The command to perform
915 * uap->arg Pointer to the command argument
916 * retval Pointer to the call return area
917 *
918 * Returns: 0 Success
919 * EINVAL
920 * fp_lookup:EBADF Bad file descriptor
921 * [F_DUPFD]
922 * fdalloc:EMFILE
923 * fdalloc:ENOMEM
924 * finishdup:EBADF
925 * finishdup:ENOMEM
926 * [F_SETOWN]
927 * ESRCH
928 * [F_SETLK]
929 * EBADF
930 * EOVERFLOW
931 * copyin:EFAULT
932 * vnode_getwithref:???
933 * VNOP_ADVLOCK:???
934 * msleep:ETIMEDOUT
935 * [F_GETLK]
936 * EBADF
937 * EOVERFLOW
938 * copyin:EFAULT
939 * copyout:EFAULT
940 * vnode_getwithref:???
941 * VNOP_ADVLOCK:???
942 * [F_PREALLOCATE]
943 * EBADF
944 * EINVAL
945 * copyin:EFAULT
946 * copyout:EFAULT
947 * vnode_getwithref:???
948 * VNOP_ALLOCATE:???
949 * [F_SETSIZE,F_RDADVISE]
950 * EBADF
951 * EINVAL
952 * copyin:EFAULT
953 * vnode_getwithref:???
954 * [F_RDAHEAD,F_NOCACHE]
955 * EBADF
956 * vnode_getwithref:???
957 * [???]
958 *
959 * Implicit returns:
960 * *retval (modified) fcntl return value (if any)
961 */
962 int
963 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
964 {
965 int fd = uap->fd;
966 struct filedesc *fdp = p->p_fd;
967 struct fileproc *fp;
968 char *pop;
969 struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */
970 unsigned int oflags, nflags;
971 int i, tmp, error, error2, flg = 0;
972 struct flock fl = {};
973 struct flocktimeout fltimeout;
974 struct timespec *timeout = NULL;
975 struct vfs_context context;
976 off_t offset;
977 int newmin;
978 daddr64_t lbn, bn;
979 unsigned int fflag;
980 user_addr_t argp;
981 boolean_t is64bit;
982 rlim_t nofile;
983 int has_entitlement = 0;
984
985 AUDIT_ARG(fd, uap->fd);
986 AUDIT_ARG(cmd, uap->cmd);
987
988 nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
989
990 proc_fdlock(p);
991 if ((error = fp_lookup(p, fd, &fp, 1))) {
992 proc_fdunlock(p);
993 return error;
994 }
995 context.vc_thread = current_thread();
996 context.vc_ucred = fp->f_cred;
997
998 is64bit = proc_is64bit(p);
999 if (is64bit) {
1000 argp = uap->arg;
1001 } else {
1002 /*
1003 * Since the arg parameter is defined as a long but may be
1004 * either a long or a pointer we must take care to handle
1005 * sign extension issues. Our sys call munger will sign
1006 * extend a long when we are called from a 32-bit process.
1007 * Since we can never have an address greater than 32-bits
1008 * from a 32-bit process we lop off the top 32-bits to avoid
1009 * getting the wrong address
1010 */
1011 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
1012 }
1013
1014 #if CONFIG_MACF
1015 error = mac_file_check_fcntl(proc_ucred(p), fp->fp_glob, uap->cmd,
1016 uap->arg);
1017 if (error) {
1018 goto out;
1019 }
1020 #endif
1021
1022 pop = &fdp->fd_ofileflags[fd];
1023
1024 switch (uap->cmd) {
1025 case F_DUPFD:
1026 case F_DUPFD_CLOEXEC:
1027 if (fp_isguarded(fp, GUARD_DUP)) {
1028 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
1029 goto out;
1030 }
1031 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1032 AUDIT_ARG(value32, newmin);
1033 if ((rlim_t)newmin >= nofile ||
1034 newmin >= maxfilesperproc) {
1035 error = EINVAL;
1036 goto out;
1037 }
1038 if ((error = fdalloc(p, newmin, &i))) {
1039 goto out;
1040 }
1041 error = finishdup(p, fdp, fd, i,
1042 uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
1043 goto out;
1044
1045 case F_GETFD:
1046 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
1047 error = 0;
1048 goto out;
1049
1050 case F_SETFD:
1051 AUDIT_ARG(value32, (uint32_t)uap->arg);
1052 if (uap->arg & FD_CLOEXEC) {
1053 *pop |= UF_EXCLOSE;
1054 } else {
1055 if (fp_isguarded(fp, 0)) {
1056 error = fp_guard_exception(p,
1057 fd, fp, kGUARD_EXC_NOCLOEXEC);
1058 goto out;
1059 }
1060 *pop &= ~UF_EXCLOSE;
1061 }
1062 error = 0;
1063 goto out;
1064
1065 case F_GETFL:
1066 *retval = OFLAGS(fp->f_flag);
1067 error = 0;
1068 goto out;
1069
1070 case F_SETFL:
1071 // FIXME (rdar://54898652)
1072 //
1073 // this code is broken if fnctl(F_SETFL), ioctl() are
1074 // called concurrently for the same fileglob.
1075
1076 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1077 AUDIT_ARG(value32, tmp);
1078
1079 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
1080 nflags = oflags & ~FCNTLFLAGS;
1081 nflags |= FFLAGS(tmp) & FCNTLFLAGS;
1082 });
1083 tmp = nflags & FNONBLOCK;
1084 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1085 if (error) {
1086 goto out;
1087 }
1088 tmp = nflags & FASYNC;
1089 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1090 if (!error) {
1091 goto out;
1092 }
1093 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
1094 tmp = 0;
1095 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1096 goto out;
1097
1098 case F_GETOWN:
1099 if (fp->f_type == DTYPE_SOCKET) {
1100 *retval = ((struct socket *)fp->f_data)->so_pgid;
1101 error = 0;
1102 goto out;
1103 }
1104 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
1105 *retval = -*retval;
1106 goto out;
1107
1108 case F_SETOWN:
1109 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
1110 AUDIT_ARG(value32, tmp);
1111 if (fp->f_type == DTYPE_SOCKET) {
1112 ((struct socket *)fp->f_data)->so_pgid = tmp;
1113 error = 0;
1114 goto out;
1115 }
1116 if (fp->f_type == DTYPE_PIPE) {
1117 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1118 goto out;
1119 }
1120
1121 if (tmp <= 0) {
1122 tmp = -tmp;
1123 } else {
1124 proc_t p1 = proc_find(tmp);
1125 if (p1 == 0) {
1126 error = ESRCH;
1127 goto out;
1128 }
1129 tmp = (int)p1->p_pgrpid;
1130 proc_rele(p1);
1131 }
1132 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1133 goto out;
1134
1135 case F_SETNOSIGPIPE:
1136 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
1137 if (fp->f_type == DTYPE_SOCKET) {
1138 #if SOCKETS
1139 error = sock_setsockopt((struct socket *)fp->f_data,
1140 SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
1141 #else
1142 error = EINVAL;
1143 #endif
1144 } else {
1145 struct fileglob *fg = fp->fp_glob;
1146
1147 lck_mtx_lock_spin(&fg->fg_lock);
1148 if (tmp) {
1149 fg->fg_lflags |= FG_NOSIGPIPE;
1150 } else {
1151 fg->fg_lflags &= ~FG_NOSIGPIPE;
1152 }
1153 lck_mtx_unlock(&fg->fg_lock);
1154 error = 0;
1155 }
1156 goto out;
1157
1158 case F_GETNOSIGPIPE:
1159 if (fp->f_type == DTYPE_SOCKET) {
1160 #if SOCKETS
1161 int retsize = sizeof(*retval);
1162 error = sock_getsockopt((struct socket *)fp->f_data,
1163 SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
1164 #else
1165 error = EINVAL;
1166 #endif
1167 } else {
1168 *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
1169 1 : 0;
1170 error = 0;
1171 }
1172 goto out;
1173
1174 case F_SETCONFINED:
1175 /*
1176 * If this is the only reference to this fglob in the process
1177 * and it's already marked as close-on-fork then mark it as
1178 * (immutably) "confined" i.e. any fd that points to it will
1179 * forever be close-on-fork, and attempts to use an IPC
1180 * mechanism to move the descriptor elsewhere will fail.
1181 */
1182 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
1183 struct fileglob *fg = fp->fp_glob;
1184
1185 lck_mtx_lock_spin(&fg->fg_lock);
1186 if (fg->fg_lflags & FG_CONFINED) {
1187 error = 0;
1188 } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
1189 error = EAGAIN; /* go close the dup .. */
1190 } else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
1191 fg->fg_lflags |= FG_CONFINED;
1192 error = 0;
1193 } else {
1194 error = EBADF; /* open without O_CLOFORK? */
1195 }
1196 lck_mtx_unlock(&fg->fg_lock);
1197 } else {
1198 /*
1199 * Other subsystems may have built on the immutability
1200 * of FG_CONFINED; clearing it may be tricky.
1201 */
1202 error = EPERM; /* immutable */
1203 }
1204 goto out;
1205
1206 case F_GETCONFINED:
1207 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
1208 error = 0;
1209 goto out;
1210
1211 case F_SETLKWTIMEOUT:
1212 case F_SETLKW:
1213 case F_OFD_SETLKWTIMEOUT:
1214 case F_OFD_SETLKW:
1215 flg |= F_WAIT;
1216 OS_FALLTHROUGH;
1217
1218 case F_SETLK:
1219 case F_OFD_SETLK:
1220 if (fp->f_type != DTYPE_VNODE) {
1221 error = EBADF;
1222 goto out;
1223 }
1224 vp = (struct vnode *)fp->f_data;
1225
1226 fflag = fp->f_flag;
1227 offset = fp->f_offset;
1228 proc_fdunlock(p);
1229
1230 /* Copy in the lock structure */
1231 if (F_SETLKWTIMEOUT == uap->cmd ||
1232 F_OFD_SETLKWTIMEOUT == uap->cmd) {
1233 error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1234 if (error) {
1235 goto outdrop;
1236 }
1237 fl = fltimeout.fl;
1238 timeout = &fltimeout.timeout;
1239 } else {
1240 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1241 if (error) {
1242 goto outdrop;
1243 }
1244 }
1245
1246 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1247 /* and ending byte for EOVERFLOW in SEEK_SET */
1248 error = check_file_seek_range(&fl, offset);
1249 if (error) {
1250 goto outdrop;
1251 }
1252
1253 if ((error = vnode_getwithref(vp))) {
1254 goto outdrop;
1255 }
1256 if (fl.l_whence == SEEK_CUR) {
1257 fl.l_start += offset;
1258 }
1259
1260 #if CONFIG_MACF
1261 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1262 F_SETLK, &fl);
1263 if (error) {
1264 (void)vnode_put(vp);
1265 goto outdrop;
1266 }
1267 #endif
1268 switch (uap->cmd) {
1269 case F_OFD_SETLK:
1270 case F_OFD_SETLKW:
1271 case F_OFD_SETLKWTIMEOUT:
1272 flg |= F_OFD_LOCK;
1273 switch (fl.l_type) {
1274 case F_RDLCK:
1275 if ((fflag & FREAD) == 0) {
1276 error = EBADF;
1277 break;
1278 }
1279 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1280 F_SETLK, &fl, flg, &context, timeout);
1281 break;
1282 case F_WRLCK:
1283 if ((fflag & FWRITE) == 0) {
1284 error = EBADF;
1285 break;
1286 }
1287 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1288 F_SETLK, &fl, flg, &context, timeout);
1289 break;
1290 case F_UNLCK:
1291 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1292 F_UNLCK, &fl, F_OFD_LOCK, &context,
1293 timeout);
1294 break;
1295 default:
1296 error = EINVAL;
1297 break;
1298 }
1299 if (0 == error &&
1300 (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
1301 struct fileglob *fg = fp->fp_glob;
1302
1303 /*
1304 * arrange F_UNLCK on last close (once
1305 * set, FG_HAS_OFDLOCK is immutable)
1306 */
1307 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
1308 lck_mtx_lock_spin(&fg->fg_lock);
1309 fg->fg_lflags |= FG_HAS_OFDLOCK;
1310 lck_mtx_unlock(&fg->fg_lock);
1311 }
1312 }
1313 break;
1314 default:
1315 flg |= F_POSIX;
1316 switch (fl.l_type) {
1317 case F_RDLCK:
1318 if ((fflag & FREAD) == 0) {
1319 error = EBADF;
1320 break;
1321 }
1322 // XXX UInt32 unsafe for LP64 kernel
1323 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1324 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1325 F_SETLK, &fl, flg, &context, timeout);
1326 break;
1327 case F_WRLCK:
1328 if ((fflag & FWRITE) == 0) {
1329 error = EBADF;
1330 break;
1331 }
1332 // XXX UInt32 unsafe for LP64 kernel
1333 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1334 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1335 F_SETLK, &fl, flg, &context, timeout);
1336 break;
1337 case F_UNLCK:
1338 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1339 F_UNLCK, &fl, F_POSIX, &context, timeout);
1340 break;
1341 default:
1342 error = EINVAL;
1343 break;
1344 }
1345 break;
1346 }
1347 (void) vnode_put(vp);
1348 goto outdrop;
1349
1350 case F_GETLK:
1351 case F_OFD_GETLK:
1352 case F_GETLKPID:
1353 case F_OFD_GETLKPID:
1354 if (fp->f_type != DTYPE_VNODE) {
1355 error = EBADF;
1356 goto out;
1357 }
1358 vp = (struct vnode *)fp->f_data;
1359
1360 offset = fp->f_offset;
1361 proc_fdunlock(p);
1362
1363 /* Copy in the lock structure */
1364 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1365 if (error) {
1366 goto outdrop;
1367 }
1368
1369 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1370 /* and ending byte for EOVERFLOW in SEEK_SET */
1371 error = check_file_seek_range(&fl, offset);
1372 if (error) {
1373 goto outdrop;
1374 }
1375
1376 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1377 error = EINVAL;
1378 goto outdrop;
1379 }
1380
1381 switch (fl.l_type) {
1382 case F_RDLCK:
1383 case F_UNLCK:
1384 case F_WRLCK:
1385 break;
1386 default:
1387 error = EINVAL;
1388 goto outdrop;
1389 }
1390
1391 switch (fl.l_whence) {
1392 case SEEK_CUR:
1393 case SEEK_SET:
1394 case SEEK_END:
1395 break;
1396 default:
1397 error = EINVAL;
1398 goto outdrop;
1399 }
1400
1401 if ((error = vnode_getwithref(vp)) == 0) {
1402 if (fl.l_whence == SEEK_CUR) {
1403 fl.l_start += offset;
1404 }
1405
1406 #if CONFIG_MACF
1407 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1408 uap->cmd, &fl);
1409 if (error == 0)
1410 #endif
1411 switch (uap->cmd) {
1412 case F_OFD_GETLK:
1413 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1414 F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1415 break;
1416 case F_OFD_GETLKPID:
1417 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1418 F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1419 break;
1420 default:
1421 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1422 uap->cmd, &fl, F_POSIX, &context, NULL);
1423 break;
1424 }
1425
1426 (void)vnode_put(vp);
1427
1428 if (error == 0) {
1429 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1430 }
1431 }
1432 goto outdrop;
1433
1434 case F_PREALLOCATE: {
1435 fstore_t alloc_struct; /* structure for allocate command */
1436 u_int32_t alloc_flags = 0;
1437
1438 if (fp->f_type != DTYPE_VNODE) {
1439 error = EBADF;
1440 goto out;
1441 }
1442
1443 vp = (struct vnode *)fp->f_data;
1444 proc_fdunlock(p);
1445
1446 /* make sure that we have write permission */
1447 if ((fp->f_flag & FWRITE) == 0) {
1448 error = EBADF;
1449 goto outdrop;
1450 }
1451
1452 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1453 if (error) {
1454 goto outdrop;
1455 }
1456
1457 /* now set the space allocated to 0 */
1458 alloc_struct.fst_bytesalloc = 0;
1459
1460 /*
1461 * Do some simple parameter checking
1462 */
1463
1464 /* set up the flags */
1465
1466 alloc_flags |= PREALLOCATE;
1467
1468 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
1469 alloc_flags |= ALLOCATECONTIG;
1470 }
1471
1472 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
1473 alloc_flags |= ALLOCATEALL;
1474 }
1475
1476 /*
1477 * Do any position mode specific stuff. The only
1478 * position mode supported now is PEOFPOSMODE
1479 */
1480
1481 switch (alloc_struct.fst_posmode) {
1482 case F_PEOFPOSMODE:
1483 if (alloc_struct.fst_offset != 0) {
1484 error = EINVAL;
1485 goto outdrop;
1486 }
1487
1488 alloc_flags |= ALLOCATEFROMPEOF;
1489 break;
1490
1491 case F_VOLPOSMODE:
1492 if (alloc_struct.fst_offset <= 0) {
1493 error = EINVAL;
1494 goto outdrop;
1495 }
1496
1497 alloc_flags |= ALLOCATEFROMVOL;
1498 break;
1499
1500 default: {
1501 error = EINVAL;
1502 goto outdrop;
1503 }
1504 }
1505 if ((error = vnode_getwithref(vp)) == 0) {
1506 /*
1507 * call allocate to get the space
1508 */
1509 error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
1510 &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1511 &context);
1512 (void)vnode_put(vp);
1513
1514 error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1515
1516 if (error == 0) {
1517 error = error2;
1518 }
1519 }
1520 goto outdrop;
1521 }
1522 case F_PUNCHHOLE: {
1523 fpunchhole_t args;
1524
1525 if (fp->f_type != DTYPE_VNODE) {
1526 error = EBADF;
1527 goto out;
1528 }
1529
1530 vp = (struct vnode *)fp->f_data;
1531 proc_fdunlock(p);
1532
1533 /* need write permissions */
1534 if ((fp->f_flag & FWRITE) == 0) {
1535 error = EPERM;
1536 goto outdrop;
1537 }
1538
1539 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1540 goto outdrop;
1541 }
1542
1543 if ((error = vnode_getwithref(vp))) {
1544 goto outdrop;
1545 }
1546
1547 #if CONFIG_MACF
1548 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
1549 (void)vnode_put(vp);
1550 goto outdrop;
1551 }
1552 #endif
1553
1554 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
1555 (void)vnode_put(vp);
1556
1557 goto outdrop;
1558 }
1559 case F_TRIM_ACTIVE_FILE: {
1560 ftrimactivefile_t args;
1561
1562 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
1563 error = EACCES;
1564 goto out;
1565 }
1566
1567 if (fp->f_type != DTYPE_VNODE) {
1568 error = EBADF;
1569 goto out;
1570 }
1571
1572 vp = (struct vnode *)fp->f_data;
1573 proc_fdunlock(p);
1574
1575 /* need write permissions */
1576 if ((fp->f_flag & FWRITE) == 0) {
1577 error = EPERM;
1578 goto outdrop;
1579 }
1580
1581 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1582 goto outdrop;
1583 }
1584
1585 if ((error = vnode_getwithref(vp))) {
1586 goto outdrop;
1587 }
1588
1589 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
1590 (void)vnode_put(vp);
1591
1592 goto outdrop;
1593 }
1594 case F_SPECULATIVE_READ: {
1595 fspecread_t args;
1596
1597 if (fp->f_type != DTYPE_VNODE) {
1598 error = EBADF;
1599 goto out;
1600 }
1601
1602 vp = (struct vnode *)fp->f_data;
1603 proc_fdunlock(p);
1604
1605 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1606 goto outdrop;
1607 }
1608
1609 /* Discard invalid offsets or lengths */
1610 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
1611 error = EINVAL;
1612 goto outdrop;
1613 }
1614
1615 /*
1616 * Round the file offset down to a page-size boundary (or to 0).
1617 * The filesystem will need to round the length up to the end of the page boundary
1618 * or to the EOF of the file.
1619 */
1620 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
1621 uint64_t foff_delta = args.fsr_offset - foff;
1622 args.fsr_offset = (off_t) foff;
1623
1624 /*
1625 * Now add in the delta to the supplied length. Since we may have adjusted the
1626 * offset, increase it by the amount that we adjusted.
1627 */
1628 args.fsr_length += foff_delta;
1629
1630 if ((error = vnode_getwithref(vp))) {
1631 goto outdrop;
1632 }
1633 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
1634 (void)vnode_put(vp);
1635
1636 goto outdrop;
1637 }
1638 case F_SETSIZE:
1639 if (fp->f_type != DTYPE_VNODE) {
1640 error = EBADF;
1641 goto out;
1642 }
1643 vp = (struct vnode *)fp->f_data;
1644 proc_fdunlock(p);
1645
1646 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
1647 if (error) {
1648 goto outdrop;
1649 }
1650 AUDIT_ARG(value64, offset);
1651
1652 error = vnode_getwithref(vp);
1653 if (error) {
1654 goto outdrop;
1655 }
1656
1657 #if CONFIG_MACF
1658 error = mac_vnode_check_truncate(&context,
1659 fp->fp_glob->fg_cred, vp);
1660 if (error) {
1661 (void)vnode_put(vp);
1662 goto outdrop;
1663 }
1664 #endif
1665 /*
1666 * Make sure that we are root. Growing a file
1667 * without zero filling the data is a security hole.
1668 */
1669 if (!kauth_cred_issuser(kauth_cred_get())) {
1670 error = EACCES;
1671 } else {
1672 /*
1673 * Require privilege to change file size without zerofill,
1674 * else will change the file size and zerofill it.
1675 */
1676 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
1677 if (error == 0) {
1678 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
1679 } else {
1680 error = vnode_setsize(vp, offset, 0, &context);
1681 }
1682
1683 #if CONFIG_MACF
1684 if (error == 0) {
1685 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
1686 }
1687 #endif
1688 }
1689
1690 (void)vnode_put(vp);
1691 goto outdrop;
1692
1693 case F_RDAHEAD:
1694 if (fp->f_type != DTYPE_VNODE) {
1695 error = EBADF;
1696 goto out;
1697 }
1698 if (uap->arg) {
1699 os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1700 } else {
1701 os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1702 }
1703 goto out;
1704
1705 case F_NOCACHE:
1706 if (fp->f_type != DTYPE_VNODE) {
1707 error = EBADF;
1708 goto out;
1709 }
1710 if (uap->arg) {
1711 os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1712 } else {
1713 os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1714 }
1715 goto out;
1716
1717 case F_NODIRECT:
1718 if (fp->f_type != DTYPE_VNODE) {
1719 error = EBADF;
1720 goto out;
1721 }
1722 if (uap->arg) {
1723 os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1724 } else {
1725 os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1726 }
1727 goto out;
1728
1729 case F_SINGLE_WRITER:
1730 if (fp->f_type != DTYPE_VNODE) {
1731 error = EBADF;
1732 goto out;
1733 }
1734 if (uap->arg) {
1735 os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1736 } else {
1737 os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1738 }
1739 goto out;
1740
1741 case F_GLOBAL_NOCACHE:
1742 if (fp->f_type != DTYPE_VNODE) {
1743 error = EBADF;
1744 goto out;
1745 }
1746 vp = (struct vnode *)fp->f_data;
1747 proc_fdunlock(p);
1748
1749 if ((error = vnode_getwithref(vp)) == 0) {
1750 *retval = vnode_isnocache(vp);
1751
1752 if (uap->arg) {
1753 vnode_setnocache(vp);
1754 } else {
1755 vnode_clearnocache(vp);
1756 }
1757
1758 (void)vnode_put(vp);
1759 }
1760 goto outdrop;
1761
1762 case F_CHECK_OPENEVT:
1763 if (fp->f_type != DTYPE_VNODE) {
1764 error = EBADF;
1765 goto out;
1766 }
1767 vp = (struct vnode *)fp->f_data;
1768 proc_fdunlock(p);
1769
1770 if ((error = vnode_getwithref(vp)) == 0) {
1771 *retval = vnode_is_openevt(vp);
1772
1773 if (uap->arg) {
1774 vnode_set_openevt(vp);
1775 } else {
1776 vnode_clear_openevt(vp);
1777 }
1778
1779 (void)vnode_put(vp);
1780 }
1781 goto outdrop;
1782
1783 case F_RDADVISE: {
1784 struct radvisory ra_struct;
1785
1786 if (fp->f_type != DTYPE_VNODE) {
1787 error = EBADF;
1788 goto out;
1789 }
1790 vp = (struct vnode *)fp->f_data;
1791 proc_fdunlock(p);
1792
1793 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
1794 goto outdrop;
1795 }
1796 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
1797 error = EINVAL;
1798 goto outdrop;
1799 }
1800 if ((error = vnode_getwithref(vp)) == 0) {
1801 error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1802
1803 (void)vnode_put(vp);
1804 }
1805 goto outdrop;
1806 }
1807
1808 case F_FLUSH_DATA:
1809
1810 if (fp->f_type != DTYPE_VNODE) {
1811 error = EBADF;
1812 goto out;
1813 }
1814 vp = (struct vnode *)fp->f_data;
1815 proc_fdunlock(p);
1816
1817 if ((error = vnode_getwithref(vp)) == 0) {
1818 error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
1819
1820 (void)vnode_put(vp);
1821 }
1822 goto outdrop;
1823
1824 case F_LOG2PHYS:
1825 case F_LOG2PHYS_EXT: {
1826 struct log2phys l2p_struct = {}; /* structure for allocate command */
1827 int devBlockSize;
1828
1829 off_t file_offset = 0;
1830 size_t a_size = 0;
1831 size_t run = 0;
1832
1833 if (uap->cmd == F_LOG2PHYS_EXT) {
1834 error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1835 if (error) {
1836 goto out;
1837 }
1838 file_offset = l2p_struct.l2p_devoffset;
1839 } else {
1840 file_offset = fp->f_offset;
1841 }
1842 if (fp->f_type != DTYPE_VNODE) {
1843 error = EBADF;
1844 goto out;
1845 }
1846 vp = (struct vnode *)fp->f_data;
1847 proc_fdunlock(p);
1848 if ((error = vnode_getwithref(vp))) {
1849 goto outdrop;
1850 }
1851 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1852 if (error) {
1853 (void)vnode_put(vp);
1854 goto outdrop;
1855 }
1856 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1857 if (error) {
1858 (void)vnode_put(vp);
1859 goto outdrop;
1860 }
1861 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1862 if (uap->cmd == F_LOG2PHYS_EXT) {
1863 if (l2p_struct.l2p_contigbytes < 0) {
1864 vnode_put(vp);
1865 error = EINVAL;
1866 goto outdrop;
1867 }
1868
1869 a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1870 } else {
1871 a_size = devBlockSize;
1872 }
1873
1874 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1875
1876 (void)vnode_put(vp);
1877
1878 if (!error) {
1879 l2p_struct.l2p_flags = 0; /* for now */
1880 if (uap->cmd == F_LOG2PHYS_EXT) {
1881 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1882 } else {
1883 l2p_struct.l2p_contigbytes = 0; /* for now */
1884 }
1885
1886 /*
1887 * The block number being -1 suggests that the file offset is not backed
1888 * by any real blocks on-disk. As a result, just let it be passed back up wholesale.
1889 */
1890 if (bn == -1) {
1891 /* Don't multiply it by the block size */
1892 l2p_struct.l2p_devoffset = bn;
1893 } else {
1894 l2p_struct.l2p_devoffset = bn * devBlockSize;
1895 l2p_struct.l2p_devoffset += file_offset - offset;
1896 }
1897 error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1898 }
1899 goto outdrop;
1900 }
1901 case F_GETPATH:
1902 case F_GETPATH_NOFIRMLINK: {
1903 char *pathbufp;
1904 int pathlen;
1905
1906 if (fp->f_type != DTYPE_VNODE) {
1907 error = EBADF;
1908 goto out;
1909 }
1910 vp = (struct vnode *)fp->f_data;
1911 proc_fdunlock(p);
1912
1913 pathlen = MAXPATHLEN;
1914 pathbufp = zalloc(ZV_NAMEI);
1915
1916 if ((error = vnode_getwithref(vp)) == 0) {
1917 if (uap->cmd == F_GETPATH_NOFIRMLINK) {
1918 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
1919 } else {
1920 error = vn_getpath(vp, pathbufp, &pathlen);
1921 }
1922 (void)vnode_put(vp);
1923
1924 if (error == 0) {
1925 error = copyout((caddr_t)pathbufp, argp, pathlen);
1926 }
1927 }
1928 zfree(ZV_NAMEI, pathbufp);
1929 goto outdrop;
1930 }
1931
1932 case F_PATHPKG_CHECK: {
1933 char *pathbufp;
1934 size_t pathlen;
1935
1936 if (fp->f_type != DTYPE_VNODE) {
1937 error = EBADF;
1938 goto out;
1939 }
1940 vp = (struct vnode *)fp->f_data;
1941 proc_fdunlock(p);
1942
1943 pathlen = MAXPATHLEN;
1944 pathbufp = zalloc(ZV_NAMEI);
1945
1946 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
1947 if ((error = vnode_getwithref(vp)) == 0) {
1948 AUDIT_ARG(text, pathbufp);
1949 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
1950
1951 (void)vnode_put(vp);
1952 }
1953 }
1954 zfree(ZV_NAMEI, pathbufp);
1955 goto outdrop;
1956 }
1957
1958 case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
1959 case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
1960 case F_BARRIERFSYNC: // fsync + barrier
1961 case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
1962 case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
1963 if (fp->f_type != DTYPE_VNODE) {
1964 error = EBADF;
1965 goto out;
1966 }
1967 vp = (struct vnode *)fp->f_data;
1968 proc_fdunlock(p);
1969
1970 if ((error = vnode_getwithref(vp)) == 0) {
1971 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1972
1973 (void)vnode_put(vp);
1974 }
1975 break;
1976 }
1977
1978 /*
1979 * SPI (private) for opening a file starting from a dir fd
1980 */
1981 case F_OPENFROM: {
1982 struct user_fopenfrom fopen;
1983 struct vnode_attr va;
1984 struct nameidata nd;
1985 int cmode;
1986
1987 /* Check if this isn't a valid file descriptor */
1988 if ((fp->f_type != DTYPE_VNODE) ||
1989 (fp->f_flag & FREAD) == 0) {
1990 error = EBADF;
1991 goto out;
1992 }
1993 vp = (struct vnode *)fp->f_data;
1994 proc_fdunlock(p);
1995
1996 if (vnode_getwithref(vp)) {
1997 error = ENOENT;
1998 goto outdrop;
1999 }
2000
2001 /* Only valid for directories */
2002 if (vp->v_type != VDIR) {
2003 vnode_put(vp);
2004 error = ENOTDIR;
2005 goto outdrop;
2006 }
2007
2008 /*
2009 * Only entitled apps may use the credentials of the thread
2010 * that opened the file descriptor.
2011 * Non-entitled threads will use their own context.
2012 */
2013 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2014 has_entitlement = 1;
2015 }
2016
2017 /* Get flags, mode and pathname arguments. */
2018 if (IS_64BIT_PROCESS(p)) {
2019 error = copyin(argp, &fopen, sizeof(fopen));
2020 } else {
2021 struct user32_fopenfrom fopen32;
2022
2023 error = copyin(argp, &fopen32, sizeof(fopen32));
2024 fopen.o_flags = fopen32.o_flags;
2025 fopen.o_mode = fopen32.o_mode;
2026 fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2027 }
2028 if (error) {
2029 vnode_put(vp);
2030 goto outdrop;
2031 }
2032 AUDIT_ARG(fflags, fopen.o_flags);
2033 AUDIT_ARG(mode, fopen.o_mode);
2034 VATTR_INIT(&va);
2035 /* Mask off all but regular access permissions */
2036 cmode = ((fopen.o_mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2037 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2038
2039 /* Start the lookup relative to the file descriptor's vnode. */
2040 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2041 fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2042 nd.ni_dvp = vp;
2043
2044 error = open1(has_entitlement ? &context : vfs_context_current(),
2045 &nd, fopen.o_flags, &va, fileproc_alloc_init, NULL, retval);
2046
2047 vnode_put(vp);
2048 break;
2049 }
2050 /*
2051 * SPI (private) for unlinking a file starting from a dir fd
2052 */
2053 case F_UNLINKFROM: {
2054 user_addr_t pathname;
2055
2056 /* Check if this isn't a valid file descriptor */
2057 if ((fp->f_type != DTYPE_VNODE) ||
2058 (fp->f_flag & FREAD) == 0) {
2059 error = EBADF;
2060 goto out;
2061 }
2062 vp = (struct vnode *)fp->f_data;
2063 proc_fdunlock(p);
2064
2065 if (vnode_getwithref(vp)) {
2066 error = ENOENT;
2067 goto outdrop;
2068 }
2069
2070 /* Only valid for directories */
2071 if (vp->v_type != VDIR) {
2072 vnode_put(vp);
2073 error = ENOTDIR;
2074 goto outdrop;
2075 }
2076
2077 /*
2078 * Only entitled apps may use the credentials of the thread
2079 * that opened the file descriptor.
2080 * Non-entitled threads will use their own context.
2081 */
2082 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2083 has_entitlement = 1;
2084 }
2085
2086 /* Get flags, mode and pathname arguments. */
2087 if (IS_64BIT_PROCESS(p)) {
2088 pathname = (user_addr_t)argp;
2089 } else {
2090 pathname = CAST_USER_ADDR_T(argp);
2091 }
2092
2093 /* Start the lookup relative to the file descriptor's vnode. */
2094 error = unlink1(has_entitlement ? &context : vfs_context_current(),
2095 vp, pathname, UIO_USERSPACE, 0);
2096
2097 vnode_put(vp);
2098 break;
2099 }
2100
2101 case F_ADDSIGS:
2102 case F_ADDFILESIGS:
2103 case F_ADDFILESIGS_FOR_DYLD_SIM:
2104 case F_ADDFILESIGS_RETURN:
2105 case F_ADDFILESIGS_INFO:
2106 {
2107 struct cs_blob *blob = NULL;
2108 struct user_fsignatures fs;
2109 kern_return_t kr;
2110 vm_offset_t kernel_blob_addr;
2111 vm_size_t kernel_blob_size;
2112 int blob_add_flags = 0;
2113 const size_t sizeof_fs = (uap->cmd == F_ADDFILESIGS_INFO ?
2114 offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
2115 offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
2116
2117 if (fp->f_type != DTYPE_VNODE) {
2118 error = EBADF;
2119 goto out;
2120 }
2121 vp = (struct vnode *)fp->f_data;
2122 proc_fdunlock(p);
2123
2124 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2125 blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
2126 if ((p->p_csflags & CS_KILL) == 0) {
2127 proc_lock(p);
2128 p->p_csflags |= CS_KILL;
2129 proc_unlock(p);
2130 }
2131 }
2132
2133 error = vnode_getwithref(vp);
2134 if (error) {
2135 goto outdrop;
2136 }
2137
2138 if (IS_64BIT_PROCESS(p)) {
2139 error = copyin(argp, &fs, sizeof_fs);
2140 } else {
2141 if (uap->cmd == F_ADDFILESIGS_INFO) {
2142 error = EINVAL;
2143 vnode_put(vp);
2144 goto outdrop;
2145 }
2146
2147 struct user32_fsignatures fs32;
2148
2149 error = copyin(argp, &fs32, sizeof(fs32));
2150 fs.fs_file_start = fs32.fs_file_start;
2151 fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
2152 fs.fs_blob_size = fs32.fs_blob_size;
2153 }
2154
2155 if (error) {
2156 vnode_put(vp);
2157 goto outdrop;
2158 }
2159
2160 /*
2161 * First check if we have something loaded a this offset
2162 */
2163 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
2164 if (blob != NULL) {
2165 /* If this is for dyld_sim revalidate the blob */
2166 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2167 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
2168 if (error) {
2169 blob = NULL;
2170 if (error != EAGAIN) {
2171 vnode_put(vp);
2172 goto outdrop;
2173 }
2174 }
2175 }
2176 }
2177
2178 if (blob == NULL) {
2179 /*
2180 * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
2181 * our use cases for the immediate future, but note that at the time of this commit, some
2182 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
2183 *
2184 * We should consider how we can manage this more effectively; the above means that some
2185 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
2186 * threshold considered ridiculous at the time of this change.
2187 */
2188 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
2189 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2190 error = E2BIG;
2191 vnode_put(vp);
2192 goto outdrop;
2193 }
2194
2195 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2196 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2197 if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
2198 error = ENOMEM;
2199 vnode_put(vp);
2200 goto outdrop;
2201 }
2202
2203 if (uap->cmd == F_ADDSIGS) {
2204 error = copyin(fs.fs_blob_start,
2205 (void *) kernel_blob_addr,
2206 fs.fs_blob_size);
2207 } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
2208 int resid;
2209
2210 error = vn_rdwr(UIO_READ,
2211 vp,
2212 (caddr_t) kernel_blob_addr,
2213 (int)kernel_blob_size,
2214 fs.fs_file_start + fs.fs_blob_start,
2215 UIO_SYSSPACE,
2216 0,
2217 kauth_cred_get(),
2218 &resid,
2219 p);
2220 if ((error == 0) && resid) {
2221 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2222 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2223 }
2224 }
2225
2226 if (error) {
2227 ubc_cs_blob_deallocate(kernel_blob_addr,
2228 kernel_blob_size);
2229 vnode_put(vp);
2230 goto outdrop;
2231 }
2232
2233 blob = NULL;
2234 error = ubc_cs_blob_add(vp,
2235 proc_platform(p),
2236 CPU_TYPE_ANY, /* not for a specific architecture */
2237 CPU_SUBTYPE_ANY,
2238 fs.fs_file_start,
2239 &kernel_blob_addr,
2240 kernel_blob_size,
2241 NULL,
2242 blob_add_flags,
2243 &blob);
2244
2245 /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
2246 if (error) {
2247 if (kernel_blob_addr) {
2248 ubc_cs_blob_deallocate(kernel_blob_addr,
2249 kernel_blob_size);
2250 }
2251 vnode_put(vp);
2252 goto outdrop;
2253 } else {
2254 #if CHECK_CS_VALIDATION_BITMAP
2255 ubc_cs_validation_bitmap_allocate( vp );
2256 #endif
2257 }
2258 }
2259
2260 if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
2261 uap->cmd == F_ADDFILESIGS_INFO) {
2262 /*
2263 * The first element of the structure is a
2264 * off_t that happen to have the same size for
2265 * all archs. Lets overwrite that.
2266 */
2267 off_t end_offset = 0;
2268 if (blob) {
2269 end_offset = blob->csb_end_offset;
2270 }
2271 error = copyout(&end_offset, argp, sizeof(end_offset));
2272
2273 if (error) {
2274 vnode_put(vp);
2275 goto outdrop;
2276 }
2277 }
2278
2279 if (uap->cmd == F_ADDFILESIGS_INFO) {
2280 /* Return information. What we copy out depends on the size of the
2281 * passed in structure, to keep binary compatibility. */
2282
2283 if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
2284 // enough room for fs_cdhash[20]+fs_hash_type
2285
2286 if (blob != NULL) {
2287 error = copyout(blob->csb_cdhash,
2288 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
2289 USER_FSIGNATURES_CDHASH_LEN);
2290 if (error) {
2291 vnode_put(vp);
2292 goto outdrop;
2293 }
2294 int hashtype = cs_hash_type(blob->csb_hashtype);
2295 error = copyout(&hashtype,
2296 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
2297 sizeof(int));
2298 if (error) {
2299 vnode_put(vp);
2300 goto outdrop;
2301 }
2302 }
2303 }
2304 }
2305
2306 (void) vnode_put(vp);
2307 break;
2308 }
2309 #if CONFIG_SUPPLEMENTAL_SIGNATURES
2310 case F_ADDFILESUPPL:
2311 {
2312 struct vnode *ivp;
2313 struct cs_blob *blob = NULL;
2314 struct user_fsupplement fs;
2315 int orig_fd;
2316 struct fileproc* orig_fp = NULL;
2317 kern_return_t kr;
2318 vm_offset_t kernel_blob_addr;
2319 vm_size_t kernel_blob_size;
2320
2321 if (!IS_64BIT_PROCESS(p)) {
2322 error = EINVAL;
2323 goto out; // drop fp and unlock fds
2324 }
2325
2326 if (fp->f_type != DTYPE_VNODE) {
2327 error = EBADF;
2328 goto out;
2329 }
2330
2331 error = copyin(argp, &fs, sizeof(fs));
2332 if (error) {
2333 goto out;
2334 }
2335
2336 orig_fd = fs.fs_orig_fd;
2337 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
2338 printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
2339 goto out;
2340 }
2341
2342 if (orig_fp->f_type != DTYPE_VNODE) {
2343 error = EBADF;
2344 fp_drop(p, orig_fd, orig_fp, 1);
2345 goto out;
2346 }
2347
2348 ivp = (struct vnode *)orig_fp->f_data;
2349
2350 vp = (struct vnode *)fp->f_data;
2351
2352 proc_fdunlock(p);
2353
2354 error = vnode_getwithref(ivp);
2355 if (error) {
2356 fp_drop(p, orig_fd, orig_fp, 0);
2357 goto outdrop; //drop fp
2358 }
2359
2360 error = vnode_getwithref(vp);
2361 if (error) {
2362 vnode_put(ivp);
2363 fp_drop(p, orig_fd, orig_fp, 0);
2364 goto outdrop;
2365 }
2366
2367 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2368 error = E2BIG;
2369 goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
2370 }
2371
2372 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2373 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2374 if (kr != KERN_SUCCESS) {
2375 error = ENOMEM;
2376 goto dropboth;
2377 }
2378
2379 int resid;
2380 error = vn_rdwr(UIO_READ, vp,
2381 (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
2382 fs.fs_file_start + fs.fs_blob_start,
2383 UIO_SYSSPACE, 0,
2384 kauth_cred_get(), &resid, p);
2385 if ((error == 0) && resid) {
2386 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2387 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2388 }
2389
2390 if (error) {
2391 ubc_cs_blob_deallocate(kernel_blob_addr,
2392 kernel_blob_size);
2393 goto dropboth;
2394 }
2395
2396 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
2397 &kernel_blob_addr, kernel_blob_size, &blob);
2398
2399 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
2400 if (error) {
2401 if (kernel_blob_addr) {
2402 ubc_cs_blob_deallocate(kernel_blob_addr,
2403 kernel_blob_size);
2404 }
2405 goto dropboth;
2406 }
2407 vnode_put(ivp);
2408 vnode_put(vp);
2409 fp_drop(p, orig_fd, orig_fp, 0);
2410 break;
2411
2412 dropboth:
2413 vnode_put(ivp);
2414 vnode_put(vp);
2415 fp_drop(p, orig_fd, orig_fp, 0);
2416 goto outdrop;
2417 }
2418 #endif
2419 case F_GETCODEDIR:
2420 case F_FINDSIGS: {
2421 error = ENOTSUP;
2422 goto out;
2423 }
2424 case F_CHECK_LV: {
2425 struct fileglob *fg;
2426 fchecklv_t lv = {};
2427
2428 if (fp->f_type != DTYPE_VNODE) {
2429 error = EBADF;
2430 goto out;
2431 }
2432 fg = fp->fp_glob;
2433 proc_fdunlock(p);
2434
2435 if (IS_64BIT_PROCESS(p)) {
2436 error = copyin(argp, &lv, sizeof(lv));
2437 } else {
2438 struct user32_fchecklv lv32 = {};
2439
2440 error = copyin(argp, &lv32, sizeof(lv32));
2441 lv.lv_file_start = lv32.lv_file_start;
2442 lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
2443 lv.lv_error_message_size = lv32.lv_error_message_size;
2444 }
2445 if (error) {
2446 goto outdrop;
2447 }
2448
2449 #if CONFIG_MACF
2450 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
2451 (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
2452 #endif
2453
2454 break;
2455 }
2456 case F_GETSIGSINFO: {
2457 struct cs_blob *blob = NULL;
2458 fgetsigsinfo_t sigsinfo = {};
2459
2460 if (fp->f_type != DTYPE_VNODE) {
2461 error = EBADF;
2462 goto out;
2463 }
2464 vp = (struct vnode *)fp->f_data;
2465 proc_fdunlock(p);
2466
2467 error = vnode_getwithref(vp);
2468 if (error) {
2469 goto outdrop;
2470 }
2471
2472 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
2473 if (error) {
2474 vnode_put(vp);
2475 goto outdrop;
2476 }
2477
2478 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
2479 if (blob == NULL) {
2480 error = ENOENT;
2481 vnode_put(vp);
2482 goto outdrop;
2483 }
2484 switch (sigsinfo.fg_info_request) {
2485 case GETSIGSINFO_PLATFORM_BINARY:
2486 sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
2487 error = copyout(&sigsinfo.fg_sig_is_platform,
2488 (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
2489 sizeof(sigsinfo.fg_sig_is_platform));
2490 if (error) {
2491 vnode_put(vp);
2492 goto outdrop;
2493 }
2494 break;
2495 default:
2496 error = EINVAL;
2497 vnode_put(vp);
2498 goto outdrop;
2499 }
2500 vnode_put(vp);
2501 break;
2502 }
2503 #if CONFIG_PROTECT
2504 case F_GETPROTECTIONCLASS: {
2505 if (fp->f_type != DTYPE_VNODE) {
2506 error = EBADF;
2507 goto out;
2508 }
2509 vp = (struct vnode *)fp->f_data;
2510
2511 proc_fdunlock(p);
2512
2513 if (vnode_getwithref(vp)) {
2514 error = ENOENT;
2515 goto outdrop;
2516 }
2517
2518 struct vnode_attr va;
2519
2520 VATTR_INIT(&va);
2521 VATTR_WANTED(&va, va_dataprotect_class);
2522 error = VNOP_GETATTR(vp, &va, &context);
2523 if (!error) {
2524 if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
2525 *retval = va.va_dataprotect_class;
2526 } else {
2527 error = ENOTSUP;
2528 }
2529 }
2530
2531 vnode_put(vp);
2532 break;
2533 }
2534
2535 case F_SETPROTECTIONCLASS: {
2536 /* tmp must be a valid PROTECTION_CLASS_* */
2537 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2538
2539 if (fp->f_type != DTYPE_VNODE) {
2540 error = EBADF;
2541 goto out;
2542 }
2543 vp = (struct vnode *)fp->f_data;
2544
2545 proc_fdunlock(p);
2546
2547 if (vnode_getwithref(vp)) {
2548 error = ENOENT;
2549 goto outdrop;
2550 }
2551
2552 /* Only go forward if you have write access */
2553 vfs_context_t ctx = vfs_context_current();
2554 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2555 vnode_put(vp);
2556 error = EBADF;
2557 goto outdrop;
2558 }
2559
2560 struct vnode_attr va;
2561
2562 VATTR_INIT(&va);
2563 VATTR_SET(&va, va_dataprotect_class, tmp);
2564
2565 error = VNOP_SETATTR(vp, &va, ctx);
2566
2567 vnode_put(vp);
2568 break;
2569 }
2570
2571 case F_TRANSCODEKEY: {
2572 if (fp->f_type != DTYPE_VNODE) {
2573 error = EBADF;
2574 goto out;
2575 }
2576
2577 vp = (struct vnode *)fp->f_data;
2578 proc_fdunlock(p);
2579
2580 if (vnode_getwithref(vp)) {
2581 error = ENOENT;
2582 goto outdrop;
2583 }
2584
2585 cp_key_t k = {
2586 .len = CP_MAX_WRAPPEDKEYSIZE,
2587 };
2588
2589 k.key = kheap_alloc(KHEAP_TEMP, CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
2590 if (k.key == NULL) {
2591 error = ENOMEM;
2592 } else {
2593 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
2594 }
2595
2596 vnode_put(vp);
2597
2598 if (error == 0) {
2599 error = copyout(k.key, argp, k.len);
2600 *retval = k.len;
2601 }
2602
2603 kheap_free(KHEAP_TEMP, k.key, CP_MAX_WRAPPEDKEYSIZE);
2604
2605 break;
2606 }
2607
2608 case F_GETPROTECTIONLEVEL: {
2609 if (fp->f_type != DTYPE_VNODE) {
2610 error = EBADF;
2611 goto out;
2612 }
2613
2614 vp = (struct vnode*) fp->f_data;
2615 proc_fdunlock(p);
2616
2617 if (vnode_getwithref(vp)) {
2618 error = ENOENT;
2619 goto outdrop;
2620 }
2621
2622 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
2623
2624 vnode_put(vp);
2625 break;
2626 }
2627
2628 case F_GETDEFAULTPROTLEVEL: {
2629 if (fp->f_type != DTYPE_VNODE) {
2630 error = EBADF;
2631 goto out;
2632 }
2633
2634 vp = (struct vnode*) fp->f_data;
2635 proc_fdunlock(p);
2636
2637 if (vnode_getwithref(vp)) {
2638 error = ENOENT;
2639 goto outdrop;
2640 }
2641
2642 /*
2643 * if cp_get_major_vers fails, error will be set to proper errno
2644 * and cp_version will still be 0.
2645 */
2646
2647 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
2648
2649 vnode_put(vp);
2650 break;
2651 }
2652
2653 #endif /* CONFIG_PROTECT */
2654
2655 case F_MOVEDATAEXTENTS: {
2656 struct fileproc *fp2 = NULL;
2657 struct vnode *src_vp = NULLVP;
2658 struct vnode *dst_vp = NULLVP;
2659 /* We need to grab the 2nd FD out of the argments before moving on. */
2660 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2661
2662 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
2663 if (error) {
2664 goto out;
2665 }
2666
2667 if (fp->f_type != DTYPE_VNODE) {
2668 error = EBADF;
2669 goto out;
2670 }
2671
2672 /*
2673 * For now, special case HFS+ and APFS only, since this
2674 * is SPI.
2675 */
2676 src_vp = (struct vnode *)fp->f_data;
2677 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
2678 error = ENOTSUP;
2679 goto out;
2680 }
2681
2682 /*
2683 * Get the references before we start acquiring iocounts on the vnodes,
2684 * while we still hold the proc fd lock
2685 */
2686 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
2687 error = EBADF;
2688 goto out;
2689 }
2690 if (fp2->f_type != DTYPE_VNODE) {
2691 fp_drop(p, fd2, fp2, 1);
2692 error = EBADF;
2693 goto out;
2694 }
2695 dst_vp = (struct vnode *)fp2->f_data;
2696 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
2697 fp_drop(p, fd2, fp2, 1);
2698 error = ENOTSUP;
2699 goto out;
2700 }
2701
2702 #if CONFIG_MACF
2703 /* Re-do MAC checks against the new FD, pass in a fake argument */
2704 error = mac_file_check_fcntl(proc_ucred(p), fp2->fp_glob, uap->cmd, 0);
2705 if (error) {
2706 fp_drop(p, fd2, fp2, 1);
2707 goto out;
2708 }
2709 #endif
2710 /* Audit the 2nd FD */
2711 AUDIT_ARG(fd, fd2);
2712
2713 proc_fdunlock(p);
2714
2715 if (vnode_getwithref(src_vp)) {
2716 fp_drop(p, fd2, fp2, 0);
2717 error = ENOENT;
2718 goto outdrop;
2719 }
2720 if (vnode_getwithref(dst_vp)) {
2721 vnode_put(src_vp);
2722 fp_drop(p, fd2, fp2, 0);
2723 error = ENOENT;
2724 goto outdrop;
2725 }
2726
2727 /*
2728 * Basic asserts; validate they are not the same and that
2729 * both live on the same filesystem.
2730 */
2731 if (dst_vp == src_vp) {
2732 vnode_put(src_vp);
2733 vnode_put(dst_vp);
2734 fp_drop(p, fd2, fp2, 0);
2735 error = EINVAL;
2736 goto outdrop;
2737 }
2738
2739 if (dst_vp->v_mount != src_vp->v_mount) {
2740 vnode_put(src_vp);
2741 vnode_put(dst_vp);
2742 fp_drop(p, fd2, fp2, 0);
2743 error = EXDEV;
2744 goto outdrop;
2745 }
2746
2747 /* Now we have a legit pair of FDs. Go to work */
2748
2749 /* Now check for write access to the target files */
2750 if (vnode_authorize(src_vp, NULLVP,
2751 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2752 vnode_put(src_vp);
2753 vnode_put(dst_vp);
2754 fp_drop(p, fd2, fp2, 0);
2755 error = EBADF;
2756 goto outdrop;
2757 }
2758
2759 if (vnode_authorize(dst_vp, NULLVP,
2760 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2761 vnode_put(src_vp);
2762 vnode_put(dst_vp);
2763 fp_drop(p, fd2, fp2, 0);
2764 error = EBADF;
2765 goto outdrop;
2766 }
2767
2768 /* Verify that both vps point to files and not directories */
2769 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2770 error = EINVAL;
2771 vnode_put(src_vp);
2772 vnode_put(dst_vp);
2773 fp_drop(p, fd2, fp2, 0);
2774 goto outdrop;
2775 }
2776
2777 /*
2778 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2779 * We'll pass in our special bit indicating that the new behavior is expected
2780 */
2781
2782 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2783
2784 vnode_put(src_vp);
2785 vnode_put(dst_vp);
2786 fp_drop(p, fd2, fp2, 0);
2787 break;
2788 }
2789
2790 /*
2791 * SPI for making a file compressed.
2792 */
2793 case F_MAKECOMPRESSED: {
2794 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2795
2796 if (fp->f_type != DTYPE_VNODE) {
2797 error = EBADF;
2798 goto out;
2799 }
2800
2801 vp = (struct vnode*) fp->f_data;
2802 proc_fdunlock(p);
2803
2804 /* get the vnode */
2805 if (vnode_getwithref(vp)) {
2806 error = ENOENT;
2807 goto outdrop;
2808 }
2809
2810 /* Is it a file? */
2811 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2812 vnode_put(vp);
2813 error = EBADF;
2814 goto outdrop;
2815 }
2816
2817 /* invoke ioctl to pass off to FS */
2818 /* Only go forward if you have write access */
2819 vfs_context_t ctx = vfs_context_current();
2820 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2821 vnode_put(vp);
2822 error = EBADF;
2823 goto outdrop;
2824 }
2825
2826 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2827
2828 vnode_put(vp);
2829 break;
2830 }
2831
2832 /*
2833 * SPI (private) for indicating to a filesystem that subsequent writes to
2834 * the open FD will written to the Fastflow.
2835 */
2836 case F_SET_GREEDY_MODE:
2837 /* intentionally drop through to the same handler as F_SETSTATIC.
2838 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2839 */
2840
2841 /*
2842 * SPI (private) for indicating to a filesystem that subsequent writes to
2843 * the open FD will represent static content.
2844 */
2845 case F_SETSTATICCONTENT: {
2846 caddr_t ioctl_arg = NULL;
2847
2848 if (uap->arg) {
2849 ioctl_arg = (caddr_t) 1;
2850 }
2851
2852 if (fp->f_type != DTYPE_VNODE) {
2853 error = EBADF;
2854 goto out;
2855 }
2856 vp = (struct vnode *)fp->f_data;
2857 proc_fdunlock(p);
2858
2859 error = vnode_getwithref(vp);
2860 if (error) {
2861 error = ENOENT;
2862 goto outdrop;
2863 }
2864
2865 /* Only go forward if you have write access */
2866 vfs_context_t ctx = vfs_context_current();
2867 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2868 vnode_put(vp);
2869 error = EBADF;
2870 goto outdrop;
2871 }
2872
2873 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2874 (void)vnode_put(vp);
2875
2876 break;
2877 }
2878
2879 /*
2880 * SPI (private) for indicating to the lower level storage driver that the
2881 * subsequent writes should be of a particular IO type (burst, greedy, static),
2882 * or other flavors that may be necessary.
2883 */
2884 case F_SETIOTYPE: {
2885 caddr_t param_ptr;
2886 uint32_t param;
2887
2888 if (uap->arg) {
2889 /* extract 32 bits of flags from userland */
2890 param_ptr = (caddr_t) uap->arg;
2891 param = (uint32_t) param_ptr;
2892 } else {
2893 /* If no argument is specified, error out */
2894 error = EINVAL;
2895 goto out;
2896 }
2897
2898 /*
2899 * Validate the different types of flags that can be specified:
2900 * all of them are mutually exclusive for now.
2901 */
2902 switch (param) {
2903 case F_IOTYPE_ISOCHRONOUS:
2904 break;
2905
2906 default:
2907 error = EINVAL;
2908 goto out;
2909 }
2910
2911
2912 if (fp->f_type != DTYPE_VNODE) {
2913 error = EBADF;
2914 goto out;
2915 }
2916 vp = (struct vnode *)fp->f_data;
2917 proc_fdunlock(p);
2918
2919 error = vnode_getwithref(vp);
2920 if (error) {
2921 error = ENOENT;
2922 goto outdrop;
2923 }
2924
2925 /* Only go forward if you have write access */
2926 vfs_context_t ctx = vfs_context_current();
2927 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2928 vnode_put(vp);
2929 error = EBADF;
2930 goto outdrop;
2931 }
2932
2933 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2934 (void)vnode_put(vp);
2935
2936 break;
2937 }
2938
2939 /*
2940 * Set the vnode pointed to by 'fd'
2941 * and tag it as the (potentially future) backing store
2942 * for another filesystem
2943 */
2944 case F_SETBACKINGSTORE: {
2945 if (fp->f_type != DTYPE_VNODE) {
2946 error = EBADF;
2947 goto out;
2948 }
2949
2950 vp = (struct vnode *)fp->f_data;
2951
2952 if (vp->v_tag != VT_HFS) {
2953 error = EINVAL;
2954 goto out;
2955 }
2956 proc_fdunlock(p);
2957
2958 if (vnode_getwithref(vp)) {
2959 error = ENOENT;
2960 goto outdrop;
2961 }
2962
2963 /* only proceed if you have write access */
2964 vfs_context_t ctx = vfs_context_current();
2965 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2966 vnode_put(vp);
2967 error = EBADF;
2968 goto outdrop;
2969 }
2970
2971
2972 /* If arg != 0, set, otherwise unset */
2973 if (uap->arg) {
2974 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)1, 0, &context);
2975 } else {
2976 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
2977 }
2978
2979 vnode_put(vp);
2980 break;
2981 }
2982
2983 /*
2984 * like F_GETPATH, but special semantics for
2985 * the mobile time machine handler.
2986 */
2987 case F_GETPATH_MTMINFO: {
2988 char *pathbufp;
2989 int pathlen;
2990
2991 if (fp->f_type != DTYPE_VNODE) {
2992 error = EBADF;
2993 goto out;
2994 }
2995 vp = (struct vnode *)fp->f_data;
2996 proc_fdunlock(p);
2997
2998 pathlen = MAXPATHLEN;
2999 pathbufp = zalloc(ZV_NAMEI);
3000
3001 if ((error = vnode_getwithref(vp)) == 0) {
3002 int backingstore = 0;
3003
3004 /* Check for error from vn_getpath before moving on */
3005 if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
3006 if (vp->v_tag == VT_HFS) {
3007 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
3008 }
3009 (void)vnode_put(vp);
3010
3011 if (error == 0) {
3012 error = copyout((caddr_t)pathbufp, argp, pathlen);
3013 }
3014 if (error == 0) {
3015 /*
3016 * If the copyout was successful, now check to ensure
3017 * that this vnode is not a BACKINGSTORE vnode. mtmd
3018 * wants the path regardless.
3019 */
3020 if (backingstore) {
3021 error = EBUSY;
3022 }
3023 }
3024 } else {
3025 (void)vnode_put(vp);
3026 }
3027 }
3028
3029 zfree(ZV_NAMEI, pathbufp);
3030 goto outdrop;
3031 }
3032
3033 #if DEBUG || DEVELOPMENT
3034 case F_RECYCLE:
3035 if (fp->f_type != DTYPE_VNODE) {
3036 error = EBADF;
3037 goto out;
3038 }
3039 vp = (struct vnode *)fp->f_data;
3040 proc_fdunlock(p);
3041
3042 vnode_recycle(vp);
3043 break;
3044 #endif
3045
3046 default:
3047 /*
3048 * This is an fcntl() that we d not recognize at this level;
3049 * if this is a vnode, we send it down into the VNOP_IOCTL
3050 * for this vnode; this can include special devices, and will
3051 * effectively overload fcntl() to send ioctl()'s.
3052 */
3053 if ((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)) {
3054 error = EINVAL;
3055 goto out;
3056 }
3057
3058 /* Catch any now-invalid fcntl() selectors */
3059 switch (uap->cmd) {
3060 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
3061 case (int)FSIOC_FIOSEEKHOLE:
3062 case (int)FSIOC_FIOSEEKDATA:
3063 case (int)FSIOC_CAS_BSDFLAGS:
3064 case HFS_GET_BOOT_INFO:
3065 case HFS_SET_BOOT_INFO:
3066 case FIOPINSWAP:
3067 case F_MARKDEPENDENCY:
3068 case TIOCREVOKE:
3069 case TIOCREVOKECLEAR:
3070 error = EINVAL;
3071 goto out;
3072 default:
3073 break;
3074 }
3075
3076 if (fp->f_type != DTYPE_VNODE) {
3077 error = EBADF;
3078 goto out;
3079 }
3080 vp = (struct vnode *)fp->f_data;
3081 proc_fdunlock(p);
3082
3083 if ((error = vnode_getwithref(vp)) == 0) {
3084 #define STK_PARAMS 128
3085 char stkbuf[STK_PARAMS] = {0};
3086 unsigned int size;
3087 caddr_t data, memp;
3088 /*
3089 * For this to work properly, we have to copy in the
3090 * ioctl() cmd argument if there is one; we must also
3091 * check that a command parameter, if present, does
3092 * not exceed the maximum command length dictated by
3093 * the number of bits we have available in the command
3094 * to represent a structure length. Finally, we have
3095 * to copy the results back out, if it is that type of
3096 * ioctl().
3097 */
3098 size = IOCPARM_LEN(uap->cmd);
3099 if (size > IOCPARM_MAX) {
3100 (void)vnode_put(vp);
3101 error = EINVAL;
3102 break;
3103 }
3104
3105 memp = NULL;
3106 if (size > sizeof(stkbuf)) {
3107 memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
3108 if (memp == 0) {
3109 (void)vnode_put(vp);
3110 error = ENOMEM;
3111 goto outdrop;
3112 }
3113 data = memp;
3114 } else {
3115 data = &stkbuf[0];
3116 }
3117
3118 if (uap->cmd & IOC_IN) {
3119 if (size) {
3120 /* structure */
3121 error = copyin(argp, data, size);
3122 if (error) {
3123 (void)vnode_put(vp);
3124 if (memp) {
3125 kheap_free(KHEAP_TEMP, memp, size);
3126 }
3127 goto outdrop;
3128 }
3129
3130 /* Bzero the section beyond that which was needed */
3131 if (size <= sizeof(stkbuf)) {
3132 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
3133 }
3134 } else {
3135 /* int */
3136 if (is64bit) {
3137 *(user_addr_t *)data = argp;
3138 } else {
3139 *(uint32_t *)data = (uint32_t)argp;
3140 }
3141 };
3142 } else if ((uap->cmd & IOC_OUT) && size) {
3143 /*
3144 * Zero the buffer so the user always
3145 * gets back something deterministic.
3146 */
3147 bzero(data, size);
3148 } else if (uap->cmd & IOC_VOID) {
3149 if (is64bit) {
3150 *(user_addr_t *)data = argp;
3151 } else {
3152 *(uint32_t *)data = (uint32_t)argp;
3153 }
3154 }
3155
3156 error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
3157
3158 (void)vnode_put(vp);
3159
3160 /* Copy any output data to user */
3161 if (error == 0 && (uap->cmd & IOC_OUT) && size) {
3162 error = copyout(data, argp, size);
3163 }
3164 if (memp) {
3165 kheap_free(KHEAP_TEMP, memp, size);
3166 }
3167 }
3168 break;
3169 }
3170
3171 outdrop:
3172 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3173 fp_drop(p, fd, fp, 0);
3174 return error;
3175 out:
3176 fp_drop(p, fd, fp, 1);
3177 proc_fdunlock(p);
3178 return error;
3179 }
3180
3181
3182 /*
3183 * finishdup
3184 *
3185 * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
3186 *
3187 * Parameters: p Process performing the dup
3188 * old The fd to dup
3189 * new The fd to dup it to
3190 * fd_flags Flags to augment the new fd
3191 * retval Pointer to the call return area
3192 *
3193 * Returns: 0 Success
3194 * EBADF
3195 * ENOMEM
3196 *
3197 * Implicit returns:
3198 * *retval (modified) The new descriptor
3199 *
3200 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
3201 * the caller
3202 *
3203 * Notes: This function may drop and reacquire this lock; it is unsafe
3204 * for a caller to assume that other state protected by the lock
3205 * has not been subsequently changed out from under it.
3206 */
3207 int
3208 finishdup(proc_t p,
3209 struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
3210 {
3211 struct fileproc *nfp;
3212 struct fileproc *ofp;
3213 #if CONFIG_MACF
3214 int error;
3215 #endif
3216
3217 #if DIAGNOSTIC
3218 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3219 #endif
3220 if ((ofp = fdp->fd_ofiles[old]) == NULL ||
3221 (fdp->fd_ofileflags[old] & UF_RESERVED)) {
3222 fdrelse(p, new);
3223 return EBADF;
3224 }
3225
3226 #if CONFIG_MACF
3227 error = mac_file_check_dup(proc_ucred(p), ofp->fp_glob, new);
3228 if (error) {
3229 fdrelse(p, new);
3230 return error;
3231 }
3232 #endif
3233
3234 proc_fdunlock(p);
3235
3236 nfp = fileproc_alloc_init(NULL);
3237
3238 proc_fdlock(p);
3239
3240 if (nfp == NULL) {
3241 fdrelse(p, new);
3242 return ENOMEM;
3243 }
3244
3245 fg_ref(p, ofp->fp_glob);
3246 nfp->fp_glob = ofp->fp_glob;
3247
3248 #if DIAGNOSTIC
3249 if (fdp->fd_ofiles[new] != 0) {
3250 panic("finishdup: overwriting fd_ofiles with new %d", new);
3251 }
3252 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
3253 panic("finishdup: unreserved fileflags with new %d", new);
3254 }
3255 #endif
3256
3257 if (new > fdp->fd_lastfile) {
3258 fdp->fd_lastfile = new;
3259 }
3260 *fdflags(p, new) |= fd_flags;
3261 procfdtbl_releasefd(p, new, nfp);
3262 *retval = new;
3263 return 0;
3264 }
3265
3266
3267 /*
3268 * sys_close
3269 *
3270 * Description: The implementation of the close(2) system call
3271 *
3272 * Parameters: p Process in whose per process file table
3273 * the close is to occur
3274 * uap->fd fd to be closed
3275 * retval <unused>
3276 *
3277 * Returns: 0 Success
3278 * fp_lookup:EBADF Bad file descriptor
3279 * fp_guard_exception:??? Guarded file descriptor
3280 * close_internal:EBADF
3281 * close_internal:??? Anything returnable by a per-fileops
3282 * close function
3283 */
3284 int
3285 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
3286 {
3287 __pthread_testcancel(1);
3288 return close_nocancel(p, uap->fd);
3289 }
3290
3291 int
3292 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
3293 {
3294 return close_nocancel(p, uap->fd);
3295 }
3296
3297 int
3298 close_nocancel(proc_t p, int fd)
3299 {
3300 struct fileproc *fp;
3301
3302 AUDIT_SYSCLOSE(p, fd);
3303
3304 proc_fdlock(p);
3305 if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
3306 proc_fdunlock(p);
3307 return EBADF;
3308 }
3309
3310 if (fp_isguarded(fp, GUARD_CLOSE)) {
3311 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
3312 proc_fdunlock(p);
3313 return error;
3314 }
3315
3316 return fp_close_and_unlock(p, fd, fp, 0);
3317 }
3318
3319
3320 int
3321 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
3322 {
3323 struct filedesc *fdp = p->p_fd;
3324 struct fileglob *fg = fp->fp_glob;
3325
3326 #if DIAGNOSTIC
3327 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3328 #endif
3329
3330 /*
3331 * Keep most people from finding the filedesc while we are closing it.
3332 *
3333 * Callers are:
3334 *
3335 * - dup2() which always waits for UF_RESERVED to clear
3336 *
3337 * - close/guarded_close/... who will fail the fileproc lookup if
3338 * UF_RESERVED is set,
3339 *
3340 * - fdexec()/fdfree() who only run once all threads in the proc
3341 * are properly canceled, hence no fileproc in this proc should
3342 * be in flux.
3343 *
3344 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
3345 *
3346 * Callers of fp_get_noref_locked_with_iocount() can still find
3347 * this entry so that they can drop their I/O reference despite
3348 * not having remembered the fileproc pointer (namely select() and
3349 * file_drop()).
3350 */
3351 if (p->p_fd->fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
3352 panic("%s: called with fileproc in flux (%d/:%p)",
3353 __func__, fd, fp);
3354 }
3355 p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
3356
3357 if ((fp->fp_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
3358 proc_fdunlock(p);
3359
3360 if ((FILEGLOB_DTYPE(fg) == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
3361 /*
3362 * call out to allow 3rd party notification of close.
3363 * Ignore result of kauth_authorize_fileop call.
3364 */
3365 if (vnode_getwithref((vnode_t)fg->fg_data) == 0) {
3366 u_int fileop_flags = 0;
3367 if (fg->fg_flag & FWASWRITTEN) {
3368 fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
3369 }
3370 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
3371 (uintptr_t)fg->fg_data, (uintptr_t)fileop_flags);
3372 #if CONFIG_MACF
3373 mac_file_notify_close(proc_ucred(p), fp->fp_glob);
3374 #endif
3375 vnode_put((vnode_t)fg->fg_data);
3376 }
3377 }
3378 if (fp->fp_flags & FP_AIOISSUED) {
3379 /*
3380 * cancel all async IO requests that can be cancelled.
3381 */
3382 _aio_close( p, fd );
3383 }
3384
3385 proc_fdlock(p);
3386 }
3387
3388 if (fd < fdp->fd_knlistsize) {
3389 knote_fdclose(p, fd);
3390 }
3391
3392 fileproc_drain(p, fp);
3393
3394 if (flags & FD_DUP2RESV) {
3395 fdp->fd_ofiles[fd] = NULL;
3396 fdp->fd_ofileflags[fd] &= ~(UF_CLOSING | UF_EXCLOSE | UF_FORKCLOSE);
3397 } else {
3398 fdrelse(p, fd);
3399 }
3400
3401 proc_fdunlock(p);
3402
3403 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
3404 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
3405 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg->fg_data));
3406 }
3407
3408 fileproc_free(fp);
3409
3410 return fg_drop(p, fg);
3411 }
3412
3413
3414 /*
3415 * fstat
3416 *
3417 * Description: Return status information about a file descriptor.
3418 *
3419 * Parameters: p The process doing the fstat
3420 * fd The fd to stat
3421 * ub The user stat buffer
3422 * xsecurity The user extended security
3423 * buffer, or 0 if none
3424 * xsecurity_size The size of xsecurity, or 0
3425 * if no xsecurity
3426 * isstat64 Flag to indicate 64 bit version
3427 * for inode size, etc.
3428 *
3429 * Returns: 0 Success
3430 * EBADF
3431 * EFAULT
3432 * fp_lookup:EBADF Bad file descriptor
3433 * vnode_getwithref:???
3434 * copyout:EFAULT
3435 * vnode_getwithref:???
3436 * vn_stat:???
3437 * soo_stat:???
3438 * pipe_stat:???
3439 * pshm_stat:???
3440 * kqueue_stat:???
3441 *
3442 * Notes: Internal implementation for all other fstat() related
3443 * functions
3444 *
3445 * XXX switch on node type is bogus; need a stat in struct
3446 * XXX fileops instead.
3447 */
3448 static int
3449 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3450 {
3451 struct fileproc *fp;
3452 union {
3453 struct stat sb;
3454 struct stat64 sb64;
3455 } source;
3456 union {
3457 struct user64_stat user64_sb;
3458 struct user32_stat user32_sb;
3459 struct user64_stat64 user64_sb64;
3460 struct user32_stat64 user32_sb64;
3461 } dest;
3462 int error, my_size;
3463 file_type_t type;
3464 caddr_t data;
3465 kauth_filesec_t fsec;
3466 user_size_t xsecurity_bufsize;
3467 vfs_context_t ctx = vfs_context_current();
3468 void * sbptr;
3469
3470
3471 AUDIT_ARG(fd, fd);
3472
3473 if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
3474 return error;
3475 }
3476 type = fp->f_type;
3477 data = fp->f_data;
3478 fsec = KAUTH_FILESEC_NONE;
3479
3480 sbptr = (void *)&source;
3481
3482 switch (type) {
3483 case DTYPE_VNODE:
3484 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
3485 /*
3486 * If the caller has the file open, and is not
3487 * requesting extended security information, we are
3488 * going to let them get the basic stat information.
3489 */
3490 if (xsecurity == USER_ADDR_NULL) {
3491 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
3492 fp->fp_glob->fg_cred);
3493 } else {
3494 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
3495 }
3496
3497 AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
3498 (void)vnode_put((vnode_t)data);
3499 }
3500 break;
3501
3502 #if SOCKETS
3503 case DTYPE_SOCKET:
3504 error = soo_stat((struct socket *)data, sbptr, isstat64);
3505 break;
3506 #endif /* SOCKETS */
3507
3508 case DTYPE_PIPE:
3509 error = pipe_stat((void *)data, sbptr, isstat64);
3510 break;
3511
3512 case DTYPE_PSXSHM:
3513 error = pshm_stat((void *)data, sbptr, isstat64);
3514 break;
3515
3516 case DTYPE_KQUEUE:
3517 error = kqueue_stat((void *)data, sbptr, isstat64, p);
3518 break;
3519
3520 default:
3521 error = EBADF;
3522 goto out;
3523 }
3524 if (error == 0) {
3525 caddr_t sbp;
3526
3527 if (isstat64 != 0) {
3528 source.sb64.st_lspare = 0;
3529 source.sb64.st_qspare[0] = 0LL;
3530 source.sb64.st_qspare[1] = 0LL;
3531
3532 if (IS_64BIT_PROCESS(current_proc())) {
3533 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3534 my_size = sizeof(dest.user64_sb64);
3535 sbp = (caddr_t)&dest.user64_sb64;
3536 } else {
3537 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3538 my_size = sizeof(dest.user32_sb64);
3539 sbp = (caddr_t)&dest.user32_sb64;
3540 }
3541 } else {
3542 source.sb.st_lspare = 0;
3543 source.sb.st_qspare[0] = 0LL;
3544 source.sb.st_qspare[1] = 0LL;
3545 if (IS_64BIT_PROCESS(current_proc())) {
3546 munge_user64_stat(&source.sb, &dest.user64_sb);
3547 my_size = sizeof(dest.user64_sb);
3548 sbp = (caddr_t)&dest.user64_sb;
3549 } else {
3550 munge_user32_stat(&source.sb, &dest.user32_sb);
3551 my_size = sizeof(dest.user32_sb);
3552 sbp = (caddr_t)&dest.user32_sb;
3553 }
3554 }
3555
3556 error = copyout(sbp, ub, my_size);
3557 }
3558
3559 /* caller wants extended security information? */
3560 if (xsecurity != USER_ADDR_NULL) {
3561 /* did we get any? */
3562 if (fsec == KAUTH_FILESEC_NONE) {
3563 if (susize(xsecurity_size, 0) != 0) {
3564 error = EFAULT;
3565 goto out;
3566 }
3567 } else {
3568 /* find the user buffer size */
3569 xsecurity_bufsize = fusize(xsecurity_size);
3570
3571 /* copy out the actual data size */
3572 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3573 error = EFAULT;
3574 goto out;
3575 }
3576
3577 /* if the caller supplied enough room, copy out to it */
3578 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
3579 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3580 }
3581 }
3582 }
3583 out:
3584 fp_drop(p, fd, fp, 0);
3585 if (fsec != NULL) {
3586 kauth_filesec_free(fsec);
3587 }
3588 return error;
3589 }
3590
3591
3592 /*
3593 * sys_fstat_extended
3594 *
3595 * Description: Extended version of fstat supporting returning extended
3596 * security information
3597 *
3598 * Parameters: p The process doing the fstat
3599 * uap->fd The fd to stat
3600 * uap->ub The user stat buffer
3601 * uap->xsecurity The user extended security
3602 * buffer, or 0 if none
3603 * uap->xsecurity_size The size of xsecurity, or 0
3604 *
3605 * Returns: 0 Success
3606 * !0 Errno (see fstat)
3607 */
3608 int
3609 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
3610 {
3611 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
3612 }
3613
3614
3615 /*
3616 * sys_fstat
3617 *
3618 * Description: Get file status for the file associated with fd
3619 *
3620 * Parameters: p The process doing the fstat
3621 * uap->fd The fd to stat
3622 * uap->ub The user stat buffer
3623 *
3624 * Returns: 0 Success
3625 * !0 Errno (see fstat)
3626 */
3627 int
3628 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
3629 {
3630 return fstat(p, uap->fd, uap->ub, 0, 0, 0);
3631 }
3632
3633
3634 /*
3635 * sys_fstat64_extended
3636 *
3637 * Description: Extended version of fstat64 supporting returning extended
3638 * security information
3639 *
3640 * Parameters: p The process doing the fstat
3641 * uap->fd The fd to stat
3642 * uap->ub The user stat buffer
3643 * uap->xsecurity The user extended security
3644 * buffer, or 0 if none
3645 * uap->xsecurity_size The size of xsecurity, or 0
3646 *
3647 * Returns: 0 Success
3648 * !0 Errno (see fstat)
3649 */
3650 int
3651 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3652 {
3653 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
3654 }
3655
3656
3657 /*
3658 * sys_fstat64
3659 *
3660 * Description: Get 64 bit version of the file status for the file associated
3661 * with fd
3662 *
3663 * Parameters: p The process doing the fstat
3664 * uap->fd The fd to stat
3665 * uap->ub The user stat buffer
3666 *
3667 * Returns: 0 Success
3668 * !0 Errno (see fstat)
3669 */
3670 int
3671 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
3672 {
3673 return fstat(p, uap->fd, uap->ub, 0, 0, 1);
3674 }
3675
3676
3677 /*
3678 * sys_fpathconf
3679 *
3680 * Description: Return pathconf information about a file descriptor.
3681 *
3682 * Parameters: p Process making the request
3683 * uap->fd fd to get information about
3684 * uap->name Name of information desired
3685 * retval Pointer to the call return area
3686 *
3687 * Returns: 0 Success
3688 * EINVAL
3689 * fp_lookup:EBADF Bad file descriptor
3690 * vnode_getwithref:???
3691 * vn_pathconf:???
3692 *
3693 * Implicit returns:
3694 * *retval (modified) Returned information (numeric)
3695 */
3696 int
3697 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3698 {
3699 int fd = uap->fd;
3700 struct fileproc *fp;
3701 struct vnode *vp;
3702 int error = 0;
3703 file_type_t type;
3704 caddr_t data;
3705
3706
3707 AUDIT_ARG(fd, uap->fd);
3708 if ((error = fp_lookup(p, fd, &fp, 0))) {
3709 return error;
3710 }
3711 type = fp->f_type;
3712 data = fp->f_data;
3713
3714 switch (type) {
3715 case DTYPE_SOCKET:
3716 if (uap->name != _PC_PIPE_BUF) {
3717 error = EINVAL;
3718 goto out;
3719 }
3720 *retval = PIPE_BUF;
3721 error = 0;
3722 goto out;
3723
3724 case DTYPE_PIPE:
3725 if (uap->name != _PC_PIPE_BUF) {
3726 error = EINVAL;
3727 goto out;
3728 }
3729 *retval = PIPE_BUF;
3730 error = 0;
3731 goto out;
3732
3733 case DTYPE_VNODE:
3734 vp = (struct vnode *)data;
3735
3736 if ((error = vnode_getwithref(vp)) == 0) {
3737 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3738
3739 error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3740
3741 (void)vnode_put(vp);
3742 }
3743 goto out;
3744
3745 default:
3746 error = EINVAL;
3747 goto out;
3748 }
3749 /*NOTREACHED*/
3750 out:
3751 fp_drop(p, fd, fp, 0);
3752 return error;
3753 }
3754
3755 /*
3756 * Statistics counter for the number of times a process calling fdalloc()
3757 * has resulted in an expansion of the per process open file table.
3758 *
3759 * XXX This would likely be of more use if it were per process
3760 */
3761 int fdexpand;
3762
3763
3764 /*
3765 * fdalloc
3766 *
3767 * Description: Allocate a file descriptor for the process.
3768 *
3769 * Parameters: p Process to allocate the fd in
3770 * want The fd we would prefer to get
3771 * result Pointer to fd we got
3772 *
3773 * Returns: 0 Success
3774 * EMFILE
3775 * ENOMEM
3776 *
3777 * Implicit returns:
3778 * *result (modified) The fd which was allocated
3779 */
3780 int
3781 fdalloc(proc_t p, int want, int *result)
3782 {
3783 struct filedesc *fdp = p->p_fd;
3784 int i;
3785 int last, numfiles, oldnfiles;
3786 struct fileproc **newofiles, **ofiles;
3787 char *newofileflags;
3788 rlim_t lim;
3789 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3790
3791 nofile = MIN(nofile, INT_MAX);
3792
3793 /*
3794 * Search for a free descriptor starting at the higher
3795 * of want or fd_freefile. If that fails, consider
3796 * expanding the ofile array.
3797 */
3798 #if DIAGNOSTIC
3799 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3800 #endif
3801
3802 lim = MIN(nofile, maxfilesperproc);
3803 for (;;) {
3804 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
3805 if ((i = want) < fdp->fd_freefile) {
3806 i = fdp->fd_freefile;
3807 }
3808 for (; i < last; i++) {
3809 if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3810 procfdtbl_reservefd(p, i);
3811 if (i > fdp->fd_lastfile) {
3812 fdp->fd_lastfile = i;
3813 }
3814 if (want <= fdp->fd_freefile) {
3815 fdp->fd_freefile = i;
3816 }
3817 *result = i;
3818 return 0;
3819 }
3820 }
3821
3822 /*
3823 * No space in current array. Expand?
3824 */
3825 if ((rlim_t)fdp->fd_nfiles >= lim) {
3826 return EMFILE;
3827 }
3828 if (fdp->fd_nfiles < NDEXTENT) {
3829 numfiles = NDEXTENT;
3830 } else {
3831 numfiles = 2 * fdp->fd_nfiles;
3832 }
3833 /* Enforce lim */
3834 if ((rlim_t)numfiles > lim) {
3835 numfiles = (int)lim;
3836 }
3837 proc_fdunlock(p);
3838 newofiles = kheap_alloc(KM_OFILETABL, numfiles * OFILESIZE,
3839 Z_WAITOK);
3840 proc_fdlock(p);
3841 if (newofiles == NULL) {
3842 return ENOMEM;
3843 }
3844 if (fdp->fd_nfiles >= numfiles) {
3845 kheap_free(KM_OFILETABL, newofiles, numfiles * OFILESIZE);
3846 continue;
3847 }
3848 newofileflags = (char *) &newofiles[numfiles];
3849 /*
3850 * Copy the existing ofile and ofileflags arrays
3851 * and zero the new portion of each array.
3852 */
3853 oldnfiles = fdp->fd_nfiles;
3854 (void) memcpy(newofiles, fdp->fd_ofiles,
3855 oldnfiles * sizeof(*fdp->fd_ofiles));
3856 (void) memset(&newofiles[oldnfiles], 0,
3857 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3858
3859 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3860 oldnfiles * sizeof(*fdp->fd_ofileflags));
3861 (void) memset(&newofileflags[oldnfiles], 0,
3862 (numfiles - oldnfiles) *
3863 sizeof(*fdp->fd_ofileflags));
3864 ofiles = fdp->fd_ofiles;
3865 fdp->fd_ofiles = newofiles;
3866 fdp->fd_ofileflags = newofileflags;
3867 fdp->fd_nfiles = numfiles;
3868 kheap_free(KM_OFILETABL, ofiles, oldnfiles * OFILESIZE);
3869 fdexpand++;
3870 }
3871 }
3872
3873
3874 /*
3875 * fdavail
3876 *
3877 * Description: Check to see whether n user file descriptors are available
3878 * to the process p.
3879 *
3880 * Parameters: p Process to check in
3881 * n The number of fd's desired
3882 *
3883 * Returns: 0 No
3884 * 1 Yes
3885 *
3886 * Locks: Assumes proc_fdlock for process is held by the caller
3887 *
3888 * Notes: The answer only remains valid so long as the proc_fdlock is
3889 * held by the caller.
3890 */
3891 int
3892 fdavail(proc_t p, int n)
3893 {
3894 struct filedesc *fdp = p->p_fd;
3895 struct fileproc **fpp;
3896 char *flags;
3897 int i;
3898 int lim;
3899 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3900
3901 lim = (int)MIN(nofile, maxfilesperproc);
3902 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
3903 return 1;
3904 }
3905 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3906 flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3907 for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
3908 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
3909 return 1;
3910 }
3911 }
3912 return 0;
3913 }
3914
3915
3916 struct fileproc *
3917 fp_get_noref_locked(proc_t p, int fd)
3918 {
3919 struct filedesc *fdp = p->p_fd;
3920 struct fileproc *fp;
3921
3922 if (fd < 0 || fd >= fdp->fd_nfiles ||
3923 (fp = fdp->fd_ofiles[fd]) == NULL ||
3924 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3925 return NULL;
3926 }
3927 return fp;
3928 }
3929
3930 struct fileproc *
3931 fp_get_noref_locked_with_iocount(proc_t p, int fd)
3932 {
3933 struct filedesc *fdp = p->p_fd;
3934 struct fileproc *fp = NULL;
3935
3936 if (fd < 0 || fd >= fdp->fd_nfiles ||
3937 (fp = fdp->fd_ofiles[fd]) == NULL ||
3938 os_ref_get_count(&fp->fp_iocount) <= 1 ||
3939 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
3940 !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
3941 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
3942 __func__, fd, fp);
3943 }
3944
3945 return fp;
3946 }
3947
3948 int
3949 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
3950 {
3951 struct filedesc *fdp = p->p_fd;
3952 struct fileproc *fp;
3953
3954 proc_fdlock_spin(p);
3955 if (fd < 0 || fd >= fdp->fd_nfiles ||
3956 (fp = fdp->fd_ofiles[fd]) == NULL ||
3957 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3958 proc_fdunlock(p);
3959 return EBADF;
3960 }
3961
3962 if (fp->f_type != ftype) {
3963 proc_fdunlock(p);
3964 return err;
3965 }
3966
3967 os_ref_retain_locked(&fp->fp_iocount);
3968 proc_fdunlock(p);
3969
3970 *fpp = fp;
3971 return 0;
3972 }
3973
3974
3975 /*
3976 * fp_getfvp
3977 *
3978 * Description: Get fileproc and vnode pointer for a given fd from the per
3979 * process open file table of the specified process, and if
3980 * successful, increment the fp_iocount
3981 *
3982 * Parameters: p Process in which fd lives
3983 * fd fd to get information for
3984 * resultfp Pointer to result fileproc
3985 * pointer area, or 0 if none
3986 * resultvp Pointer to result vnode pointer
3987 * area, or 0 if none
3988 *
3989 * Returns: 0 Success
3990 * EBADF Bad file descriptor
3991 * ENOTSUP fd does not refer to a vnode
3992 *
3993 * Implicit returns:
3994 * *resultfp (modified) Fileproc pointer
3995 * *resultvp (modified) vnode pointer
3996 *
3997 * Notes: The resultfp and resultvp fields are optional, and may be
3998 * independently specified as NULL to skip returning information
3999 *
4000 * Locks: Internally takes and releases proc_fdlock
4001 */
4002 int
4003 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
4004 {
4005 struct fileproc *fp;
4006 int error;
4007
4008 error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
4009 if (error == 0) {
4010 if (resultfp) {
4011 *resultfp = fp;
4012 }
4013 if (resultvp) {
4014 *resultvp = (struct vnode *)fp->f_data;
4015 }
4016 }
4017
4018 return error;
4019 }
4020
4021
4022 /*
4023 * fp_get_pipe_id
4024 *
4025 * Description: Get pipe id for a given fd from the per process open file table
4026 * of the specified process.
4027 *
4028 * Parameters: p Process in which fd lives
4029 * fd fd to get information for
4030 * result_pipe_id Pointer to result pipe id
4031 *
4032 * Returns: 0 Success
4033 * EIVAL NULL pointer arguments passed
4034 * fp_lookup:EBADF Bad file descriptor
4035 * ENOTSUP fd does not refer to a pipe
4036 *
4037 * Implicit returns:
4038 * *result_pipe_id (modified) pipe id
4039 *
4040 * Locks: Internally takes and releases proc_fdlock
4041 */
4042 int
4043 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
4044 {
4045 struct fileproc *fp = FILEPROC_NULL;
4046 struct fileglob *fg = NULL;
4047 int error = 0;
4048
4049 if (p == NULL || result_pipe_id == NULL) {
4050 return EINVAL;
4051 }
4052
4053 proc_fdlock(p);
4054 if ((error = fp_lookup(p, fd, &fp, 1))) {
4055 proc_fdunlock(p);
4056 return error;
4057 }
4058 fg = fp->fp_glob;
4059
4060 if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
4061 *result_pipe_id = pipe_id((struct pipe*)fg->fg_data);
4062 } else {
4063 error = ENOTSUP;
4064 }
4065
4066 fp_drop(p, fd, fp, 1);
4067 proc_fdunlock(p);
4068 return error;
4069 }
4070
4071
4072 /*
4073 * fp_lookup
4074 *
4075 * Description: Get fileproc pointer for a given fd from the per process
4076 * open file table of the specified process and if successful,
4077 * increment the fp_iocount
4078 *
4079 * Parameters: p Process in which fd lives
4080 * fd fd to get information for
4081 * resultfp Pointer to result fileproc
4082 * pointer area, or 0 if none
4083 * locked !0 if the caller holds the
4084 * proc_fdlock, 0 otherwise
4085 *
4086 * Returns: 0 Success
4087 * EBADF Bad file descriptor
4088 *
4089 * Implicit returns:
4090 * *resultfp (modified) Fileproc pointer
4091 *
4092 * Locks: If the argument 'locked' is non-zero, then the caller is
4093 * expected to have taken and held the proc_fdlock; if it is
4094 * zero, than this routine internally takes and drops this lock.
4095 */
4096 int
4097 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
4098 {
4099 struct filedesc *fdp = p->p_fd;
4100 struct fileproc *fp;
4101
4102 if (!locked) {
4103 proc_fdlock_spin(p);
4104 }
4105 if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
4106 (fp = fdp->fd_ofiles[fd]) == NULL ||
4107 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4108 if (!locked) {
4109 proc_fdunlock(p);
4110 }
4111 return EBADF;
4112 }
4113 os_ref_retain_locked(&fp->fp_iocount);
4114
4115 if (resultfp) {
4116 *resultfp = fp;
4117 }
4118 if (!locked) {
4119 proc_fdunlock(p);
4120 }
4121
4122 return 0;
4123 }
4124
4125
4126 /*
4127 * fp_tryswap
4128 *
4129 * Description: Swap the fileproc pointer for a given fd with a new
4130 * fileproc pointer in the per-process open file table of
4131 * the specified process. The fdlock must be held at entry.
4132 * Iff the swap is successful, the old fileproc pointer is freed.
4133 *
4134 * Parameters: p Process containing the fd
4135 * fd The fd of interest
4136 * nfp Pointer to the newfp
4137 *
4138 * Returns: 0 Success
4139 * EBADF Bad file descriptor
4140 * EINTR Interrupted
4141 * EKEEPLOOKING Other references were active, try again.
4142 */
4143 int
4144 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
4145 {
4146 struct fileproc *fp;
4147 int error;
4148
4149 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
4150
4151 if (0 != (error = fp_lookup(p, fd, &fp, 1))) {
4152 return error;
4153 }
4154 /*
4155 * At this point, our caller (change_guardedfd_np) has
4156 * one fp_iocount reference, and we just took another
4157 * one to begin the replacement.
4158 * fp and nfp have a +1 reference from allocation.
4159 * Thus if no-one else is looking, fp_iocount should be 3.
4160 */
4161 if (os_ref_get_count(&fp->fp_iocount) < 3 ||
4162 1 != os_ref_get_count(&nfp->fp_iocount)) {
4163 panic("%s: fp_iocount", __func__);
4164 } else if (3 == os_ref_get_count(&fp->fp_iocount)) {
4165 /* Copy the contents of *fp, preserving the "type" of *nfp */
4166
4167 nfp->fp_flags = (nfp->fp_flags & FP_TYPEMASK) |
4168 (fp->fp_flags & ~FP_TYPEMASK);
4169 os_ref_retain_locked(&nfp->fp_iocount);
4170 os_ref_retain_locked(&nfp->fp_iocount);
4171 nfp->fp_glob = fp->fp_glob;
4172 nfp->fp_wset = fp->fp_wset;
4173
4174 p->p_fd->fd_ofiles[fd] = nfp;
4175 fp_drop(p, fd, nfp, 1);
4176
4177 os_ref_release_live(&fp->fp_iocount);
4178 os_ref_release_live(&fp->fp_iocount);
4179 fileproc_free(fp);
4180 } else {
4181 /*
4182 * Wait for all other active references to evaporate.
4183 */
4184 p->p_fpdrainwait = 1;
4185 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
4186 PRIBIO | PCATCH, "tryswap fpdrain", NULL);
4187 if (0 == error) {
4188 /*
4189 * Return an "internal" errno to trigger a full
4190 * reevaluation of the change-guard attempt.
4191 */
4192 error = EKEEPLOOKING;
4193 }
4194 (void) fp_drop(p, fd, fp, 1);
4195 }
4196 return error;
4197 }
4198
4199
4200 /*
4201 * fp_drop
4202 *
4203 * Description: Drop the I/O reference previously taken by calling fp_lookup
4204 * et. al.
4205 *
4206 * Parameters: p Process in which the fd lives
4207 * fd fd associated with the fileproc
4208 * fp fileproc on which to set the
4209 * flag and drop the reference
4210 * locked flag to internally take and
4211 * drop proc_fdlock if it is not
4212 * already held by the caller
4213 *
4214 * Returns: 0 Success
4215 * EBADF Bad file descriptor
4216 *
4217 * Locks: This function internally takes and drops the proc_fdlock for
4218 * the supplied process if 'locked' is non-zero, and assumes that
4219 * the caller already holds this lock if 'locked' is non-zero.
4220 *
4221 * Notes: The fileproc must correspond to the fd in the supplied proc
4222 */
4223 int
4224 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
4225 {
4226 struct filedesc *fdp = p->p_fd;
4227 int needwakeup = 0;
4228
4229 if (!locked) {
4230 proc_fdlock_spin(p);
4231 }
4232 if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
4233 (fp = fdp->fd_ofiles[fd]) == NULL ||
4234 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4235 !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4236 if (!locked) {
4237 proc_fdunlock(p);
4238 }
4239 return EBADF;
4240 }
4241
4242 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4243 if (fp->fp_flags & FP_SELCONFLICT) {
4244 fp->fp_flags &= ~FP_SELCONFLICT;
4245 }
4246
4247 if (p->p_fpdrainwait) {
4248 p->p_fpdrainwait = 0;
4249 needwakeup = 1;
4250 }
4251 }
4252 if (!locked) {
4253 proc_fdunlock(p);
4254 }
4255 if (needwakeup) {
4256 wakeup(&p->p_fpdrainwait);
4257 }
4258
4259 return 0;
4260 }
4261
4262
4263 /*
4264 * file_vnode
4265 *
4266 * Description: Given an fd, look it up in the current process's per process
4267 * open file table, and return its internal vnode pointer.
4268 *
4269 * Parameters: fd fd to obtain vnode from
4270 * vpp pointer to vnode return area
4271 *
4272 * Returns: 0 Success
4273 * EINVAL The fd does not refer to a
4274 * vnode fileproc entry
4275 * fp_lookup:EBADF Bad file descriptor
4276 *
4277 * Implicit returns:
4278 * *vpp (modified) Returned vnode pointer
4279 *
4280 * Locks: This function internally takes and drops the proc_fdlock for
4281 * the current process
4282 *
4283 * Notes: If successful, this function increments the fp_iocount on the
4284 * fd's corresponding fileproc.
4285 *
4286 * The fileproc referenced is not returned; because of this, care
4287 * must be taken to not drop the last reference (e.g. by closing
4288 * the file). This is inherently unsafe, since the reference may
4289 * not be recoverable from the vnode, if there is a subsequent
4290 * close that destroys the associate fileproc. The caller should
4291 * therefore retain their own reference on the fileproc so that
4292 * the fp_iocount can be dropped subsequently. Failure to do this
4293 * can result in the returned pointer immediately becoming invalid
4294 * following the call.
4295 *
4296 * Use of this function is discouraged.
4297 */
4298 int
4299 file_vnode(int fd, struct vnode **vpp)
4300 {
4301 return file_vnode_withvid(fd, vpp, NULL);
4302 }
4303
4304 /*
4305 * file_vnode_withvid
4306 *
4307 * Description: Given an fd, look it up in the current process's per process
4308 * open file table, and return its internal vnode pointer.
4309 *
4310 * Parameters: fd fd to obtain vnode from
4311 * vpp pointer to vnode return area
4312 * vidp pointer to vid of the returned vnode
4313 *
4314 * Returns: 0 Success
4315 * EINVAL The fd does not refer to a
4316 * vnode fileproc entry
4317 * fp_lookup:EBADF Bad file descriptor
4318 *
4319 * Implicit returns:
4320 * *vpp (modified) Returned vnode pointer
4321 *
4322 * Locks: This function internally takes and drops the proc_fdlock for
4323 * the current process
4324 *
4325 * Notes: If successful, this function increments the fp_iocount on the
4326 * fd's corresponding fileproc.
4327 *
4328 * The fileproc referenced is not returned; because of this, care
4329 * must be taken to not drop the last reference (e.g. by closing
4330 * the file). This is inherently unsafe, since the reference may
4331 * not be recoverable from the vnode, if there is a subsequent
4332 * close that destroys the associate fileproc. The caller should
4333 * therefore retain their own reference on the fileproc so that
4334 * the fp_iocount can be dropped subsequently. Failure to do this
4335 * can result in the returned pointer immediately becoming invalid
4336 * following the call.
4337 *
4338 * Use of this function is discouraged.
4339 */
4340 int
4341 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
4342 {
4343 struct fileproc *fp;
4344 int error;
4345
4346 error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
4347 if (error == 0) {
4348 if (vpp) {
4349 *vpp = fp->f_data;
4350 }
4351 if (vidp) {
4352 *vidp = vnode_vid(fp->f_data);
4353 }
4354 }
4355 return error;
4356 }
4357
4358 /*
4359 * file_socket
4360 *
4361 * Description: Given an fd, look it up in the current process's per process
4362 * open file table, and return its internal socket pointer.
4363 *
4364 * Parameters: fd fd to obtain vnode from
4365 * sp pointer to socket return area
4366 *
4367 * Returns: 0 Success
4368 * ENOTSOCK Not a socket
4369 * fp_lookup:EBADF Bad file descriptor
4370 *
4371 * Implicit returns:
4372 * *sp (modified) Returned socket pointer
4373 *
4374 * Locks: This function internally takes and drops the proc_fdlock for
4375 * the current process
4376 *
4377 * Notes: If successful, this function increments the fp_iocount on the
4378 * fd's corresponding fileproc.
4379 *
4380 * The fileproc referenced is not returned; because of this, care
4381 * must be taken to not drop the last reference (e.g. by closing
4382 * the file). This is inherently unsafe, since the reference may
4383 * not be recoverable from the socket, if there is a subsequent
4384 * close that destroys the associate fileproc. The caller should
4385 * therefore retain their own reference on the fileproc so that
4386 * the fp_iocount can be dropped subsequently. Failure to do this
4387 * can result in the returned pointer immediately becoming invalid
4388 * following the call.
4389 *
4390 * Use of this function is discouraged.
4391 */
4392 int
4393 file_socket(int fd, struct socket **sp)
4394 {
4395 struct fileproc *fp;
4396 int error;
4397
4398 error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
4399 if (error == 0) {
4400 if (sp) {
4401 *sp = (struct socket *)fp->f_data;
4402 }
4403 }
4404 return error;
4405 }
4406
4407
4408 /*
4409 * file_flags
4410 *
4411 * Description: Given an fd, look it up in the current process's per process
4412 * open file table, and return its fileproc's flags field.
4413 *
4414 * Parameters: fd fd whose flags are to be
4415 * retrieved
4416 * flags pointer to flags data area
4417 *
4418 * Returns: 0 Success
4419 * ENOTSOCK Not a socket
4420 * fp_lookup:EBADF Bad file descriptor
4421 *
4422 * Implicit returns:
4423 * *flags (modified) Returned flags field
4424 *
4425 * Locks: This function internally takes and drops the proc_fdlock for
4426 * the current process
4427 */
4428 int
4429 file_flags(int fd, int *flags)
4430 {
4431 proc_t p = current_proc();
4432 struct fileproc *fp;
4433 int error = EBADF;
4434
4435 proc_fdlock_spin(p);
4436 fp = fp_get_noref_locked(p, fd);
4437 if (fp) {
4438 *flags = (int)fp->f_flag;
4439 error = 0;
4440 }
4441 proc_fdunlock(p);
4442
4443 return error;
4444 }
4445
4446
4447 /*
4448 * file_drop
4449 *
4450 * Description: Drop an iocount reference on an fd, and wake up any waiters
4451 * for draining (i.e. blocked in fileproc_drain() called during
4452 * the last attempt to close a file).
4453 *
4454 * Parameters: fd fd on which an ioreference is
4455 * to be dropped
4456 *
4457 * Returns: 0 Success
4458 *
4459 * Description: Given an fd, look it up in the current process's per process
4460 * open file table, and drop it's fileproc's fp_iocount by one
4461 *
4462 * Notes: This is intended as a corresponding operation to the functions
4463 * file_vnode() and file_socket() operations.
4464 *
4465 * If the caller can't possibly hold an I/O reference,
4466 * this function will panic the kernel rather than allowing
4467 * for memory corruption. Callers should always call this
4468 * because they acquired an I/O reference on this file before.
4469 *
4470 * Use of this function is discouraged.
4471 */
4472 int
4473 file_drop(int fd)
4474 {
4475 struct fileproc *fp;
4476 proc_t p = current_proc();
4477 int needwakeup = 0;
4478
4479 proc_fdlock_spin(p);
4480 fp = fp_get_noref_locked_with_iocount(p, fd);
4481
4482 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4483 if (fp->fp_flags & FP_SELCONFLICT) {
4484 fp->fp_flags &= ~FP_SELCONFLICT;
4485 }
4486
4487 if (p->p_fpdrainwait) {
4488 p->p_fpdrainwait = 0;
4489 needwakeup = 1;
4490 }
4491 }
4492 proc_fdunlock(p);
4493
4494 if (needwakeup) {
4495 wakeup(&p->p_fpdrainwait);
4496 }
4497 return 0;
4498 }
4499
4500
4501
4502 /*
4503 * falloc_withalloc
4504 *
4505 * Create a new open file structure and allocate
4506 * a file descriptor for the process that refers to it.
4507 *
4508 * Returns: 0 Success
4509 *
4510 * Description: Allocate an entry in the per process open file table and
4511 * return the corresponding fileproc and fd.
4512 *
4513 * Parameters: p The process in whose open file
4514 * table the fd is to be allocated
4515 * resultfp Pointer to fileproc pointer
4516 * return area
4517 * resultfd Pointer to fd return area
4518 * ctx VFS context
4519 * fp_zalloc fileproc allocator to use
4520 * crarg allocator args
4521 *
4522 * Returns: 0 Success
4523 * ENFILE Too many open files in system
4524 * fdalloc:EMFILE Too many open files in process
4525 * fdalloc:ENOMEM M_OFILETABL zone exhausted
4526 * ENOMEM fp_zone or fg_zone zone
4527 * exhausted
4528 *
4529 * Implicit returns:
4530 * *resultfd (modified) Returned fileproc pointer
4531 * *resultfd (modified) Returned fd
4532 *
4533 * Notes: This function takes separate process and context arguments
4534 * solely to support kern_exec.c; otherwise, it would take
4535 * neither, and use the vfs_context_current() routine internally.
4536 */
4537 int
4538 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4539 vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg)
4540 {
4541 struct fileproc *fp;
4542 struct fileglob *fg;
4543 int error, nfd;
4544
4545 /* Make sure we don't go beyond the system-wide limit */
4546 if (nfiles >= maxfiles) {
4547 tablefull("file");
4548 return ENFILE;
4549 }
4550
4551 proc_fdlock(p);
4552
4553 /* fdalloc will make sure the process stays below per-process limit */
4554 if ((error = fdalloc(p, 0, &nfd))) {
4555 proc_fdunlock(p);
4556 return error;
4557 }
4558
4559 #if CONFIG_MACF
4560 error = mac_file_check_create(proc_ucred(p));
4561 if (error) {
4562 proc_fdunlock(p);
4563 return error;
4564 }
4565 #endif
4566
4567 /*
4568 * Allocate a new file descriptor.
4569 * If the process has file descriptor zero open, add to the list
4570 * of open files at that point, otherwise put it at the front of
4571 * the list of open files.
4572 */
4573 proc_fdunlock(p);
4574
4575 fp = (*fp_zalloc)(crarg);
4576 if (fp == NULL) {
4577 return ENOMEM;
4578 }
4579 fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
4580 lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
4581
4582 os_ref_retain_locked(&fp->fp_iocount);
4583 os_ref_init_raw(&fg->fg_count, &f_refgrp);
4584 fg->fg_ops = &uninitops;
4585 fp->fp_glob = fg;
4586 #if CONFIG_MACF
4587 mac_file_label_init(fg);
4588 #endif
4589
4590 kauth_cred_ref(ctx->vc_ucred);
4591
4592 fp->f_cred = ctx->vc_ucred;
4593
4594 #if CONFIG_MACF
4595 mac_file_label_associate(fp->f_cred, fg);
4596 #endif
4597
4598 os_atomic_inc(&nfiles, relaxed);
4599
4600 proc_fdlock(p);
4601
4602 p->p_fd->fd_ofiles[nfd] = fp;
4603
4604 proc_fdunlock(p);
4605
4606 if (resultfp) {
4607 *resultfp = fp;
4608 }
4609 if (resultfd) {
4610 *resultfd = nfd;
4611 }
4612
4613 return 0;
4614 }
4615
4616 int
4617 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4618 {
4619 return falloc_withalloc(p, resultfp, resultfd, ctx,
4620 fileproc_alloc_init, NULL);
4621 }
4622
4623 /*
4624 * fdexec
4625 *
4626 * Description: Perform close-on-exec processing for all files in a process
4627 * that are either marked as close-on-exec, or which were in the
4628 * process of being opened at the time of the execve
4629 *
4630 * Also handles the case (via posix_spawn()) where -all-
4631 * files except those marked with "inherit" as treated as
4632 * close-on-exec.
4633 *
4634 * Parameters: p Pointer to process calling
4635 * execve
4636 *
4637 * Returns: void
4638 *
4639 * Locks: This function internally takes and drops proc_fdlock()
4640 * But assumes tables don't grow/change while unlocked.
4641 *
4642 */
4643 void
4644 fdexec(proc_t p, short flags, int self_exec)
4645 {
4646 struct filedesc *fdp = p->p_fd;
4647 int i;
4648 boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4649 thread_t self = current_thread();
4650 struct uthread *ut = get_bsdthread_info(self);
4651 struct kqworkq *dealloc_kqwq = NULL;
4652
4653 /*
4654 * If the current thread is bound as a workq/workloop
4655 * servicing thread, we need to unbind it first.
4656 */
4657 if (ut->uu_kqr_bound && self_exec) {
4658 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
4659 }
4660
4661 proc_fdlock(p);
4662
4663 /*
4664 * Deallocate the knotes for this process
4665 * and mark the tables non-existent so
4666 * subsequent kqueue closes go faster.
4667 */
4668 knotes_dealloc(p);
4669 assert(fdp->fd_knlistsize == 0);
4670 assert(fdp->fd_knhashmask == 0);
4671
4672 for (i = fdp->fd_lastfile; i >= 0; i--) {
4673 struct fileproc *fp = fdp->fd_ofiles[i];
4674 char *flagp = &fdp->fd_ofileflags[i];
4675
4676 if (fp && cloexec_default) {
4677 /*
4678 * Reverse the usual semantics of file descriptor
4679 * inheritance - all of them should be closed
4680 * except files marked explicitly as "inherit" and
4681 * not marked close-on-exec.
4682 */
4683 if ((*flagp & (UF_EXCLOSE | UF_INHERIT)) != UF_INHERIT) {
4684 *flagp |= UF_EXCLOSE;
4685 }
4686 *flagp &= ~UF_INHERIT;
4687 }
4688
4689 if (
4690 ((*flagp & (UF_RESERVED | UF_EXCLOSE)) == UF_EXCLOSE)
4691 #if CONFIG_MACF
4692 || (fp && mac_file_check_inherit(proc_ucred(p), fp->fp_glob))
4693 #endif
4694 ) {
4695 fp_close_and_unlock(p, i, fp, 0);
4696 proc_fdlock(p);
4697 }
4698 }
4699
4700 /* release the per-process workq kq */
4701 if (fdp->fd_wqkqueue) {
4702 dealloc_kqwq = fdp->fd_wqkqueue;
4703 fdp->fd_wqkqueue = NULL;
4704 }
4705
4706 proc_fdunlock(p);
4707
4708 /* Anything to free? */
4709 if (dealloc_kqwq) {
4710 kqworkq_dealloc(dealloc_kqwq);
4711 }
4712 }
4713
4714
4715 /*
4716 * fdcopy
4717 *
4718 * Description: Copy a filedesc structure. This is normally used as part of
4719 * forkproc() when forking a new process, to copy the per process
4720 * open file table over to the new process.
4721 *
4722 * Parameters: p Process whose open file table
4723 * is to be copied (parent)
4724 * uth_cdir Per thread current working
4725 * cirectory, or NULL
4726 *
4727 * Returns: NULL Copy failed
4728 * !NULL Pointer to new struct filedesc
4729 *
4730 * Locks: This function internally takes and drops proc_fdlock()
4731 *
4732 * Notes: Files are copied directly, ignoring the new resource limits
4733 * for the process that's being copied into. Since the descriptor
4734 * references are just additional references, this does not count
4735 * against the number of open files on the system.
4736 *
4737 * The struct filedesc includes the current working directory,
4738 * and the current root directory, if the process is chroot'ed.
4739 *
4740 * If the exec was called by a thread using a per thread current
4741 * working directory, we inherit the working directory from the
4742 * thread making the call, rather than from the process.
4743 *
4744 * In the case of a failure to obtain a reference, for most cases,
4745 * the file entry will be silently dropped. There's an exception
4746 * for the case of a chroot dir, since a failure to to obtain a
4747 * reference there would constitute an "escape" from the chroot
4748 * environment, which must not be allowed. In that case, we will
4749 * deny the execve() operation, rather than allowing the escape.
4750 */
4751 struct filedesc *
4752 fdcopy(proc_t p, vnode_t uth_cdir)
4753 {
4754 struct filedesc *newfdp, *fdp = p->p_fd;
4755 int i;
4756 struct fileproc *ofp, *fp;
4757 vnode_t v_dir;
4758
4759 newfdp = zalloc(fdp_zone);
4760
4761 proc_fdlock(p);
4762
4763 /*
4764 * the FD_CHROOT flag will be inherited via this copy
4765 */
4766 (void) memcpy(newfdp, fdp, sizeof(*newfdp));
4767
4768 /*
4769 * If we are running with per-thread current working directories,
4770 * inherit the new current working directory from the current thread
4771 * instead, before we take our references.
4772 */
4773 if (uth_cdir != NULLVP) {
4774 newfdp->fd_cdir = uth_cdir;
4775 }
4776
4777 /*
4778 * For both fd_cdir and fd_rdir make sure we get
4779 * a valid reference... if we can't, than set
4780 * set the pointer(s) to NULL in the child... this
4781 * will keep us from using a non-referenced vp
4782 * and allows us to do the vnode_rele only on
4783 * a properly referenced vp
4784 */
4785 if ((v_dir = newfdp->fd_cdir)) {
4786 if (vnode_getwithref(v_dir) == 0) {
4787 if ((vnode_ref(v_dir))) {
4788 newfdp->fd_cdir = NULL;
4789 }
4790 vnode_put(v_dir);
4791 } else {
4792 newfdp->fd_cdir = NULL;
4793 }
4794 }
4795 if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4796 /*
4797 * we couldn't get a new reference on
4798 * the current working directory being
4799 * inherited... we might as well drop
4800 * our reference from the parent also
4801 * since the vnode has gone DEAD making
4802 * it useless... by dropping it we'll
4803 * be that much closer to recycling it
4804 */
4805 vnode_rele(fdp->fd_cdir);
4806 fdp->fd_cdir = NULL;
4807 }
4808
4809 if ((v_dir = newfdp->fd_rdir)) {
4810 if (vnode_getwithref(v_dir) == 0) {
4811 if ((vnode_ref(v_dir))) {
4812 newfdp->fd_rdir = NULL;
4813 }
4814 vnode_put(v_dir);
4815 } else {
4816 newfdp->fd_rdir = NULL;
4817 }
4818 }
4819 /* Coming from a chroot environment and unable to get a reference... */
4820 if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4821 proc_fdunlock(p);
4822 /*
4823 * We couldn't get a new reference on
4824 * the chroot directory being
4825 * inherited... this is fatal, since
4826 * otherwise it would constitute an
4827 * escape from a chroot environment by
4828 * the new process.
4829 */
4830 if (newfdp->fd_cdir) {
4831 vnode_rele(newfdp->fd_cdir);
4832 }
4833 zfree(fdp_zone, newfdp);
4834 return NULL;
4835 }
4836
4837 /*
4838 * If the number of open files fits in the internal arrays
4839 * of the open file structure, use them, otherwise allocate
4840 * additional memory for the number of descriptors currently
4841 * in use.
4842 */
4843 if (newfdp->fd_lastfile < NDFILE) {
4844 i = NDFILE;
4845 } else {
4846 /*
4847 * Compute the smallest multiple of NDEXTENT needed
4848 * for the file descriptors currently in use,
4849 * allowing the table to shrink.
4850 */
4851 i = newfdp->fd_nfiles;
4852 while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2) {
4853 i /= 2;
4854 }
4855 }
4856 proc_fdunlock(p);
4857
4858 newfdp->fd_ofiles = kheap_alloc(KM_OFILETABL, i * OFILESIZE,
4859 Z_WAITOK | Z_ZERO);
4860 if (newfdp->fd_ofiles == NULL) {
4861 if (newfdp->fd_cdir) {
4862 vnode_rele(newfdp->fd_cdir);
4863 }
4864 if (newfdp->fd_rdir) {
4865 vnode_rele(newfdp->fd_rdir);
4866 }
4867
4868 zfree(fdp_zone, newfdp);
4869 return NULL;
4870 }
4871 proc_fdlock(p);
4872
4873 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4874 newfdp->fd_nfiles = i;
4875
4876 if (fdp->fd_nfiles > 0) {
4877 struct fileproc **fpp;
4878 char *flags;
4879
4880 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4881 (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4882 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4883 (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4884
4885 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4886 flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
4887 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--, flags--) {
4888 if ((ofp = *fpp) != NULL &&
4889 0 == (ofp->fp_glob->fg_lflags & FG_CONFINED) &&
4890 0 == (*flags & (UF_FORKCLOSE | UF_RESERVED))) {
4891 #if DEBUG
4892 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) {
4893 panic("complex fileproc");
4894 }
4895 #endif
4896 fp = fileproc_alloc_init(NULL);
4897 if (fp == NULL) {
4898 /*
4899 * XXX no room to copy, unable to
4900 * XXX safely unwind state at present
4901 */
4902 *fpp = NULL;
4903 } else {
4904 fp->fp_flags |=
4905 (ofp->fp_flags & ~FP_TYPEMASK);
4906 fp->fp_glob = ofp->fp_glob;
4907 fg_ref(p, fp->fp_glob);
4908 *fpp = fp;
4909 }
4910 } else {
4911 *fpp = NULL;
4912 *flags = 0;
4913 }
4914 if (*fpp == NULL) {
4915 if (i == newfdp->fd_lastfile && i > 0) {
4916 newfdp->fd_lastfile--;
4917 }
4918 if (i < newfdp->fd_freefile) {
4919 newfdp->fd_freefile = i;
4920 }
4921 }
4922 }
4923 }
4924
4925 proc_fdunlock(p);
4926
4927 /*
4928 * Initialize knote and kqueue tracking structs
4929 */
4930 newfdp->fd_knlist = NULL;
4931 newfdp->fd_knlistsize = 0;
4932 newfdp->fd_knhash = NULL;
4933 newfdp->fd_knhashmask = 0;
4934 newfdp->fd_kqhash = NULL;
4935 newfdp->fd_kqhashmask = 0;
4936 newfdp->fd_wqkqueue = NULL;
4937 lck_mtx_init(&newfdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
4938 lck_mtx_init(&newfdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
4939
4940 return newfdp;
4941 }
4942
4943
4944 /*
4945 * fdfree
4946 *
4947 * Description: Release a filedesc (per process open file table) structure;
4948 * this is done on process exit(), or from forkproc_free() if
4949 * the fork fails for some reason subsequent to a successful
4950 * call to fdcopy()
4951 *
4952 * Parameters: p Pointer to process going away
4953 *
4954 * Returns: void
4955 *
4956 * Locks: This function internally takes and drops proc_fdlock()
4957 */
4958 void
4959 fdfree(proc_t p)
4960 {
4961 struct filedesc *fdp;
4962 struct fileproc *fp;
4963 struct kqworkq *dealloc_kqwq = NULL;
4964 int i;
4965
4966 proc_fdlock(p);
4967
4968 if (p == kernproc || NULL == (fdp = p->p_fd)) {
4969 proc_fdunlock(p);
4970 return;
4971 }
4972
4973 extern struct filedesc filedesc0;
4974
4975 if (&filedesc0 == fdp) {
4976 panic("filedesc0");
4977 }
4978
4979 /*
4980 * deallocate all the knotes up front and claim empty
4981 * tables to make any subsequent kqueue closes faster.
4982 */
4983 knotes_dealloc(p);
4984 assert(fdp->fd_knlistsize == 0);
4985 assert(fdp->fd_knhashmask == 0);
4986
4987 /*
4988 * dealloc all workloops that have outstanding retains
4989 * when created with scheduling parameters.
4990 */
4991 kqworkloops_dealloc(p);
4992
4993 /* close file descriptors */
4994 if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
4995 for (i = fdp->fd_lastfile; i >= 0; i--) {
4996 if ((fp = fdp->fd_ofiles[i]) != NULL) {
4997 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
4998 panic("fdfree: found fp with UF_RESERVED");
4999 }
5000 fp_close_and_unlock(p, i, fp, 0);
5001 proc_fdlock(p);
5002 }
5003 }
5004 kheap_free(KM_OFILETABL, fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE);
5005 fdp->fd_ofiles = NULL;
5006 fdp->fd_nfiles = 0;
5007 }
5008
5009 if (fdp->fd_wqkqueue) {
5010 dealloc_kqwq = fdp->fd_wqkqueue;
5011 fdp->fd_wqkqueue = NULL;
5012 }
5013
5014 proc_fdunlock(p);
5015
5016 if (dealloc_kqwq) {
5017 kqworkq_dealloc(dealloc_kqwq);
5018 }
5019 if (fdp->fd_cdir) {
5020 vnode_rele(fdp->fd_cdir);
5021 }
5022 if (fdp->fd_rdir) {
5023 vnode_rele(fdp->fd_rdir);
5024 }
5025
5026 proc_fdlock_spin(p);
5027 p->p_fd = NULL;
5028 proc_fdunlock(p);
5029
5030 if (fdp->fd_kqhash) {
5031 for (uint32_t j = 0; j <= fdp->fd_kqhashmask; j++) {
5032 assert(LIST_EMPTY(&fdp->fd_kqhash[j]));
5033 }
5034 hashdestroy(fdp->fd_kqhash, M_KQUEUE, fdp->fd_kqhashmask);
5035 }
5036
5037 lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
5038 lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
5039
5040 zfree(fdp_zone, fdp);
5041 }
5042
5043 /*
5044 * fileproc_drain
5045 *
5046 * Description: Drain out pending I/O operations
5047 *
5048 * Parameters: p Process closing this file
5049 * fp fileproc struct for the open
5050 * instance on the file
5051 *
5052 * Returns: void
5053 *
5054 * Locks: Assumes the caller holds the proc_fdlock
5055 *
5056 * Notes: For character devices, this occurs on the last close of the
5057 * device; for all other file descriptors, this occurs on each
5058 * close to prevent fd's from being closed out from under
5059 * operations currently in progress and blocked
5060 *
5061 * See Also: file_vnode(), file_socket(), file_drop(), and the cautions
5062 * regarding their use and interaction with this function.
5063 */
5064 void
5065 fileproc_drain(proc_t p, struct fileproc * fp)
5066 {
5067 struct vfs_context context;
5068 thread_t thread;
5069 bool is_current_proc;
5070
5071 is_current_proc = (p == current_proc());
5072
5073 if (!is_current_proc) {
5074 proc_lock(p);
5075 thread = proc_thread(p); /* XXX */
5076 thread_reference(thread);
5077 proc_unlock(p);
5078 } else {
5079 thread = current_thread();
5080 }
5081
5082 context.vc_thread = thread;
5083 context.vc_ucred = fp->fp_glob->fg_cred;
5084
5085 /* Set the vflag for drain */
5086 fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
5087
5088 while (os_ref_get_count(&fp->fp_iocount) > 1) {
5089 lck_mtx_convert_spin(&p->p_fdmlock);
5090
5091 fo_drain(fp, &context);
5092 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
5093 if (waitq_wakeup64_all((struct waitq *)fp->fp_wset, NO_EVENT64,
5094 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5095 panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->fp_wset, fp);
5096 }
5097 }
5098 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5099 if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5100 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5101 panic("bad select_conflict_queue");
5102 }
5103 }
5104 p->p_fpdrainwait = 1;
5105
5106 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5107 }
5108 #if DIAGNOSTIC
5109 if ((fp->fp_flags & FP_INSELECT) != 0) {
5110 panic("FP_INSELECT set on drained fp");
5111 }
5112 #endif
5113 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5114 fp->fp_flags &= ~FP_SELCONFLICT;
5115 }
5116
5117 if (!is_current_proc) {
5118 thread_deallocate(thread);
5119 }
5120 }
5121
5122
5123 /*
5124 * fp_free
5125 *
5126 * Description: Release the fd and free the fileproc associated with the fd
5127 * in the per process open file table of the specified process;
5128 * these values must correspond.
5129 *
5130 * Parameters: p Process containing fd
5131 * fd fd to be released
5132 * fp fileproc to be freed
5133 */
5134 void
5135 fp_free(proc_t p, int fd, struct fileproc * fp)
5136 {
5137 proc_fdlock_spin(p);
5138 fdrelse(p, fd);
5139 proc_fdunlock(p);
5140
5141 fg_free(fp->fp_glob);
5142 os_ref_release_live(&fp->fp_iocount);
5143 fileproc_free(fp);
5144 }
5145
5146
5147 /*
5148 * sys_flock
5149 *
5150 * Description: Apply an advisory lock on a file descriptor.
5151 *
5152 * Parameters: p Process making request
5153 * uap->fd fd on which the lock is to be
5154 * attempted
5155 * uap->how (Un)Lock bits, including type
5156 * retval Pointer to the call return area
5157 *
5158 * Returns: 0 Success
5159 * fp_getfvp:EBADF Bad file descriptor
5160 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5161 * vnode_getwithref:???
5162 * VNOP_ADVLOCK:???
5163 *
5164 * Implicit returns:
5165 * *retval (modified) Size of dtable
5166 *
5167 * Notes: Just attempt to get a record lock of the requested type on
5168 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5169 */
5170 int
5171 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5172 {
5173 int fd = uap->fd;
5174 int how = uap->how;
5175 struct fileproc *fp;
5176 struct vnode *vp;
5177 struct flock lf;
5178 vfs_context_t ctx = vfs_context_current();
5179 int error = 0;
5180
5181 AUDIT_ARG(fd, uap->fd);
5182 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5183 return error;
5184 }
5185 if ((error = vnode_getwithref(vp))) {
5186 goto out1;
5187 }
5188 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5189
5190 lf.l_whence = SEEK_SET;
5191 lf.l_start = 0;
5192 lf.l_len = 0;
5193 if (how & LOCK_UN) {
5194 lf.l_type = F_UNLCK;
5195 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5196 goto out;
5197 }
5198 if (how & LOCK_EX) {
5199 lf.l_type = F_WRLCK;
5200 } else if (how & LOCK_SH) {
5201 lf.l_type = F_RDLCK;
5202 } else {
5203 error = EBADF;
5204 goto out;
5205 }
5206 #if CONFIG_MACF
5207 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob, F_SETLK, &lf);
5208 if (error) {
5209 goto out;
5210 }
5211 #endif
5212 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5213 (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5214 ctx, NULL);
5215 if (!error) {
5216 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5217 }
5218 out:
5219 (void)vnode_put(vp);
5220 out1:
5221 fp_drop(p, fd, fp, 0);
5222 return error;
5223 }
5224
5225 /*
5226 * sys_fileport_makeport
5227 *
5228 * Description: Obtain a Mach send right for a given file descriptor.
5229 *
5230 * Parameters: p Process calling fileport
5231 * uap->fd The fd to reference
5232 * uap->portnamep User address at which to place port name.
5233 *
5234 * Returns: 0 Success.
5235 * EBADF Bad file descriptor.
5236 * EINVAL File descriptor had type that cannot be sent, misc. other errors.
5237 * EFAULT Address at which to store port name is not valid.
5238 * EAGAIN Resource shortage.
5239 *
5240 * Implicit returns:
5241 * On success, name of send right is stored at user-specified address.
5242 */
5243 int
5244 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5245 __unused int *retval)
5246 {
5247 int err;
5248 int fd = uap->fd;
5249 user_addr_t user_portaddr = uap->portnamep;
5250 struct fileproc *fp = FILEPROC_NULL;
5251 struct fileglob *fg = NULL;
5252 ipc_port_t fileport;
5253 mach_port_name_t name = MACH_PORT_NULL;
5254
5255 proc_fdlock(p);
5256 err = fp_lookup(p, fd, &fp, 1);
5257 if (err != 0) {
5258 goto out_unlock;
5259 }
5260
5261 fg = fp->fp_glob;
5262 if (!fg_sendable(fg)) {
5263 err = EINVAL;
5264 goto out_unlock;
5265 }
5266
5267 if (fp_isguarded(fp, GUARD_FILEPORT)) {
5268 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5269 goto out_unlock;
5270 }
5271
5272 /* Dropped when port is deallocated */
5273 fg_ref(p, fg);
5274
5275 proc_fdunlock(p);
5276
5277 /* Allocate and initialize a port */
5278 fileport = fileport_alloc(fg);
5279 if (fileport == IPC_PORT_NULL) {
5280 fg_drop_live(fg);
5281 err = EAGAIN;
5282 goto out;
5283 }
5284
5285 /* Add an entry. Deallocates port on failure. */
5286 name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5287 if (!MACH_PORT_VALID(name)) {
5288 err = EINVAL;
5289 goto out;
5290 }
5291
5292 err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5293 if (err != 0) {
5294 goto out;
5295 }
5296
5297 /* Tag the fileglob for debugging purposes */
5298 lck_mtx_lock_spin(&fg->fg_lock);
5299 fg->fg_lflags |= FG_PORTMADE;
5300 lck_mtx_unlock(&fg->fg_lock);
5301
5302 fp_drop(p, fd, fp, 0);
5303
5304 return 0;
5305
5306 out_unlock:
5307 proc_fdunlock(p);
5308 out:
5309 if (MACH_PORT_VALID(name)) {
5310 /* Don't care if another thread races us to deallocate the entry */
5311 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5312 }
5313
5314 if (fp != FILEPROC_NULL) {
5315 fp_drop(p, fd, fp, 0);
5316 }
5317
5318 return err;
5319 }
5320
5321 void
5322 fileport_releasefg(struct fileglob *fg)
5323 {
5324 (void)fg_drop(PROC_NULL, fg);
5325 }
5326
5327 /*
5328 * fileport_makefd
5329 *
5330 * Description: Obtain the file descriptor for a given Mach send right.
5331 *
5332 * Returns: 0 Success
5333 * EINVAL Invalid Mach port name, or port is not for a file.
5334 * fdalloc:EMFILE
5335 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5336 *
5337 * Implicit returns:
5338 * *retval (modified) The new descriptor
5339 */
5340 int
5341 fileport_makefd(proc_t p, ipc_port_t port, int uf_flags, int *retval)
5342 {
5343 struct fileglob *fg;
5344 struct fileproc *fp = FILEPROC_NULL;
5345 int fd;
5346 int err;
5347
5348 fg = fileport_port_to_fileglob(port);
5349 if (fg == NULL) {
5350 err = EINVAL;
5351 goto out;
5352 }
5353
5354 fp = fileproc_alloc_init(NULL);
5355 if (fp == FILEPROC_NULL) {
5356 err = ENOMEM;
5357 goto out;
5358 }
5359
5360 proc_fdlock(p);
5361 err = fdalloc(p, 0, &fd);
5362 if (err != 0) {
5363 proc_fdunlock(p);
5364 goto out;
5365 }
5366 if (uf_flags) {
5367 *fdflags(p, fd) |= uf_flags;
5368 }
5369
5370 fp->fp_glob = fg;
5371 fg_ref(p, fg);
5372
5373 procfdtbl_releasefd(p, fd, fp);
5374 proc_fdunlock(p);
5375
5376 *retval = fd;
5377 err = 0;
5378 out:
5379 if ((fp != NULL) && (0 != err)) {
5380 fileproc_free(fp);
5381 }
5382
5383 return err;
5384 }
5385
5386 /*
5387 * sys_fileport_makefd
5388 *
5389 * Description: Obtain the file descriptor for a given Mach send right.
5390 *
5391 * Parameters: p Process calling fileport
5392 * uap->port Name of send right to file port.
5393 *
5394 * Returns: 0 Success
5395 * EINVAL Invalid Mach port name, or port is not for a file.
5396 * fdalloc:EMFILE
5397 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5398 *
5399 * Implicit returns:
5400 * *retval (modified) The new descriptor
5401 */
5402 int
5403 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5404 {
5405 ipc_port_t port = IPC_PORT_NULL;
5406 mach_port_name_t send = uap->port;
5407 kern_return_t res;
5408 int err;
5409
5410 res = ipc_object_copyin(get_task_ipcspace(p->task),
5411 send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5412
5413 if (res == KERN_SUCCESS) {
5414 err = fileport_makefd(p, port, UF_EXCLOSE, retval);
5415 } else {
5416 err = EINVAL;
5417 }
5418
5419 if (IPC_PORT_NULL != port) {
5420 ipc_port_release_send(port);
5421 }
5422
5423 return err;
5424 }
5425
5426
5427 /*
5428 * dupfdopen
5429 *
5430 * Description: Duplicate the specified descriptor to a free descriptor;
5431 * this is the second half of fdopen(), above.
5432 *
5433 * Parameters: fdp filedesc pointer to fill in
5434 * indx fd to dup to
5435 * dfd fd to dup from
5436 * mode mode to set on new fd
5437 * error command code
5438 *
5439 * Returns: 0 Success
5440 * EBADF Source fd is bad
5441 * EACCES Requested mode not allowed
5442 * !0 'error', if not ENODEV or
5443 * ENXIO
5444 *
5445 * Notes: XXX This is not thread safe; see fdopen() above
5446 */
5447 int
5448 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5449 {
5450 struct fileproc *wfp;
5451 struct fileproc *fp;
5452 #if CONFIG_MACF
5453 int myerror;
5454 #endif
5455 proc_t p = current_proc();
5456
5457 /*
5458 * If the to-be-dup'd fd number is greater than the allowed number
5459 * of file descriptors, or the fd to be dup'd has already been
5460 * closed, reject. Note, check for new == old is necessary as
5461 * falloc could allocate an already closed to-be-dup'd descriptor
5462 * as the new descriptor.
5463 */
5464 proc_fdlock(p);
5465
5466 fp = fdp->fd_ofiles[indx];
5467 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5468 (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5469 (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5470 proc_fdunlock(p);
5471 return EBADF;
5472 }
5473 #if CONFIG_MACF
5474 myerror = mac_file_check_dup(proc_ucred(p), wfp->fp_glob, dfd);
5475 if (myerror) {
5476 proc_fdunlock(p);
5477 return myerror;
5478 }
5479 #endif
5480 /*
5481 * There are two cases of interest here.
5482 *
5483 * For ENODEV simply dup (dfd) to file descriptor
5484 * (indx) and return.
5485 *
5486 * For ENXIO steal away the file structure from (dfd) and
5487 * store it in (indx). (dfd) is effectively closed by
5488 * this operation.
5489 *
5490 * Any other error code is just returned.
5491 */
5492 switch (error) {
5493 case ENODEV:
5494 if (fp_isguarded(wfp, GUARD_DUP)) {
5495 proc_fdunlock(p);
5496 return EPERM;
5497 }
5498
5499 /*
5500 * Check that the mode the file is being opened for is a
5501 * subset of the mode of the existing descriptor.
5502 */
5503 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5504 proc_fdunlock(p);
5505 return EACCES;
5506 }
5507 if (indx > fdp->fd_lastfile) {
5508 fdp->fd_lastfile = indx;
5509 }
5510
5511 if (fp->fp_glob) {
5512 fg_free(fp->fp_glob);
5513 }
5514 fg_ref(p, wfp->fp_glob);
5515 fp->fp_glob = wfp->fp_glob;
5516
5517 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5518 (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5519
5520 proc_fdunlock(p);
5521 return 0;
5522
5523 default:
5524 proc_fdunlock(p);
5525 return error;
5526 }
5527 /* NOTREACHED */
5528 }
5529
5530
5531 /*
5532 * fo_read
5533 *
5534 * Description: Generic fileops read indirected through the fileops pointer
5535 * in the fileproc structure
5536 *
5537 * Parameters: fp fileproc structure pointer
5538 * uio user I/O structure pointer
5539 * flags FOF_ flags
5540 * ctx VFS context for operation
5541 *
5542 * Returns: 0 Success
5543 * !0 Errno from read
5544 */
5545 int
5546 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5547 {
5548 return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5549 }
5550
5551 int
5552 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5553 {
5554 #pragma unused(fp, uio, flags, ctx)
5555 return ENXIO;
5556 }
5557
5558
5559 /*
5560 * fo_write
5561 *
5562 * Description: Generic fileops write indirected through the fileops pointer
5563 * in the fileproc structure
5564 *
5565 * Parameters: fp fileproc structure pointer
5566 * uio user I/O structure pointer
5567 * flags FOF_ flags
5568 * ctx VFS context for operation
5569 *
5570 * Returns: 0 Success
5571 * !0 Errno from write
5572 */
5573 int
5574 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5575 {
5576 return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5577 }
5578
5579 int
5580 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5581 {
5582 #pragma unused(fp, uio, flags, ctx)
5583 return ENXIO;
5584 }
5585
5586
5587 /*
5588 * fo_ioctl
5589 *
5590 * Description: Generic fileops ioctl indirected through the fileops pointer
5591 * in the fileproc structure
5592 *
5593 * Parameters: fp fileproc structure pointer
5594 * com ioctl command
5595 * data pointer to internalized copy
5596 * of user space ioctl command
5597 * parameter data in kernel space
5598 * ctx VFS context for operation
5599 *
5600 * Returns: 0 Success
5601 * !0 Errno from ioctl
5602 *
5603 * Locks: The caller is assumed to have held the proc_fdlock; this
5604 * function releases and reacquires this lock. If the caller
5605 * accesses data protected by this lock prior to calling this
5606 * function, it will need to revalidate/reacquire any cached
5607 * protected data obtained prior to the call.
5608 */
5609 int
5610 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5611 {
5612 int error;
5613
5614 proc_fdunlock(vfs_context_proc(ctx));
5615 error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5616 proc_fdlock(vfs_context_proc(ctx));
5617 return error;
5618 }
5619
5620 int
5621 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5622 {
5623 #pragma unused(fp, com, data, ctx)
5624 return ENOTTY;
5625 }
5626
5627
5628 /*
5629 * fo_select
5630 *
5631 * Description: Generic fileops select indirected through the fileops pointer
5632 * in the fileproc structure
5633 *
5634 * Parameters: fp fileproc structure pointer
5635 * which select which
5636 * wql pointer to wait queue list
5637 * ctx VFS context for operation
5638 *
5639 * Returns: 0 Success
5640 * !0 Errno from select
5641 */
5642 int
5643 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5644 {
5645 return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5646 }
5647
5648 int
5649 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5650 {
5651 #pragma unused(fp, which, wql, ctx)
5652 return ENOTSUP;
5653 }
5654
5655
5656 /*
5657 * fo_close
5658 *
5659 * Description: Generic fileops close indirected through the fileops pointer
5660 * in the fileproc structure
5661 *
5662 * Parameters: fp fileproc structure pointer for
5663 * file to close
5664 * ctx VFS context for operation
5665 *
5666 * Returns: 0 Success
5667 * !0 Errno from close
5668 */
5669 int
5670 fo_close(struct fileglob *fg, vfs_context_t ctx)
5671 {
5672 return (*fg->fg_ops->fo_close)(fg, ctx);
5673 }
5674
5675
5676 /*
5677 * fo_drain
5678 *
5679 * Description: Generic fileops kqueue filter indirected through the fileops
5680 * pointer in the fileproc structure
5681 *
5682 * Parameters: fp fileproc structure pointer
5683 * ctx VFS context for operation
5684 *
5685 * Returns: 0 Success
5686 * !0 errno from drain
5687 */
5688 int
5689 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5690 {
5691 return (*fp->f_ops->fo_drain)(fp, ctx);
5692 }
5693
5694 int
5695 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5696 {
5697 #pragma unused(fp, ctx)
5698 return ENOTSUP;
5699 }
5700
5701
5702 /*
5703 * fo_kqfilter
5704 *
5705 * Description: Generic fileops kqueue filter indirected through the fileops
5706 * pointer in the fileproc structure
5707 *
5708 * Parameters: fp fileproc structure pointer
5709 * kn pointer to knote to filter on
5710 *
5711 * Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
5712 * 0 Filter is not active
5713 * !0 Filter is active
5714 */
5715 int
5716 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5717 {
5718 return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5719 }
5720
5721 int
5722 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5723 {
5724 #pragma unused(fp, kev)
5725 knote_set_error(kn, ENOTSUP);
5726 return 0;
5727 }
5728
5729
5730 struct fileproc *
5731 fileproc_alloc_init(__unused void *arg)
5732 {
5733 struct fileproc *fp = zalloc_flags(fp_zone, Z_WAITOK | Z_ZERO);
5734
5735 os_ref_init(&fp->fp_iocount, &f_refgrp);
5736 return fp;
5737 }
5738
5739
5740 void
5741 fileproc_free(struct fileproc *fp)
5742 {
5743 os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
5744 #if DEVELOPMENT || DEBUG
5745 if (0 != refc) {
5746 panic("%s: pid %d refc: %u != 0",
5747 __func__, proc_pid(current_proc()), refc);
5748 }
5749 #endif
5750 switch (FILEPROC_TYPE(fp)) {
5751 case FTYPE_SIMPLE:
5752 zfree(fp_zone, fp);
5753 break;
5754 case FTYPE_GUARDED:
5755 guarded_fileproc_free(fp);
5756 break;
5757 default:
5758 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->fp_flags);
5759 }
5760 }
5761
5762 void
5763 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
5764 {
5765 if (clearflags) {
5766 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
5767 } else {
5768 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
5769 }
5770 }
5771
5772 fileproc_vflags_t
5773 fileproc_get_vflags(struct fileproc *fp)
5774 {
5775 return os_atomic_load(&fp->fp_vflags, relaxed);
5776 }