2 * Copyright (c) 1995-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/sysctl.h>
98 #include <sys/xattr.h>
99 #include <sys/fcntl.h>
100 #include <sys/fsctl.h>
101 #include <sys/ubc_internal.h>
102 #include <sys/disk.h>
103 #include <sys/content_protection.h>
104 #include <sys/clonefile.h>
105 #include <sys/snapshot.h>
106 #include <sys/priv.h>
107 #include <sys/fsgetpath.h>
108 #include <machine/cons.h>
109 #include <machine/limits.h>
110 #include <miscfs/specfs/specdev.h>
112 #include <vfs/vfs_disk_conditioner.h>
114 #include <security/audit/audit.h>
115 #include <bsm/audit_kevents.h>
117 #include <mach/mach_types.h>
118 #include <kern/kern_types.h>
119 #include <kern/kalloc.h>
120 #include <kern/task.h>
122 #include <vm/vm_pageout.h>
123 #include <vm/vm_protos.h>
125 #include <libkern/OSAtomic.h>
126 #include <pexpert/pexpert.h>
127 #include <IOKit/IOBSD.h>
130 #include <kern/host.h>
131 #include <kern/ipc_misc.h>
132 #include <mach/host_priv.h>
133 #include <mach/vfs_nspace.h>
136 #include <nfs/nfs_conf.h>
139 #include <miscfs/routefs/routefs.h>
143 #include <security/mac.h>
144 #include <security/mac_framework.h>
148 #define GET_PATH(x) \
149 (x) = get_pathbuff();
150 #define RELEASE_PATH(x) \
153 #define GET_PATH(x) \
154 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
155 #define RELEASE_PATH(x) \
156 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
157 #endif /* CONFIG_FSE */
159 #ifndef HFS_GET_BOOT_INFO
160 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
163 #ifndef HFS_SET_BOOT_INFO
164 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
167 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
168 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
171 extern void disk_conditioner_unmount(mount_t mp
);
173 /* struct for checkdirs iteration */
178 /* callback for checkdirs iteration */
179 static int checkdirs_callback(proc_t p
, void * arg
);
181 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
182 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
183 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
184 static int getfsstat_callback(mount_t mp
, void * arg
);
185 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
186 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
187 static int sync_callback(mount_t
, void *);
188 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
189 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
190 boolean_t partial_copy
);
191 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
192 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
193 struct componentname
*cnp
, user_addr_t fsmountargs
,
194 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
196 void vfs_notify_mount(vnode_t pdvp
);
198 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
200 struct fd_vn_data
* fg_vn_data_alloc(void);
203 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
204 * Concurrent lookups (or lookups by ids) on hard links can cause the
205 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
206 * does) to return ENOENT as the path cannot be returned from the name cache
207 * alone. We have no option but to retry and hope to get one namei->reverse path
208 * generation done without an intervening lookup, lookup by id on the hard link
209 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
210 * which currently are the MAC hooks for rename, unlink and rmdir.
212 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
214 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
,
217 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, uint32_t options
, int *);
219 #ifdef CONFIG_IMGSRC_ACCESS
220 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
221 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
222 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
223 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
224 static void mount_end_update(mount_t mp
);
225 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
226 #endif /* CONFIG_IMGSRC_ACCESS */
228 #if CONFIG_LOCKERBOOT
229 int mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
230 const char *pbdevpath
);
234 #if CONFIG_MNT_ROOTSNAP
235 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
);
237 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
) __attribute__((unused
));
240 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
243 int sync_internal(void);
246 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
248 extern lck_grp_t
*fd_vn_lck_grp
;
249 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
250 extern lck_attr_t
*fd_vn_lck_attr
;
253 * incremented each time a mount or unmount operation occurs
254 * used to invalidate the cached value of the rootvp in the
255 * mount structure utilized by cache_lookup_path
257 uint32_t mount_generation
= 0;
259 /* counts number of mount and unmount operations */
260 unsigned int vfs_nummntops
= 0;
262 extern const struct fileops vnops
;
263 #if CONFIG_APPLEDOUBLE
264 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
265 #endif /* CONFIG_APPLEDOUBLE */
268 * Virtual File System System Calls
271 #if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
273 * Private in-kernel mounting spi (NFS only, not exported)
277 vfs_iskernelmount(mount_t mp
)
279 return (mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
;
284 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
285 void *data
, __unused
size_t datalen
, int syscall_flags
, uint32_t kern_flags
, vfs_context_t ctx
)
291 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
292 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
295 * Get the vnode to be covered if it's not supplied
300 if (kern_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_VMVOL
| KERNEL_MOUNT_DATAVOL
)) {
301 printf("failed to locate mount-on path: %s ", path
);
309 char *pnbuf
= CAST_DOWN(char *, path
);
311 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
312 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
316 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
317 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
327 #endif /* CONFIG_NFS_CLIENT || DEVFS */
330 * Mount a file system.
334 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
336 struct __mac_mount_args muap
;
338 muap
.type
= uap
->type
;
339 muap
.path
= uap
->path
;
340 muap
.flags
= uap
->flags
;
341 muap
.data
= uap
->data
;
342 muap
.mac_p
= USER_ADDR_NULL
;
343 return __mac_mount(p
, &muap
, retval
);
347 fmount(__unused proc_t p
, struct fmount_args
*uap
, __unused
int32_t *retval
)
349 struct componentname cn
;
350 vfs_context_t ctx
= vfs_context_current();
353 int flags
= uap
->flags
;
354 char fstypename
[MFSNAMELEN
];
355 char *labelstr
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */
359 AUDIT_ARG(fd
, uap
->fd
);
360 AUDIT_ARG(fflags
, flags
);
361 /* fstypename will get audited by mount_common */
363 /* Sanity check the flags */
364 if (flags
& (MNT_IMGSRC_BY_INDEX
| MNT_ROOTFS
)) {
368 if (flags
& MNT_UNION
) {
372 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
377 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
381 if ((error
= vnode_getwithref(vp
)) != 0) {
386 pvp
= vnode_getparent(vp
);
393 memset(&cn
, 0, sizeof(struct componentname
));
394 MALLOC(cn
.cn_pnbuf
, char *, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
395 cn
.cn_pnlen
= MAXPATHLEN
;
397 if ((error
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) {
398 FREE(cn
.cn_pnbuf
, M_TEMP
);
405 error
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
);
407 FREE(cn
.cn_pnbuf
, M_TEMP
);
416 vfs_notify_mount(vnode_t pdvp
)
418 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
419 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
424 * Mount a file system taking into account MAC label behavior.
425 * See mount(2) man page for more information
427 * Parameters: p Process requesting the mount
428 * uap User argument descriptor (see below)
431 * Indirect: uap->type Filesystem type
432 * uap->path Path to mount
433 * uap->data Mount arguments
434 * uap->mac_p MAC info
435 * uap->flags Mount flags
441 boolean_t root_fs_upgrade_try
= FALSE
;
444 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
448 int need_nameidone
= 0;
449 vfs_context_t ctx
= vfs_context_current();
450 char fstypename
[MFSNAMELEN
];
453 char *labelstr
= NULL
;
454 int flags
= uap
->flags
;
456 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
457 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
462 * Get the fs type name from user space
464 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
470 * Get the vnode to be covered
472 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
473 UIO_USERSPACE
, uap
->path
, ctx
);
482 #ifdef CONFIG_IMGSRC_ACCESS
483 /* Mounting image source cannot be batched with other operations */
484 if (flags
== MNT_IMGSRC_BY_INDEX
) {
485 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
486 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
489 #endif /* CONFIG_IMGSRC_ACCESS */
493 * Get the label string (if any) from user space
495 if (uap
->mac_p
!= USER_ADDR_NULL
) {
500 struct user64_mac mac64
;
501 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
502 mac
.m_buflen
= mac64
.m_buflen
;
503 mac
.m_string
= mac64
.m_string
;
505 struct user32_mac mac32
;
506 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
507 mac
.m_buflen
= mac32
.m_buflen
;
508 mac
.m_string
= mac32
.m_string
;
513 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
514 (mac
.m_buflen
< 2)) {
518 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
519 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
523 AUDIT_ARG(mac_string
, labelstr
);
525 #endif /* CONFIG_MACF */
527 AUDIT_ARG(fflags
, flags
);
530 if (flags
& MNT_UNION
) {
531 /* No union mounts on release kernels */
537 if ((vp
->v_flag
& VROOT
) &&
538 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
539 if (!(flags
& MNT_UNION
)) {
543 * For a union mount on '/', treat it as fresh
544 * mount instead of update.
545 * Otherwise, union mouting on '/' used to panic the
546 * system before, since mnt_vnodecovered was found to
547 * be NULL for '/' which is required for unionlookup
548 * after it gets ENOENT on union mount.
550 flags
= (flags
& ~(MNT_UPDATE
));
554 if ((flags
& MNT_RDONLY
) == 0) {
555 /* Release kernels are not allowed to mount "/" as rw */
561 * See 7392553 for more details on why this check exists.
562 * Suffice to say: If this check is ON and something tries
563 * to mount the rootFS RW, we'll turn off the codesign
564 * bitmap optimization.
566 #if CHECK_CS_VALIDATION_BITMAP
567 if ((flags
& MNT_RDONLY
) == 0) {
568 root_fs_upgrade_try
= TRUE
;
573 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
574 labelstr
, FALSE
, ctx
);
580 FREE(labelstr
, M_MACTEMP
);
582 #endif /* CONFIG_MACF */
590 if (need_nameidone
) {
598 * common mount implementation (final stage of mounting)
601 * fstypename file system type (ie it's vfs name)
602 * pvp parent of covered vnode
604 * cnp component name (ie path) of covered vnode
605 * flags generic mount flags
606 * fsmountargs file system specific data
607 * labelstr optional MAC label
608 * kernelmount TRUE for mounts initiated from inside the kernel
609 * ctx caller's context
612 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
613 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
614 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
617 #pragma unused(labelstr)
619 struct vnode
*devvp
= NULLVP
;
620 struct vnode
*device_vnode
= NULLVP
;
625 struct vfstable
*vfsp
= (struct vfstable
*)0;
626 struct proc
*p
= vfs_context_proc(ctx
);
628 user_addr_t devpath
= USER_ADDR_NULL
;
631 boolean_t vfsp_ref
= FALSE
;
632 boolean_t is_rwlock_locked
= FALSE
;
633 boolean_t did_rele
= FALSE
;
634 boolean_t have_usecount
= FALSE
;
636 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM
637 /* Check for mutually-exclusive flag bits */
638 uint32_t checkflags
= (internal_flags
& (KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
));
640 while (checkflags
!= 0) {
641 checkflags
&= (checkflags
- 1);
646 //not allowed to request multiple mount-by-role flags
653 * Process an update for an existing mount
655 if (flags
& MNT_UPDATE
) {
656 if ((vp
->v_flag
& VROOT
) == 0) {
662 /* unmount in progress return error */
664 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
670 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
671 is_rwlock_locked
= TRUE
;
673 * We only allow the filesystem to be reloaded if it
674 * is currently mounted read-only.
676 if ((flags
& MNT_RELOAD
) &&
677 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
683 * If content protection is enabled, update mounts are not
684 * allowed to turn it off.
686 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
687 ((flags
& MNT_CPROTECT
) == 0)) {
693 * can't turn off MNT_REMOVABLE either but it may be an unexpected
694 * failure to return an error for this so we'll just silently
695 * add it if it is not passed in.
697 if ((mp
->mnt_flag
& MNT_REMOVABLE
) &&
698 ((flags
& MNT_REMOVABLE
) == 0)) {
699 flags
|= MNT_REMOVABLE
;
702 #ifdef CONFIG_IMGSRC_ACCESS
703 /* Can't downgrade the backer of the root FS */
704 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
705 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
709 #endif /* CONFIG_IMGSRC_ACCESS */
712 * Only root, or the user that did the original mount is
713 * permitted to update it.
715 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
716 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
720 error
= mac_mount_check_remount(ctx
, mp
);
726 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
727 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
729 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
730 flags
|= MNT_NOSUID
| MNT_NODEV
;
731 if (mp
->mnt_flag
& MNT_NOEXEC
) {
739 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
741 vfsp
= mp
->mnt_vtable
;
746 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
747 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
749 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
750 flags
|= MNT_NOSUID
| MNT_NODEV
;
751 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
) {
756 /* XXXAUDIT: Should we capture the type on the error path as well? */
757 AUDIT_ARG(text
, fstypename
);
759 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
760 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
761 vfsp
->vfc_refcount
++;
773 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
774 * except in ROSV configs.
776 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) &&
777 ((internal_flags
& (KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
)) == 0)) {
778 error
= EINVAL
; /* unsupported request */
782 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
788 * Allocate and initialize the filesystem (mount_t)
790 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
792 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
795 /* Initialize the default IO constraints */
796 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
797 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
798 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
799 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
800 mp
->mnt_devblocksize
= DEV_BSIZE
;
801 mp
->mnt_alignmentmask
= PAGE_MASK
;
802 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
805 mp
->mnt_realrootvp
= NULLVP
;
806 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
808 TAILQ_INIT(&mp
->mnt_vnodelist
);
809 TAILQ_INIT(&mp
->mnt_workerqueue
);
810 TAILQ_INIT(&mp
->mnt_newvnodes
);
812 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
813 is_rwlock_locked
= TRUE
;
814 mp
->mnt_op
= vfsp
->vfc_vfsops
;
815 mp
->mnt_vtable
= vfsp
;
816 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
817 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
818 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
820 int pathlen
= MAXPATHLEN
;
822 if (vn_getpath_ext(vp
, pvp
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
)) {
823 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
826 mp
->mnt_vnodecovered
= vp
;
827 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
828 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
829 mp
->mnt_devbsdunit
= 0;
831 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
832 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
834 #if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
836 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
838 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0) {
839 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
841 #endif /* CONFIG_NFS_CLIENT || DEVFS */
846 * Set the mount level flags.
848 if (flags
& MNT_RDONLY
) {
849 mp
->mnt_flag
|= MNT_RDONLY
;
850 } else if (mp
->mnt_flag
& MNT_RDONLY
) {
851 // disallow read/write upgrades of file systems that
852 // had the TYPENAME_OVERRIDE feature set.
853 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
857 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
859 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
860 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
861 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
862 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
863 MNT_QUARANTINE
| MNT_CPROTECT
);
868 * On release builds of iOS based platforms, always enforce NOSUID on
869 * all mounts. We do this here because we can catch update mounts as well as
870 * non-update mounts in this case.
872 mp
->mnt_flag
|= (MNT_NOSUID
);
876 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
877 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
878 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
879 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
880 MNT_QUARANTINE
| MNT_CPROTECT
);
883 if (flags
& MNT_MULTILABEL
) {
884 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
888 mp
->mnt_flag
|= MNT_MULTILABEL
;
892 * Process device path for local file systems if requested
894 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
895 !(internal_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
))) {
896 //snapshot, vm, datavolume mounts are special
897 if (vfs_context_is64bit(ctx
)) {
898 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
901 fsmountargs
+= sizeof(devpath
);
904 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
907 /* munge into LP64 addr */
908 devpath
= CAST_USER_ADDR_T(tmp
);
909 fsmountargs
+= sizeof(tmp
);
912 /* Lookup device and authorize access to it */
916 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
917 if ((error
= namei(&nd
))) {
921 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
926 if (devvp
->v_type
!= VBLK
) {
930 if (major(devvp
->v_rdev
) >= nblkdev
) {
935 * If mount by non-root, then verify that user has necessary
936 * permissions on the device.
938 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
939 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
941 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
942 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
944 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0) {
949 /* On first mount, preflight and open device */
950 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
951 if ((error
= vnode_ref(devvp
))) {
955 * Disallow multiple mounts of the same device.
956 * Disallow mounting of a device that is currently in use
957 * (except for root, which might share swap device for miniroot).
958 * Flush out any old buffers remaining from a previous use.
960 if ((error
= vfs_mountedon(devvp
))) {
964 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
968 if ((error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
))) {
972 if ((error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0))) {
976 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
978 error
= mac_vnode_check_open(ctx
,
980 ronly
? FREAD
: FREAD
| FWRITE
);
985 if ((error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
))) {
989 mp
->mnt_devvp
= devvp
;
990 device_vnode
= devvp
;
991 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
992 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
993 (device_vnode
= mp
->mnt_devvp
)) {
997 * If upgrade to read-write by non-root, then verify
998 * that user has necessary permissions on the device.
1000 vnode_getalways(device_vnode
);
1002 if (suser(vfs_context_ucred(ctx
), NULL
) &&
1003 (error
= vnode_authorize(device_vnode
, NULL
,
1004 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
1006 vnode_put(device_vnode
);
1010 /* Tell the device that we're upgrading */
1011 dev
= (dev_t
)device_vnode
->v_rdev
;
1014 if ((u_int
)maj
>= (u_int
)nblkdev
) {
1015 panic("Volume mounted on a device with invalid major number.");
1018 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
1019 vnode_put(device_vnode
);
1020 device_vnode
= NULLVP
;
1025 } // localargs && !(snapshot | data | vm)
1028 if ((flags
& MNT_UPDATE
) == 0) {
1029 mac_mount_label_init(mp
);
1030 mac_mount_label_associate(ctx
, mp
);
1033 if ((flags
& MNT_UPDATE
) != 0) {
1034 error
= mac_mount_check_label_update(ctx
, mp
);
1042 * Mount the filesystem. We already asserted that internal_flags
1043 * cannot have more than one mount-by-role bit set.
1045 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
1046 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
1047 (caddr_t
)fsmountargs
, 0, ctx
);
1048 } else if (internal_flags
& KERNEL_MOUNT_DATAVOL
) {
1049 #if CONFIG_ROSV_STARTUP
1050 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1051 fs_role_mount_args_t frma
= {origin_mp
, VFS_DATA_ROLE
};
1052 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1054 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE
, error
);
1056 /* Mark volume associated with system volume */
1057 mp
->mnt_kern_flag
|= MNTK_SYSTEM
;
1059 /* Attempt to acquire the mnt_devvp and set it up */
1060 struct vnode
*mp_devvp
= NULL
;
1061 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1062 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1063 0, &mp_devvp
, vfs_context_kernel());
1065 mp
->mnt_devvp
= mp_devvp
;
1066 //vnode_lookup took an iocount, need to drop it.
1067 vnode_put(mp_devvp
);
1068 // now set `device_vnode` to the devvp that was acquired.
1069 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1070 // note that though the iocount above was dropped, the mount acquires
1071 // an implicit reference against the device.
1072 device_vnode
= mp_devvp
;
1079 } else if (internal_flags
& KERNEL_MOUNT_VMVOL
) {
1081 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1082 fs_role_mount_args_t frma
= {origin_mp
, VFS_VM_ROLE
};
1083 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1085 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE
, error
);
1087 /* Mark volume associated with system volume and a swap mount */
1088 mp
->mnt_kern_flag
|= (MNTK_SYSTEM
| MNTK_SWAP_MOUNT
);
1089 /* Attempt to acquire the mnt_devvp and set it up */
1090 struct vnode
*mp_devvp
= NULL
;
1091 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1092 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1093 0, &mp_devvp
, vfs_context_kernel());
1095 mp
->mnt_devvp
= mp_devvp
;
1096 //vnode_lookup took an iocount, need to drop it.
1097 vnode_put(mp_devvp
);
1099 // now set `device_vnode` to the devvp that was acquired.
1100 // note that though the iocount above was dropped, the mount acquires
1101 // an implicit reference against the device.
1102 device_vnode
= mp_devvp
;
1110 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
1113 if (flags
& MNT_UPDATE
) {
1114 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) {
1115 mp
->mnt_flag
&= ~MNT_RDONLY
;
1118 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
1119 mp
->mnt_kern_flag
&= ~MNTK_WANTRDWR
;
1121 mp
->mnt_flag
= flag
; /* restore flag value */
1123 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
1124 lck_rw_done(&mp
->mnt_rwlock
);
1125 is_rwlock_locked
= FALSE
;
1127 enablequotas(mp
, ctx
);
1133 * Put the new filesystem on the mount list after root.
1136 struct vfs_attr vfsattr
;
1138 error
= mac_mount_check_mount_late(ctx
, mp
);
1143 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
1144 error
= VFS_ROOT(mp
, &rvp
, ctx
);
1146 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
1149 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
1151 * drop reference provided by VFS_ROOT
1161 vnode_lock_spin(vp
);
1162 CLR(vp
->v_flag
, VMOUNT
);
1163 vp
->v_mountedhere
= mp
;
1167 * taking the name_cache_lock exclusively will
1168 * insure that everyone is out of the fast path who
1169 * might be trying to use a now stale copy of
1170 * vp->v_mountedhere->mnt_realrootvp
1171 * bumping mount_generation causes the cached values
1176 name_cache_unlock();
1178 error
= vnode_ref(vp
);
1183 have_usecount
= TRUE
;
1185 error
= checkdirs(vp
, ctx
);
1187 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1191 * there is no cleanup code here so I have made it void
1192 * we need to revisit this
1194 (void)VFS_START(mp
, 0, ctx
);
1196 if (mount_list_add(mp
) != 0) {
1198 * The system is shutting down trying to umount
1199 * everything, so fail with a plausible errno.
1204 lck_rw_done(&mp
->mnt_rwlock
);
1205 is_rwlock_locked
= FALSE
;
1207 /* Check if this mounted file system supports EAs or named streams. */
1208 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1209 VFSATTR_INIT(&vfsattr
);
1210 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
1211 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
1212 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
1213 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
1214 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
1215 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
1216 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1219 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
1220 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
1221 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
1224 /* Check if this file system supports path from id lookups. */
1225 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
1226 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
1227 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1228 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
1229 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1230 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1233 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
1234 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
1235 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1238 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1239 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1241 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1242 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1244 /* increment the operations count */
1245 OSAddAtomic(1, &vfs_nummntops
);
1246 enablequotas(mp
, ctx
);
1249 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1252 * cache the IO attributes for the underlying physical media...
1253 * an error return indicates the underlying driver doesn't
1254 * support all the queries necessary... however, reasonable
1255 * defaults will have been set, so no reason to bail or care
1257 vfs_init_io_attributes(device_vnode
, mp
);
1260 /* Now that mount is setup, notify the listeners */
1261 vfs_notify_mount(pvp
);
1262 IOBSDMountChange(mp
, kIOMountChangeMount
);
1264 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1265 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1266 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1267 mp
->mnt_vtable
->vfc_name
, error
);
1270 vnode_lock_spin(vp
);
1271 CLR(vp
->v_flag
, VMOUNT
);
1274 mp
->mnt_vtable
->vfc_refcount
--;
1275 mount_list_unlock();
1278 vnode_rele(device_vnode
);
1279 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
);
1281 lck_rw_done(&mp
->mnt_rwlock
);
1282 is_rwlock_locked
= FALSE
;
1285 * if we get here, we have a mount structure that needs to be freed,
1286 * but since the coveredvp hasn't yet been updated to point at it,
1287 * no need to worry about other threads holding a crossref on this mp
1288 * so it's ok to just free it
1290 mount_lock_destroy(mp
);
1292 mac_mount_label_destroy(mp
);
1294 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1298 * drop I/O count on the device vp if there was one
1300 if (devpath
&& devvp
) {
1306 /* Error condition exits */
1308 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1311 * If the mount has been placed on the covered vp,
1312 * it may have been discovered by now, so we have
1313 * to treat this just like an unmount
1315 mount_lock_spin(mp
);
1316 mp
->mnt_lflag
|= MNT_LDEAD
;
1319 if (device_vnode
!= NULLVP
) {
1320 vnode_rele(device_vnode
);
1321 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
1326 vnode_lock_spin(vp
);
1329 vp
->v_mountedhere
= (mount_t
) 0;
1333 if (have_usecount
) {
1337 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
)) {
1341 if (devpath
&& devvp
) {
1345 /* Release mnt_rwlock only when it was taken */
1346 if (is_rwlock_locked
== TRUE
) {
1347 lck_rw_done(&mp
->mnt_rwlock
);
1351 if (mp
->mnt_crossref
) {
1352 mount_dropcrossref(mp
, vp
, 0);
1354 mount_lock_destroy(mp
);
1356 mac_mount_label_destroy(mp
);
1358 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1363 vfsp
->vfc_refcount
--;
1364 mount_list_unlock();
1371 * Flush in-core data, check for competing mount attempts,
1375 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1378 #pragma unused(cnp,fsname)
1380 struct vnode_attr va
;
1385 * If the user is not root, ensure that they own the directory
1386 * onto which we are attempting to mount.
1389 VATTR_WANTED(&va
, va_uid
);
1390 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1391 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1392 (!vfs_context_issuser(ctx
)))) {
1398 if ((error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
))) {
1402 if ((error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0))) {
1406 if (vp
->v_type
!= VDIR
) {
1411 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1417 error
= mac_mount_check_mount(ctx
, vp
,
1424 vnode_lock_spin(vp
);
1425 SET(vp
->v_flag
, VMOUNT
);
1432 #if CONFIG_IMGSRC_ACCESS
1434 #define DEBUG_IMGSRC 0
1437 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
1439 #define IMGSRC_DEBUG(args...) do { } while(0)
1443 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1445 struct nameidata nd
;
1446 vnode_t vp
, realdevvp
;
1449 enum uio_seg uio
= UIO_USERSPACE
;
1451 if (ctx
== vfs_context_kernel()) {
1455 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, uio
, devpath
, ctx
);
1456 if ((error
= namei(&nd
))) {
1457 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1463 if (!vnode_isblk(vp
)) {
1464 IMGSRC_DEBUG("Not block device.\n");
1469 realdevvp
= mp
->mnt_devvp
;
1470 if (realdevvp
== NULLVP
) {
1471 IMGSRC_DEBUG("No device backs the mount.\n");
1476 error
= vnode_getwithref(realdevvp
);
1478 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1482 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1483 IMGSRC_DEBUG("Wrong dev_t.\n");
1488 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1491 * If mount by non-root, then verify that user has necessary
1492 * permissions on the device.
1494 if (!vfs_context_issuser(ctx
)) {
1495 accessmode
= KAUTH_VNODE_READ_DATA
;
1496 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1497 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1499 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1500 IMGSRC_DEBUG("Access denied.\n");
1508 vnode_put(realdevvp
);
1521 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1522 * and call checkdirs()
1525 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1529 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1531 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1532 mp
->mnt_vtable
->vfc_name
, vnode_getname(vp
));
1534 vnode_lock_spin(vp
);
1535 CLR(vp
->v_flag
, VMOUNT
);
1536 vp
->v_mountedhere
= mp
;
1540 * taking the name_cache_lock exclusively will
1541 * insure that everyone is out of the fast path who
1542 * might be trying to use a now stale copy of
1543 * vp->v_mountedhere->mnt_realrootvp
1544 * bumping mount_generation causes the cached values
1549 name_cache_unlock();
1551 error
= vnode_ref(vp
);
1556 error
= checkdirs(vp
, ctx
);
1558 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1565 mp
->mnt_vnodecovered
= NULLVP
;
1571 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1574 vnode_lock_spin(vp
);
1575 vp
->v_mountedhere
= (mount_t
)NULL
;
1578 mp
->mnt_vnodecovered
= NULLVP
;
1582 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1586 /* unmount in progress return error */
1587 mount_lock_spin(mp
);
1588 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1593 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1596 * We only allow the filesystem to be reloaded if it
1597 * is currently mounted read-only.
1599 if ((flags
& MNT_RELOAD
) &&
1600 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1606 * Only root, or the user that did the original mount is
1607 * permitted to update it.
1609 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1610 (!vfs_context_issuser(ctx
))) {
1615 error
= mac_mount_check_remount(ctx
, mp
);
1623 lck_rw_done(&mp
->mnt_rwlock
);
1630 mount_end_update(mount_t mp
)
1632 lck_rw_done(&mp
->mnt_rwlock
);
1636 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1640 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1644 vp
= imgsrc_rootvnodes
[height
];
1645 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1654 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
,
1655 struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
,
1656 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1660 boolean_t placed
= FALSE
;
1661 struct vfstable
*vfsp
;
1662 user_addr_t devpath
;
1663 char *old_mntonname
;
1669 /* If we didn't imageboot, nothing to move */
1670 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1674 /* Only root can do this */
1675 if (!vfs_context_issuser(ctx
)) {
1679 IMGSRC_DEBUG("looking for root vnode.\n");
1682 * Get root vnode of filesystem we're moving.
1686 struct user64_mnt_imgsrc_args mia64
;
1687 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1689 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1693 height
= mia64
.mi_height
;
1694 flags
= mia64
.mi_flags
;
1695 devpath
= mia64
.mi_devpath
;
1697 struct user32_mnt_imgsrc_args mia32
;
1698 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1700 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1704 height
= mia32
.mi_height
;
1705 flags
= mia32
.mi_flags
;
1706 devpath
= mia32
.mi_devpath
;
1710 * For binary compatibility--assumes one level of nesting.
1713 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
1718 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
1722 /* munge into LP64 addr */
1723 devpath
= CAST_USER_ADDR_T(tmp
);
1731 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1735 error
= get_imgsrc_rootvnode(height
, &rvp
);
1737 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error
);
1741 IMGSRC_DEBUG("got old root vnode\n");
1743 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1745 /* Can only move once */
1746 mp
= vnode_mount(rvp
);
1747 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1748 IMGSRC_DEBUG("Already moved.\n");
1753 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp
->mnt_vtable
->vfc_name
);
1754 IMGSRC_DEBUG("Starting updated.\n");
1756 /* Get exclusive rwlock on mount, authorize update on mp */
1757 error
= mount_begin_update(mp
, ctx
, 0);
1759 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1764 * It can only be moved once. Flag is set under the rwlock,
1765 * so we're now safe to proceed.
1767 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1768 IMGSRC_DEBUG("Already moved [2]\n");
1772 IMGSRC_DEBUG("Preparing coveredvp.\n");
1774 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1775 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1777 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1781 IMGSRC_DEBUG("Covered vp OK.\n");
1783 /* Sanity check the name caller has provided */
1784 vfsp
= mp
->mnt_vtable
;
1785 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1786 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1787 vfsp
->vfc_name
, fsname
);
1792 /* Check the device vnode and update mount-from name, for local filesystems */
1793 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1794 IMGSRC_DEBUG("Local, doing device validation.\n");
1796 if (devpath
!= USER_ADDR_NULL
) {
1797 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1799 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1808 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1809 * and increment the name cache's mount generation
1812 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1813 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1820 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1821 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1823 /* Forbid future moves */
1825 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1828 /* Finally, add to mount list, completely ready to go */
1829 if (mount_list_add(mp
) != 0) {
1831 * The system is shutting down trying to umount
1832 * everything, so fail with a plausible errno.
1838 mount_end_update(mp
);
1840 FREE(old_mntonname
, M_TEMP
);
1842 vfs_notify_mount(pvp
);
1846 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1849 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1854 * Placing the mp on the vnode clears VMOUNT,
1855 * so cleanup is different after that point
1858 /* Rele the vp, clear VMOUNT and v_mountedhere */
1859 undo_place_on_covered_vp(mp
, vp
);
1861 vnode_lock_spin(vp
);
1862 CLR(vp
->v_flag
, VMOUNT
);
1866 mount_end_update(mp
);
1870 FREE(old_mntonname
, M_TEMP
);
1874 #if CONFIG_LOCKERBOOT
1877 mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
1878 const char *pbdevpath
)
1881 struct nameidata nd
;
1882 boolean_t cleanup_nd
= FALSE
;
1883 vfs_context_t ctx
= vfs_context_kernel();
1884 boolean_t is64
= TRUE
;
1885 boolean_t by_index
= TRUE
;
1886 struct user64_mnt_imgsrc_args mia64
= {
1889 .mi_devpath
= CAST_USER_ADDR_T(pbdevpath
),
1891 user_addr_t mia64addr
= CAST_USER_ADDR_T(&mia64
);
1893 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
1894 UIO_SYSSPACE
, CAST_USER_ADDR_T(mntpoint
), ctx
);
1897 IMGSRC_DEBUG("namei: %d\n", error
);
1902 error
= relocate_imageboot_source(nd
.ni_dvp
, nd
.ni_vp
,
1903 &nd
.ni_cnd
, fsname
, ctx
, is64
, mia64addr
, by_index
);
1907 int stashed
= error
;
1909 error
= vnode_put(nd
.ni_vp
);
1911 panic("vnode_put() returned non-zero: %d", error
);
1915 error
= vnode_put(nd
.ni_dvp
);
1917 panic("vnode_put() returned non-zero: %d", error
);
1926 #endif /* CONFIG_LOCKERBOOT */
1927 #endif /* CONFIG_IMGSRC_ACCESS */
1930 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1932 struct nameidata qnd
;
1934 char qfpath
[MAXPATHLEN
];
1935 const char *qfname
= QUOTAFILENAME
;
1936 const char *qfopsname
= QUOTAOPSNAME
;
1937 const char *qfextension
[] = INITQFNAMES
;
1939 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1940 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0) {
1944 * Enable filesystem disk quotas if necessary.
1945 * We ignore errors as this should not interfere with final mount
1947 for (type
= 0; type
< MAXQUOTAS
; type
++) {
1948 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1949 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1950 CAST_USER_ADDR_T(qfpath
), ctx
);
1951 if (namei(&qnd
) != 0) {
1952 continue; /* option file to trigger quotas is not present */
1954 vnode_put(qnd
.ni_vp
);
1956 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1958 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1965 checkdirs_callback(proc_t p
, void * arg
)
1967 struct cdirargs
* cdrp
= (struct cdirargs
*)arg
;
1968 vnode_t olddp
= cdrp
->olddp
;
1969 vnode_t newdp
= cdrp
->newdp
;
1970 struct filedesc
*fdp
;
1971 vnode_t new_cvp
= newdp
;
1972 vnode_t new_rvp
= newdp
;
1973 vnode_t old_cvp
= NULL
;
1974 vnode_t old_rvp
= NULL
;
1977 * XXX Also needs to iterate each thread in the process to see if it
1978 * XXX is using a per-thread current working directory, and, if so,
1979 * XXX update that as well.
1983 * First, with the proc_fdlock held, check to see if we will need
1984 * to do any work. If not, we will get out fast.
1989 (fdp
->fd_cdir
!= olddp
&& fdp
->fd_rdir
!= olddp
)) {
1991 return PROC_RETURNED
;
1996 * Ok, we will have to do some work. Always take two refs
1997 * because we might need that many. We'll dispose of whatever
1998 * we ended up not using.
2000 if (vnode_ref(newdp
) != 0) {
2001 return PROC_RETURNED
;
2003 if (vnode_ref(newdp
) != 0) {
2005 return PROC_RETURNED
;
2009 * Now do the work. Note: we dropped the proc_fdlock, so we
2010 * have to do all of the checks again.
2015 if (fdp
->fd_cdir
== olddp
) {
2017 fdp
->fd_cdir
= newdp
;
2020 if (fdp
->fd_rdir
== olddp
) {
2022 fdp
->fd_rdir
= newdp
;
2029 * Dispose of any references that are no longer needed.
2031 if (old_cvp
!= NULL
) {
2032 vnode_rele(old_cvp
);
2034 if (old_rvp
!= NULL
) {
2035 vnode_rele(old_rvp
);
2037 if (new_cvp
!= NULL
) {
2038 vnode_rele(new_cvp
);
2040 if (new_rvp
!= NULL
) {
2041 vnode_rele(new_rvp
);
2044 return PROC_RETURNED
;
2050 * Scan all active processes to see if any of them have a current
2051 * or root directory onto which the new filesystem has just been
2052 * mounted. If so, replace them with the new mount point.
2055 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
2060 struct cdirargs cdr
;
2062 if (olddp
->v_usecount
== 1) {
2065 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
2069 panic("mount: lost mount: error %d", err
);
2076 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2077 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
2079 if (rootvnode
== olddp
) {
2091 * Unmount a file system.
2093 * Note: unmount takes a path to the vnode mounted on as argument,
2094 * not special file (as before).
2098 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
2103 struct nameidata nd
;
2104 vfs_context_t ctx
= vfs_context_current();
2106 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
2107 UIO_USERSPACE
, uap
->path
, ctx
);
2117 error
= mac_mount_check_umount(ctx
, mp
);
2124 * Must be the root of the filesystem
2126 if ((vp
->v_flag
& VROOT
) == 0) {
2132 /* safedounmount consumes the mount ref */
2133 return safedounmount(mp
, uap
->flags
, ctx
);
2137 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
2141 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
2142 if (mp
== (mount_t
)0) {
2147 /* safedounmount consumes the mount ref */
2148 return safedounmount(mp
, flags
, ctx
);
2153 * The mount struct comes with a mount ref which will be consumed.
2154 * Do the actual file system unmount, prevent some common foot shooting.
2157 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2160 proc_t p
= vfs_context_proc(ctx
);
2163 * If the file system is not responding and MNT_NOBLOCK
2164 * is set and not a forced unmount then return EBUSY.
2166 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
2167 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
2173 * Skip authorization if the mount is tagged as permissive and
2174 * this is not a forced-unmount attempt.
2176 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
2178 * Only root, or the user that did the original mount is
2179 * permitted to unmount this filesystem.
2181 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
2182 (error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
2187 * Don't allow unmounting the root file system (or the associated VM or DATA mounts) .
2189 if ((mp
->mnt_flag
& MNT_ROOTFS
) || (mp
->mnt_kern_flag
& MNTK_SYSTEM
)) {
2190 error
= EBUSY
; /* the root (or associated volumes) is always busy */
2194 #ifdef CONFIG_IMGSRC_ACCESS
2195 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
2199 #endif /* CONFIG_IMGSRC_ACCESS */
2201 return dounmount(mp
, flags
, 1, ctx
);
2209 * Do the actual file system unmount.
2212 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
2214 vnode_t coveredvp
= (vnode_t
)0;
2217 int forcedunmount
= 0;
2219 struct vnode
*devvp
= NULLVP
;
2221 proc_t p
= vfs_context_proc(ctx
);
2223 int pflags_save
= 0;
2224 #endif /* CONFIG_TRIGGERS */
2227 if (!(flags
& MNT_FORCE
)) {
2228 fsevent_unmount(mp
, ctx
); /* has to come first! */
2235 * If already an unmount in progress just return EBUSY.
2236 * Even a forced unmount cannot override.
2238 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
2246 if (flags
& MNT_FORCE
) {
2248 mp
->mnt_lflag
|= MNT_LFORCE
;
2252 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2253 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
2257 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
2258 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
2259 mp
->mnt_flag
&= ~MNT_ASYNC
;
2261 * anyone currently in the fast path that
2262 * trips over the cached rootvp will be
2263 * dumped out and forced into the slow path
2264 * to regenerate a new cached value
2266 mp
->mnt_realrootvp
= NULLVP
;
2269 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
2271 * Force unmount any mounts in this filesystem.
2272 * If any unmounts fail - just leave them dangling.
2275 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
2279 * taking the name_cache_lock exclusively will
2280 * insure that everyone is out of the fast path who
2281 * might be trying to use a now stale copy of
2282 * vp->v_mountedhere->mnt_realrootvp
2283 * bumping mount_generation causes the cached values
2288 name_cache_unlock();
2291 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2296 if (forcedunmount
== 0) {
2297 ubc_umount(mp
); /* release cached vnodes */
2298 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2299 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
2302 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2303 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2304 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2310 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
2313 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
2316 if (forcedunmount
) {
2317 lflags
|= FORCECLOSE
;
2319 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
2320 if ((forcedunmount
== 0) && error
) {
2322 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2323 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2324 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2328 /* make sure there are no one in the mount iterations or lookup */
2329 mount_iterdrain(mp
);
2331 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
2333 mount_iterreset(mp
);
2335 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2336 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2337 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2341 /* increment the operations count */
2343 OSAddAtomic(1, &vfs_nummntops
);
2346 if (mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
2347 /* hold an io reference and drop the usecount before close */
2348 devvp
= mp
->mnt_devvp
;
2349 vnode_getalways(devvp
);
2351 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
2353 vnode_clearmountedon(devvp
);
2356 lck_rw_done(&mp
->mnt_rwlock
);
2357 mount_list_remove(mp
);
2358 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2360 /* mark the mount point hook in the vp but not drop the ref yet */
2361 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2363 * The covered vnode needs special handling. Trying to get an
2364 * iocount must not block here as this may lead to deadlocks
2365 * if the Filesystem to which the covered vnode belongs is
2366 * undergoing forced unmounts. Since we hold a usecount, the
2367 * vnode cannot be reused (it can, however, still be terminated)
2369 vnode_getalways(coveredvp
);
2370 vnode_lock_spin(coveredvp
);
2373 coveredvp
->v_mountedhere
= (struct mount
*)0;
2374 CLR(coveredvp
->v_flag
, VMOUNT
);
2376 vnode_unlock(coveredvp
);
2377 vnode_put(coveredvp
);
2381 mp
->mnt_vtable
->vfc_refcount
--;
2382 mount_list_unlock();
2384 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2385 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2387 mp
->mnt_lflag
|= MNT_LDEAD
;
2389 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2391 * do the wakeup here
2392 * in case we block in mount_refdrain
2393 * which will drop the mount lock
2394 * and allow anyone blocked in vfs_busy
2395 * to wakeup and see the LDEAD state
2397 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2398 wakeup((caddr_t
)mp
);
2402 /* free disk_conditioner_info structure for this mount */
2403 disk_conditioner_unmount(mp
);
2406 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2407 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2412 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2413 // Restore P_NOREMOTEHANG bit to its previous value
2414 if ((pflags_save
& P_NOREMOTEHANG
) == 0) {
2415 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2420 * Callback and context are set together under the mount lock, and
2421 * never cleared, so we're safe to examine them here, drop the lock,
2424 if (mp
->mnt_triggercallback
!= NULL
) {
2427 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2428 } else if (did_vflush
) {
2429 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2436 #endif /* CONFIG_TRIGGERS */
2438 lck_rw_done(&mp
->mnt_rwlock
);
2441 wakeup((caddr_t
)mp
);
2445 if ((coveredvp
!= NULLVP
)) {
2446 vnode_t pvp
= NULLVP
;
2449 * The covered vnode needs special handling. Trying to
2450 * get an iocount must not block here as this may lead
2451 * to deadlocks if the Filesystem to which the covered
2452 * vnode belongs is undergoing forced unmounts. Since we
2453 * hold a usecount, the vnode cannot be reused
2454 * (it can, however, still be terminated).
2456 vnode_getalways(coveredvp
);
2458 mount_dropcrossref(mp
, coveredvp
, 0);
2460 * We'll _try_ to detect if this really needs to be
2461 * done. The coveredvp can only be in termination (or
2462 * terminated) if the coveredvp's mount point is in a
2463 * forced unmount (or has been) since we still hold the
2466 if (!vnode_isrecycled(coveredvp
)) {
2467 pvp
= vnode_getparent(coveredvp
);
2469 if (coveredvp
->v_resolve
) {
2470 vnode_trigger_rearm(coveredvp
, ctx
);
2475 vnode_rele(coveredvp
);
2476 vnode_put(coveredvp
);
2480 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2483 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2484 mount_lock_destroy(mp
);
2486 mac_mount_label_destroy(mp
);
2488 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2490 panic("dounmount: no coveredvp");
2497 * Unmount any mounts in this filesystem.
2500 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2503 fsid_t
*fsids
, fsid
;
2505 int count
= 0, i
, m
= 0;
2510 // Get an array to hold the submounts fsids.
2511 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2513 fsids_sz
= count
* sizeof(fsid_t
);
2514 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2515 if (fsids
== NULL
) {
2516 mount_list_unlock();
2519 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2522 * Fill the array with submount fsids.
2523 * Since mounts are always added to the tail of the mount list, the
2524 * list is always in mount order.
2525 * For each mount check if the mounted-on vnode belongs to a
2526 * mount that's already added to our array of mounts to be unmounted.
2528 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2529 vp
= smp
->mnt_vnodecovered
;
2533 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2534 for (i
= 0; i
<= m
; i
++) {
2535 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2536 fsids
[i
].val
[1] == fsid
.val
[1]) {
2537 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2542 mount_list_unlock();
2544 // Unmount the submounts in reverse order. Ignore errors.
2545 for (i
= m
; i
> 0; i
--) {
2546 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2549 mount_iterdrop(smp
);
2550 (void) dounmount(smp
, flags
, 1, ctx
);
2555 FREE(fsids
, M_TEMP
);
2560 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2565 if (mp
->mnt_crossref
< 0) {
2566 panic("mount cross refs -ve");
2569 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2571 vnode_put_locked(dp
);
2575 mount_lock_destroy(mp
);
2577 mac_mount_label_destroy(mp
);
2579 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2583 vnode_put_locked(dp
);
2590 * Sync each mounted filesystem.
2596 int print_vmpage_stat
= 0;
2599 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2600 * mounted read-write with the passed waitfor value.
2602 * Parameters: mp mount-point descriptor per mounted file-system instance.
2603 * arg user argument (please see below)
2605 * User argument is a pointer to 32 bit unsigned integer which describes the
2606 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2607 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2610 * Returns: VFS_RETURNED
2613 sync_callback(mount_t mp
, void *arg
)
2615 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2616 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2617 unsigned waitfor
= MNT_NOWAIT
;
2620 waitfor
= *(uint32_t*)arg
;
2623 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2624 if (waitfor
!= MNT_WAIT
&&
2625 waitfor
!= (MNT_WAIT
| MNT_VOLUME
) &&
2626 waitfor
!= MNT_NOWAIT
&&
2627 waitfor
!= (MNT_NOWAIT
| MNT_VOLUME
) &&
2628 waitfor
!= MNT_DWAIT
&&
2629 waitfor
!= (MNT_DWAIT
| MNT_VOLUME
)) {
2630 panic("Passed inappropriate waitfor %u to "
2631 "sync_callback()", waitfor
);
2634 mp
->mnt_flag
&= ~MNT_ASYNC
;
2635 (void)VFS_SYNC(mp
, waitfor
, vfs_context_kernel());
2637 mp
->mnt_flag
|= MNT_ASYNC
;
2641 return VFS_RETURNED
;
2646 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2648 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2650 if (print_vmpage_stat
) {
2651 vm_countdirtypages();
2658 #endif /* DIAGNOSTIC */
2664 SYNC_ONLY_RELIABLE_MEDIA
= 1,
2665 SYNC_ONLY_UNRELIABLE_MEDIA
= 2
2669 sync_internal_callback(mount_t mp
, void *arg
)
2672 int is_reliable
= !(mp
->mnt_kern_flag
& MNTK_VIRTUALDEV
) &&
2673 (mp
->mnt_flag
& MNT_LOCAL
);
2674 sync_type_t sync_type
= *((sync_type_t
*)arg
);
2676 if ((sync_type
== SYNC_ONLY_RELIABLE_MEDIA
) && !is_reliable
) {
2677 return VFS_RETURNED
;
2678 } else if ((sync_type
== SYNC_ONLY_UNRELIABLE_MEDIA
) && is_reliable
) {
2679 return VFS_RETURNED
;
2683 (void)sync_callback(mp
, NULL
);
2685 return VFS_RETURNED
;
2688 int sync_thread_state
= 0;
2689 int sync_timeout_seconds
= 5;
2691 #define SYNC_THREAD_RUN 0x0001
2692 #define SYNC_THREAD_RUNNING 0x0002
2695 sync_thread(__unused
void *arg
, __unused wait_result_t wr
)
2697 sync_type_t sync_type
;
2699 lck_mtx_lock(sync_mtx_lck
);
2700 while (sync_thread_state
& SYNC_THREAD_RUN
) {
2701 sync_thread_state
&= ~SYNC_THREAD_RUN
;
2702 lck_mtx_unlock(sync_mtx_lck
);
2704 sync_type
= SYNC_ONLY_RELIABLE_MEDIA
;
2705 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2706 sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
;
2707 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2709 lck_mtx_lock(sync_mtx_lck
);
2712 * This wakeup _has_ to be issued before the lock is released otherwise
2713 * we may end up waking up a thread in sync_internal which is
2714 * expecting a wakeup from a thread it just created and not from this
2715 * thread which is about to exit.
2717 wakeup(&sync_thread_state
);
2718 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2719 lck_mtx_unlock(sync_mtx_lck
);
2721 if (print_vmpage_stat
) {
2722 vm_countdirtypages();
2729 #endif /* DIAGNOSTIC */
2732 struct timeval sync_timeout_last_print
= {.tv_sec
= 0, .tv_usec
= 0};
2735 * An in-kernel sync for power management to call.
2736 * This function always returns within sync_timeout seconds.
2738 __private_extern__
int
2743 int thread_created
= FALSE
;
2744 struct timespec ts
= {.tv_sec
= sync_timeout_seconds
, .tv_nsec
= 0};
2746 lck_mtx_lock(sync_mtx_lck
);
2747 sync_thread_state
|= SYNC_THREAD_RUN
;
2748 if (!(sync_thread_state
& SYNC_THREAD_RUNNING
)) {
2751 sync_thread_state
|= SYNC_THREAD_RUNNING
;
2752 kr
= kernel_thread_start(sync_thread
, NULL
, &thd
);
2753 if (kr
!= KERN_SUCCESS
) {
2754 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2755 lck_mtx_unlock(sync_mtx_lck
);
2756 printf("sync_thread failed\n");
2759 thread_created
= TRUE
;
2762 error
= msleep((caddr_t
)&sync_thread_state
, sync_mtx_lck
,
2763 (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2768 if (now
.tv_sec
- sync_timeout_last_print
.tv_sec
> 120) {
2769 printf("sync timed out: %d sec\n", sync_timeout_seconds
);
2770 sync_timeout_last_print
.tv_sec
= now
.tv_sec
;
2774 if (thread_created
) {
2775 thread_deallocate(thd
);
2779 } /* end of sync_internal call */
2782 * Change filesystem quotas.
2786 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2789 int error
, quota_cmd
, quota_status
= 0;
2792 struct nameidata nd
;
2793 vfs_context_t ctx
= vfs_context_current();
2794 struct dqblk my_dqblk
= {};
2796 AUDIT_ARG(uid
, uap
->uid
);
2797 AUDIT_ARG(cmd
, uap
->cmd
);
2798 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2804 mp
= nd
.ni_vp
->v_mount
;
2806 vnode_put(nd
.ni_vp
);
2809 /* copyin any data we will need for downstream code */
2810 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2812 switch (quota_cmd
) {
2814 /* uap->arg specifies a file from which to take the quotas */
2815 fnamelen
= MAXPATHLEN
;
2816 datap
= kalloc(MAXPATHLEN
);
2817 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2820 /* uap->arg is a pointer to a dqblk structure. */
2821 datap
= (caddr_t
) &my_dqblk
;
2825 /* uap->arg is a pointer to a dqblk structure. */
2826 datap
= (caddr_t
) &my_dqblk
;
2827 if (proc_is64bit(p
)) {
2828 struct user_dqblk my_dqblk64
;
2829 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof(my_dqblk64
));
2831 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2834 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof(my_dqblk
));
2838 /* uap->arg is a pointer to an integer */
2839 datap
= (caddr_t
) "a_status
;
2847 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2850 switch (quota_cmd
) {
2852 if (datap
!= NULL
) {
2853 kfree(datap
, MAXPATHLEN
);
2857 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2859 if (proc_is64bit(p
)) {
2860 struct user_dqblk my_dqblk64
;
2862 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
));
2863 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2864 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof(my_dqblk64
));
2866 error
= copyout(datap
, uap
->arg
, sizeof(struct dqblk
));
2871 /* uap->arg is a pointer to an integer */
2873 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2885 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2892 * Get filesystem statistics.
2894 * Returns: 0 Success
2896 * vfs_update_vfsstat:???
2897 * munge_statfs:EFAULT
2901 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2904 struct vfsstatfs
*sp
;
2906 struct nameidata nd
;
2907 vfs_context_t ctx
= vfs_context_current();
2910 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2911 UIO_USERSPACE
, uap
->path
, ctx
);
2918 sp
= &mp
->mnt_vfsstat
;
2922 error
= mac_mount_check_stat(ctx
, mp
);
2929 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2935 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2941 * Get filesystem statistics.
2945 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2949 struct vfsstatfs
*sp
;
2952 AUDIT_ARG(fd
, uap
->fd
);
2954 if ((error
= file_vnode(uap
->fd
, &vp
))) {
2958 error
= vnode_getwithref(vp
);
2964 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2973 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2979 sp
= &mp
->mnt_vfsstat
;
2980 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2984 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2994 vfs_get_statfs64(struct mount
*mp
, struct statfs64
*sfs
)
2996 struct vfsstatfs
*vsfs
= &mp
->mnt_vfsstat
;
2998 bzero(sfs
, sizeof(*sfs
));
3000 sfs
->f_bsize
= vsfs
->f_bsize
;
3001 sfs
->f_iosize
= (int32_t)vsfs
->f_iosize
;
3002 sfs
->f_blocks
= vsfs
->f_blocks
;
3003 sfs
->f_bfree
= vsfs
->f_bfree
;
3004 sfs
->f_bavail
= vsfs
->f_bavail
;
3005 sfs
->f_files
= vsfs
->f_files
;
3006 sfs
->f_ffree
= vsfs
->f_ffree
;
3007 sfs
->f_fsid
= vsfs
->f_fsid
;
3008 sfs
->f_owner
= vsfs
->f_owner
;
3009 sfs
->f_type
= mp
->mnt_vtable
->vfc_typenum
;
3010 sfs
->f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
3011 sfs
->f_fssubtype
= vsfs
->f_fssubtype
;
3012 sfs
->f_flags_ext
= ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
)) ? MNT_EXT_ROOT_DATA_VOL
: 0;
3013 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
3014 strlcpy(&sfs
->f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
3016 strlcpy(&sfs
->f_fstypename
[0], &vsfs
->f_fstypename
[0], MFSTYPENAMELEN
);
3018 strlcpy(&sfs
->f_mntonname
[0], &vsfs
->f_mntonname
[0], MAXPATHLEN
);
3019 strlcpy(&sfs
->f_mntfromname
[0], &vsfs
->f_mntfromname
[0], MAXPATHLEN
);
3023 * Get file system statistics in 64-bit mode
3026 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
3030 struct nameidata nd
;
3031 struct statfs64 sfs
;
3032 vfs_context_t ctxp
= vfs_context_current();
3035 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
3036 UIO_USERSPACE
, uap
->path
, ctxp
);
3046 error
= mac_mount_check_stat(ctxp
, mp
);
3053 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
3059 vfs_get_statfs64(mp
, &sfs
);
3060 if ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
) &&
3061 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3062 /* This process does not want to see a seperate data volume mountpoint */
3063 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3065 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3072 * Get file system statistics in 64-bit mode
3075 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
3079 struct statfs64 sfs
;
3082 AUDIT_ARG(fd
, uap
->fd
);
3084 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3088 error
= vnode_getwithref(vp
);
3094 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
3103 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3109 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
3113 vfs_get_statfs64(mp
, &sfs
);
3114 if ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
) &&
3115 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3116 /* This process does not want to see a seperate data volume mountpoint */
3117 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3119 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3128 struct getfsstat_struct
{
3139 getfsstat_callback(mount_t mp
, void * arg
)
3141 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3142 struct vfsstatfs
*sp
;
3144 vfs_context_t ctx
= vfs_context_current();
3146 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3148 error
= mac_mount_check_stat(ctx
, mp
);
3150 fstp
->error
= error
;
3151 return VFS_RETURNED_DONE
;
3154 sp
= &mp
->mnt_vfsstat
;
3156 * If MNT_NOWAIT is specified, do not refresh the
3157 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
3159 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3160 (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3161 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3162 (error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
)))) {
3163 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3164 return VFS_RETURNED
;
3168 * Need to handle LP64 version of struct statfs
3170 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
3172 fstp
->error
= error
;
3173 return VFS_RETURNED_DONE
;
3175 fstp
->sfsp
+= my_size
;
3179 error
= mac_mount_label_get(mp
, *fstp
->mp
);
3181 fstp
->error
= error
;
3182 return VFS_RETURNED_DONE
;
3189 return VFS_RETURNED
;
3193 * Get statistics on all filesystems.
3196 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
3198 struct __mac_getfsstat_args muap
;
3200 muap
.buf
= uap
->buf
;
3201 muap
.bufsize
= uap
->bufsize
;
3202 muap
.mac
= USER_ADDR_NULL
;
3204 muap
.flags
= uap
->flags
;
3206 return __mac_getfsstat(p
, &muap
, retval
);
3210 * __mac_getfsstat: Get MAC-related file system statistics
3212 * Parameters: p (ignored)
3213 * uap User argument descriptor (see below)
3214 * retval Count of file system statistics (N stats)
3216 * Indirect: uap->bufsize Buffer size
3217 * uap->macsize MAC info size
3218 * uap->buf Buffer where information will be returned
3220 * uap->flags File system flags
3223 * Returns: 0 Success
3228 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
3232 size_t count
, maxcount
, bufsize
, macsize
;
3233 struct getfsstat_struct fst
;
3235 if ((unsigned)uap
->bufsize
> INT_MAX
|| (unsigned)uap
->macsize
> INT_MAX
) {
3239 bufsize
= (size_t) uap
->bufsize
;
3240 macsize
= (size_t) uap
->macsize
;
3242 if (IS_64BIT_PROCESS(p
)) {
3243 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
3245 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
3253 if (uap
->mac
!= USER_ADDR_NULL
) {
3258 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
3259 if (count
!= maxcount
) {
3263 /* Copy in the array */
3264 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
3269 error
= copyin(uap
->mac
, mp0
, macsize
);
3271 FREE(mp0
, M_MACTEMP
);
3275 /* Normalize to an array of user_addr_t */
3276 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
3278 FREE(mp0
, M_MACTEMP
);
3282 for (i
= 0; i
< count
; i
++) {
3283 if (IS_64BIT_PROCESS(p
)) {
3284 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
3286 mp
[i
] = (user_addr_t
)mp0
[i
];
3289 FREE(mp0
, M_MACTEMP
);
3296 fst
.flags
= uap
->flags
;
3299 fst
.maxcount
= maxcount
;
3302 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat_callback
, &fst
);
3305 FREE(mp
, M_MACTEMP
);
3309 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3313 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3314 *retval
= fst
.maxcount
;
3316 *retval
= fst
.count
;
3322 getfsstat64_callback(mount_t mp
, void * arg
)
3324 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3325 struct vfsstatfs
*sp
;
3326 struct statfs64 sfs
;
3329 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3331 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3333 fstp
->error
= error
;
3334 return VFS_RETURNED_DONE
;
3337 sp
= &mp
->mnt_vfsstat
;
3339 * If MNT_NOWAIT is specified, do not refresh the fsstat
3340 * cache. MNT_WAIT overrides MNT_NOWAIT.
3342 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3343 * getfsstat, since the constants are out of the same
3346 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3347 ((((fstp
->flags
& MNT_NOWAIT
) == 0) || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3348 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3349 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)))) {
3350 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3351 return VFS_RETURNED
;
3354 vfs_get_statfs64(mp
, &sfs
);
3355 error
= copyout(&sfs
, fstp
->sfsp
, sizeof(sfs
));
3357 fstp
->error
= error
;
3358 return VFS_RETURNED_DONE
;
3360 fstp
->sfsp
+= sizeof(sfs
);
3363 return VFS_RETURNED
;
3367 * Get statistics on all file systems in 64 bit mode.
3370 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
3373 int count
, maxcount
;
3374 struct getfsstat_struct fst
;
3376 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
3382 fst
.flags
= uap
->flags
;
3385 fst
.maxcount
= maxcount
;
3387 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat64_callback
, &fst
);
3390 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3394 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3395 *retval
= fst
.maxcount
;
3397 *retval
= fst
.count
;
3404 * gets the associated vnode with the file descriptor passed.
3408 * ctx - vfs context of caller
3409 * fd - file descriptor for which vnode is required.
3410 * vpp - Pointer to pointer to vnode to be returned.
3412 * The vnode is returned with an iocount so any vnode obtained
3413 * by this call needs a vnode_put
3417 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
3421 struct fileproc
*fp
;
3422 proc_t p
= vfs_context_proc(ctx
);
3426 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
3431 error
= vnode_getwithref(vp
);
3433 (void)fp_drop(p
, fd
, fp
, 0);
3437 (void)fp_drop(p
, fd
, fp
, 0);
3443 * Wrapper function around namei to start lookup from a directory
3444 * specified by a file descriptor ni_dirfd.
3446 * In addition to all the errors returned by namei, this call can
3447 * return ENOTDIR if the file descriptor does not refer to a directory.
3448 * and EBADF if the file descriptor is not valid.
3451 nameiat(struct nameidata
*ndp
, int dirfd
)
3453 if ((dirfd
!= AT_FDCWD
) &&
3454 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
3455 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3459 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3460 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3465 c
= *((char *)(ndp
->ni_dirp
));
3471 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3477 if (vnode_vtype(dvp_at
) != VDIR
) {
3482 ndp
->ni_dvp
= dvp_at
;
3483 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3485 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3495 * Change current working directory to a given file descriptor.
3499 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3501 struct filedesc
*fdp
= p
->p_fd
;
3507 vfs_context_t ctx
= vfs_context_current();
3509 AUDIT_ARG(fd
, uap
->fd
);
3510 if (per_thread
&& uap
->fd
== -1) {
3512 * Switching back from per-thread to per process CWD; verify we
3513 * in fact have one before proceeding. The only success case
3514 * for this code path is to return 0 preemptively after zapping
3515 * the thread structure contents.
3517 thread_t th
= vfs_context_thread(ctx
);
3519 uthread_t uth
= get_bsdthread_info(th
);
3521 uth
->uu_cdir
= NULLVP
;
3522 if (tvp
!= NULLVP
) {
3530 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3533 if ((error
= vnode_getwithref(vp
))) {
3538 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3540 if (vp
->v_type
!= VDIR
) {
3546 error
= mac_vnode_check_chdir(ctx
, vp
);
3551 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3556 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3557 if (vfs_busy(mp
, LK_NOWAIT
)) {
3561 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3572 if ((error
= vnode_ref(vp
))) {
3578 thread_t th
= vfs_context_thread(ctx
);
3580 uthread_t uth
= get_bsdthread_info(th
);
3583 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3609 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3611 return common_fchdir(p
, uap
, 0);
3615 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3617 return common_fchdir(p
, (void *)uap
, 1);
3622 * Change current working directory (".").
3624 * Returns: 0 Success
3625 * change_dir:ENOTDIR
3627 * vnode_ref:ENOENT No such file or directory
3631 chdir_internal(proc_t p
, vfs_context_t ctx
, struct nameidata
*ndp
, int per_thread
)
3633 struct filedesc
*fdp
= p
->p_fd
;
3637 error
= change_dir(ndp
, ctx
);
3641 if ((error
= vnode_ref(ndp
->ni_vp
))) {
3642 vnode_put(ndp
->ni_vp
);
3646 * drop the iocount we picked up in change_dir
3648 vnode_put(ndp
->ni_vp
);
3651 thread_t th
= vfs_context_thread(ctx
);
3653 uthread_t uth
= get_bsdthread_info(th
);
3655 uth
->uu_cdir
= ndp
->ni_vp
;
3656 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3658 vnode_rele(ndp
->ni_vp
);
3664 fdp
->fd_cdir
= ndp
->ni_vp
;
3677 * Change current working directory (".").
3679 * Returns: 0 Success
3680 * chdir_internal:ENOTDIR
3681 * chdir_internal:ENOENT No such file or directory
3682 * chdir_internal:???
3686 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3688 struct nameidata nd
;
3689 vfs_context_t ctx
= vfs_context_current();
3691 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3692 UIO_USERSPACE
, uap
->path
, ctx
);
3694 return chdir_internal(p
, ctx
, &nd
, per_thread
);
3701 * Change current working directory (".") for the entire process
3703 * Parameters: p Process requesting the call
3704 * uap User argument descriptor (see below)
3707 * Indirect parameters: uap->path Directory path
3709 * Returns: 0 Success
3710 * common_chdir: ENOTDIR
3711 * common_chdir: ENOENT No such file or directory
3716 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3718 return common_chdir(p
, (void *)uap
, 0);
3724 * Change current working directory (".") for a single thread
3726 * Parameters: p Process requesting the call
3727 * uap User argument descriptor (see below)
3730 * Indirect parameters: uap->path Directory path
3732 * Returns: 0 Success
3733 * common_chdir: ENOTDIR
3734 * common_chdir: ENOENT No such file or directory
3739 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3741 return common_chdir(p
, (void *)uap
, 1);
3746 * Change notion of root (``/'') directory.
3750 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3752 struct filedesc
*fdp
= p
->p_fd
;
3754 struct nameidata nd
;
3756 vfs_context_t ctx
= vfs_context_current();
3758 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
3762 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3763 UIO_USERSPACE
, uap
->path
, ctx
);
3764 error
= change_dir(&nd
, ctx
);
3770 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3773 vnode_put(nd
.ni_vp
);
3778 if ((error
= vnode_ref(nd
.ni_vp
))) {
3779 vnode_put(nd
.ni_vp
);
3782 vnode_put(nd
.ni_vp
);
3786 fdp
->fd_rdir
= nd
.ni_vp
;
3787 fdp
->fd_flags
|= FD_CHROOT
;
3798 * Common routine for chroot and chdir.
3800 * Returns: 0 Success
3801 * ENOTDIR Not a directory
3802 * namei:??? [anything namei can return]
3803 * vnode_authorize:??? [anything vnode_authorize can return]
3806 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3811 if ((error
= namei(ndp
))) {
3817 if (vp
->v_type
!= VDIR
) {
3823 error
= mac_vnode_check_chdir(ctx
, vp
);
3830 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3840 * Free the vnode data (for directories) associated with the file glob.
3843 fg_vn_data_alloc(void)
3845 struct fd_vn_data
*fvdata
;
3847 /* Allocate per fd vnode data */
3848 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3849 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3850 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3855 * Free the vnode data (for directories) associated with the file glob.
3858 fg_vn_data_free(void *fgvndata
)
3860 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3862 if (fvdata
->fv_buf
) {
3863 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3865 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3866 FREE(fvdata
, M_FD_VN_DATA
);
3870 * Check permissions, allocate an open file structure,
3871 * and call the device open routine if any.
3873 * Returns: 0 Success
3884 * XXX Need to implement uid, gid
3887 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3888 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3891 proc_t p
= vfs_context_proc(ctx
);
3892 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3893 struct fileproc
*fp
;
3896 int type
, indx
, error
;
3898 struct vfs_context context
;
3902 if ((oflags
& O_ACCMODE
) == O_ACCMODE
) {
3906 flags
= FFLAGS(uflags
);
3907 CLR(flags
, FENCRYPTED
);
3908 CLR(flags
, FUNENCRYPTED
);
3910 AUDIT_ARG(fflags
, oflags
);
3911 AUDIT_ARG(mode
, vap
->va_mode
);
3913 if ((error
= falloc_withalloc(p
,
3914 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3917 uu
->uu_dupfd
= -indx
- 1;
3919 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3920 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)) { /* XXX from fdopen */
3921 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3922 fp_drop(p
, indx
, NULL
, 0);
3927 if (error
== ERESTART
) {
3930 fp_free(p
, indx
, fp
);
3936 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3937 fp
->f_fglob
->fg_ops
= &vnops
;
3938 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3940 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3941 lf
.l_whence
= SEEK_SET
;
3944 if (flags
& O_EXLOCK
) {
3945 lf
.l_type
= F_WRLCK
;
3947 lf
.l_type
= F_RDLCK
;
3950 if ((flags
& FNONBLOCK
) == 0) {
3954 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3960 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
))) {
3963 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3966 /* try to truncate by setting the size attribute */
3967 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0)) {
3972 * For directories we hold some additional information in the fd.
3974 if (vnode_vtype(vp
) == VDIR
) {
3975 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3977 fp
->f_fglob
->fg_vn_data
= NULL
;
3983 * The first terminal open (without a O_NOCTTY) by a session leader
3984 * results in it being set as the controlling terminal.
3986 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3987 !(flags
& O_NOCTTY
)) {
3990 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3991 (caddr_t
)&tmp
, ctx
);
3995 if (flags
& O_CLOEXEC
) {
3996 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3998 if (flags
& O_CLOFORK
) {
3999 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
4001 procfdtbl_releasefd(p
, indx
, NULL
);
4003 #if CONFIG_SECLUDED_MEMORY
4004 if (secluded_for_filecache
&&
4005 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
4006 vnode_vtype(vp
) == VREG
) {
4007 memory_object_control_t moc
;
4009 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
4011 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
4012 /* nothing to do... */
4013 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
4014 /* writable -> no longer eligible for secluded pages */
4015 memory_object_mark_eligible_for_secluded(moc
,
4017 } else if (secluded_for_filecache
== 1) {
4018 char pathname
[32] = { 0, };
4020 /* XXX FBDP: better way to detect /Applications/ ? */
4021 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4022 (void)copyinstr(ndp
->ni_dirp
,
4027 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
4032 pathname
[sizeof(pathname
) - 1] = '\0';
4033 if (strncmp(pathname
,
4035 strlen("/Applications/")) == 0 &&
4037 "/Applications/Camera.app/",
4038 strlen("/Applications/Camera.app/")) != 0) {
4041 * AND from "/Applications/"
4042 * AND not from "/Applications/Camera.app/"
4043 * ==> eligible for secluded
4045 memory_object_mark_eligible_for_secluded(moc
,
4048 } else if (secluded_for_filecache
== 2) {
4050 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4052 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4054 /* not implemented... */
4056 size_t len
= strlen(vp
->v_name
);
4057 if (!strncmp(vp
->v_name
, DYLD_SHARED_CACHE_NAME
, len
) ||
4058 !strncmp(vp
->v_name
, "dyld", len
) ||
4059 !strncmp(vp
->v_name
, "launchd", len
) ||
4060 !strncmp(vp
->v_name
, "Camera", len
) ||
4061 !strncmp(vp
->v_name
, "mediaserverd", len
) ||
4062 !strncmp(vp
->v_name
, "SpringBoard", len
) ||
4063 !strncmp(vp
->v_name
, "backboardd", len
)) {
4065 * This file matters when launching Camera:
4066 * do not store its contents in the secluded
4067 * pool that will be drained on Camera launch.
4069 memory_object_mark_eligible_for_secluded(moc
,
4074 #endif /* CONFIG_SECLUDED_MEMORY */
4076 fp_drop(p
, indx
, fp
, 1);
4083 context
= *vfs_context_current();
4084 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
4086 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
4087 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
4088 lf
.l_whence
= SEEK_SET
;
4091 lf
.l_type
= F_UNLCK
;
4094 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
4097 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
4099 fp_free(p
, indx
, fp
);
4105 * While most of the *at syscall handlers can call nameiat() which
4106 * is a wrapper around namei, the use of namei and initialisation
4107 * of nameidata are far removed and in different functions - namei
4108 * gets called in vn_open_auth for open1. So we'll just do here what
4112 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
4113 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
4116 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
4120 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4121 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
4126 c
= *((char *)(ndp
->ni_dirp
));
4132 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
4138 if (vnode_vtype(dvp_at
) != VDIR
) {
4143 ndp
->ni_dvp
= dvp_at
;
4144 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
4145 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
4152 return open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
);
4156 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
4158 * Parameters: p Process requesting the open
4159 * uap User argument descriptor (see below)
4160 * retval Pointer to an area to receive the
4161 * return calue from the system call
4163 * Indirect: uap->path Path to open (same as 'open')
4164 * uap->flags Flags to open (same as 'open'
4165 * uap->uid UID to set, if creating
4166 * uap->gid GID to set, if creating
4167 * uap->mode File mode, if creating (same as 'open')
4168 * uap->xsecurity ACL to set, if creating
4170 * Returns: 0 Success
4173 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4175 * XXX: We should enummerate the possible errno values here, and where
4176 * in the code they originated.
4179 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
4181 struct filedesc
*fdp
= p
->p_fd
;
4183 kauth_filesec_t xsecdst
;
4184 struct vnode_attr va
;
4185 struct nameidata nd
;
4188 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4191 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
4192 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
4197 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4198 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4199 if (uap
->uid
!= KAUTH_UID_NONE
) {
4200 VATTR_SET(&va
, va_uid
, uap
->uid
);
4202 if (uap
->gid
!= KAUTH_GID_NONE
) {
4203 VATTR_SET(&va
, va_gid
, uap
->gid
);
4205 if (xsecdst
!= NULL
) {
4206 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4209 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4210 uap
->path
, vfs_context_current());
4212 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4213 fileproc_alloc_init
, NULL
, retval
);
4214 if (xsecdst
!= NULL
) {
4215 kauth_filesec_free(xsecdst
);
4222 * Go through the data-protected atomically controlled open (2)
4224 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4227 open_dprotected_np(__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
)
4229 int flags
= uap
->flags
;
4230 int class = uap
->class;
4231 int dpflags
= uap
->dpflags
;
4234 * Follow the same path as normal open(2)
4235 * Look up the item if it exists, and acquire the vnode.
4237 struct filedesc
*fdp
= p
->p_fd
;
4238 struct vnode_attr va
;
4239 struct nameidata nd
;
4244 /* Mask off all but regular access permissions */
4245 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4246 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4248 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4249 uap
->path
, vfs_context_current());
4252 * Initialize the extra fields in vnode_attr to pass down our
4254 * 1. target cprotect class.
4255 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4257 if (flags
& O_CREAT
) {
4258 /* lower level kernel code validates that the class is valid before applying it. */
4259 if (class != PROTECTION_CLASS_DEFAULT
) {
4261 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4262 * file behave the same as open (2)
4264 VATTR_SET(&va
, va_dataprotect_class
, class);
4268 if (dpflags
& (O_DP_GETRAWENCRYPTED
| O_DP_GETRAWUNENCRYPTED
)) {
4269 if (flags
& (O_RDWR
| O_WRONLY
)) {
4270 /* Not allowed to write raw encrypted bytes */
4273 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
4274 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
4276 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
4277 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
4281 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4282 fileproc_alloc_init
, NULL
, retval
);
4288 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
4289 int fd
, enum uio_seg segflg
, int *retval
)
4291 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
4292 struct vnode_attr va
;
4293 struct nameidata nd
;
4297 /* Mask off all but regular access permissions */
4298 cmode
= ((mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4299 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4301 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
4304 return open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
4309 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
4311 __pthread_testcancel(1);
4312 return open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
);
4316 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
4319 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4320 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
);
4324 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
4327 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4328 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
);
4332 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
4334 __pthread_testcancel(1);
4335 return openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
);
4339 * openbyid_np: open a file given a file system id and a file system object id
4340 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4341 * file systems that don't support object ids it is a node id (uint64_t).
4343 * Parameters: p Process requesting the open
4344 * uap User argument descriptor (see below)
4345 * retval Pointer to an area to receive the
4346 * return calue from the system call
4348 * Indirect: uap->path Path to open (same as 'open')
4350 * uap->fsid id of target file system
4351 * uap->objid id of target file system object
4352 * uap->flags Flags to open (same as 'open')
4354 * Returns: 0 Success
4358 * XXX: We should enummerate the possible errno values here, and where
4359 * in the code they originated.
4362 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
4368 int buflen
= MAXPATHLEN
;
4370 vfs_context_t ctx
= vfs_context_current();
4372 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
4376 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
4380 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4381 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
4385 AUDIT_ARG(value32
, fsid
.val
[0]);
4386 AUDIT_ARG(value64
, objid
);
4388 /*resolve path from fsis, objid*/
4390 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
4395 error
= fsgetpath_internal( ctx
, fsid
.val
[0], objid
, buflen
,
4396 buf
, FSOPT_ISREALFSID
, &pathlen
);
4402 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
4410 error
= openat_internal(
4411 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
4420 * Create a special file.
4422 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
4425 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
4427 struct vnode_attr va
;
4428 vfs_context_t ctx
= vfs_context_current();
4430 struct nameidata nd
;
4434 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4435 VATTR_SET(&va
, va_rdev
, uap
->dev
);
4437 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4438 if ((uap
->mode
& S_IFMT
) == S_IFIFO
) {
4439 return mkfifo1(ctx
, uap
->path
, &va
);
4442 AUDIT_ARG(mode
, uap
->mode
);
4443 AUDIT_ARG(value32
, uap
->dev
);
4445 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
4448 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
4449 UIO_USERSPACE
, uap
->path
, ctx
);
4462 switch (uap
->mode
& S_IFMT
) {
4464 VATTR_SET(&va
, va_type
, VCHR
);
4467 VATTR_SET(&va
, va_type
, VBLK
);
4475 error
= mac_vnode_check_create(ctx
,
4476 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
4482 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4486 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0) {
4491 int update_flags
= 0;
4493 // Make sure the name & parent pointers are hooked up
4494 if (vp
->v_name
== NULL
) {
4495 update_flags
|= VNODE_UPDATE_NAME
;
4497 if (vp
->v_parent
== NULLVP
) {
4498 update_flags
|= VNODE_UPDATE_PARENT
;
4502 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4506 add_fsevent(FSE_CREATE_FILE
, ctx
,
4514 * nameidone has to happen before we vnode_put(dvp)
4515 * since it may need to release the fs_nodelock on the dvp
4528 * Create a named pipe.
4530 * Returns: 0 Success
4533 * vnode_authorize:???
4537 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4541 struct nameidata nd
;
4543 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4544 UIO_USERSPACE
, upath
, ctx
);
4552 /* check that this is a new file and authorize addition */
4557 VATTR_SET(vap
, va_type
, VFIFO
);
4559 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
4563 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4566 * nameidone has to happen before we vnode_put(dvp)
4567 * since it may need to release the fs_nodelock on the dvp
4581 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4583 * Parameters: p Process requesting the open
4584 * uap User argument descriptor (see below)
4587 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4588 * uap->uid UID to set
4589 * uap->gid GID to set
4590 * uap->mode File mode to set (same as 'mkfifo')
4591 * uap->xsecurity ACL to set, if creating
4593 * Returns: 0 Success
4596 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4598 * XXX: We should enummerate the possible errno values here, and where
4599 * in the code they originated.
4602 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4605 kauth_filesec_t xsecdst
;
4606 struct vnode_attr va
;
4608 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4610 xsecdst
= KAUTH_FILESEC_NONE
;
4611 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4612 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
4618 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4619 if (uap
->uid
!= KAUTH_UID_NONE
) {
4620 VATTR_SET(&va
, va_uid
, uap
->uid
);
4622 if (uap
->gid
!= KAUTH_GID_NONE
) {
4623 VATTR_SET(&va
, va_gid
, uap
->gid
);
4625 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4626 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4629 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4631 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4632 kauth_filesec_free(xsecdst
);
4639 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4641 struct vnode_attr va
;
4644 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4646 return mkfifo1(vfs_context_current(), uap
->path
, &va
);
4651 my_strrchr(char *p
, int ch
)
4655 for (save
= NULL
;; ++p
) {
4666 extern int safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
);
4667 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4668 extern int safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4671 safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
)
4673 int ret
, len
= _len
;
4675 *truncated_path
= 0;
4678 ret
= vn_getpath(dvp
, path
, &len
);
4680 ret
= vn_getpath_no_firmlink(dvp
, path
, &len
);
4682 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4684 path
[len
- 1] = '/';
4685 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
- len
) + 1;
4686 if (len
> MAXPATHLEN
) {
4689 // the string got truncated!
4690 *truncated_path
= 1;
4691 ptr
= my_strrchr(path
, '/');
4693 *ptr
= '\0'; // chop off the string at the last directory component
4695 len
= strlen(path
) + 1;
4698 } else if (ret
== 0) {
4699 *truncated_path
= 1;
4700 } else if (ret
!= 0) {
4701 struct vnode
*mydvp
= dvp
;
4703 if (ret
!= ENOSPC
) {
4704 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4705 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4707 *truncated_path
= 1;
4710 if (mydvp
->v_parent
!= NULL
) {
4711 mydvp
= mydvp
->v_parent
;
4712 } else if (mydvp
->v_mount
) {
4713 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4716 // no parent and no mount point? only thing is to punt and say "/" changed
4717 strlcpy(path
, "/", _len
);
4722 if (mydvp
== NULL
) {
4728 ret
= vn_getpath(mydvp
, path
, &len
);
4730 ret
= vn_getpath_no_firmlink(mydvp
, path
, &len
);
4732 } while (ret
== ENOSPC
);
4739 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4741 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 1);
4745 safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4747 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 0);
4751 * Make a hard file link.
4753 * Returns: 0 Success
4758 * vnode_authorize:???
4763 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4764 user_addr_t link
, int flag
, enum uio_seg segflg
)
4766 vnode_t vp
, pvp
, dvp
, lvp
;
4767 struct nameidata nd
;
4773 int need_event
, has_listeners
, need_kpath2
;
4774 char *target_path
= NULL
;
4777 vp
= dvp
= lvp
= NULLVP
;
4779 /* look up the object we are linking to */
4780 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4781 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4784 error
= nameiat(&nd
, fd1
);
4786 if (error
== EPERM
) {
4787 printf("XXX 54841485: nameiat() src EPERM\n");
4796 * Normally, linking to directories is not supported.
4797 * However, some file systems may have limited support.
4799 if (vp
->v_type
== VDIR
) {
4800 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4801 error
= EPERM
; /* POSIX */
4802 printf("XXX 54841485: VDIR EPERM\n");
4806 /* Linking to a directory requires ownership. */
4807 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4808 struct vnode_attr dva
;
4811 VATTR_WANTED(&dva
, va_uid
);
4812 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4813 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4814 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4821 /* lookup the target node */
4825 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4826 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4828 error
= nameiat(&nd
, fd2
);
4830 if (error
== EPERM
) {
4831 printf("XXX 54841485: nameiat() dst EPERM\n");
4839 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0) {
4840 if (error
== EPERM
) {
4841 printf("XXX 54841485: mac_vnode_check_link() EPERM\n");
4847 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4848 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0) {
4849 if (error
== EPERM
) {
4850 printf("XXX 54841485: vnode_authorize() LINKTARGET EPERM\n");
4855 /* target node must not exist */
4856 if (lvp
!= NULLVP
) {
4860 /* cannot link across mountpoints */
4861 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4866 /* authorize creation of the target note */
4867 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4868 if (error
== EPERM
) {
4869 printf("XXX 54841485: vnode_authorize() ADD_FILE EPERM\n");
4874 /* and finally make the link */
4875 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4877 if (error
== EPERM
) {
4878 printf("XXX 54841485: VNOP_LINK() EPERM\n");
4884 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4888 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4892 has_listeners
= kauth_authorize_fileop_has_listeners();
4896 if (AUDIT_RECORD_EXISTS()) {
4901 if (need_event
|| has_listeners
|| need_kpath2
) {
4902 char *link_to_path
= NULL
;
4903 int len
, link_name_len
;
4905 /* build the path to the new link file */
4906 GET_PATH(target_path
);
4907 if (target_path
== NULL
) {
4912 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4914 AUDIT_ARG(kpath
, target_path
, ARG_KPATH2
);
4916 if (has_listeners
) {
4917 /* build the path to file we are linking to */
4918 GET_PATH(link_to_path
);
4919 if (link_to_path
== NULL
) {
4924 link_name_len
= MAXPATHLEN
;
4925 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4927 * Call out to allow 3rd party notification of rename.
4928 * Ignore result of kauth_authorize_fileop call.
4930 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4931 (uintptr_t)link_to_path
,
4932 (uintptr_t)target_path
);
4934 if (link_to_path
!= NULL
) {
4935 RELEASE_PATH(link_to_path
);
4940 /* construct fsevent */
4941 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4943 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4946 // build the path to the destination of the link
4947 add_fsevent(FSE_CREATE_FILE
, ctx
,
4948 FSE_ARG_STRING
, len
, target_path
,
4949 FSE_ARG_FINFO
, &finfo
,
4954 // need an iocount on pvp in this case
4955 if (pvp
&& pvp
!= dvp
) {
4956 error
= vnode_get(pvp
);
4963 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4964 FSE_ARG_VNODE
, pvp
, FSE_ARG_DONE
);
4966 if (pvp
&& pvp
!= dvp
) {
4974 * nameidone has to happen before we vnode_put(dvp)
4975 * since it may need to release the fs_nodelock on the dvp
4978 if (target_path
!= NULL
) {
4979 RELEASE_PATH(target_path
);
4993 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4995 return linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4996 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
);
5000 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
5002 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
) {
5006 return linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
5007 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
);
5011 * Make a symbolic link.
5013 * We could add support for ACLs here too...
5017 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
5018 user_addr_t link
, enum uio_seg segflg
)
5020 struct vnode_attr va
;
5023 struct nameidata nd
;
5029 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
5030 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
5031 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
5033 path
= (char *)path_data
;
5038 AUDIT_ARG(text
, path
); /* This is the link string */
5040 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
5043 error
= nameiat(&nd
, fd
);
5050 p
= vfs_context_proc(ctx
);
5052 VATTR_SET(&va
, va_type
, VLNK
);
5053 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
5056 error
= mac_vnode_check_create(ctx
,
5057 dvp
, &nd
.ni_cnd
, &va
);
5070 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
5072 /* get default ownership, etc. */
5074 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
5077 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
5080 /* do fallback attribute handling */
5081 if (error
== 0 && vp
) {
5082 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
5086 if (error
== 0 && vp
) {
5087 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
5092 int update_flags
= 0;
5094 /*check if a new vnode was created, else try to get one*/
5096 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
5098 nd
.ni_op
= OP_LOOKUP
;
5100 nd
.ni_cnd
.cn_flags
= 0;
5101 error
= nameiat(&nd
, fd
);
5109 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
5110 /* call out to allow 3rd party notification of rename.
5111 * Ignore result of kauth_authorize_fileop call.
5113 if (kauth_authorize_fileop_has_listeners() &&
5115 char *new_link_path
= NULL
;
5118 /* build the path to the new link file */
5119 new_link_path
= get_pathbuff();
5121 vn_getpath(dvp
, new_link_path
, &len
);
5122 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
5123 new_link_path
[len
- 1] = '/';
5124 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
- len
);
5127 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
5128 (uintptr_t)path
, (uintptr_t)new_link_path
);
5129 if (new_link_path
!= NULL
) {
5130 release_pathbuff(new_link_path
);
5134 // Make sure the name & parent pointers are hooked up
5135 if (vp
->v_name
== NULL
) {
5136 update_flags
|= VNODE_UPDATE_NAME
;
5138 if (vp
->v_parent
== NULLVP
) {
5139 update_flags
|= VNODE_UPDATE_PARENT
;
5143 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
5147 add_fsevent(FSE_CREATE_FILE
, ctx
,
5155 * nameidone has to happen before we vnode_put(dvp)
5156 * since it may need to release the fs_nodelock on the dvp
5165 if (path
&& (path
!= (char *)path_data
)) {
5166 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
5173 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
5175 return symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
5176 uap
->link
, UIO_USERSPACE
);
5180 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
5181 __unused
int32_t *retval
)
5183 return symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
5184 uap
->path2
, UIO_USERSPACE
);
5188 * Delete a whiteout from the filesystem.
5189 * No longer supported.
5192 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
5198 * Delete a name from the filesystem.
5202 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
5203 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
5205 struct nameidata nd
;
5208 struct componentname
*cnp
;
5210 char *no_firmlink_path
= NULL
;
5212 int len_no_firmlink_path
= 0;
5215 struct vnode_attr va
;
5221 int truncated_no_firmlink_path
;
5223 struct vnode_attr
*vap
;
5225 int retry_count
= 0;
5228 cn_flags
= LOCKPARENT
;
5229 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
)) {
5230 cn_flags
|= AUDITVNPATH1
;
5232 /* If a starting dvp is passed, it trumps any fd passed. */
5238 /* unlink or delete is allowed on rsrc forks and named streams */
5239 cn_flags
|= CN_ALLOWRSRCFORK
;
5248 truncated_no_firmlink_path
= 0;
5251 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
5253 nd
.ni_dvp
= start_dvp
;
5254 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
5258 error
= nameiat(&nd
, fd
);
5267 /* With Carbon delete semantics, busy files cannot be deleted */
5268 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
5269 flags
|= VNODE_REMOVE_NODELETEBUSY
;
5272 /* Skip any potential upcalls if told to. */
5273 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
5274 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
5278 batched
= vnode_compound_remove_available(vp
);
5280 * The root of a mounted filesystem cannot be deleted.
5282 if ((vp
->v_flag
& VROOT
) || (dvp
->v_mount
!= vp
->v_mount
)) {
5287 #if DEVELOPMENT || DEBUG
5289 * XXX VSWAP: Check for entitlements or special flag here
5290 * so we can restrict access appropriately.
5292 #else /* DEVELOPMENT || DEBUG */
5294 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
5298 #endif /* DEVELOPMENT || DEBUG */
5301 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
5303 if (error
== ENOENT
) {
5304 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5315 if (!vnode_compound_remove_available(dvp
)) {
5316 panic("No vp, but no compound remove?");
5321 need_event
= need_fsevent(FSE_DELETE
, dvp
);
5324 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
5325 /* XXX need to get these data in batched VNOP */
5326 get_fse_info(vp
, &finfo
, ctx
);
5329 error
= vfs_get_notify_attributes(&va
);
5338 has_listeners
= kauth_authorize_fileop_has_listeners();
5339 if (need_event
|| has_listeners
) {
5347 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
5348 if (no_firmlink_path
== NULL
) {
5349 GET_PATH(no_firmlink_path
);
5350 if (no_firmlink_path
== NULL
) {
5355 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
5359 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
) {
5360 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
5364 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
5366 if (error
== EKEEPLOOKING
) {
5368 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5371 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
5372 panic("EKEEPLOOKING, but continue flag not set?");
5375 if (vnode_isdir(vp
)) {
5379 goto continue_lookup
;
5380 } else if (error
== ENOENT
&& batched
) {
5381 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5383 * For compound VNOPs, the authorization callback may
5384 * return ENOENT in case of racing hardlink lookups
5385 * hitting the name cache, redrive the lookup.
5395 * Call out to allow 3rd party notification of delete.
5396 * Ignore result of kauth_authorize_fileop call.
5399 if (has_listeners
) {
5400 kauth_authorize_fileop(vfs_context_ucred(ctx
),
5401 KAUTH_FILEOP_DELETE
,
5406 if (vp
->v_flag
& VISHARDLINK
) {
5408 // if a hardlink gets deleted we want to blow away the
5409 // v_parent link because the path that got us to this
5410 // instance of the link is no longer valid. this will
5411 // force the next call to get the path to ask the file
5412 // system instead of just following the v_parent link.
5414 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
5419 if (vp
->v_flag
& VISHARDLINK
) {
5420 get_fse_info(vp
, &finfo
, ctx
);
5422 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
5424 if (truncated_path
) {
5425 finfo
.mode
|= FSE_TRUNCATED_PATH
;
5427 add_fsevent(FSE_DELETE
, ctx
,
5428 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
5429 FSE_ARG_FINFO
, &finfo
,
5441 if (no_firmlink_path
!= NULL
) {
5442 RELEASE_PATH(no_firmlink_path
);
5443 no_firmlink_path
= NULL
;
5446 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5447 * will cause its shadow file to go away if necessary.
5449 if (vp
&& (vnode_isnamedstream(vp
)) &&
5450 (vp
->v_parent
!= NULLVP
) &&
5451 vnode_isshadow(vp
)) {
5456 * nameidone has to happen before we vnode_put(dvp)
5457 * since it may need to release the fs_nodelock on the dvp
5473 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
5474 enum uio_seg segflg
, int unlink_flags
)
5476 return unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
5481 * Delete a name from the filesystem using Carbon semantics.
5484 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
5486 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5487 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
);
5491 * Delete a name from the filesystem using POSIX semantics.
5494 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
5496 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5497 uap
->path
, UIO_USERSPACE
, 0);
5501 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
5503 if (uap
->flag
& ~(AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5507 if (uap
->flag
& (AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5508 int unlink_flags
= 0;
5510 if (uap
->flag
& AT_REMOVEDIR_DATALESS
) {
5511 unlink_flags
|= VNODE_REMOVE_DATALESS_DIR
;
5513 return rmdirat_internal(vfs_context_current(), uap
->fd
,
5514 uap
->path
, UIO_USERSPACE
, unlink_flags
);
5516 return unlinkat_internal(vfs_context_current(), uap
->fd
,
5517 NULLVP
, uap
->path
, UIO_USERSPACE
, 0);
5522 * Reposition read/write file offset.
5525 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
5527 struct fileproc
*fp
;
5529 struct vfs_context
*ctx
;
5530 off_t offset
= uap
->offset
, file_size
;
5533 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
5534 if (error
== ENOTSUP
) {
5539 if (vnode_isfifo(vp
)) {
5545 ctx
= vfs_context_current();
5547 if (uap
->whence
== L_INCR
&& uap
->offset
== 0) {
5548 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
5551 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
5559 if ((error
= vnode_getwithref(vp
))) {
5564 switch (uap
->whence
) {
5566 offset
+= fp
->f_fglob
->fg_offset
;
5569 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0) {
5572 offset
+= file_size
;
5577 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
5580 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
5586 if (uap
->offset
> 0 && offset
< 0) {
5587 /* Incremented/relative move past max size */
5591 * Allow negative offsets on character devices, per
5592 * POSIX 1003.1-2001. Most likely for writing disk
5595 if (offset
< 0 && vp
->v_type
!= VCHR
) {
5596 /* Decremented/relative move before start */
5600 fp
->f_fglob
->fg_offset
= offset
;
5601 *retval
= fp
->f_fglob
->fg_offset
;
5607 * An lseek can affect whether data is "available to read." Use
5608 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5610 post_event_if_success(vp
, error
, NOTE_NONE
);
5611 (void)vnode_put(vp
);
5618 * Check access permissions.
5620 * Returns: 0 Success
5621 * vnode_authorize:???
5624 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
5626 kauth_action_t action
;
5630 * If just the regular access bits, convert them to something
5631 * that vnode_authorize will understand.
5633 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
5635 if (uflags
& R_OK
) {
5636 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5638 if (uflags
& W_OK
) {
5639 if (vnode_isdir(vp
)) {
5640 action
|= KAUTH_VNODE_ADD_FILE
|
5641 KAUTH_VNODE_ADD_SUBDIRECTORY
;
5642 /* might want delete rights here too */
5644 action
|= KAUTH_VNODE_WRITE_DATA
;
5647 if (uflags
& X_OK
) {
5648 if (vnode_isdir(vp
)) {
5649 action
|= KAUTH_VNODE_SEARCH
;
5651 action
|= KAUTH_VNODE_EXECUTE
;
5655 /* take advantage of definition of uflags */
5656 action
= uflags
>> 8;
5660 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5666 /* action == 0 means only check for existence */
5668 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5679 * access_extended: Check access permissions in bulk.
5681 * Description: uap->entries Pointer to an array of accessx
5682 * descriptor structs, plus one or
5683 * more NULL terminated strings (see
5684 * "Notes" section below).
5685 * uap->size Size of the area pointed to by
5687 * uap->results Pointer to the results array.
5689 * Returns: 0 Success
5690 * ENOMEM Insufficient memory
5691 * EINVAL Invalid arguments
5692 * namei:EFAULT Bad address
5693 * namei:ENAMETOOLONG Filename too long
5694 * namei:ENOENT No such file or directory
5695 * namei:ELOOP Too many levels of symbolic links
5696 * namei:EBADF Bad file descriptor
5697 * namei:ENOTDIR Not a directory
5702 * uap->results Array contents modified
5704 * Notes: The uap->entries are structured as an arbitrary length array
5705 * of accessx descriptors, followed by one or more NULL terminated
5708 * struct accessx_descriptor[0]
5710 * struct accessx_descriptor[n]
5711 * char name_data[0];
5713 * We determine the entry count by walking the buffer containing
5714 * the uap->entries argument descriptor. For each descriptor we
5715 * see, the valid values for the offset ad_name_offset will be
5716 * in the byte range:
5718 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5720 * [ uap->entries + uap->size - 2 ]
5722 * since we must have at least one string, and the string must
5723 * be at least one character plus the NULL terminator in length.
5725 * XXX: Need to support the check-as uid argument
5728 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5730 struct accessx_descriptor
*input
= NULL
;
5731 errno_t
*result
= NULL
;
5734 unsigned int desc_max
, desc_actual
, i
, j
;
5735 struct vfs_context context
;
5736 struct nameidata nd
;
5740 #define ACCESSX_MAX_DESCR_ON_STACK 10
5741 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5743 context
.vc_ucred
= NULL
;
5746 * Validate parameters; if valid, copy the descriptor array and string
5747 * arguments into local memory. Before proceeding, the following
5748 * conditions must have been met:
5750 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5751 * o There must be sufficient room in the request for at least one
5752 * descriptor and a one yte NUL terminated string.
5753 * o The allocation of local storage must not fail.
5755 if (uap
->size
> ACCESSX_MAX_TABLESIZE
) {
5758 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2)) {
5761 if (uap
->size
<= sizeof(stack_input
)) {
5762 input
= stack_input
;
5764 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5765 if (input
== NULL
) {
5770 error
= copyin(uap
->entries
, input
, uap
->size
);
5775 AUDIT_ARG(opaque
, input
, uap
->size
);
5778 * Force NUL termination of the copyin buffer to avoid nami() running
5779 * off the end. If the caller passes us bogus data, they may get a
5782 ((char *)input
)[uap
->size
- 1] = 0;
5785 * Access is defined as checking against the process' real identity,
5786 * even if operations are checking the effective identity. This
5787 * requires that we use a local vfs context.
5789 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5790 context
.vc_thread
= current_thread();
5793 * Find out how many entries we have, so we can allocate the result
5794 * array by walking the list and adjusting the count downward by the
5795 * earliest string offset we see.
5797 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5798 desc_actual
= desc_max
;
5799 for (i
= 0; i
< desc_actual
; i
++) {
5801 * Take the offset to the name string for this entry and
5802 * convert to an input array index, which would be one off
5803 * the end of the array if this entry was the lowest-addressed
5806 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5809 * An offset greater than the max allowable offset is an error.
5810 * It is also an error for any valid entry to point
5811 * to a location prior to the end of the current entry, if
5812 * it's not a reference to the string of the previous entry.
5814 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5819 /* Also do not let ad_name_offset point to something beyond the size of the input */
5820 if (input
[i
].ad_name_offset
>= uap
->size
) {
5826 * An offset of 0 means use the previous descriptor's offset;
5827 * this is used to chain multiple requests for the same file
5828 * to avoid multiple lookups.
5831 /* This is not valid for the first entry */
5840 * If the offset of the string for this descriptor is before
5841 * what we believe is the current actual last descriptor,
5842 * then we need to adjust our estimate downward; this permits
5843 * the string table following the last descriptor to be out
5844 * of order relative to the descriptor list.
5846 if (j
< desc_actual
) {
5852 * We limit the actual number of descriptors we are willing to process
5853 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5854 * requested does not exceed this limit,
5856 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5860 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
| M_ZERO
);
5861 if (result
== NULL
) {
5867 * Do the work by iterating over the descriptor entries we know to
5868 * at least appear to contain valid data.
5871 for (i
= 0; i
< desc_actual
; i
++) {
5873 * If the ad_name_offset is 0, then we use the previous
5874 * results to make the check; otherwise, we are looking up
5877 if (input
[i
].ad_name_offset
!= 0) {
5878 /* discard old vnodes */
5889 * Scan forward in the descriptor list to see if we
5890 * need the parent vnode. We will need it if we are
5891 * deleting, since we must have rights to remove
5892 * entries in the parent directory, as well as the
5893 * rights to delete the object itself.
5895 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5896 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++) {
5897 if (input
[j
].ad_flags
& _DELETE_OK
) {
5902 niopts
= FOLLOW
| AUDITVNPATH1
;
5904 /* need parent for vnode_authorize for deletion test */
5906 niopts
|= WANTPARENT
;
5910 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5911 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5924 * Handle lookup errors.
5934 /* run this access check */
5935 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5938 /* fatal lookup error */
5944 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5946 /* copy out results */
5947 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5950 if (input
&& input
!= stack_input
) {
5951 FREE(input
, M_TEMP
);
5954 FREE(result
, M_TEMP
);
5962 if (IS_VALID_CRED(context
.vc_ucred
)) {
5963 kauth_cred_unref(&context
.vc_ucred
);
5970 * Returns: 0 Success
5971 * namei:EFAULT Bad address
5972 * namei:ENAMETOOLONG Filename too long
5973 * namei:ENOENT No such file or directory
5974 * namei:ELOOP Too many levels of symbolic links
5975 * namei:EBADF Bad file descriptor
5976 * namei:ENOTDIR Not a directory
5981 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5982 int flag
, enum uio_seg segflg
)
5985 struct nameidata nd
;
5987 struct vfs_context context
;
5989 int is_namedstream
= 0;
5993 * Unless the AT_EACCESS option is used, Access is defined as checking
5994 * against the process' real identity, even if operations are checking
5995 * the effective identity. So we need to tweak the credential
5996 * in the context for that case.
5998 if (!(flag
& AT_EACCESS
)) {
5999 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
6001 context
.vc_ucred
= ctx
->vc_ucred
;
6003 context
.vc_thread
= ctx
->vc_thread
;
6006 niopts
= (flag
& AT_SYMLINK_NOFOLLOW
? NOFOLLOW
: FOLLOW
) | AUDITVNPATH1
;
6007 /* need parent for vnode_authorize for deletion test */
6008 if (amode
& _DELETE_OK
) {
6009 niopts
|= WANTPARENT
;
6011 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
6015 /* access(F_OK) calls are allowed for resource forks. */
6016 if (amode
== F_OK
) {
6017 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
6020 error
= nameiat(&nd
, fd
);
6026 /* Grab reference on the shadow stream file vnode to
6027 * force an inactive on release which will mark it
6030 if (vnode_isnamedstream(nd
.ni_vp
) &&
6031 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6032 vnode_isshadow(nd
.ni_vp
)) {
6034 vnode_ref(nd
.ni_vp
);
6038 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
6041 if (is_namedstream
) {
6042 vnode_rele(nd
.ni_vp
);
6046 vnode_put(nd
.ni_vp
);
6047 if (amode
& _DELETE_OK
) {
6048 vnode_put(nd
.ni_dvp
);
6053 if (!(flag
& AT_EACCESS
)) {
6054 kauth_cred_unref(&context
.vc_ucred
);
6060 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
6062 return faccessat_internal(vfs_context_current(), AT_FDCWD
,
6063 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
);
6067 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
6068 __unused
int32_t *retval
)
6070 if (uap
->flag
& ~(AT_EACCESS
| AT_SYMLINK_NOFOLLOW
)) {
6074 return faccessat_internal(vfs_context_current(), uap
->fd
,
6075 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
);
6079 * Returns: 0 Success
6086 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
6087 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
6088 enum uio_seg segflg
, int fd
, int flag
)
6090 struct nameidata nd
;
6097 struct user64_stat user64_sb
;
6098 struct user32_stat user32_sb
;
6099 struct user64_stat64 user64_sb64
;
6100 struct user32_stat64 user32_sb64
;
6104 kauth_filesec_t fsec
;
6105 size_t xsecurity_bufsize
;
6107 struct fileproc
*fp
= NULL
;
6108 int needsrealdev
= 0;
6110 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6111 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
6115 int is_namedstream
= 0;
6116 /* stat calls are allowed for resource forks. */
6117 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
6120 if (flag
& AT_FDONLY
) {
6123 error
= fp_getfvp(vfs_context_proc(ctx
), fd
, &fp
, &fvp
);
6127 if ((error
= vnode_getwithref(fvp
))) {
6133 error
= nameiat(&nd
, fd
);
6138 fsec
= KAUTH_FILESEC_NONE
;
6140 statptr
= (void *)&source
;
6143 /* Grab reference on the shadow stream file vnode to
6144 * force an inactive on release which will mark it
6147 if (vnode_isnamedstream(nd
.ni_vp
) &&
6148 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6149 vnode_isshadow(nd
.ni_vp
)) {
6151 vnode_ref(nd
.ni_vp
);
6155 needsrealdev
= flag
& AT_REALDEV
? 1 : 0;
6156 if (fp
&& (xsecurity
== USER_ADDR_NULL
)) {
6158 * If the caller has the file open, and is not
6159 * requesting extended security information, we are
6160 * going to let them get the basic stat information.
6162 error
= vn_stat_noauth(nd
.ni_vp
, statptr
, NULL
, isstat64
, needsrealdev
, ctx
,
6163 fp
->f_fglob
->fg_cred
);
6165 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
),
6166 isstat64
, needsrealdev
, ctx
);
6170 if (is_namedstream
) {
6171 vnode_rele(nd
.ni_vp
);
6174 vnode_put(nd
.ni_vp
);
6184 /* Zap spare fields */
6185 if (isstat64
!= 0) {
6186 source
.sb64
.st_lspare
= 0;
6187 source
.sb64
.st_qspare
[0] = 0LL;
6188 source
.sb64
.st_qspare
[1] = 0LL;
6189 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6190 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
6191 my_size
= sizeof(dest
.user64_sb64
);
6192 sbp
= (caddr_t
)&dest
.user64_sb64
;
6194 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
6195 my_size
= sizeof(dest
.user32_sb64
);
6196 sbp
= (caddr_t
)&dest
.user32_sb64
;
6199 * Check if we raced (post lookup) against the last unlink of a file.
6201 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
6202 source
.sb64
.st_nlink
= 1;
6205 source
.sb
.st_lspare
= 0;
6206 source
.sb
.st_qspare
[0] = 0LL;
6207 source
.sb
.st_qspare
[1] = 0LL;
6208 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6209 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
6210 my_size
= sizeof(dest
.user64_sb
);
6211 sbp
= (caddr_t
)&dest
.user64_sb
;
6213 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
6214 my_size
= sizeof(dest
.user32_sb
);
6215 sbp
= (caddr_t
)&dest
.user32_sb
;
6219 * Check if we raced (post lookup) against the last unlink of a file.
6221 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
6222 source
.sb
.st_nlink
= 1;
6225 if ((error
= copyout(sbp
, ub
, my_size
)) != 0) {
6229 /* caller wants extended security information? */
6230 if (xsecurity
!= USER_ADDR_NULL
) {
6231 /* did we get any? */
6232 if (fsec
== KAUTH_FILESEC_NONE
) {
6233 if (susize(xsecurity_size
, 0) != 0) {
6238 /* find the user buffer size */
6239 xsecurity_bufsize
= fusize(xsecurity_size
);
6241 /* copy out the actual data size */
6242 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
6247 /* if the caller supplied enough room, copy out to it */
6248 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
)) {
6249 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
6254 if (fsec
!= KAUTH_FILESEC_NONE
) {
6255 kauth_filesec_free(fsec
);
6261 * stat_extended: Get file status; with extended security (ACL).
6263 * Parameters: p (ignored)
6264 * uap User argument descriptor (see below)
6267 * Indirect: uap->path Path of file to get status from
6268 * uap->ub User buffer (holds file status info)
6269 * uap->xsecurity ACL to get (extended security)
6270 * uap->xsecurity_size Size of ACL
6272 * Returns: 0 Success
6277 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
6278 __unused
int32_t *retval
)
6280 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6281 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6286 * Returns: 0 Success
6287 * fstatat_internal:??? [see fstatat_internal() in this file]
6290 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
6292 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6293 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0);
6297 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
6299 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6300 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0);
6304 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6306 * Parameters: p (ignored)
6307 * uap User argument descriptor (see below)
6310 * Indirect: uap->path Path of file to get status from
6311 * uap->ub User buffer (holds file status info)
6312 * uap->xsecurity ACL to get (extended security)
6313 * uap->xsecurity_size Size of ACL
6315 * Returns: 0 Success
6320 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
6322 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6323 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6328 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6330 * Parameters: p (ignored)
6331 * uap User argument descriptor (see below)
6334 * Indirect: uap->path Path of file to get status from
6335 * uap->ub User buffer (holds file status info)
6336 * uap->xsecurity ACL to get (extended security)
6337 * uap->xsecurity_size Size of ACL
6339 * Returns: 0 Success
6344 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
6346 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6347 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6348 AT_SYMLINK_NOFOLLOW
);
6352 * Get file status; this version does not follow links.
6355 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
6357 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6358 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6362 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
6364 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6365 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6369 * lstat64_extended: Get file status; can handle large inode numbers; does not
6370 * follow links; with extended security (ACL).
6372 * Parameters: p (ignored)
6373 * uap User argument descriptor (see below)
6376 * Indirect: uap->path Path of file to get status from
6377 * uap->ub User buffer (holds file status info)
6378 * uap->xsecurity ACL to get (extended security)
6379 * uap->xsecurity_size Size of ACL
6381 * Returns: 0 Success
6386 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
6388 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6389 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6390 AT_SYMLINK_NOFOLLOW
);
6394 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
6396 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6400 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6401 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6405 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
6406 __unused
int32_t *retval
)
6408 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6412 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6413 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6417 * Get configurable pathname variables.
6419 * Returns: 0 Success
6423 * Notes: Global implementation constants are intended to be
6424 * implemented in this function directly; all other constants
6425 * are per-FS implementation, and therefore must be handled in
6426 * each respective FS, instead.
6428 * XXX We implement some things globally right now that should actually be
6429 * XXX per-FS; we will need to deal with this at some point.
6433 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
6436 struct nameidata nd
;
6437 vfs_context_t ctx
= vfs_context_current();
6439 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
6440 UIO_USERSPACE
, uap
->path
, ctx
);
6446 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
6448 vnode_put(nd
.ni_vp
);
6454 * Return target name of a symbolic link.
6458 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
6459 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
6465 struct nameidata nd
;
6466 char uio_buf
[UIO_SIZEOF(1)];
6468 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
6471 error
= nameiat(&nd
, fd
);
6479 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
6480 &uio_buf
[0], sizeof(uio_buf
));
6481 uio_addiov(auio
, buf
, bufsize
);
6482 if (vp
->v_type
!= VLNK
) {
6486 error
= mac_vnode_check_readlink(ctx
, vp
);
6489 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
6493 error
= VNOP_READLINK(vp
, auio
, ctx
);
6498 *retval
= bufsize
- (int)uio_resid(auio
);
6503 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
6505 enum uio_seg procseg
;
6507 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6508 return readlinkat_internal(vfs_context_current(), AT_FDCWD
,
6509 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
6510 uap
->count
, procseg
, retval
);
6514 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
6516 enum uio_seg procseg
;
6518 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6519 return readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6520 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
);
6524 * Change file flags, the deep inner layer.
6527 chflags0(vnode_t vp
, struct vnode_attr
*va
,
6528 int (*setattr
)(vnode_t
, void *, vfs_context_t
),
6529 void *arg
, vfs_context_t ctx
)
6531 kauth_action_t action
= 0;
6535 error
= mac_vnode_check_setflags(ctx
, vp
, va
->va_flags
);
6541 /* request authorisation, disregard immutability */
6542 if ((error
= vnode_authattr(vp
, va
, &action
, ctx
)) != 0) {
6546 * Request that the auth layer disregard those file flags it's allowed to when
6547 * authorizing this operation; we need to do this in order to be able to
6548 * clear immutable flags.
6550 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0)) {
6553 error
= (*setattr
)(vp
, arg
, ctx
);
6557 mac_vnode_notify_setflags(ctx
, vp
, va
->va_flags
);
6566 * Change file flags.
6568 * NOTE: this will vnode_put() `vp'
6571 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
6573 struct vnode_attr va
;
6577 VATTR_SET(&va
, va_flags
, flags
);
6579 error
= chflags0(vp
, &va
, (void *)vnode_setattr
, &va
, ctx
);
6582 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6590 * Change flags of a file given a path name.
6594 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
6597 vfs_context_t ctx
= vfs_context_current();
6599 struct nameidata nd
;
6601 AUDIT_ARG(fflags
, uap
->flags
);
6602 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6603 UIO_USERSPACE
, uap
->path
, ctx
);
6611 /* we don't vnode_put() here because chflags1 does internally */
6612 error
= chflags1(vp
, uap
->flags
, ctx
);
6618 * Change flags of a file given a file descriptor.
6622 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
6627 AUDIT_ARG(fd
, uap
->fd
);
6628 AUDIT_ARG(fflags
, uap
->flags
);
6629 if ((error
= file_vnode(uap
->fd
, &vp
))) {
6633 if ((error
= vnode_getwithref(vp
))) {
6638 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6640 /* we don't vnode_put() here because chflags1 does internally */
6641 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
6648 * Change security information on a filesystem object.
6650 * Returns: 0 Success
6651 * EPERM Operation not permitted
6652 * vnode_authattr:??? [anything vnode_authattr can return]
6653 * vnode_authorize:??? [anything vnode_authorize can return]
6654 * vnode_setattr:??? [anything vnode_setattr can return]
6656 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6657 * translated to EPERM before being returned.
6660 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
6662 kauth_action_t action
;
6665 AUDIT_ARG(mode
, vap
->va_mode
);
6666 /* XXX audit new args */
6669 /* chmod calls are not allowed for resource forks. */
6670 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6676 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
6677 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0) {
6681 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6682 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
6683 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6684 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1))) {
6689 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
6690 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
))) {
6695 /* make sure that the caller is allowed to set this security information */
6696 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
6697 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6698 if (error
== EACCES
) {
6704 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0) {
6709 if (VATTR_IS_ACTIVE(vap
, va_mode
)) {
6710 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
6713 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6714 mac_vnode_notify_setowner(ctx
, vp
,
6715 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6716 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
6719 if (VATTR_IS_ACTIVE(vap
, va_acl
)) {
6720 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
6729 * Change mode of a file given a path name.
6731 * Returns: 0 Success
6732 * namei:??? [anything namei can return]
6733 * chmod_vnode:??? [anything chmod_vnode can return]
6736 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
6737 int fd
, int flag
, enum uio_seg segflg
)
6739 struct nameidata nd
;
6742 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6743 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
6745 if ((error
= nameiat(&nd
, fd
))) {
6748 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
6749 vnode_put(nd
.ni_vp
);
6755 * chmod_extended: Change the mode of a file given a path name; with extended
6756 * argument list (including extended security (ACL)).
6758 * Parameters: p Process requesting the open
6759 * uap User argument descriptor (see below)
6762 * Indirect: uap->path Path to object (same as 'chmod')
6763 * uap->uid UID to set
6764 * uap->gid GID to set
6765 * uap->mode File mode to set (same as 'chmod')
6766 * uap->xsecurity ACL to set (or delete)
6768 * Returns: 0 Success
6771 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6773 * XXX: We should enummerate the possible errno values here, and where
6774 * in the code they originated.
6777 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6780 struct vnode_attr va
;
6781 kauth_filesec_t xsecdst
;
6783 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6786 if (uap
->mode
!= -1) {
6787 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6789 if (uap
->uid
!= KAUTH_UID_NONE
) {
6790 VATTR_SET(&va
, va_uid
, uap
->uid
);
6792 if (uap
->gid
!= KAUTH_GID_NONE
) {
6793 VATTR_SET(&va
, va_gid
, uap
->gid
);
6797 switch (uap
->xsecurity
) {
6798 /* explicit remove request */
6799 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6800 VATTR_SET(&va
, va_acl
, NULL
);
6803 case USER_ADDR_NULL
:
6806 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6809 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6810 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6813 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6816 if (xsecdst
!= NULL
) {
6817 kauth_filesec_free(xsecdst
);
6823 * Returns: 0 Success
6824 * chmodat:??? [anything chmodat can return]
6827 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6828 int flag
, enum uio_seg segflg
)
6830 struct vnode_attr va
;
6833 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6835 return chmodat(ctx
, path
, &va
, fd
, flag
, segflg
);
6839 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6841 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6842 AT_FDCWD
, 0, UIO_USERSPACE
);
6846 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6848 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
6852 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6853 uap
->fd
, uap
->flag
, UIO_USERSPACE
);
6857 * Change mode of a file given a file descriptor.
6860 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6867 if ((error
= file_vnode(fd
, &vp
)) != 0) {
6870 if ((error
= vnode_getwithref(vp
)) != 0) {
6874 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6876 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6877 (void)vnode_put(vp
);
6884 * fchmod_extended: Change mode of a file given a file descriptor; with
6885 * extended argument list (including extended security (ACL)).
6887 * Parameters: p Process requesting to change file mode
6888 * uap User argument descriptor (see below)
6891 * Indirect: uap->mode File mode to set (same as 'chmod')
6892 * uap->uid UID to set
6893 * uap->gid GID to set
6894 * uap->xsecurity ACL to set (or delete)
6895 * uap->fd File descriptor of file to change mode
6897 * Returns: 0 Success
6902 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6905 struct vnode_attr va
;
6906 kauth_filesec_t xsecdst
;
6908 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6911 if (uap
->mode
!= -1) {
6912 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6914 if (uap
->uid
!= KAUTH_UID_NONE
) {
6915 VATTR_SET(&va
, va_uid
, uap
->uid
);
6917 if (uap
->gid
!= KAUTH_GID_NONE
) {
6918 VATTR_SET(&va
, va_gid
, uap
->gid
);
6922 switch (uap
->xsecurity
) {
6923 case USER_ADDR_NULL
:
6924 VATTR_SET(&va
, va_acl
, NULL
);
6926 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6927 VATTR_SET(&va
, va_acl
, NULL
);
6930 case CAST_USER_ADDR_T(-1):
6933 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6936 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6939 error
= fchmod1(p
, uap
->fd
, &va
);
6942 switch (uap
->xsecurity
) {
6943 case USER_ADDR_NULL
:
6944 case CAST_USER_ADDR_T(-1):
6947 if (xsecdst
!= NULL
) {
6948 kauth_filesec_free(xsecdst
);
6955 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6957 struct vnode_attr va
;
6960 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6962 return fchmod1(p
, uap
->fd
, &va
);
6967 * Set ownership given a path name.
6971 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6972 gid_t gid
, int flag
, enum uio_seg segflg
)
6975 struct vnode_attr va
;
6977 struct nameidata nd
;
6979 kauth_action_t action
;
6981 AUDIT_ARG(owner
, uid
, gid
);
6983 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6984 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6986 error
= nameiat(&nd
, fd
);
6995 if (uid
!= (uid_t
)VNOVAL
) {
6996 VATTR_SET(&va
, va_uid
, uid
);
6998 if (gid
!= (gid_t
)VNOVAL
) {
6999 VATTR_SET(&va
, va_gid
, gid
);
7003 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
7009 /* preflight and authorize attribute changes */
7010 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7013 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7016 error
= vnode_setattr(vp
, &va
, ctx
);
7020 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
7026 * EACCES is only allowed from namei(); permissions failure should
7027 * return EPERM, so we need to translate the error code.
7029 if (error
== EACCES
) {
7038 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
7040 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7041 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
);
7045 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
7047 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7048 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
);
7052 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
7054 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
7058 return fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
7059 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
);
7063 * Set ownership given a file descriptor.
7067 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
7069 struct vnode_attr va
;
7070 vfs_context_t ctx
= vfs_context_current();
7073 kauth_action_t action
;
7075 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7076 AUDIT_ARG(fd
, uap
->fd
);
7078 if ((error
= file_vnode(uap
->fd
, &vp
))) {
7082 if ((error
= vnode_getwithref(vp
))) {
7086 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7089 if (uap
->uid
!= VNOVAL
) {
7090 VATTR_SET(&va
, va_uid
, uap
->uid
);
7092 if (uap
->gid
!= VNOVAL
) {
7093 VATTR_SET(&va
, va_gid
, uap
->gid
);
7097 /* chown calls are not allowed for resource forks. */
7098 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7105 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7111 /* preflight and authorize attribute changes */
7112 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7115 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7116 if (error
== EACCES
) {
7121 error
= vnode_setattr(vp
, &va
, ctx
);
7125 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7130 (void)vnode_put(vp
);
7136 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
7140 if (usrtvp
== USER_ADDR_NULL
) {
7141 struct timeval old_tv
;
7142 /* XXX Y2038 bug because of microtime argument */
7144 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
7147 if (IS_64BIT_PROCESS(current_proc())) {
7148 struct user64_timeval tv
[2];
7149 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7153 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7154 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7156 struct user32_timeval tv
[2];
7157 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7161 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7162 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7169 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
7173 struct vnode_attr va
;
7174 kauth_action_t action
;
7176 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7179 VATTR_SET(&va
, va_access_time
, ts
[0]);
7180 VATTR_SET(&va
, va_modify_time
, ts
[1]);
7182 va
.va_vaflags
|= VA_UTIMES_NULL
;
7186 /* utimes calls are not allowed for resource forks. */
7187 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7194 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7199 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7200 if (!nullflag
&& error
== EACCES
) {
7206 /* since we may not need to auth anything, check here */
7207 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7208 if (!nullflag
&& error
== EACCES
) {
7213 error
= vnode_setattr(vp
, &va
, ctx
);
7217 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7226 * Set the access and modification times of a file.
7230 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
7232 struct timespec ts
[2];
7235 struct nameidata nd
;
7236 vfs_context_t ctx
= vfs_context_current();
7239 * AUDIT: Needed to change the order of operations to do the
7240 * name lookup first because auditing wants the path.
7242 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
7243 UIO_USERSPACE
, uap
->path
, ctx
);
7251 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7252 * the current time instead.
7255 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7259 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
7262 vnode_put(nd
.ni_vp
);
7267 * Set the access and modification times of a file.
7271 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
7273 struct timespec ts
[2];
7278 AUDIT_ARG(fd
, uap
->fd
);
7280 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7283 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
7286 if ((error
= vnode_getwithref(vp
))) {
7291 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
7298 * Truncate a file given its path name.
7302 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
7305 struct vnode_attr va
;
7306 vfs_context_t ctx
= vfs_context_current();
7308 struct nameidata nd
;
7309 kauth_action_t action
;
7311 if (uap
->length
< 0) {
7314 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
7315 UIO_USERSPACE
, uap
->path
, ctx
);
7316 if ((error
= namei(&nd
))) {
7324 VATTR_SET(&va
, va_data_size
, uap
->length
);
7327 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
7333 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7336 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7339 error
= vnode_setattr(vp
, &va
, ctx
);
7343 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
7353 * Truncate a file given a file descriptor.
7357 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
7359 vfs_context_t ctx
= vfs_context_current();
7360 struct vnode_attr va
;
7362 struct fileproc
*fp
;
7366 AUDIT_ARG(fd
, uap
->fd
);
7367 if (uap
->length
< 0) {
7371 if ((error
= fp_lookup(p
, fd
, &fp
, 0))) {
7375 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
7377 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
7386 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
7388 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
7389 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7394 if ((error
= vnode_getwithref(vp
)) != 0) {
7398 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7401 error
= mac_vnode_check_truncate(ctx
,
7402 fp
->f_fglob
->fg_cred
, vp
);
7404 (void)vnode_put(vp
);
7409 VATTR_SET(&va
, va_data_size
, uap
->length
);
7410 error
= vnode_setattr(vp
, &va
, ctx
);
7414 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
7418 (void)vnode_put(vp
);
7426 * Sync an open file with synchronized I/O _file_ integrity completion
7430 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
7432 __pthread_testcancel(1);
7433 return fsync_common(p
, uap
, MNT_WAIT
);
7438 * Sync an open file with synchronized I/O _file_ integrity completion
7440 * Notes: This is a legacy support function that does not test for
7441 * thread cancellation points.
7445 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
7447 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
);
7452 * Sync an open file with synchronized I/O _data_ integrity completion
7456 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
7458 __pthread_testcancel(1);
7459 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
);
7466 * Common fsync code to support both synchronized I/O file integrity completion
7467 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7469 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7470 * will only guarantee that the file data contents are retrievable. If
7471 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7472 * includes additional metadata unnecessary for retrieving the file data
7473 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7476 * Parameters: p The process
7477 * uap->fd The descriptor to synchronize
7478 * flags The data integrity flags
7480 * Returns: int Success
7481 * fp_getfvp:EBADF Bad file descriptor
7482 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7483 * VNOP_FSYNC:??? unspecified
7485 * Notes: We use struct fsync_args because it is a short name, and all
7486 * caller argument structures are otherwise identical.
7489 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
7492 struct fileproc
*fp
;
7493 vfs_context_t ctx
= vfs_context_current();
7496 AUDIT_ARG(fd
, uap
->fd
);
7498 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
7501 if ((error
= vnode_getwithref(vp
))) {
7506 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7508 error
= VNOP_FSYNC(vp
, flags
, ctx
);
7511 /* Sync resource fork shadow file if necessary. */
7513 (vp
->v_flag
& VISNAMEDSTREAM
) &&
7514 (vp
->v_parent
!= NULLVP
) &&
7515 vnode_isshadow(vp
) &&
7516 (fp
->f_flags
& FP_WRITTEN
)) {
7517 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
7521 (void)vnode_put(vp
);
7527 * Duplicate files. Source must be a file, target must be a file or
7530 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7531 * perform inheritance correctly.
7535 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
7537 vnode_t tvp
, fvp
, tdvp
, sdvp
;
7538 struct nameidata fromnd
, tond
;
7540 vfs_context_t ctx
= vfs_context_current();
7542 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
7543 struct vnode_attr va
;
7546 /* Check that the flags are valid. */
7548 if (uap
->flags
& ~CPF_MASK
) {
7552 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
7553 UIO_USERSPACE
, uap
->from
, ctx
);
7554 if ((error
= namei(&fromnd
))) {
7559 NDINIT(&tond
, CREATE
, OP_LINK
,
7560 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7561 UIO_USERSPACE
, uap
->to
, ctx
);
7562 if ((error
= namei(&tond
))) {
7569 if (!(uap
->flags
& CPF_OVERWRITE
)) {
7575 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
7580 /* This calls existing MAC hooks for open */
7581 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
7588 * See unlinkat_internal for an explanation of the potential
7589 * ENOENT from the MAC hook but the gist is that the MAC hook
7590 * can fail because vn_getpath isn't able to return the full
7591 * path. We choose to ignore this failure.
7593 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
7594 if (error
&& error
!= ENOENT
) {
7602 VATTR_SET(&va
, va_type
, fvp
->v_type
);
7603 /* Mask off all but regular access permissions */
7604 VATTR_SET(&va
, va_mode
,
7605 ((((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
7606 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
7610 #endif /* CONFIG_MACF */
7612 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
7620 * If source is the same as the destination (that is the
7621 * same inode number) then there is nothing to do.
7622 * (fixed to have POSIX semantics - CSM 3/2/98)
7628 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
7631 sdvp
= tond
.ni_startdir
;
7633 * nameidone has to happen before we vnode_put(tdvp)
7634 * since it may need to release the fs_nodelock on the tdvp
7654 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7657 * Helper function for doing clones. The caller is expected to provide an
7658 * iocounted source vnode and release it.
7661 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
7662 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
7665 struct nameidata tond
;
7668 boolean_t free_src_acl
;
7669 boolean_t attr_cleanup
;
7671 kauth_action_t action
;
7672 struct componentname
*cnp
;
7674 struct vnode_attr va
;
7675 struct vnode_attr nva
;
7676 uint32_t vnop_flags
;
7678 v_type
= vnode_vtype(fvp
);
7683 action
= KAUTH_VNODE_ADD_FILE
;
7686 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
7687 fvp
->v_mountedhere
) {
7690 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
7696 AUDIT_ARG(fd2
, dst_dirfd
);
7697 AUDIT_ARG(value32
, flags
);
7699 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7700 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
7701 UIO_USERSPACE
, dst
, ctx
);
7702 if ((error
= nameiat(&tond
, dst_dirfd
))) {
7709 free_src_acl
= FALSE
;
7710 attr_cleanup
= FALSE
;
7717 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
7723 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
))) {
7727 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
))) {
7731 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
7732 if (data_read_authorised
) {
7733 action
&= ~KAUTH_VNODE_READ_DATA
;
7735 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
))) {
7740 * certain attributes may need to be changed from the source, we ask for
7744 VATTR_WANTED(&va
, va_uid
);
7745 VATTR_WANTED(&va
, va_gid
);
7746 VATTR_WANTED(&va
, va_mode
);
7747 VATTR_WANTED(&va
, va_flags
);
7748 VATTR_WANTED(&va
, va_acl
);
7750 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0) {
7755 VATTR_SET(&nva
, va_type
, v_type
);
7756 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
7757 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
7758 free_src_acl
= TRUE
;
7761 /* Handle ACL inheritance, initialize vap. */
7762 if (v_type
== VLNK
) {
7763 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
7765 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
7769 attr_cleanup
= TRUE
;
7772 vnop_flags
= VNODE_CLONEFILE_DEFAULT
;
7774 * We've got initial values for all security parameters,
7775 * If we are superuser, then we can change owners to be the
7776 * same as the source. Both superuser and the owner have default
7777 * WRITE_SECURITY privileges so all other fields can be taken
7778 * from source as well.
7780 if (!(flags
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) {
7781 if (VATTR_IS_SUPPORTED(&va
, va_uid
)) {
7782 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
7784 if (VATTR_IS_SUPPORTED(&va
, va_gid
)) {
7785 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
7788 vnop_flags
|= VNODE_CLONEFILE_NOOWNERCOPY
;
7791 if (VATTR_IS_SUPPORTED(&va
, va_mode
)) {
7792 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
7794 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
7795 VATTR_SET(&nva
, va_flags
,
7796 ((va
.va_flags
& ~(UF_DATAVAULT
| SF_RESTRICTED
)) | /* Turn off from source */
7797 (nva
.va_flags
& (UF_DATAVAULT
| SF_RESTRICTED
))));
7800 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
);
7802 if (!error
&& tvp
) {
7803 int update_flags
= 0;
7806 #endif /* CONFIG_FSE */
7809 * If some of the requested attributes weren't handled by the
7810 * VNOP, use our fallback code.
7812 if (!VATTR_ALL_SUPPORTED(&va
)) {
7813 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
7817 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
7818 VNODE_LABEL_CREATE
, ctx
);
7821 // Make sure the name & parent pointers are hooked up
7822 if (tvp
->v_name
== NULL
) {
7823 update_flags
|= VNODE_UPDATE_NAME
;
7825 if (tvp
->v_parent
== NULLVP
) {
7826 update_flags
|= VNODE_UPDATE_PARENT
;
7830 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
7831 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
7835 switch (vnode_vtype(tvp
)) {
7839 fsevent
= FSE_CREATE_FILE
;
7842 fsevent
= FSE_CREATE_DIR
;
7848 if (need_fsevent(fsevent
, tvp
)) {
7850 * The following is a sequence of three explicit events.
7851 * A pair of FSE_CLONE events representing the source and destination
7852 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7853 * fseventsd may coalesce the destination clone and create events
7854 * into a single event resulting in the following sequence for a client
7856 * FSE_CLONE | FSE_CREATE (dst)
7858 add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
,
7860 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
7863 #endif /* CONFIG_FSE */
7868 vn_attribute_cleanup(&nva
, defaulted
);
7870 if (free_src_acl
&& va
.va_acl
) {
7871 kauth_acl_free(va
.va_acl
);
7882 * clone files or directories, target must not exist.
7886 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7887 __unused
int32_t *retval
)
7890 struct nameidata fromnd
;
7893 vfs_context_t ctx
= vfs_context_current();
7895 /* Check that the flags are valid. */
7896 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7900 AUDIT_ARG(fd
, uap
->src_dirfd
);
7902 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7903 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7904 UIO_USERSPACE
, uap
->src
, ctx
);
7905 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
))) {
7912 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7920 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7921 __unused
int32_t *retval
)
7924 struct fileproc
*fp
;
7926 vfs_context_t ctx
= vfs_context_current();
7928 /* Check that the flags are valid. */
7929 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7933 AUDIT_ARG(fd
, uap
->src_fd
);
7934 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7939 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7940 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7945 if ((error
= vnode_getwithref(fvp
))) {
7949 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7951 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7956 file_drop(uap
->src_fd
);
7961 rename_submounts_callback(mount_t mp
, void *arg
)
7964 mount_t pmp
= (mount_t
)arg
;
7965 int prefix_len
= strlen(pmp
->mnt_vfsstat
.f_mntonname
);
7967 if (strncmp(mp
->mnt_vfsstat
.f_mntonname
, pmp
->mnt_vfsstat
.f_mntonname
, prefix_len
) != 0) {
7971 if (mp
->mnt_vfsstat
.f_mntonname
[prefix_len
] != '/') {
7975 if ((error
= vfs_busy(mp
, LK_NOWAIT
))) {
7976 printf("vfs_busy failed with %d for %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
7980 int pathlen
= MAXPATHLEN
;
7981 if ((error
= vn_getpath_ext(mp
->mnt_vnodecovered
, NULL
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
))) {
7982 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
7991 * Rename files. Source and destination must either both be directories,
7992 * or both not be directories. If target is a directory, it must be empty.
7996 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7997 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7999 if (flags
& ~VFS_RENAME_FLAGS_MASK
) {
8003 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
)) {
8009 struct nameidata
*fromnd
, *tond
;
8017 const char *oname
= NULL
;
8018 char *from_name
= NULL
, *to_name
= NULL
;
8019 char *from_name_no_firmlink
= NULL
, *to_name_no_firmlink
= NULL
;
8020 int from_len
= 0, to_len
= 0;
8021 int from_len_no_firmlink
= 0, to_len_no_firmlink
= 0;
8022 int holding_mntlock
;
8023 mount_t locked_mp
= NULL
;
8024 vnode_t oparent
= NULLVP
;
8026 fse_info from_finfo
, to_finfo
;
8028 int from_truncated
= 0, to_truncated
= 0;
8029 int from_truncated_no_firmlink
= 0, to_truncated_no_firmlink
= 0;
8031 struct vnode_attr
*fvap
, *tvap
;
8033 /* carving out a chunk for structs that are too big to be on stack. */
8035 struct nameidata from_node
, to_node
;
8036 struct vnode_attr fv_attr
, tv_attr
;
8038 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
8039 fromnd
= &__rename_data
->from_node
;
8040 tond
= &__rename_data
->to_node
;
8042 holding_mntlock
= 0;
8051 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
8053 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8055 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
8057 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8060 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8061 if ((error
= nameiat(fromnd
, fromfd
))) {
8064 fdvp
= fromnd
->ni_dvp
;
8065 fvp
= fromnd
->ni_vp
;
8067 if (fvp
&& fvp
->v_type
== VDIR
) {
8068 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
8072 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8073 if ((error
= nameiat(tond
, tofd
))) {
8075 * Translate error code for rename("dir1", "dir2/.").
8077 if (error
== EISDIR
&& fvp
->v_type
== VDIR
) {
8082 tdvp
= tond
->ni_dvp
;
8086 #if DEVELOPMENT || DEBUG
8088 * XXX VSWAP: Check for entitlements or special flag here
8089 * so we can restrict access appropriately.
8091 #else /* DEVELOPMENT || DEBUG */
8093 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8098 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8102 #endif /* DEVELOPMENT || DEBUG */
8104 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
8109 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
8114 batched
= vnode_compound_rename_available(fdvp
);
8117 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
8120 get_fse_info(fvp
, &from_finfo
, ctx
);
8122 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
8127 fvap
= &__rename_data
->fv_attr
;
8131 get_fse_info(tvp
, &to_finfo
, ctx
);
8132 } else if (batched
) {
8133 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
8138 tvap
= &__rename_data
->tv_attr
;
8143 #endif /* CONFIG_FSE */
8145 has_listeners
= kauth_authorize_fileop_has_listeners();
8149 if (AUDIT_RECORD_EXISTS()) {
8154 if (need_event
|| has_listeners
) {
8155 if (from_name
== NULL
) {
8156 GET_PATH(from_name
);
8157 if (from_name
== NULL
) {
8163 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
8165 if (from_name_no_firmlink
== NULL
) {
8166 GET_PATH(from_name_no_firmlink
);
8167 if (from_name_no_firmlink
== NULL
) {
8173 from_len_no_firmlink
= safe_getpath_no_firmlink(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name_no_firmlink
, MAXPATHLEN
, &from_truncated_no_firmlink
);
8176 if (need_event
|| need_kpath2
|| has_listeners
) {
8177 if (to_name
== NULL
) {
8179 if (to_name
== NULL
) {
8185 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
8187 if (to_name_no_firmlink
== NULL
) {
8188 GET_PATH(to_name_no_firmlink
);
8189 if (to_name_no_firmlink
== NULL
) {
8195 to_len_no_firmlink
= safe_getpath_no_firmlink(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name_no_firmlink
, MAXPATHLEN
, &to_truncated_no_firmlink
);
8196 if (to_name
&& need_kpath2
) {
8197 AUDIT_ARG(kpath
, to_name
, ARG_KPATH2
);
8202 * Claim: this check will never reject a valid rename.
8203 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8204 * Suppose fdvp and tdvp are not on the same mount.
8205 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
8206 * then you can't move it to within another dir on the same mountpoint.
8207 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8209 * If this check passes, then we are safe to pass these vnodes to the same FS.
8211 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
8215 goto skipped_lookup
;
8219 error
= vn_authorize_renamex_with_paths(fdvp
, fvp
, &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
, flags
, NULL
);
8221 if (error
== ENOENT
) {
8222 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8224 * We encountered a race where after doing the namei, tvp stops
8225 * being valid. If so, simply re-drive the rename call from the
8237 * If the source and destination are the same (i.e. they're
8238 * links to the same vnode) and the target file system is
8239 * case sensitive, then there is nothing to do.
8241 * XXX Come back to this.
8247 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8248 * then assume that this file system is case sensitive.
8250 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
8251 pathconf_val
!= 0) {
8257 * Allow the renaming of mount points.
8258 * - target must not exist
8259 * - target must reside in the same directory as source
8260 * - union mounts cannot be renamed
8261 * - "/" cannot be renamed
8263 * XXX Handle this in VFS after a continued lookup (if we missed
8264 * in the cache to start off)
8266 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8267 * we'll skip past here. The file system is responsible for
8268 * checking that @tvp is not a descendent of @fvp and vice versa
8269 * so it should always return EINVAL if either @tvp or @fvp is the
8272 if ((fvp
->v_flag
& VROOT
) &&
8273 (fvp
->v_type
== VDIR
) &&
8275 (fvp
->v_mountedhere
== NULL
) &&
8277 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
8278 ((fvp
->v_mount
->mnt_kern_flag
& MNTK_SYSTEM
) == 0) &&
8279 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
8282 /* switch fvp to the covered vnode */
8283 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
8284 if ((vnode_getwithref(coveredvp
))) {
8294 * Check for cross-device rename.
8296 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
8297 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
8303 * If source is the same as the destination (that is the
8304 * same inode number) then there is nothing to do...
8305 * EXCEPT if the underlying file system supports case
8306 * insensitivity and is case preserving. In this case
8307 * the file system needs to handle the special case of
8308 * getting the same vnode as target (fvp) and source (tvp).
8310 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8311 * and _PC_CASE_PRESERVING can have this exception, and they need to
8312 * handle the special case of getting the same vnode as target and
8313 * source. NOTE: Then the target is unlocked going into vnop_rename,
8314 * so not to cause locking problems. There is a single reference on tvp.
8316 * NOTE - that fvp == tvp also occurs if they are hard linked and
8317 * that correct behaviour then is just to return success without doing
8320 * XXX filesystem should take care of this itself, perhaps...
8322 if (fvp
== tvp
&& fdvp
== tdvp
) {
8323 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
8324 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
8325 fromnd
->ni_cnd
.cn_namelen
)) {
8330 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
8332 * we're holding a reference and lock
8333 * on locked_mp, but it no longer matches
8334 * what we want to do... so drop our hold
8336 mount_unlock_renames(locked_mp
);
8337 mount_drop(locked_mp
, 0);
8338 holding_mntlock
= 0;
8340 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
8342 * serialize renames that re-shape
8343 * the tree... if holding_mntlock is
8344 * set, then we're ready to go...
8346 * first need to drop the iocounts
8347 * we picked up, second take the
8348 * lock to serialize the access,
8349 * then finally start the lookup
8350 * process over with the lock held
8352 if (!holding_mntlock
) {
8354 * need to grab a reference on
8355 * the mount point before we
8356 * drop all the iocounts... once
8357 * the iocounts are gone, the mount
8360 locked_mp
= fvp
->v_mount
;
8361 mount_ref(locked_mp
, 0);
8364 * nameidone has to happen before we vnode_put(tvp)
8365 * since it may need to release the fs_nodelock on the tvp
8375 * nameidone has to happen before we vnode_put(fdvp)
8376 * since it may need to release the fs_nodelock on the fvp
8383 mount_lock_renames(locked_mp
);
8384 holding_mntlock
= 1;
8390 * when we dropped the iocounts to take
8391 * the lock, we allowed the identity of
8392 * the various vnodes to change... if they did,
8393 * we may no longer be dealing with a rename
8394 * that reshapes the tree... once we're holding
8395 * the iocounts, the vnodes can't change type
8396 * so we're free to drop the lock at this point
8399 if (holding_mntlock
) {
8400 mount_unlock_renames(locked_mp
);
8401 mount_drop(locked_mp
, 0);
8402 holding_mntlock
= 0;
8406 // save these off so we can later verify that fvp is the same
8407 oname
= fvp
->v_name
;
8408 oparent
= fvp
->v_parent
;
8411 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
8412 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
8415 if (holding_mntlock
) {
8417 * we can drop our serialization
8420 mount_unlock_renames(locked_mp
);
8421 mount_drop(locked_mp
, 0);
8422 holding_mntlock
= 0;
8425 if (error
== EDATALESS
) {
8427 * If we've been here before, something has gone
8428 * horribly wrong and we should just get out lest
8429 * we spiral around the drain forever.
8431 if (flags
& VFS_RENAME_DATALESS
) {
8437 * The object we're renaming is dataless (or has a
8438 * dataless descendent) and requires materialization
8439 * before the rename occurs. But we're holding the
8440 * mount point's rename lock, so it's not safe to
8443 * In this case, we release the lock, perform the
8444 * materialization, and start the whole thing over.
8446 error
= vnode_materialize_dataless_file(fvp
,
8447 NAMESPACE_HANDLER_RENAME_OP
);
8451 * The next time around we need to tell the
8452 * file system that the materializtaion has
8455 flags
|= VFS_RENAME_DATALESS
;
8460 if (error
== EKEEPLOOKING
) {
8461 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8462 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8463 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8467 fromnd
->ni_vp
= fvp
;
8470 goto continue_lookup
;
8474 * We may encounter a race in the VNOP where the destination didn't
8475 * exist when we did the namei, but it does by the time we go and
8476 * try to create the entry. In this case, we should re-drive this rename
8477 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8478 * but other filesystems susceptible to this race could return it, too.
8480 if (error
== ERECYCLE
) {
8485 * For compound VNOPs, the authorization callback may return
8486 * ENOENT in case of racing hardlink lookups hitting the name
8487 * cache, redrive the lookup.
8489 if (batched
&& error
== ENOENT
) {
8490 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8499 /* call out to allow 3rd party notification of rename.
8500 * Ignore result of kauth_authorize_fileop call.
8502 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8503 KAUTH_FILEOP_RENAME
,
8504 (uintptr_t)from_name
, (uintptr_t)to_name
);
8505 if (flags
& VFS_RENAME_SWAP
) {
8506 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8507 KAUTH_FILEOP_RENAME
,
8508 (uintptr_t)to_name
, (uintptr_t)from_name
);
8512 if (from_name
!= NULL
&& to_name
!= NULL
) {
8513 if (from_truncated
|| to_truncated
) {
8514 // set it here since only the from_finfo gets reported up to user space
8515 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8519 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
8522 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
8526 add_fsevent(FSE_RENAME
, ctx
,
8527 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8528 FSE_ARG_FINFO
, &from_finfo
,
8529 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8530 FSE_ARG_FINFO
, &to_finfo
,
8532 if (flags
& VFS_RENAME_SWAP
) {
8534 * Strictly speaking, swap is the equivalent of
8535 * *three* renames. FSEvents clients should only take
8536 * the events as a hint, so we only bother reporting
8539 add_fsevent(FSE_RENAME
, ctx
,
8540 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8541 FSE_ARG_FINFO
, &to_finfo
,
8542 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8543 FSE_ARG_FINFO
, &from_finfo
,
8547 add_fsevent(FSE_RENAME
, ctx
,
8548 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8549 FSE_ARG_FINFO
, &from_finfo
,
8550 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8554 #endif /* CONFIG_FSE */
8557 * update filesystem's mount point data
8560 char *cp
, *pathend
, *mpname
;
8566 mp
= fvp
->v_mountedhere
;
8568 if (vfs_busy(mp
, LK_NOWAIT
)) {
8572 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
8574 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
8575 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
8577 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
8580 /* find current mount point prefix */
8581 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
8582 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
8587 /* find last component of target name */
8588 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
8594 /* Update f_mntonname of sub mounts */
8595 vfs_iterate(0, rename_submounts_callback
, (void *)mp
);
8597 /* append name to prefix */
8598 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
8599 bzero(pathend
, maxlen
);
8601 strlcpy(pathend
, mpname
, maxlen
);
8603 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
8607 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
8610 * fix up name & parent pointers. note that we first
8611 * check that fvp has the same name/parent pointers it
8612 * had before the rename call... this is a 'weak' check
8615 * XXX oparent and oname may not be set in the compound vnop case
8617 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
8620 update_flags
= VNODE_UPDATE_NAME
;
8623 update_flags
|= VNODE_UPDATE_PARENT
;
8626 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
8629 if (to_name
!= NULL
) {
8630 RELEASE_PATH(to_name
);
8633 if (to_name_no_firmlink
!= NULL
) {
8634 RELEASE_PATH(to_name_no_firmlink
);
8635 to_name_no_firmlink
= NULL
;
8637 if (from_name
!= NULL
) {
8638 RELEASE_PATH(from_name
);
8641 if (from_name_no_firmlink
!= NULL
) {
8642 RELEASE_PATH(from_name_no_firmlink
);
8643 from_name_no_firmlink
= NULL
;
8645 if (holding_mntlock
) {
8646 mount_unlock_renames(locked_mp
);
8647 mount_drop(locked_mp
, 0);
8648 holding_mntlock
= 0;
8652 * nameidone has to happen before we vnode_put(tdvp)
8653 * since it may need to release the fs_nodelock on the tdvp
8664 * nameidone has to happen before we vnode_put(fdvp)
8665 * since it may need to release the fs_nodelock on the fdvp
8676 * If things changed after we did the namei, then we will re-drive
8677 * this rename call from the top.
8684 FREE(__rename_data
, M_TEMP
);
8689 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
8691 return renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
8692 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0);
8696 renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
8698 return renameat_internal(
8699 vfs_context_current(),
8700 uap
->fromfd
, uap
->from
,
8702 UIO_USERSPACE
, uap
->flags
);
8706 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
8708 return renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
8709 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0);
8713 * Make a directory file.
8715 * Returns: 0 Success
8718 * vnode_authorize:???
8723 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
8724 enum uio_seg segflg
)
8728 int update_flags
= 0;
8730 struct nameidata nd
;
8732 AUDIT_ARG(mode
, vap
->va_mode
);
8733 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
8735 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
8736 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
8739 error
= nameiat(&nd
, fd
);
8751 batched
= vnode_compound_mkdir_available(dvp
);
8753 VATTR_SET(vap
, va_type
, VDIR
);
8757 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8758 * only get EXISTS or EISDIR for existing path components, and not that it could see
8759 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8760 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8762 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
8763 if (error
== EACCES
|| error
== EPERM
) {
8771 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8772 * rather than EACCESS if the target exists.
8774 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
8776 error2
= nameiat(&nd
, fd
);
8790 * make the directory
8792 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
8793 if (error
== EKEEPLOOKING
) {
8795 goto continue_lookup
;
8801 // Make sure the name & parent pointers are hooked up
8802 if (vp
->v_name
== NULL
) {
8803 update_flags
|= VNODE_UPDATE_NAME
;
8805 if (vp
->v_parent
== NULLVP
) {
8806 update_flags
|= VNODE_UPDATE_PARENT
;
8810 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
8814 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
8819 * nameidone has to happen before we vnode_put(dvp)
8820 * since it may need to release the fs_nodelock on the dvp
8835 * mkdir_extended: Create a directory; with extended security (ACL).
8837 * Parameters: p Process requesting to create the directory
8838 * uap User argument descriptor (see below)
8841 * Indirect: uap->path Path of directory to create
8842 * uap->mode Access permissions to set
8843 * uap->xsecurity ACL to set
8845 * Returns: 0 Success
8850 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
8853 kauth_filesec_t xsecdst
;
8854 struct vnode_attr va
;
8856 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
8859 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
8860 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
8865 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8866 if (xsecdst
!= NULL
) {
8867 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
8870 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8872 if (xsecdst
!= NULL
) {
8873 kauth_filesec_free(xsecdst
);
8879 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
8881 struct vnode_attr va
;
8884 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8886 return mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8891 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
8893 struct vnode_attr va
;
8896 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8898 return mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
8903 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
8904 enum uio_seg segflg
, int unlink_flags
)
8908 struct nameidata nd
;
8910 char *no_firmlink_path
= NULL
;
8912 int len_no_firmlink_path
= 0;
8913 int has_listeners
= 0;
8915 int truncated_path
= 0;
8916 int truncated_no_firmlink_path
= 0;
8918 struct vnode_attr va
;
8919 #endif /* CONFIG_FSE */
8920 struct vnode_attr
*vap
= NULL
;
8921 int restart_count
= 0;
8927 * This loop exists to restart rmdir in the unlikely case that two
8928 * processes are simultaneously trying to remove the same directory
8929 * containing orphaned appleDouble files.
8932 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
8933 segflg
, dirpath
, ctx
);
8934 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
8939 error
= nameiat(&nd
, fd
);
8948 batched
= vnode_compound_rmdir_available(vp
);
8950 if (vp
->v_flag
& VROOT
) {
8952 * The root of a mounted filesystem cannot be deleted.
8958 #if DEVELOPMENT || DEBUG
8960 * XXX VSWAP: Check for entitlements or special flag here
8961 * so we can restrict access appropriately.
8963 #else /* DEVELOPMENT || DEBUG */
8965 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
8969 #endif /* DEVELOPMENT || DEBUG */
8972 * Removed a check here; we used to abort if vp's vid
8973 * was not the same as what we'd seen the last time around.
8974 * I do not think that check was valid, because if we retry
8975 * and all dirents are gone, the directory could legitimately
8976 * be recycled but still be present in a situation where we would
8977 * have had permission to delete. Therefore, we won't make
8978 * an effort to preserve that check now that we may not have a
8983 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
8985 if (error
== ENOENT
) {
8986 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8997 if (!vnode_compound_rmdir_available(dvp
)) {
8998 panic("No error, but no compound rmdir?");
9005 need_event
= need_fsevent(FSE_DELETE
, dvp
);
9008 get_fse_info(vp
, &finfo
, ctx
);
9010 error
= vfs_get_notify_attributes(&va
);
9019 has_listeners
= kauth_authorize_fileop_has_listeners();
9020 if (need_event
|| has_listeners
) {
9029 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
9031 if (no_firmlink_path
== NULL
) {
9032 GET_PATH(no_firmlink_path
);
9033 if (no_firmlink_path
== NULL
) {
9039 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
9041 if (truncated_no_firmlink_path
) {
9042 finfo
.mode
|= FSE_TRUNCATED_PATH
;
9047 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9050 /* Couldn't find a vnode */
9054 if (error
== EKEEPLOOKING
) {
9055 goto continue_lookup
;
9056 } else if (batched
&& error
== ENOENT
) {
9057 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
9059 * For compound VNOPs, the authorization callback
9060 * may return ENOENT in case of racing hard link lookups
9061 * redrive the lookup.
9070 * XXX There's no provision for passing flags
9071 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9072 * because it's not empty, then we try again
9073 * with VNOP_REMOVE(), passing in a special
9074 * flag that clever file systems will know
9077 if (error
== ENOTEMPTY
&&
9078 (unlink_flags
& VNODE_REMOVE_DATALESS_DIR
) != 0) {
9080 * If this fails, we want to keep the original
9083 if (vn_remove(dvp
, &vp
, &nd
,
9084 VNODE_REMOVE_DATALESS_DIR
, vap
, ctx
) == 0) {
9089 #if CONFIG_APPLEDOUBLE
9091 * Special case to remove orphaned AppleDouble
9092 * files. I don't like putting this in the kernel,
9093 * but carbon does not like putting this in carbon either,
9096 if (error
== ENOTEMPTY
) {
9097 int ad_error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
9098 if (ad_error
== EBUSY
) {
9105 * Assuming everything went well, we will try the RMDIR again
9108 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9111 #endif /* CONFIG_APPLEDOUBLE */
9113 * Call out to allow 3rd party notification of delete.
9114 * Ignore result of kauth_authorize_fileop call.
9117 if (has_listeners
) {
9118 kauth_authorize_fileop(vfs_context_ucred(ctx
),
9119 KAUTH_FILEOP_DELETE
,
9124 if (vp
->v_flag
& VISHARDLINK
) {
9125 // see the comment in unlink1() about why we update
9126 // the parent of a hard link when it is removed
9127 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
9133 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
9135 add_fsevent(FSE_DELETE
, ctx
,
9136 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
9137 FSE_ARG_FINFO
, &finfo
,
9149 if (no_firmlink_path
!= NULL
) {
9150 RELEASE_PATH(no_firmlink_path
);
9151 no_firmlink_path
= NULL
;
9155 * nameidone has to happen before we vnode_put(dvp)
9156 * since it may need to release the fs_nodelock on the dvp
9165 if (restart_flag
== 0) {
9166 wakeup_one((caddr_t
)vp
);
9169 tsleep(vp
, PVFS
, "rm AD", 1);
9170 } while (restart_flag
!= 0);
9176 * Remove a directory file.
9180 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
9182 return rmdirat_internal(vfs_context_current(), AT_FDCWD
,
9183 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
, 0);
9186 /* Get direntry length padded to 8 byte alignment */
9187 #define DIRENT64_LEN(namlen) \
9188 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9190 /* Get dirent length padded to 4 byte alignment */
9191 #define DIRENT_LEN(namelen) \
9192 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9194 /* Get the end of this dirent */
9195 #define DIRENT_END(dep) \
9196 (((char *)(dep)) + (dep)->d_reclen - 1)
9199 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
9200 int *numdirent
, vfs_context_t ctxp
)
9202 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
9203 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
9204 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
9205 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
9210 struct direntry
*entry64
;
9216 * We're here because the underlying file system does not
9217 * support direnties or we mounted denying support so we must
9218 * fall back to dirents and convert them to direntries.
9220 * Our kernel buffer needs to be smaller since re-packing will
9221 * expand each dirent. The worse case (when the name length
9222 * is 3 or less) corresponds to a struct direntry size of 32
9223 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9224 * (4-byte aligned). So having a buffer that is 3/8 the size
9225 * will prevent us from reading more than we can pack.
9227 * Since this buffer is wired memory, we will limit the
9228 * buffer size to a maximum of 32K. We would really like to
9229 * use 32K in the MIN(), but we use magic number 87371 to
9230 * prevent uio_resid() * 3 / 8 from overflowing.
9232 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
9233 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
9234 if (bufptr
== NULL
) {
9238 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
9239 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
9240 auio
->uio_offset
= uio
->uio_offset
;
9242 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
9244 dep
= (struct dirent
*)bufptr
;
9245 bytesread
= bufsize
- uio_resid(auio
);
9247 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
9250 * Convert all the entries and copy them out to user's buffer.
9252 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
9253 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
9255 if (DIRENT_END(dep
) > ((char *)bufptr
+ bytesread
) ||
9256 DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) {
9257 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
,
9258 vp
->v_mount
->mnt_vfsstat
.f_mntonname
,
9259 vp
->v_name
? vp
->v_name
: "<unknown>");
9264 bzero(entry64
, enbufsize
);
9265 /* Convert a dirent to a dirent64. */
9266 entry64
->d_ino
= dep
->d_ino
;
9267 entry64
->d_seekoff
= 0;
9268 entry64
->d_reclen
= enbufsize
;
9269 entry64
->d_namlen
= dep
->d_namlen
;
9270 entry64
->d_type
= dep
->d_type
;
9271 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
9273 /* Move to next entry. */
9274 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
9276 /* Copy entry64 to user's buffer. */
9277 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
9280 /* Update the real offset using the offset we got from VNOP_READDIR. */
9282 uio
->uio_offset
= auio
->uio_offset
;
9285 FREE(bufptr
, M_TEMP
);
9286 FREE(entry64
, M_TEMP
);
9291 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
9294 * Read a block of directory entries in a file system independent format.
9297 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
9298 off_t
*offset
, int *eofflag
, int flags
)
9301 struct vfs_context context
= *vfs_context_current(); /* local copy */
9302 struct fileproc
*fp
;
9304 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9306 int error
, numdirent
;
9307 char uio_buf
[UIO_SIZEOF(1)];
9309 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
9313 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9314 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9319 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
) {
9320 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
9324 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
9329 if ((error
= vnode_getwithref(vp
))) {
9332 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9335 if (vp
->v_type
!= VDIR
) {
9336 (void)vnode_put(vp
);
9342 error
= mac_vnode_check_readdir(&context
, vp
);
9344 (void)vnode_put(vp
);
9349 loff
= fp
->f_fglob
->fg_offset
;
9350 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9351 uio_addiov(auio
, bufp
, bufsize
);
9353 if (flags
& VNODE_READDIR_EXTENDED
) {
9354 error
= vnode_readdir64(vp
, auio
, flags
, eofflag
, &numdirent
, &context
);
9355 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
9357 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, &numdirent
, &context
);
9358 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
9361 (void)vnode_put(vp
);
9365 if ((user_ssize_t
)bufsize
== uio_resid(auio
)) {
9366 if (union_dircheckp
) {
9367 error
= union_dircheckp(&vp
, fp
, &context
);
9372 (void)vnode_put(vp
);
9377 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
9378 struct vnode
*tvp
= vp
;
9379 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
9381 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9382 fp
->f_fglob
->fg_offset
= 0;
9396 *bytesread
= bufsize
- uio_resid(auio
);
9404 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
9410 AUDIT_ARG(fd
, uap
->fd
);
9411 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
,
9412 &bytesread
, &offset
, &eofflag
, 0);
9415 if (proc_is64bit(p
)) {
9416 user64_long_t base
= (user64_long_t
)offset
;
9417 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
9419 user32_long_t base
= (user32_long_t
)offset
;
9420 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
9422 *retval
= bytesread
;
9428 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
9433 user_size_t bufsize
;
9435 AUDIT_ARG(fd
, uap
->fd
);
9438 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9439 * then the kernel carves out the last 4 bytes to return extended
9440 * information to userspace (namely whether we reached EOF with this call).
9442 if (uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9443 bufsize
= uap
->bufsize
- sizeof(getdirentries64_flags_t
);
9445 bufsize
= uap
->bufsize
;
9448 error
= getdirentries_common(uap
->fd
, uap
->buf
, bufsize
,
9449 &bytesread
, &offset
, &eofflag
, VNODE_READDIR_EXTENDED
);
9452 *retval
= bytesread
;
9453 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
9455 if (error
== 0 && uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9456 getdirentries64_flags_t flags
= 0;
9458 flags
|= GETDIRENTRIES64_EOF
;
9460 error
= copyout(&flags
, (user_addr_t
)uap
->buf
+ bufsize
,
9469 * Set the mode mask for creation of filesystem nodes.
9470 * XXX implement xsecurity
9472 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
9474 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
9476 struct filedesc
*fdp
;
9478 AUDIT_ARG(mask
, newmask
);
9481 *retval
= fdp
->fd_cmask
;
9482 fdp
->fd_cmask
= newmask
& ALLPERMS
;
9488 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9490 * Parameters: p Process requesting to set the umask
9491 * uap User argument descriptor (see below)
9492 * retval umask of the process (parameter p)
9494 * Indirect: uap->newmask umask to set
9495 * uap->xsecurity ACL to set
9497 * Returns: 0 Success
9502 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
9505 kauth_filesec_t xsecdst
;
9507 xsecdst
= KAUTH_FILESEC_NONE
;
9508 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
9509 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
9513 xsecdst
= KAUTH_FILESEC_NONE
;
9516 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
9518 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
9519 kauth_filesec_free(xsecdst
);
9525 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
9527 return umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
);
9531 * Void all references to file by ripping underlying filesystem
9536 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
9539 struct vnode_attr va
;
9540 vfs_context_t ctx
= vfs_context_current();
9542 struct nameidata nd
;
9544 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
9554 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
9559 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
9565 error
= mac_vnode_check_revoke(ctx
, vp
);
9572 VATTR_WANTED(&va
, va_uid
);
9573 if ((error
= vnode_getattr(vp
, &va
, ctx
))) {
9576 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
9577 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
9580 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
))) {
9581 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
9590 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9591 * The following system calls are designed to support features
9592 * which are specific to the HFS & HFS Plus volume formats
9597 * Obtain attribute information on objects in a directory while enumerating
9602 getdirentriesattr(proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
9605 struct fileproc
*fp
;
9607 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9608 uint32_t count
= 0, savecount
= 0;
9609 uint32_t newstate
= 0;
9612 struct attrlist attributelist
;
9613 vfs_context_t ctx
= vfs_context_current();
9615 char uio_buf
[UIO_SIZEOF(1)];
9616 kauth_action_t action
;
9620 /* Get the attributes into kernel space */
9621 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
9624 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
9628 if ((error
= fp_getfvp(p
, fd
, &fp
, &vp
))) {
9631 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9632 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9639 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
9647 if ((error
= vnode_getwithref(vp
))) {
9651 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9654 if (vp
->v_type
!= VDIR
) {
9655 (void)vnode_put(vp
);
9661 error
= mac_vnode_check_readdir(ctx
, vp
);
9663 (void)vnode_put(vp
);
9668 /* set up the uio structure which will contain the users return buffer */
9669 loff
= fp
->f_fglob
->fg_offset
;
9670 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9671 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
9674 * If the only item requested is file names, we can let that past with
9675 * just LIST_DIRECTORY. If they want any other attributes, that means
9676 * they need SEARCH as well.
9678 action
= KAUTH_VNODE_LIST_DIRECTORY
;
9679 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
9680 attributelist
.fileattr
|| attributelist
.dirattr
) {
9681 action
|= KAUTH_VNODE_SEARCH
;
9684 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
9685 /* Believe it or not, uap->options only has 32-bits of valid
9686 * info, so truncate before extending again */
9688 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
9689 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
9693 (void) vnode_put(vp
);
9698 * If we've got the last entry of a directory in a union mount
9699 * then reset the eofflag and pretend there's still more to come.
9700 * The next call will again set eofflag and the buffer will be empty,
9701 * so traverse to the underlying directory and do the directory
9704 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
9705 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
9707 } else { // Empty buffer
9708 struct vnode
*tvp
= vp
;
9709 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
9710 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
9711 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9712 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
9714 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
9722 (void)vnode_put(vp
);
9727 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
9729 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
)))) {
9732 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
)))) {
9735 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
)))) {
9739 *retval
= eofflag
; /* similar to getdirentries */
9743 return error
; /* return error earlier, an retval of 0 or 1 now */
9744 } /* end of getdirentriesattr system call */
9747 * Exchange data between two files
9752 exchangedata(__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
9754 struct nameidata fnd
, snd
;
9755 vfs_context_t ctx
= vfs_context_current();
9759 u_int32_t nameiflags
;
9762 int flen
= 0, slen
= 0;
9763 int from_truncated
= 0, to_truncated
= 0;
9765 fse_info f_finfo
, s_finfo
;
9769 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
9770 nameiflags
|= FOLLOW
;
9773 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
9774 UIO_USERSPACE
, uap
->path1
, ctx
);
9776 error
= namei(&fnd
);
9784 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
9785 UIO_USERSPACE
, uap
->path2
, ctx
);
9787 error
= namei(&snd
);
9796 * if the files are the same, return an inval error
9804 * if the files are on different volumes, return an error
9806 if (svp
->v_mount
!= fvp
->v_mount
) {
9811 /* If they're not files, return an error */
9812 if ((vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
9818 error
= mac_vnode_check_exchangedata(ctx
,
9824 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
9825 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0)) {
9831 need_fsevent(FSE_EXCHANGE
, fvp
) ||
9833 kauth_authorize_fileop_has_listeners()) {
9836 if (fpath
== NULL
|| spath
== NULL
) {
9841 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
9842 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
9845 get_fse_info(fvp
, &f_finfo
, ctx
);
9846 get_fse_info(svp
, &s_finfo
, ctx
);
9847 if (from_truncated
|| to_truncated
) {
9848 // set it here since only the f_finfo gets reported up to user space
9849 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
9853 /* Ok, make the call */
9854 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
9857 const char *tmpname
;
9859 if (fpath
!= NULL
&& spath
!= NULL
) {
9860 /* call out to allow 3rd party notification of exchangedata.
9861 * Ignore result of kauth_authorize_fileop call.
9863 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
9864 (uintptr_t)fpath
, (uintptr_t)spath
);
9868 tmpname
= fvp
->v_name
;
9869 fvp
->v_name
= svp
->v_name
;
9870 svp
->v_name
= tmpname
;
9872 if (fvp
->v_parent
!= svp
->v_parent
) {
9875 tmp
= fvp
->v_parent
;
9876 fvp
->v_parent
= svp
->v_parent
;
9877 svp
->v_parent
= tmp
;
9879 name_cache_unlock();
9882 if (fpath
!= NULL
&& spath
!= NULL
) {
9883 add_fsevent(FSE_EXCHANGE
, ctx
,
9884 FSE_ARG_STRING
, flen
, fpath
,
9885 FSE_ARG_FINFO
, &f_finfo
,
9886 FSE_ARG_STRING
, slen
, spath
,
9887 FSE_ARG_FINFO
, &s_finfo
,
9894 if (fpath
!= NULL
) {
9895 RELEASE_PATH(fpath
);
9897 if (spath
!= NULL
) {
9898 RELEASE_PATH(spath
);
9907 * Return (in MB) the amount of freespace on the given vnode's volume.
9909 uint32_t freespace_mb(vnode_t vp
);
9912 freespace_mb(vnode_t vp
)
9914 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
9915 return ((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
9916 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20;
9924 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
9929 struct nameidata nd
;
9930 struct user64_fssearchblock searchblock
;
9931 struct searchstate
*state
;
9932 struct attrlist
*returnattrs
;
9933 struct timeval timelimit
;
9934 void *searchparams1
, *searchparams2
;
9936 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9937 uint32_t nummatches
;
9939 uint32_t nameiflags
;
9940 vfs_context_t ctx
= vfs_context_current();
9941 char uio_buf
[UIO_SIZEOF(1)];
9943 /* Start by copying in fsearchblock parameter list */
9944 if (IS_64BIT_PROCESS(p
)) {
9945 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
9946 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
9947 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
9949 struct user32_fssearchblock tmp_searchblock
;
9951 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
9952 // munge into 64-bit version
9953 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
9954 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
9955 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
9956 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
9958 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9959 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9961 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
9962 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
9963 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
9964 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
9965 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
9966 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
9967 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
9973 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9975 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
9976 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
) {
9980 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9981 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9982 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9985 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9986 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9987 /* assumes the size is still 556 bytes it will continue to work */
9989 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
9990 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2 * sizeof(uint32_t));
9992 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
9994 /* Now set up the various pointers to the correct place in our newly allocated memory */
9996 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
9997 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
9998 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof(struct attrlist
));
10000 /* Now copy in the stuff given our local variables. */
10002 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
))) {
10006 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
))) {
10010 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
)))) {
10014 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
)))) {
10019 * When searching a union mount, need to set the
10020 * start flag at the first call on each layer to
10021 * reset state for the new volume.
10023 if (uap
->options
& SRCHFS_START
) {
10024 state
->ss_union_layer
= 0;
10026 uap
->options
|= state
->ss_union_flags
;
10028 state
->ss_union_flags
= 0;
10031 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
10032 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
10033 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
10034 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
10035 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
10038 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
10039 attrreference_t
* string_ref
;
10040 u_int32_t
* start_length
;
10041 user64_size_t param_length
;
10043 /* validate searchparams1 */
10044 param_length
= searchblock
.sizeofsearchparams1
;
10045 /* skip the word that specifies length of the buffer */
10046 start_length
= (u_int32_t
*) searchparams1
;
10047 start_length
= start_length
+ 1;
10048 string_ref
= (attrreference_t
*) start_length
;
10050 /* ensure no negative offsets or too big offsets */
10051 if (string_ref
->attr_dataoffset
< 0) {
10055 if (string_ref
->attr_length
> MAXPATHLEN
) {
10060 /* Check for pointer overflow in the string ref */
10061 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
10066 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
10070 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
10076 /* set up the uio structure which will contain the users return buffer */
10077 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10078 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
10081 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
10082 nameiflags
|= FOLLOW
;
10084 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
10085 UIO_USERSPACE
, uap
->path
, ctx
);
10087 error
= namei(&nd
);
10095 * Switch to the root vnode for the volume
10097 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
10105 * If it's a union mount, the path lookup takes
10106 * us to the top layer. But we may need to descend
10107 * to a lower layer. For non-union mounts the layer
10110 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
10111 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0) {
10115 vp
= vp
->v_mount
->mnt_vnodecovered
;
10121 error
= vnode_getwithref(vp
);
10129 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
10138 * If searchblock.maxmatches == 0, then skip the search. This has happened
10139 * before and sometimes the underlying code doesnt deal with it well.
10141 if (searchblock
.maxmatches
== 0) {
10147 * Allright, we have everything we need, so lets make that call.
10149 * We keep special track of the return value from the file system:
10150 * EAGAIN is an acceptable error condition that shouldn't keep us
10151 * from copying out any results...
10154 fserror
= VNOP_SEARCHFS(vp
,
10157 &searchblock
.searchattrs
,
10158 (u_long
)searchblock
.maxmatches
,
10162 (u_long
)uap
->scriptcode
,
10163 (u_long
)uap
->options
,
10165 (struct searchstate
*) &state
->ss_fsstate
,
10169 * If it's a union mount we need to be called again
10170 * to search the mounted-on filesystem.
10172 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
10173 state
->ss_union_flags
= SRCHFS_START
;
10174 state
->ss_union_layer
++; // search next layer down
10182 /* Now copy out the stuff that needs copying out. That means the number of matches, the
10183 * search state. Everything was already put into he return buffer by the vop call. */
10185 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0) {
10189 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0) {
10197 FREE(searchparams1
, M_TEMP
);
10200 } /* end of searchfs system call */
10202 #else /* CONFIG_SEARCHFS */
10205 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
10210 #endif /* CONFIG_SEARCHFS */
10213 #if CONFIG_DATALESS_FILES
10216 * === Namespace Resolver Up-call Mechanism ===
10218 * When I/O is performed to a dataless file or directory (read, write,
10219 * lookup-in, etc.), the file system performs an upcall to the namespace
10220 * resolver (filecoordinationd) to materialize the object.
10222 * We need multiple up-calls to be in flight at once, and we need these
10223 * up-calls to be interruptible, thus the following implementation:
10225 * => The nspace_resolver_request represents the in-kernel request state.
10226 * It contains a request ID, storage space for the errno code returned
10227 * by filecoordinationd, and flags.
10229 * => The request ID is simply a global monotonically incrementing 32-bit
10230 * number. Outstanding requests are stored in a hash table, and the
10231 * hash function is extremely simple.
10233 * => When an upcall is to be made to filecoordinationd, a request structure
10234 * is allocated on the stack (it is small, and needs to live only during
10235 * the duration of the call to resolve_nspace_item_ext()). It is
10236 * initialized and inserted into the table. Some backpressure from
10237 * filecoordinationd is applied by limiting the numnber of entries that
10238 * can be inserted into the table (and thus limiting the number of
10239 * outstanding requests issued to filecoordinationd); waiting for an
10240 * available slot is interruptible.
10242 * => Once the request has been inserted into the table, the up-call is made
10243 * to filecoordinationd via a MiG-generated stub. The up-call returns
10244 * immediately and filecoordinationd processes the request asynchronously.
10246 * => The caller now waits for the request to complete. Tnis is achieved by
10247 * sleeping on the address of the request structure and waiting for
10248 * filecoordinationd to mark the request structure as complete. This
10249 * is an interruptible sleep call; if interrupted, the request structure
10250 * is removed from the table and EINTR is returned to the caller. If
10251 * this occurs, an advisory up-call is made to filecoordinationd with
10252 * the request ID to indicate that the request can be aborted or
10253 * de-prioritized at the discretion of filecoordinationd.
10255 * => When filecoordinationd has completed the request, it signals completion
10256 * by writing to the vfs.nspace.complete sysctl node. Only a process
10257 * decorated as a namespace resolver can write to this sysctl node. The
10258 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10259 * The request ID is looked up in the table, and if the request is found,
10260 * the error code is stored in the request structure and a wakeup()
10261 * issued on the address of the request structure. If the request is not
10262 * found, we simply drop the completion notification, assuming that the
10263 * caller was interrupted.
10265 * => When the waiting thread wakes up, it extracts the error code from the
10266 * request structure, removes the request from the table, and returns the
10267 * error code to the calling function. Fini!
10270 struct nspace_resolver_request
{
10271 LIST_ENTRY(nspace_resolver_request
) r_hashlink
;
10273 int r_resolver_error
;
10277 #define RRF_COMPLETE 0x0001
10280 next_nspace_req_id(void)
10282 static uint32_t next_req_id
;
10284 return OSAddAtomic(1, &next_req_id
);
10287 #define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10288 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
10290 static LIST_HEAD(nspace_resolver_requesthead
,
10291 nspace_resolver_request
) * nspace_resolver_request_hashtbl
;
10292 static u_long nspace_resolver_request_hashmask
;
10293 static u_int nspace_resolver_request_count
;
10294 static bool nspace_resolver_request_wait_slot
;
10295 static lck_grp_t
*nspace_resolver_request_lck_grp
;
10296 static lck_mtx_t nspace_resolver_request_hash_mutex
;
10298 #define NSPACE_REQ_LOCK() \
10299 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10300 #define NSPACE_REQ_UNLOCK() \
10301 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10303 #define NSPACE_RESOLVER_HASH(req_id) \
10304 (&nspace_resolver_request_hashtbl[(req_id) & \
10305 nspace_resolver_request_hashmask])
10307 static struct nspace_resolver_request
*
10308 nspace_resolver_req_lookup(uint32_t req_id
)
10310 struct nspace_resolver_requesthead
*bucket
;
10311 struct nspace_resolver_request
*req
;
10313 bucket
= NSPACE_RESOLVER_HASH(req_id
);
10314 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10315 if (req
->r_req_id
== req_id
) {
10324 nspace_resolver_req_add(struct nspace_resolver_request
*req
)
10326 struct nspace_resolver_requesthead
*bucket
;
10329 while (nspace_resolver_request_count
>=
10330 NSPACE_RESOLVER_MAX_OUTSTANDING
) {
10331 nspace_resolver_request_wait_slot
= true;
10332 error
= msleep(&nspace_resolver_request_count
,
10333 &nspace_resolver_request_hash_mutex
,
10334 PVFS
| PCATCH
, "nspacerq", NULL
);
10340 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10342 assert(nspace_resolver_req_lookup(req
->r_req_id
) == NULL
);
10343 #endif /* DIAGNOSTIC */
10344 LIST_INSERT_HEAD(bucket
, req
, r_hashlink
);
10345 nspace_resolver_request_count
++;
10351 nspace_resolver_req_remove(struct nspace_resolver_request
*req
)
10353 struct nspace_resolver_requesthead
*bucket
;
10355 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10357 assert(nspace_resolver_req_lookup(req
->r_req_id
) != NULL
);
10358 #endif /* DIAGNOSTIC */
10359 LIST_REMOVE(req
, r_hashlink
);
10360 nspace_resolver_request_count
--;
10362 if (nspace_resolver_request_wait_slot
) {
10363 nspace_resolver_request_wait_slot
= false;
10364 wakeup(&nspace_resolver_request_count
);
10369 nspace_resolver_req_cancel(uint32_t req_id
)
10374 // Failures here aren't fatal -- the cancellation message
10375 // sent to the resolver is merely advisory.
10377 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10378 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10382 kr
= send_nspace_resolve_cancel(mp
, req_id
);
10383 if (kr
!= KERN_SUCCESS
) {
10384 os_log_error(OS_LOG_DEFAULT
,
10385 "NSPACE send_nspace_resolve_cancel failure: %d", kr
);
10388 ipc_port_release_send(mp
);
10392 nspace_resolver_req_wait(struct nspace_resolver_request
*req
)
10394 bool send_cancel_message
= false;
10399 while ((req
->r_flags
& RRF_COMPLETE
) == 0) {
10400 error
= msleep(req
, &nspace_resolver_request_hash_mutex
,
10401 PVFS
| PCATCH
, "nspace", NULL
);
10402 if (error
&& error
!= ERESTART
) {
10403 req
->r_resolver_error
= (error
== EINTR
) ? EINTR
:
10405 send_cancel_message
= true;
10410 nspace_resolver_req_remove(req
);
10412 NSPACE_REQ_UNLOCK();
10414 if (send_cancel_message
) {
10415 nspace_resolver_req_cancel(req
->r_req_id
);
10418 return req
->r_resolver_error
;
10422 nspace_resolver_req_mark_complete(
10423 struct nspace_resolver_request
*req
,
10424 int resolver_error
)
10426 req
->r_resolver_error
= resolver_error
;
10427 req
->r_flags
|= RRF_COMPLETE
;
10432 nspace_resolver_req_completed(uint32_t req_id
, int resolver_error
)
10434 struct nspace_resolver_request
*req
;
10438 // If we don't find the request corresponding to our req_id,
10439 // just drop the completion signal on the floor; it's likely
10440 // that the requester interrupted with a signal.
10442 req
= nspace_resolver_req_lookup(req_id
);
10444 nspace_resolver_req_mark_complete(req
, resolver_error
);
10447 NSPACE_REQ_UNLOCK();
10450 static struct proc
*nspace_resolver_proc
;
10453 nspace_resolver_get_proc_state(struct proc
*p
, int *is_resolver
)
10455 *is_resolver
= ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10456 p
== nspace_resolver_proc
) ? 1 : 0;
10461 nspace_resolver_set_proc_state(struct proc
*p
, int is_resolver
)
10463 vfs_context_t ctx
= vfs_context_current();
10467 // The system filecoordinationd runs as uid == 0. This also
10468 // has the nice side-effect of filtering out filecoordinationd
10469 // running in the simulator.
10471 if (!vfs_context_issuser(ctx
)) {
10475 error
= priv_check_cred(vfs_context_ucred(ctx
),
10476 PRIV_VFS_DATALESS_RESOLVER
, 0);
10484 if (nspace_resolver_proc
== NULL
) {
10486 p
->p_lflag
|= P_LNSPACE_RESOLVER
;
10488 nspace_resolver_proc
= p
;
10493 NSPACE_REQ_UNLOCK();
10495 // This is basically just like the exit case.
10496 // nspace_resolver_exited() will verify that the
10497 // process is the resolver, and will clear the
10499 nspace_resolver_exited(p
);
10506 nspace_materialization_get_proc_state(struct proc
*p
, int *is_prevented
)
10508 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) != 0 ||
10509 (p
->p_vfs_iopolicy
&
10510 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) == 0) {
10519 nspace_materialization_set_proc_state(struct proc
*p
, int is_prevented
)
10521 if (p
->p_lflag
& P_LNSPACE_RESOLVER
) {
10522 return is_prevented
? 0 : EBUSY
;
10525 if (is_prevented
) {
10526 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
), &p
->p_vfs_iopolicy
);
10528 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
, &p
->p_vfs_iopolicy
);
10534 nspace_materialization_get_thread_state(int *is_prevented
)
10536 uthread_t ut
= get_bsdthread_info(current_thread());
10538 *is_prevented
= (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) ? 1 : 0;
10543 nspace_materialization_set_thread_state(int is_prevented
)
10545 uthread_t ut
= get_bsdthread_info(current_thread());
10547 if (is_prevented
) {
10548 ut
->uu_flag
|= UT_NSPACE_NODATALESSFAULTS
;
10550 ut
->uu_flag
&= ~UT_NSPACE_NODATALESSFAULTS
;
10556 nspace_materialization_is_prevented(void)
10558 proc_t p
= current_proc();
10559 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
10560 vfs_context_t ctx
= vfs_context_current();
10563 * Kernel context ==> return EDEADLK, as we would with any random
10564 * process decorated as no-materialize.
10566 if (ctx
== vfs_context_kernel()) {
10571 * If the process has the dataless-manipulation entitlement,
10572 * materialization is prevented, and depending on the kind
10573 * of file system operation, things get to proceed as if the
10574 * object is not dataless.
10576 if (vfs_context_is_dataless_manipulator(ctx
)) {
10577 return EJUSTRETURN
;
10581 * Per-thread decorations override any process-wide decorations.
10582 * (Foundation uses this, and this overrides even the dataless-
10583 * manipulation entitlement so as to make API contracts consistent.)
10586 if (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) {
10589 if (ut
->uu_flag
& UT_NSPACE_FORCEDATALESSFAULTS
) {
10595 * If the process's iopolicy specifies that dataless files
10596 * can be materialized, then we let it go ahead.
10598 if (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) {
10603 * The default behavior is to not materialize dataless files;
10604 * return to the caller that deadlock was detected.
10609 /* the vfs.nspace branch */
10610 SYSCTL_NODE(_vfs
, OID_AUTO
, nspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
, NULL
, "vfs nspace hinge");
10613 sysctl_nspace_resolver(__unused
struct sysctl_oid
*oidp
,
10614 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10616 struct proc
*p
= req
->p
;
10617 int new_value
, old_value
, changed
= 0;
10620 error
= nspace_resolver_get_proc_state(p
, &old_value
);
10625 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10627 if (error
== 0 && changed
) {
10628 error
= nspace_resolver_set_proc_state(p
, new_value
);
10633 /* decorate this process as the dataless file resolver */
10634 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, resolver
,
10635 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10636 0, 0, sysctl_nspace_resolver
, "I", "");
10639 sysctl_nspace_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10640 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10642 struct proc
*p
= req
->p
;
10643 int new_value
, old_value
, changed
= 0;
10646 error
= nspace_materialization_get_proc_state(p
, &old_value
);
10651 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10653 if (error
== 0 && changed
) {
10654 error
= nspace_materialization_set_proc_state(p
, new_value
);
10659 /* decorate this process as not wanting to materialize dataless files */
10660 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, prevent_materialization
,
10661 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10662 0, 0, sysctl_nspace_prevent_materialization
, "I", "");
10665 sysctl_nspace_thread_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10666 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10668 int new_value
, old_value
, changed
= 0;
10671 error
= nspace_materialization_get_thread_state(&old_value
);
10676 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10678 if (error
== 0 && changed
) {
10679 error
= nspace_materialization_set_thread_state(new_value
);
10684 /* decorate this thread as not wanting to materialize dataless files */
10685 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, thread_prevent_materialization
,
10686 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10687 0, 0, sysctl_nspace_thread_prevent_materialization
, "I", "");
10690 sysctl_nspace_complete(__unused
struct sysctl_oid
*oidp
, __unused
void *arg1
,
10691 __unused
int arg2
, struct sysctl_req
*req
)
10693 struct proc
*p
= req
->p
;
10694 uint32_t req_status
[2] = { 0, 0 };
10695 int error
, is_resolver
, changed
= 0;
10697 error
= nspace_resolver_get_proc_state(p
, &is_resolver
);
10702 if (!is_resolver
) {
10706 error
= sysctl_io_opaque(req
, req_status
, sizeof(req_status
),
10713 * req_status[0] is the req_id
10715 * req_status[1] is the errno
10717 if (error
== 0 && changed
) {
10718 nspace_resolver_req_completed(req_status
[0],
10719 (int)req_status
[1]);
10724 /* Resolver reports completed reqs here. */
10725 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, complete
,
10726 CTLTYPE_OPAQUE
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10727 0, 0, sysctl_nspace_complete
, "-", "");
10729 #endif /* CONFIG_DATALESS_FILES */
10731 #if CONFIG_DATALESS_FILES
10732 #define __no_dataless_unused /* nothing */
10734 #define __no_dataless_unused __unused
10738 nspace_resolver_init(void)
10740 #if CONFIG_DATALESS_FILES
10741 nspace_resolver_request_lck_grp
=
10742 lck_grp_alloc_init("file namespace resolver", NULL
);
10744 lck_mtx_init(&nspace_resolver_request_hash_mutex
,
10745 nspace_resolver_request_lck_grp
, NULL
);
10747 nspace_resolver_request_hashtbl
=
10748 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE
,
10749 M_VNODE
/* XXX */, &nspace_resolver_request_hashmask
);
10750 #endif /* CONFIG_DATALESS_FILES */
10754 nspace_resolver_exited(struct proc
*p __no_dataless_unused
)
10756 #if CONFIG_DATALESS_FILES
10757 struct nspace_resolver_requesthead
*bucket
;
10758 struct nspace_resolver_request
*req
;
10763 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10764 p
== nspace_resolver_proc
) {
10765 for (idx
= 0; idx
<= nspace_resolver_request_hashmask
; idx
++) {
10766 bucket
= &nspace_resolver_request_hashtbl
[idx
];
10767 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10768 nspace_resolver_req_mark_complete(req
,
10772 nspace_resolver_proc
= NULL
;
10775 NSPACE_REQ_UNLOCK();
10776 #endif /* CONFIG_DATALESS_FILES */
10780 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
10782 return resolve_nspace_item_ext(vp
, op
, NULL
);
10785 #define DATALESS_RESOLVER_ENTITLEMENT \
10786 "com.apple.private.vfs.dataless-resolver"
10787 #define DATALESS_MANIPULATION_ENTITLEMENT \
10788 "com.apple.private.vfs.dataless-manipulation"
10791 * Return TRUE if the vfs context is associated with a process entitled
10792 * for dataless manipulation.
10794 * XXX Arguably belongs in vfs_subr.c, but is here because of the
10795 * complication around CONFIG_DATALESS_FILES.
10798 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused
)
10800 #if CONFIG_DATALESS_FILES
10801 assert(ctx
->vc_thread
== current_thread());
10802 task_t
const task
= current_task();
10803 return IOTaskHasEntitlement(task
, DATALESS_MANIPULATION_ENTITLEMENT
) ||
10804 IOTaskHasEntitlement(task
, DATALESS_RESOLVER_ENTITLEMENT
);
10807 #endif /* CONFIG_DATALESS_FILES */
10811 resolve_nspace_item_ext(
10812 struct vnode
*vp __no_dataless_unused
,
10813 uint64_t op __no_dataless_unused
,
10814 void *arg __unused
)
10816 #if CONFIG_DATALESS_FILES
10822 struct nspace_resolver_request req
;
10824 // only allow namespace events on regular files, directories and symlinks.
10825 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
10830 // if this is a snapshot event and the vnode is on a
10831 // disk image just pretend nothing happened since any
10832 // change to the disk image will cause the disk image
10833 // itself to get backed up and this avoids multi-way
10834 // deadlocks between the snapshot handler and the ever
10835 // popular diskimages-helper process. the variable
10836 // nspace_allow_virtual_devs allows this behavior to
10837 // be overridden (for use by the Mobile TimeMachine
10838 // testing infrastructure which uses disk images)
10840 if (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
) {
10841 os_log_debug(OS_LOG_DEFAULT
, "NSPACE SNAPSHOT not handled");
10845 error
= nspace_materialization_is_prevented();
10847 os_log_debug(OS_LOG_DEFAULT
,
10848 "NSPACE process/thread is decorated as no-materialization");
10852 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10853 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10854 os_log_error(OS_LOG_DEFAULT
, "NSPACE no port");
10855 // Treat this like being unable to access the backing
10860 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
10861 if (path
== NULL
) {
10863 goto out_release_port
;
10865 path_len
= MAXPATHLEN
;
10867 error
= vn_getpath(vp
, path
, &path_len
);
10869 int xxx_rdar44371223
; /* XXX Mig bug */
10870 req
.r_req_id
= next_nspace_req_id();
10871 req
.r_resolver_error
= 0;
10875 error
= nspace_resolver_req_add(&req
);
10876 NSPACE_REQ_UNLOCK();
10878 goto out_release_port
;
10881 os_log_debug(OS_LOG_DEFAULT
, "NSPACE resolve_path call");
10882 kr
= send_nspace_resolve_path(mp
, req
.r_req_id
,
10883 current_proc()->p_pid
, (uint32_t)(op
& 0xffffffff),
10884 path
, &xxx_rdar44371223
);
10885 if (kr
!= KERN_SUCCESS
) {
10886 // Also treat this like being unable to access
10887 // the backing store server.
10888 os_log_error(OS_LOG_DEFAULT
,
10889 "NSPACE resolve_path failure: %d", kr
);
10893 nspace_resolver_req_remove(&req
);
10894 NSPACE_REQ_UNLOCK();
10895 goto out_release_port
;
10898 // Give back the memory we allocated earlier while
10899 // we wait; we no longer need it.
10900 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
10903 // Request has been submitted to the resolver.
10904 // Now (interruptibly) wait for completion.
10905 // Upon requrn, the request will have been removed
10906 // from the lookup table.
10907 error
= nspace_resolver_req_wait(&req
);
10911 if (path
!= NULL
) {
10912 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
10914 ipc_port_release_send(mp
);
10919 #endif /* CONFIG_DATALESS_FILES */
10923 nspace_snapshot_event(__unused vnode_t vp
, __unused
time_t ctime
,
10924 __unused
uint64_t op_type
, __unused
void *arg
)
10931 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
10933 struct vnode_attr va
;
10937 VATTR_WANTED(&va
, va_fsid
);
10938 VATTR_WANTED(&va
, va_fileid
);
10940 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
10941 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
10944 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
10952 static unsigned long
10953 fsctl_bogus_command_compat(unsigned long cmd
)
10956 case IOCBASECMD(FSIOC_SYNC_VOLUME
):
10957 return FSIOC_SYNC_VOLUME
;
10958 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
):
10959 return FSIOC_ROUTEFS_SETROUTEID
;
10960 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
):
10961 return FSIOC_SET_PACKAGE_EXTS
;
10962 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
):
10963 return FSIOC_SET_FSTYPENAME_OVERRIDE
;
10964 case IOCBASECMD(DISK_CONDITIONER_IOC_GET
):
10965 return DISK_CONDITIONER_IOC_GET
;
10966 case IOCBASECMD(DISK_CONDITIONER_IOC_SET
):
10967 return DISK_CONDITIONER_IOC_SET
;
10968 case IOCBASECMD(FSIOC_FIOSEEKHOLE
):
10969 return FSIOC_FIOSEEKHOLE
;
10970 case IOCBASECMD(FSIOC_FIOSEEKDATA
):
10971 return FSIOC_FIOSEEKDATA
;
10972 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
):
10973 return SPOTLIGHT_IOC_GET_MOUNT_TIME
;
10974 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
):
10975 return SPOTLIGHT_IOC_GET_LAST_MTIME
;
10982 cas_bsdflags_setattr(vnode_t vp
, void *arg
, vfs_context_t ctx
)
10984 return VNOP_IOCTL(vp
, FSIOC_CAS_BSDFLAGS
, arg
, FWRITE
, ctx
);
10988 * Make a filesystem-specific control call:
10992 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
10997 #define STK_PARAMS 128
10998 char stkbuf
[STK_PARAMS
] = {0};
10999 caddr_t data
, memp
;
11000 vnode_t vp
= *arg_vp
;
11002 if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
11006 cmd
= fsctl_bogus_command_compat(cmd
);
11008 size
= IOCPARM_LEN(cmd
);
11009 if (size
> IOCPARM_MAX
) {
11013 is64bit
= proc_is64bit(p
);
11017 if (size
> sizeof(stkbuf
)) {
11018 if ((memp
= (caddr_t
)kalloc(size
)) == 0) {
11026 if (cmd
& IOC_IN
) {
11028 error
= copyin(udata
, data
, size
);
11037 *(user_addr_t
*)data
= udata
;
11039 *(uint32_t *)data
= (uint32_t)udata
;
11042 } else if ((cmd
& IOC_OUT
) && size
) {
11044 * Zero the buffer so the user always
11045 * gets back something deterministic.
11048 } else if (cmd
& IOC_VOID
) {
11050 *(user_addr_t
*)data
= udata
;
11052 *(uint32_t *)data
= (uint32_t)udata
;
11056 /* Check to see if it's a generic command */
11058 case FSIOC_SYNC_VOLUME
: {
11059 struct vfs_attr vfa
;
11060 mount_t mp
= vp
->v_mount
;
11064 /* record vid of vp so we can drop it below. */
11065 uint32_t vvid
= vp
->v_id
;
11068 * Then grab mount_iterref so that we can release the vnode.
11069 * Without this, a thread may call vnode_iterate_prepare then
11070 * get into a deadlock because we've never released the root vp
11072 error
= mount_iterref(mp
, 0);
11079 if (*(uint32_t*)data
& FSCTL_SYNC_WAIT
) {
11084 * If the filessytem supports multiple filesytems in a
11085 * partition (For eg APFS volumes in a container, it knows
11086 * that the waitfor argument to VFS_SYNC are flags.
11088 VFSATTR_INIT(&vfa
);
11089 VFSATTR_WANTED(&vfa
, f_capabilities
);
11090 if ((vfs_getattr(mp
, &vfa
, vfs_context_current()) == 0) &&
11091 VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) &&
11092 ((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
)) &&
11093 ((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
))) {
11097 /* issue the sync for this volume */
11098 (void)sync_callback(mp
, &arg
);
11101 * Then release the mount_iterref once we're done syncing; it's not
11102 * needed for the VNOP_IOCTL below
11104 mount_iterdrop(mp
);
11106 if (arg
& FSCTL_SYNC_FULLSYNC
) {
11107 /* re-obtain vnode iocount on the root vp, if possible */
11108 error
= vnode_getwithvid(vp
, vvid
);
11110 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
11114 /* mark the argument VP as having been released */
11119 case FSIOC_ROUTEFS_SETROUTEID
: {
11121 char routepath
[MAXPATHLEN
];
11124 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11127 bzero(routepath
, MAXPATHLEN
);
11128 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
11132 error
= routefs_kernel_mount(routepath
);
11140 case FSIOC_SET_PACKAGE_EXTS
: {
11141 user_addr_t ext_strings
;
11142 uint32_t num_entries
;
11143 uint32_t max_width
;
11145 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0))) {
11149 if ((is64bit
&& size
!= sizeof(user64_package_ext_info
))
11150 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
11151 // either you're 64-bit and passed a 64-bit struct or
11152 // you're 32-bit and passed a 32-bit struct. otherwise
11159 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
11160 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
11161 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
11163 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
11164 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
11165 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
11167 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
11171 case FSIOC_SET_FSTYPENAME_OVERRIDE
:
11173 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11177 mount_lock(vp
->v_mount
);
11178 if (data
[0] != 0) {
11179 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
11180 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
11181 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11182 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
11183 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
11186 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11187 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
11189 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
11190 vp
->v_mount
->fstypename_override
[0] = '\0';
11192 mount_unlock(vp
->v_mount
);
11197 case DISK_CONDITIONER_IOC_GET
: {
11198 error
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11202 case DISK_CONDITIONER_IOC_SET
: {
11203 error
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11207 case FSIOC_CAS_BSDFLAGS
: {
11208 struct fsioc_cas_bsdflags
*cas
= (struct fsioc_cas_bsdflags
*)data
;
11209 struct vnode_attr va
;
11212 VATTR_SET(&va
, va_flags
, cas
->new_flags
);
11214 error
= chflags0(vp
, &va
, cas_bsdflags_setattr
, cas
, ctx
);
11218 case FSIOC_FD_ONLY_OPEN_ONCE
: {
11219 if (vnode_usecount(vp
) > 1) {
11228 /* other, known commands shouldn't be passed down here */
11231 case F_TRIM_ACTIVE_FILE
:
11233 case F_TRANSCODEKEY
:
11234 case F_GETPROTECTIONLEVEL
:
11235 case F_GETDEFAULTPROTLEVEL
:
11236 case F_MAKECOMPRESSED
:
11237 case F_SET_GREEDY_MODE
:
11238 case F_SETSTATICCONTENT
:
11240 case F_SETBACKINGSTORE
:
11241 case F_GETPATH_MTMINFO
:
11242 case APFSIOC_REVERT_TO_SNAPSHOT
:
11243 case FSIOC_FIOSEEKHOLE
:
11244 case FSIOC_FIOSEEKDATA
:
11245 case HFS_GET_BOOT_INFO
:
11246 case HFS_SET_BOOT_INFO
:
11250 case F_BARRIERFSYNC
:
11256 /* Invoke the filesystem-specific code */
11257 error
= VNOP_IOCTL(vp
, cmd
, data
, options
, ctx
);
11259 } /* end switch stmt */
11262 * if no errors, copy any data to user. Size was
11263 * already set and checked above.
11265 if (error
== 0 && (cmd
& IOC_OUT
) && size
) {
11266 error
= copyout(data
, udata
, size
);
11279 fsctl(proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
11282 struct nameidata nd
;
11285 vfs_context_t ctx
= vfs_context_current();
11287 AUDIT_ARG(cmd
, uap
->cmd
);
11288 AUDIT_ARG(value32
, uap
->options
);
11289 /* Get the vnode for the file we are getting info on: */
11292 // if we come through fsctl() then the file is by definition not open.
11293 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11294 // lest the caller mistakenly thinks the only open is their own (but in
11295 // reality it's someone elses).
11297 if (uap
->cmd
== FSIOC_FD_ONLY_OPEN_ONCE
) {
11300 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
11301 nameiflags
|= FOLLOW
;
11303 if (uap
->cmd
== FSIOC_FIRMLINK_CTL
) {
11304 nameiflags
|= (CN_FIRMLINK_NOFOLLOW
| NOCACHE
);
11306 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
11307 UIO_USERSPACE
, uap
->path
, ctx
);
11308 if ((error
= namei(&nd
))) {
11315 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
11321 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11331 ffsctl(proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
11335 vfs_context_t ctx
= vfs_context_current();
11338 AUDIT_ARG(fd
, uap
->fd
);
11339 AUDIT_ARG(cmd
, uap
->cmd
);
11340 AUDIT_ARG(value32
, uap
->options
);
11342 /* Get the vnode for the file we are getting info on: */
11343 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11347 if ((error
= vnode_getwithref(vp
))) {
11353 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
11360 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11364 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11371 /* end of fsctl system call */
11374 * Retrieve the data of an extended attribute.
11377 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
11380 struct nameidata nd
;
11381 char attrname
[XATTR_MAXNAMELEN
+ 1];
11382 vfs_context_t ctx
= vfs_context_current();
11384 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11385 size_t attrsize
= 0;
11387 u_int32_t nameiflags
;
11389 char uio_buf
[UIO_SIZEOF(1)];
11391 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11395 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11396 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11397 if ((error
= namei(&nd
))) {
11403 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11407 if (xattr_protected(attrname
)) {
11408 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
11414 * the specific check for 0xffffffff is a hack to preserve
11415 * binaray compatibilty in K64 with applications that discovered
11416 * that passing in a buf pointer and a size of -1 resulted in
11417 * just the size of the indicated extended attribute being returned.
11418 * this isn't part of the documented behavior, but because of the
11419 * original implemtation's check for "uap->size > 0", this behavior
11420 * was allowed. In K32 that check turned into a signed comparison
11421 * even though uap->size is unsigned... in K64, we blow by that
11422 * check because uap->size is unsigned and doesn't get sign smeared
11423 * in the munger for a 32 bit user app. we also need to add a
11424 * check to limit the maximum size of the buffer being passed in...
11425 * unfortunately, the underlying fileystems seem to just malloc
11426 * the requested size even if the actual extended attribute is tiny.
11427 * because that malloc is for kernel wired memory, we have to put a
11428 * sane limit on it.
11430 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11431 * U64 running on K64 will yield -1 (64 bits wide)
11432 * U32/U64 running on K32 will yield -1 (32 bits wide)
11434 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1) {
11439 if (uap
->size
> (size_t)XATTR_MAXSIZE
) {
11440 uap
->size
= XATTR_MAXSIZE
;
11443 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11444 &uio_buf
[0], sizeof(uio_buf
));
11445 uio_addiov(auio
, uap
->value
, uap
->size
);
11448 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
11453 *retval
= uap
->size
- uio_resid(auio
);
11455 *retval
= (user_ssize_t
)attrsize
;
11462 * Retrieve the data of an extended attribute.
11465 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
11468 char attrname
[XATTR_MAXNAMELEN
+ 1];
11470 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11471 size_t attrsize
= 0;
11474 char uio_buf
[UIO_SIZEOF(1)];
11476 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11480 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11483 if ((error
= vnode_getwithref(vp
))) {
11484 file_drop(uap
->fd
);
11487 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11491 if (xattr_protected(attrname
)) {
11495 if (uap
->value
&& uap
->size
> 0) {
11496 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11497 &uio_buf
[0], sizeof(uio_buf
));
11498 uio_addiov(auio
, uap
->value
, uap
->size
);
11501 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11503 (void)vnode_put(vp
);
11504 file_drop(uap
->fd
);
11507 *retval
= uap
->size
- uio_resid(auio
);
11509 *retval
= (user_ssize_t
)attrsize
;
11515 * Set the data of an extended attribute.
11518 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
11521 struct nameidata nd
;
11522 char attrname
[XATTR_MAXNAMELEN
+ 1];
11523 vfs_context_t ctx
= vfs_context_current();
11525 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11527 u_int32_t nameiflags
;
11529 char uio_buf
[UIO_SIZEOF(1)];
11531 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11535 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11537 if (error
== EPERM
) {
11538 /* if the string won't fit in attrname, copyinstr emits EPERM */
11539 return ENAMETOOLONG
;
11541 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11544 if (xattr_protected(attrname
)) {
11547 if (uap
->size
!= 0 && uap
->value
== 0) {
11551 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11552 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11553 if ((error
= namei(&nd
))) {
11559 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11560 &uio_buf
[0], sizeof(uio_buf
));
11561 uio_addiov(auio
, uap
->value
, uap
->size
);
11563 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
11566 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11577 * Set the data of an extended attribute.
11580 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
11583 char attrname
[XATTR_MAXNAMELEN
+ 1];
11585 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11588 char uio_buf
[UIO_SIZEOF(1)];
11590 vfs_context_t ctx
= vfs_context_current();
11593 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11597 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11599 if (error
== EPERM
) {
11600 /* if the string won't fit in attrname, copyinstr emits EPERM */
11601 return ENAMETOOLONG
;
11603 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11606 if (xattr_protected(attrname
)) {
11609 if (uap
->size
!= 0 && uap
->value
== 0) {
11612 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11615 if ((error
= vnode_getwithref(vp
))) {
11616 file_drop(uap
->fd
);
11619 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11620 &uio_buf
[0], sizeof(uio_buf
));
11621 uio_addiov(auio
, uap
->value
, uap
->size
);
11623 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
11626 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11632 file_drop(uap
->fd
);
11638 * Remove an extended attribute.
11639 * XXX Code duplication here.
11642 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
11645 struct nameidata nd
;
11646 char attrname
[XATTR_MAXNAMELEN
+ 1];
11647 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11648 vfs_context_t ctx
= vfs_context_current();
11650 u_int32_t nameiflags
;
11653 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11657 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11661 if (xattr_protected(attrname
)) {
11664 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11665 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11666 if ((error
= namei(&nd
))) {
11672 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
11675 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11686 * Remove an extended attribute.
11687 * XXX Code duplication here.
11690 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
11693 char attrname
[XATTR_MAXNAMELEN
+ 1];
11697 vfs_context_t ctx
= vfs_context_current();
11700 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11704 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11708 if (xattr_protected(attrname
)) {
11711 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11714 if ((error
= vnode_getwithref(vp
))) {
11715 file_drop(uap
->fd
);
11719 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
11722 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11728 file_drop(uap
->fd
);
11734 * Retrieve the list of extended attribute names.
11735 * XXX Code duplication here.
11738 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
11741 struct nameidata nd
;
11742 vfs_context_t ctx
= vfs_context_current();
11744 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11745 size_t attrsize
= 0;
11746 u_int32_t nameiflags
;
11748 char uio_buf
[UIO_SIZEOF(1)];
11750 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11754 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11755 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11756 if ((error
= namei(&nd
))) {
11761 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11762 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
11763 &uio_buf
[0], sizeof(uio_buf
));
11764 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11767 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
11771 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11773 *retval
= (user_ssize_t
)attrsize
;
11779 * Retrieve the list of extended attribute names.
11780 * XXX Code duplication here.
11783 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
11787 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11788 size_t attrsize
= 0;
11790 char uio_buf
[UIO_SIZEOF(1)];
11792 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11796 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11799 if ((error
= vnode_getwithref(vp
))) {
11800 file_drop(uap
->fd
);
11803 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11804 auio
= uio_createwithbuffer(1, 0, spacetype
,
11805 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
11806 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11809 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11812 file_drop(uap
->fd
);
11814 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11816 *retval
= (user_ssize_t
)attrsize
;
11822 fsgetpath_internal(vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
11823 vm_size_t bufsize
, caddr_t buf
, uint32_t options
, int *pathlen
)
11826 struct mount
*mp
= NULL
;
11830 /* maximum number of times to retry build_path */
11831 unsigned int retries
= 0x10;
11833 if (bufsize
> PAGE_SIZE
) {
11842 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
11843 error
= ENOTSUP
; /* unexpected failure */
11849 struct vfs_attr vfsattr
;
11850 int use_vfs_root
= TRUE
;
11852 VFSATTR_INIT(&vfsattr
);
11853 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
11854 if (!(options
& FSOPT_ISREALFSID
) &&
11855 vfs_getattr(mp
, &vfsattr
, vfs_context_kernel()) == 0 &&
11856 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
11857 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
) &&
11858 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
)) {
11859 use_vfs_root
= FALSE
;
11863 if (use_vfs_root
) {
11864 error
= VFS_ROOT(mp
, &vp
, ctx
);
11866 error
= VFS_VGET(mp
, objid
, &vp
, ctx
);
11869 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
11872 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
11874 * If the fileid isn't found and we're in a union
11875 * mount volume, then see if the fileid is in the
11876 * mounted-on volume.
11878 struct mount
*tmp
= mp
;
11879 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
11881 if (vfs_busy(mp
, LK_NOWAIT
) == 0) {
11893 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
11900 /* Obtain the absolute path to this vnode. */
11901 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
11902 if (options
& FSOPT_NOFIRMLINKPATH
) {
11903 bpflags
|= BUILDPATH_NO_FIRMLINK
;
11905 bpflags
|= BUILDPATH_CHECK_MOVED
;
11906 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
11910 /* there was a race building the path, try a few more times */
11911 if (error
== EAGAIN
) {
11922 AUDIT_ARG(text
, buf
);
11924 if (kdebug_enable
) {
11925 long dbg_parms
[NUMPARMS
];
11928 dbg_namelen
= (int)sizeof(dbg_parms
);
11930 if (length
< dbg_namelen
) {
11931 memcpy((char *)dbg_parms
, buf
, length
);
11932 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
11934 dbg_namelen
= length
;
11936 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
11939 kdebug_vfs_lookup(dbg_parms
, dbg_namelen
, (void *)vp
,
11940 KDBG_VFS_LOOKUP_FLAG_LOOKUP
);
11943 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
11950 * Obtain the full pathname of a file system object by id.
11953 fsgetpath_extended(user_addr_t buf
, int bufsize
, user_addr_t user_fsid
, uint64_t objid
,
11954 uint32_t options
, user_ssize_t
*retval
)
11956 vfs_context_t ctx
= vfs_context_current();
11962 if (options
& ~(FSOPT_NOFIRMLINKPATH
| FSOPT_ISREALFSID
)) {
11966 if ((error
= copyin(user_fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
11969 AUDIT_ARG(value32
, fsid
.val
[0]);
11970 AUDIT_ARG(value64
, objid
);
11971 /* Restrict output buffer size for now. */
11973 if (bufsize
> PAGE_SIZE
|| bufsize
<= 0) {
11976 MALLOC(realpath
, char *, bufsize
, M_TEMP
, M_WAITOK
| M_ZERO
);
11977 if (realpath
== NULL
) {
11981 error
= fsgetpath_internal(ctx
, fsid
.val
[0], objid
, bufsize
, realpath
,
11988 error
= copyout((caddr_t
)realpath
, buf
, length
);
11990 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
11993 FREE(realpath
, M_TEMP
);
11999 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
12001 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
12006 fsgetpath_ext(__unused proc_t p
, struct fsgetpath_ext_args
*uap
, user_ssize_t
*retval
)
12008 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
12009 uap
->options
, retval
);
12013 * Common routine to handle various flavors of statfs data heading out
12016 * Returns: 0 Success
12020 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
12021 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
12022 boolean_t partial_copy
)
12025 int my_size
, copy_size
;
12028 struct user64_statfs sfs
;
12029 my_size
= copy_size
= sizeof(sfs
);
12030 bzero(&sfs
, my_size
);
12031 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12032 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
12033 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12034 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
12035 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
12036 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
12037 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
12038 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
12039 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
12040 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
12041 sfs
.f_fsid
= sfsp
->f_fsid
;
12042 sfs
.f_owner
= sfsp
->f_owner
;
12043 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12044 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12046 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12048 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12049 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12051 if (partial_copy
) {
12052 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12054 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12056 struct user32_statfs sfs
;
12058 my_size
= copy_size
= sizeof(sfs
);
12059 bzero(&sfs
, my_size
);
12061 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12062 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
12063 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12066 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12067 * have to fudge the numbers here in that case. We inflate the blocksize in order
12068 * to reflect the filesystem size as best we can.
12070 if ((sfsp
->f_blocks
> INT_MAX
)
12071 /* Hack for 4061702 . I think the real fix is for Carbon to
12072 * look for some volume capability and not depend on hidden
12073 * semantics agreed between a FS and carbon.
12074 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12075 * for Carbon to set bNoVolumeSizes volume attribute.
12076 * Without this the webdavfs files cannot be copied onto
12077 * disk as they look huge. This change should not affect
12078 * XSAN as they should not setting these to -1..
12080 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
12081 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
12082 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
12086 * Work out how far we have to shift the block count down to make it fit.
12087 * Note that it's possible to have to shift so far that the resulting
12088 * blocksize would be unreportably large. At that point, we will clip
12089 * any values that don't fit.
12091 * For safety's sake, we also ensure that f_iosize is never reported as
12092 * being smaller than f_bsize.
12094 for (shift
= 0; shift
< 32; shift
++) {
12095 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
) {
12098 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
) {
12102 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
12103 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
12104 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
12105 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
12106 #undef __SHIFT_OR_CLIP
12107 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
12108 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
12110 /* filesystem is small enough to be reported honestly */
12111 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
12112 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
12113 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
12114 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
12115 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
12117 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
12118 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
12119 sfs
.f_fsid
= sfsp
->f_fsid
;
12120 sfs
.f_owner
= sfsp
->f_owner
;
12121 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12122 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12124 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12126 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12127 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12129 if (partial_copy
) {
12130 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12132 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12135 if (sizep
!= NULL
) {
12142 * copy stat structure into user_stat structure.
12145 munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
12147 bzero(usbp
, sizeof(*usbp
));
12149 usbp
->st_dev
= sbp
->st_dev
;
12150 usbp
->st_ino
= sbp
->st_ino
;
12151 usbp
->st_mode
= sbp
->st_mode
;
12152 usbp
->st_nlink
= sbp
->st_nlink
;
12153 usbp
->st_uid
= sbp
->st_uid
;
12154 usbp
->st_gid
= sbp
->st_gid
;
12155 usbp
->st_rdev
= sbp
->st_rdev
;
12156 #ifndef _POSIX_C_SOURCE
12157 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12158 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12159 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12160 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12161 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12162 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12164 usbp
->st_atime
= sbp
->st_atime
;
12165 usbp
->st_atimensec
= sbp
->st_atimensec
;
12166 usbp
->st_mtime
= sbp
->st_mtime
;
12167 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12168 usbp
->st_ctime
= sbp
->st_ctime
;
12169 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12171 usbp
->st_size
= sbp
->st_size
;
12172 usbp
->st_blocks
= sbp
->st_blocks
;
12173 usbp
->st_blksize
= sbp
->st_blksize
;
12174 usbp
->st_flags
= sbp
->st_flags
;
12175 usbp
->st_gen
= sbp
->st_gen
;
12176 usbp
->st_lspare
= sbp
->st_lspare
;
12177 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12178 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12182 munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
12184 bzero(usbp
, sizeof(*usbp
));
12186 usbp
->st_dev
= sbp
->st_dev
;
12187 usbp
->st_ino
= sbp
->st_ino
;
12188 usbp
->st_mode
= sbp
->st_mode
;
12189 usbp
->st_nlink
= sbp
->st_nlink
;
12190 usbp
->st_uid
= sbp
->st_uid
;
12191 usbp
->st_gid
= sbp
->st_gid
;
12192 usbp
->st_rdev
= sbp
->st_rdev
;
12193 #ifndef _POSIX_C_SOURCE
12194 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12195 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12196 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12197 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12198 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12199 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12201 usbp
->st_atime
= sbp
->st_atime
;
12202 usbp
->st_atimensec
= sbp
->st_atimensec
;
12203 usbp
->st_mtime
= sbp
->st_mtime
;
12204 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12205 usbp
->st_ctime
= sbp
->st_ctime
;
12206 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12208 usbp
->st_size
= sbp
->st_size
;
12209 usbp
->st_blocks
= sbp
->st_blocks
;
12210 usbp
->st_blksize
= sbp
->st_blksize
;
12211 usbp
->st_flags
= sbp
->st_flags
;
12212 usbp
->st_gen
= sbp
->st_gen
;
12213 usbp
->st_lspare
= sbp
->st_lspare
;
12214 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12215 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12219 * copy stat64 structure into user_stat64 structure.
12222 munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
12224 bzero(usbp
, sizeof(*usbp
));
12226 usbp
->st_dev
= sbp
->st_dev
;
12227 usbp
->st_ino
= sbp
->st_ino
;
12228 usbp
->st_mode
= sbp
->st_mode
;
12229 usbp
->st_nlink
= sbp
->st_nlink
;
12230 usbp
->st_uid
= sbp
->st_uid
;
12231 usbp
->st_gid
= sbp
->st_gid
;
12232 usbp
->st_rdev
= sbp
->st_rdev
;
12233 #ifndef _POSIX_C_SOURCE
12234 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12235 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12236 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12237 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12238 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12239 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12240 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12241 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12243 usbp
->st_atime
= sbp
->st_atime
;
12244 usbp
->st_atimensec
= sbp
->st_atimensec
;
12245 usbp
->st_mtime
= sbp
->st_mtime
;
12246 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12247 usbp
->st_ctime
= sbp
->st_ctime
;
12248 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12249 usbp
->st_birthtime
= sbp
->st_birthtime
;
12250 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12252 usbp
->st_size
= sbp
->st_size
;
12253 usbp
->st_blocks
= sbp
->st_blocks
;
12254 usbp
->st_blksize
= sbp
->st_blksize
;
12255 usbp
->st_flags
= sbp
->st_flags
;
12256 usbp
->st_gen
= sbp
->st_gen
;
12257 usbp
->st_lspare
= sbp
->st_lspare
;
12258 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12259 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12263 munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
12265 bzero(usbp
, sizeof(*usbp
));
12267 usbp
->st_dev
= sbp
->st_dev
;
12268 usbp
->st_ino
= sbp
->st_ino
;
12269 usbp
->st_mode
= sbp
->st_mode
;
12270 usbp
->st_nlink
= sbp
->st_nlink
;
12271 usbp
->st_uid
= sbp
->st_uid
;
12272 usbp
->st_gid
= sbp
->st_gid
;
12273 usbp
->st_rdev
= sbp
->st_rdev
;
12274 #ifndef _POSIX_C_SOURCE
12275 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12276 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12277 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12278 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12279 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12280 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12281 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12282 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12284 usbp
->st_atime
= sbp
->st_atime
;
12285 usbp
->st_atimensec
= sbp
->st_atimensec
;
12286 usbp
->st_mtime
= sbp
->st_mtime
;
12287 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12288 usbp
->st_ctime
= sbp
->st_ctime
;
12289 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12290 usbp
->st_birthtime
= sbp
->st_birthtime
;
12291 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12293 usbp
->st_size
= sbp
->st_size
;
12294 usbp
->st_blocks
= sbp
->st_blocks
;
12295 usbp
->st_blksize
= sbp
->st_blksize
;
12296 usbp
->st_flags
= sbp
->st_flags
;
12297 usbp
->st_gen
= sbp
->st_gen
;
12298 usbp
->st_lspare
= sbp
->st_lspare
;
12299 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12300 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12304 * Purge buffer cache for simulating cold starts
12307 vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
12309 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
12311 return VNODE_RETURNED
;
12315 vfs_purge_callback(mount_t mp
, __unused
void * arg
)
12317 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
12319 return VFS_RETURNED
;
12323 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
12325 if (!kauth_cred_issuser(kauth_cred_get())) {
12329 vfs_iterate(0 /* flags */, vfs_purge_callback
, NULL
);
12335 * gets the vnode associated with the (unnamed) snapshot directory
12336 * for a Filesystem. The snapshot directory vnode is returned with
12337 * an iocount on it.
12340 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
12342 return VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
);
12346 * Get the snapshot vnode.
12348 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12349 * needs nameidone() on ndp.
12351 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12353 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12357 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
12358 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
12359 #if !CONFIG_TRIGGERS
12362 enum path_operation pathop
,
12368 struct vfs_attr vfa
;
12373 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
12378 if (!vnode_isvroot(*rvpp
)) {
12383 /* Make sure the filesystem supports snapshots */
12384 VFSATTR_INIT(&vfa
);
12385 VFSATTR_WANTED(&vfa
, f_capabilities
);
12386 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
12387 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
12388 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
12389 VOL_CAP_INT_SNAPSHOT
)) ||
12390 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
12391 VOL_CAP_INT_SNAPSHOT
))) {
12396 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
12401 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12402 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12408 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12409 * (the length returned by copyinstr includes the terminating NUL)
12411 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
12412 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
12416 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++) {
12419 if (i
< (int)name_len
) {
12425 if (op
== CREATE
) {
12426 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
12428 } else if (op
== DELETE
) {
12429 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
12437 /* Check if the snapshot already exists ... */
12438 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
12439 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
12440 ndp
->ni_dvp
= *sdvpp
;
12442 error
= namei(ndp
);
12444 FREE(name_buf
, M_TEMP
);
12460 * create a filesystem snapshot (for supporting filesystems)
12462 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12463 * We get to the (unnamed) snapshot directory vnode and create the vnode
12464 * for the snapshot in it.
12468 * a) Passed in name for snapshot cannot have slashes.
12469 * b) name can't be "." or ".."
12471 * Since this requires superuser privileges, vnode_authorize calls are not
12475 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12478 vnode_t rvp
, snapdvp
;
12480 struct nameidata namend
;
12482 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
12488 if (namend
.ni_vp
) {
12489 vnode_put(namend
.ni_vp
);
12492 struct vnode_attr va
;
12493 vnode_t vp
= NULLVP
;
12496 VATTR_SET(&va
, va_type
, VREG
);
12497 VATTR_SET(&va
, va_mode
, 0);
12499 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
12500 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
12501 if (!error
&& vp
) {
12506 nameidone(&namend
);
12507 vnode_put(snapdvp
);
12513 * Delete a Filesystem snapshot
12515 * get the vnode for the unnamed snapshot directory and the snapshot and
12516 * delete the snapshot.
12519 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12522 vnode_t rvp
, snapdvp
;
12524 struct nameidata namend
;
12526 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
12532 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
12533 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
12535 vnode_put(namend
.ni_vp
);
12536 nameidone(&namend
);
12537 vnode_put(snapdvp
);
12544 * Revert a filesystem to a snapshot
12546 * Marks the filesystem to revert to the given snapshot on next mount.
12549 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12555 struct fs_snapshot_revert_args revert_data
;
12556 struct componentname cnp
;
12560 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12564 mp
= vnode_mount(rvp
);
12566 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12567 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12569 FREE(name_buf
, M_TEMP
);
12575 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
12577 FREE(name_buf
, M_TEMP
);
12584 * Grab mount_iterref so that we can release the vnode,
12585 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12587 error
= mount_iterref(mp
, 0);
12590 FREE(name_buf
, M_TEMP
);
12594 memset(&cnp
, 0, sizeof(cnp
));
12595 cnp
.cn_pnbuf
= (char *)name_buf
;
12596 cnp
.cn_nameiop
= LOOKUP
;
12597 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12598 cnp
.cn_pnlen
= MAXPATHLEN
;
12599 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12600 cnp
.cn_namelen
= (int)name_len
;
12601 revert_data
.sr_cnp
= &cnp
;
12603 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
12604 mount_iterdrop(mp
);
12605 FREE(name_buf
, M_TEMP
);
12608 /* If there was any error, try again using VNOP_IOCTL */
12611 struct nameidata namend
;
12613 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
12620 error
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
12623 vnode_put(namend
.ni_vp
);
12624 nameidone(&namend
);
12625 vnode_put(snapdvp
);
12633 * rename a Filesystem snapshot
12635 * get the vnode for the unnamed snapshot directory and the snapshot and
12636 * rename the snapshot. This is a very specialised (and simple) case of
12637 * rename(2) (which has to deal with a lot more complications). It differs
12638 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12641 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
12642 __unused
uint32_t flags
, vfs_context_t ctx
)
12644 vnode_t rvp
, snapdvp
;
12646 caddr_t newname_buf
;
12649 struct nameidata
*fromnd
, *tond
;
12650 /* carving out a chunk for structs that are too big to be on stack. */
12652 struct nameidata from_node
;
12653 struct nameidata to_node
;
12656 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
12657 fromnd
= &__rename_data
->from_node
;
12658 tond
= &__rename_data
->to_node
;
12660 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
12665 fvp
= fromnd
->ni_vp
;
12667 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12668 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
12674 * Some sanity checks- new name can't be empty, "." or ".." or have
12676 * (the length returned by copyinstr includes the terminating NUL)
12678 * The FS rename VNOP is suppossed to handle this but we'll pick it
12681 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
12682 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
12686 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++) {
12689 if (i
< (int)name_len
) {
12695 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
12702 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
12703 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
12704 tond
->ni_dvp
= snapdvp
;
12706 error
= namei(tond
);
12709 } else if (tond
->ni_vp
) {
12711 * snapshot rename behaves differently than rename(2) - if the
12712 * new name exists, EEXIST is returned.
12714 vnode_put(tond
->ni_vp
);
12719 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
12720 &tond
->ni_cnd
, ctx
);
12725 FREE(newname_buf
, M_TEMP
);
12727 vnode_put(snapdvp
);
12731 FREE(__rename_data
, M_TEMP
);
12736 * Mount a Filesystem snapshot
12738 * get the vnode for the unnamed snapshot directory and the snapshot and
12739 * mount the snapshot.
12742 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
12743 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
12746 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
12747 struct fs_snapshot_mount_args smnt_data
;
12749 struct nameidata
*snapndp
, *dirndp
;
12750 /* carving out a chunk for structs that are too big to be on stack. */
12752 struct nameidata snapnd
;
12753 struct nameidata dirnd
;
12754 } * __snapshot_mount_data
;
12756 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
12758 snapndp
= &__snapshot_mount_data
->snapnd
;
12759 dirndp
= &__snapshot_mount_data
->dirnd
;
12761 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
12767 snapvp
= snapndp
->ni_vp
;
12768 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
12773 /* Get the vnode to be covered */
12774 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
12775 UIO_USERSPACE
, directory
, ctx
);
12776 error
= namei(dirndp
);
12781 vp
= dirndp
->ni_vp
;
12782 pvp
= dirndp
->ni_dvp
;
12783 mp
= vnode_mount(rvp
);
12785 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
12791 error
= mac_mount_check_snapshot_mount(ctx
, rvp
, vp
, &dirndp
->ni_cnd
, snapndp
->ni_cnd
.cn_nameptr
,
12792 mp
->mnt_vfsstat
.f_fstypename
);
12798 smnt_data
.sm_mp
= mp
;
12799 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
12800 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
12801 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags
& MNT_DONTBROWSE
,
12802 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
12810 vnode_put(snapdvp
);
12812 nameidone(snapndp
);
12814 FREE(__snapshot_mount_data
, M_TEMP
);
12819 * Root from a snapshot of the filesystem
12821 * Marks the filesystem to root from the given snapshot on next boot.
12824 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12830 struct fs_snapshot_root_args root_data
;
12831 struct componentname cnp
;
12835 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12839 mp
= vnode_mount(rvp
);
12841 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12842 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12844 FREE(name_buf
, M_TEMP
);
12849 // XXX MAC checks ?
12852 * Grab mount_iterref so that we can release the vnode,
12853 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12855 error
= mount_iterref(mp
, 0);
12858 FREE(name_buf
, M_TEMP
);
12862 memset(&cnp
, 0, sizeof(cnp
));
12863 cnp
.cn_pnbuf
= (char *)name_buf
;
12864 cnp
.cn_nameiop
= LOOKUP
;
12865 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12866 cnp
.cn_pnlen
= MAXPATHLEN
;
12867 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12868 cnp
.cn_namelen
= (int)name_len
;
12869 root_data
.sr_cnp
= &cnp
;
12871 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
12873 mount_iterdrop(mp
);
12874 FREE(name_buf
, M_TEMP
);
12880 * FS snapshot operations dispatcher
12883 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
12884 __unused
int32_t *retval
)
12887 vfs_context_t ctx
= vfs_context_current();
12889 AUDIT_ARG(fd
, uap
->dirfd
);
12890 AUDIT_ARG(value32
, uap
->op
);
12892 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
12898 * Enforce user authorization for snapshot modification operations
12900 if ((uap
->op
!= SNAPSHOT_OP_MOUNT
) &&
12901 (uap
->op
!= SNAPSHOT_OP_ROOT
)) {
12902 vnode_t dvp
= NULLVP
;
12903 vnode_t devvp
= NULLVP
;
12906 error
= vnode_getfromfd(ctx
, uap
->dirfd
, &dvp
);
12910 mp
= vnode_mount(dvp
);
12911 devvp
= mp
->mnt_devvp
;
12913 /* get an iocount on devvp */
12914 if (devvp
== NULLVP
) {
12915 error
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 0, &devvp
, ctx
);
12916 /* for mounts which arent block devices */
12917 if (error
== ENOENT
) {
12921 error
= vnode_getwithref(devvp
);
12929 if ((vfs_context_issuser(ctx
) == 0) &&
12930 (vnode_authorize(devvp
, NULL
, KAUTH_VNODE_WRITE_DATA
, ctx
) != 0)) {
12942 case SNAPSHOT_OP_CREATE
:
12943 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12945 case SNAPSHOT_OP_DELETE
:
12946 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12948 case SNAPSHOT_OP_RENAME
:
12949 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
12952 case SNAPSHOT_OP_MOUNT
:
12953 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
12954 uap
->data
, uap
->flags
, ctx
);
12956 case SNAPSHOT_OP_REVERT
:
12957 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12959 #if CONFIG_MNT_ROOTSNAP
12960 case SNAPSHOT_OP_ROOT
:
12961 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12963 #endif /* CONFIG_MNT_ROOTSNAP */