2 * Copyright (c) 1995-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/sysctl.h>
98 #include <sys/xattr.h>
99 #include <sys/fcntl.h>
100 #include <sys/fsctl.h>
101 #include <sys/ubc_internal.h>
102 #include <sys/disk.h>
103 #include <sys/content_protection.h>
104 #include <sys/clonefile.h>
105 #include <sys/snapshot.h>
106 #include <sys/priv.h>
107 #include <sys/fsgetpath.h>
108 #include <machine/cons.h>
109 #include <machine/limits.h>
110 #include <miscfs/specfs/specdev.h>
112 #include <vfs/vfs_disk_conditioner.h>
114 #include <security/audit/audit.h>
115 #include <bsm/audit_kevents.h>
117 #include <mach/mach_types.h>
118 #include <kern/kern_types.h>
119 #include <kern/kalloc.h>
120 #include <kern/task.h>
122 #include <vm/vm_pageout.h>
123 #include <vm/vm_protos.h>
125 #include <libkern/OSAtomic.h>
126 #include <pexpert/pexpert.h>
127 #include <IOKit/IOBSD.h>
130 #include <kern/host.h>
131 #include <kern/ipc_misc.h>
132 #include <mach/host_priv.h>
133 #include <mach/vfs_nspace.h>
137 #include <miscfs/routefs/routefs.h>
141 #include <security/mac.h>
142 #include <security/mac_framework.h>
146 #define GET_PATH(x) \
147 (x) = get_pathbuff();
148 #define RELEASE_PATH(x) \
151 #define GET_PATH(x) \
152 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
153 #define RELEASE_PATH(x) \
154 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
155 #endif /* CONFIG_FSE */
157 #ifndef HFS_GET_BOOT_INFO
158 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
161 #ifndef HFS_SET_BOOT_INFO
162 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
165 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
166 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
169 extern void disk_conditioner_unmount(mount_t mp
);
171 /* struct for checkdirs iteration */
176 /* callback for checkdirs iteration */
177 static int checkdirs_callback(proc_t p
, void * arg
);
179 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
180 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
181 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
182 static int getfsstat_callback(mount_t mp
, void * arg
);
183 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
184 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
185 static int sync_callback(mount_t
, void *);
186 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
187 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
188 boolean_t partial_copy
);
189 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
190 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
191 struct componentname
*cnp
, user_addr_t fsmountargs
,
192 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
194 void vfs_notify_mount(vnode_t pdvp
);
196 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
198 struct fd_vn_data
* fg_vn_data_alloc(void);
201 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
202 * Concurrent lookups (or lookups by ids) on hard links can cause the
203 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
204 * does) to return ENOENT as the path cannot be returned from the name cache
205 * alone. We have no option but to retry and hope to get one namei->reverse path
206 * generation done without an intervening lookup, lookup by id on the hard link
207 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
208 * which currently are the MAC hooks for rename, unlink and rmdir.
210 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
212 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
,
215 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, uint32_t options
, int *);
217 #ifdef CONFIG_IMGSRC_ACCESS
218 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
219 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
220 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
221 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
222 static void mount_end_update(mount_t mp
);
223 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
224 #endif /* CONFIG_IMGSRC_ACCESS */
226 #if CONFIG_LOCKERBOOT
227 int mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
228 const char *pbdevpath
);
232 #if CONFIG_MNT_ROOTSNAP
233 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
);
235 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
) __attribute__((unused
));
238 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
241 int sync_internal(void);
244 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
246 extern lck_grp_t
*fd_vn_lck_grp
;
247 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
248 extern lck_attr_t
*fd_vn_lck_attr
;
251 * incremented each time a mount or unmount operation occurs
252 * used to invalidate the cached value of the rootvp in the
253 * mount structure utilized by cache_lookup_path
255 uint32_t mount_generation
= 0;
257 /* counts number of mount and unmount operations */
258 unsigned int vfs_nummntops
= 0;
260 extern const struct fileops vnops
;
261 #if CONFIG_APPLEDOUBLE
262 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
263 #endif /* CONFIG_APPLEDOUBLE */
266 * Virtual File System System Calls
269 #if NFSCLIENT || DEVFS || ROUTEFS
271 * Private in-kernel mounting spi (NFS only, not exported)
275 vfs_iskernelmount(mount_t mp
)
277 return (mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
;
282 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
283 void *data
, __unused
size_t datalen
, int syscall_flags
, uint32_t kern_flags
, vfs_context_t ctx
)
289 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
290 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
293 * Get the vnode to be covered if it's not supplied
298 if (kern_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_VMVOL
| KERNEL_MOUNT_DATAVOL
)) {
299 printf("failed to locate mount-on path: %s ", path
);
307 char *pnbuf
= CAST_DOWN(char *, path
);
309 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
310 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
314 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
315 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
325 #endif /* NFSCLIENT || DEVFS */
328 * Mount a file system.
332 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
334 struct __mac_mount_args muap
;
336 muap
.type
= uap
->type
;
337 muap
.path
= uap
->path
;
338 muap
.flags
= uap
->flags
;
339 muap
.data
= uap
->data
;
340 muap
.mac_p
= USER_ADDR_NULL
;
341 return __mac_mount(p
, &muap
, retval
);
345 fmount(__unused proc_t p
, struct fmount_args
*uap
, __unused
int32_t *retval
)
347 struct componentname cn
;
348 vfs_context_t ctx
= vfs_context_current();
351 int flags
= uap
->flags
;
352 char fstypename
[MFSNAMELEN
];
353 char *labelstr
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */
357 AUDIT_ARG(fd
, uap
->fd
);
358 AUDIT_ARG(fflags
, flags
);
359 /* fstypename will get audited by mount_common */
361 /* Sanity check the flags */
362 if (flags
& (MNT_IMGSRC_BY_INDEX
| MNT_ROOTFS
)) {
366 if (flags
& MNT_UNION
) {
370 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
375 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
379 if ((error
= vnode_getwithref(vp
)) != 0) {
384 pvp
= vnode_getparent(vp
);
391 memset(&cn
, 0, sizeof(struct componentname
));
392 MALLOC(cn
.cn_pnbuf
, char *, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
393 cn
.cn_pnlen
= MAXPATHLEN
;
395 if ((error
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) {
396 FREE(cn
.cn_pnbuf
, M_TEMP
);
403 error
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
);
405 FREE(cn
.cn_pnbuf
, M_TEMP
);
414 vfs_notify_mount(vnode_t pdvp
)
416 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
417 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
422 * Mount a file system taking into account MAC label behavior.
423 * See mount(2) man page for more information
425 * Parameters: p Process requesting the mount
426 * uap User argument descriptor (see below)
429 * Indirect: uap->type Filesystem type
430 * uap->path Path to mount
431 * uap->data Mount arguments
432 * uap->mac_p MAC info
433 * uap->flags Mount flags
439 boolean_t root_fs_upgrade_try
= FALSE
;
442 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
446 int need_nameidone
= 0;
447 vfs_context_t ctx
= vfs_context_current();
448 char fstypename
[MFSNAMELEN
];
451 char *labelstr
= NULL
;
452 int flags
= uap
->flags
;
454 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
455 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
460 * Get the fs type name from user space
462 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
468 * Get the vnode to be covered
470 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
471 UIO_USERSPACE
, uap
->path
, ctx
);
480 #ifdef CONFIG_IMGSRC_ACCESS
481 /* Mounting image source cannot be batched with other operations */
482 if (flags
== MNT_IMGSRC_BY_INDEX
) {
483 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
484 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
487 #endif /* CONFIG_IMGSRC_ACCESS */
491 * Get the label string (if any) from user space
493 if (uap
->mac_p
!= USER_ADDR_NULL
) {
498 struct user64_mac mac64
;
499 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
500 mac
.m_buflen
= mac64
.m_buflen
;
501 mac
.m_string
= mac64
.m_string
;
503 struct user32_mac mac32
;
504 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
505 mac
.m_buflen
= mac32
.m_buflen
;
506 mac
.m_string
= mac32
.m_string
;
511 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
512 (mac
.m_buflen
< 2)) {
516 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
517 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
521 AUDIT_ARG(mac_string
, labelstr
);
523 #endif /* CONFIG_MACF */
525 AUDIT_ARG(fflags
, flags
);
528 if (flags
& MNT_UNION
) {
529 /* No union mounts on release kernels */
535 if ((vp
->v_flag
& VROOT
) &&
536 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
537 if (!(flags
& MNT_UNION
)) {
541 * For a union mount on '/', treat it as fresh
542 * mount instead of update.
543 * Otherwise, union mouting on '/' used to panic the
544 * system before, since mnt_vnodecovered was found to
545 * be NULL for '/' which is required for unionlookup
546 * after it gets ENOENT on union mount.
548 flags
= (flags
& ~(MNT_UPDATE
));
552 if ((flags
& MNT_RDONLY
) == 0) {
553 /* Release kernels are not allowed to mount "/" as rw */
559 * See 7392553 for more details on why this check exists.
560 * Suffice to say: If this check is ON and something tries
561 * to mount the rootFS RW, we'll turn off the codesign
562 * bitmap optimization.
564 #if CHECK_CS_VALIDATION_BITMAP
565 if ((flags
& MNT_RDONLY
) == 0) {
566 root_fs_upgrade_try
= TRUE
;
571 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
572 labelstr
, FALSE
, ctx
);
578 FREE(labelstr
, M_MACTEMP
);
580 #endif /* CONFIG_MACF */
588 if (need_nameidone
) {
596 * common mount implementation (final stage of mounting)
599 * fstypename file system type (ie it's vfs name)
600 * pvp parent of covered vnode
602 * cnp component name (ie path) of covered vnode
603 * flags generic mount flags
604 * fsmountargs file system specific data
605 * labelstr optional MAC label
606 * kernelmount TRUE for mounts initiated from inside the kernel
607 * ctx caller's context
610 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
611 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
612 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
615 #pragma unused(labelstr)
617 struct vnode
*devvp
= NULLVP
;
618 struct vnode
*device_vnode
= NULLVP
;
623 struct vfstable
*vfsp
= (struct vfstable
*)0;
624 struct proc
*p
= vfs_context_proc(ctx
);
626 user_addr_t devpath
= USER_ADDR_NULL
;
629 boolean_t vfsp_ref
= FALSE
;
630 boolean_t is_rwlock_locked
= FALSE
;
631 boolean_t did_rele
= FALSE
;
632 boolean_t have_usecount
= FALSE
;
634 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM
635 /* Check for mutually-exclusive flag bits */
636 uint32_t checkflags
= (internal_flags
& (KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
));
638 while (checkflags
!= 0) {
639 checkflags
&= (checkflags
- 1);
644 //not allowed to request multiple mount-by-role flags
651 * Process an update for an existing mount
653 if (flags
& MNT_UPDATE
) {
654 if ((vp
->v_flag
& VROOT
) == 0) {
660 /* unmount in progress return error */
662 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
668 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
669 is_rwlock_locked
= TRUE
;
671 * We only allow the filesystem to be reloaded if it
672 * is currently mounted read-only.
674 if ((flags
& MNT_RELOAD
) &&
675 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
681 * If content protection is enabled, update mounts are not
682 * allowed to turn it off.
684 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
685 ((flags
& MNT_CPROTECT
) == 0)) {
691 * can't turn off MNT_REMOVABLE either but it may be an unexpected
692 * failure to return an error for this so we'll just silently
693 * add it if it is not passed in.
695 if ((mp
->mnt_flag
& MNT_REMOVABLE
) &&
696 ((flags
& MNT_REMOVABLE
) == 0)) {
697 flags
|= MNT_REMOVABLE
;
700 #ifdef CONFIG_IMGSRC_ACCESS
701 /* Can't downgrade the backer of the root FS */
702 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
703 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
707 #endif /* CONFIG_IMGSRC_ACCESS */
710 * Only root, or the user that did the original mount is
711 * permitted to update it.
713 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
714 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
718 error
= mac_mount_check_remount(ctx
, mp
);
724 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
725 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
727 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
728 flags
|= MNT_NOSUID
| MNT_NODEV
;
729 if (mp
->mnt_flag
& MNT_NOEXEC
) {
737 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
739 vfsp
= mp
->mnt_vtable
;
744 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
745 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
747 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
748 flags
|= MNT_NOSUID
| MNT_NODEV
;
749 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
) {
754 /* XXXAUDIT: Should we capture the type on the error path as well? */
755 AUDIT_ARG(text
, fstypename
);
757 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
758 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
759 vfsp
->vfc_refcount
++;
771 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
772 * except in ROSV configs.
774 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) &&
775 ((internal_flags
& (KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
)) == 0)) {
776 error
= EINVAL
; /* unsupported request */
780 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
786 * Allocate and initialize the filesystem (mount_t)
788 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
790 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
793 /* Initialize the default IO constraints */
794 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
795 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
796 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
797 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
798 mp
->mnt_devblocksize
= DEV_BSIZE
;
799 mp
->mnt_alignmentmask
= PAGE_MASK
;
800 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
803 mp
->mnt_realrootvp
= NULLVP
;
804 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
806 TAILQ_INIT(&mp
->mnt_vnodelist
);
807 TAILQ_INIT(&mp
->mnt_workerqueue
);
808 TAILQ_INIT(&mp
->mnt_newvnodes
);
810 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
811 is_rwlock_locked
= TRUE
;
812 mp
->mnt_op
= vfsp
->vfc_vfsops
;
813 mp
->mnt_vtable
= vfsp
;
814 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
815 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
816 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
818 int pathlen
= MAXPATHLEN
;
820 if (vn_getpath_ext(vp
, pvp
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
)) {
821 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
824 mp
->mnt_vnodecovered
= vp
;
825 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
826 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
827 mp
->mnt_devbsdunit
= 0;
829 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
830 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
832 #if NFSCLIENT || DEVFS || ROUTEFS
834 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
836 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0) {
837 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
839 #endif /* NFSCLIENT || DEVFS */
844 * Set the mount level flags.
846 if (flags
& MNT_RDONLY
) {
847 mp
->mnt_flag
|= MNT_RDONLY
;
848 } else if (mp
->mnt_flag
& MNT_RDONLY
) {
849 // disallow read/write upgrades of file systems that
850 // had the TYPENAME_OVERRIDE feature set.
851 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
855 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
857 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
858 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
859 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
860 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
861 MNT_QUARANTINE
| MNT_CPROTECT
);
866 * On release builds of iOS based platforms, always enforce NOSUID on
867 * all mounts. We do this here because we can catch update mounts as well as
868 * non-update mounts in this case.
870 mp
->mnt_flag
|= (MNT_NOSUID
);
874 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
875 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
876 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
877 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
878 MNT_QUARANTINE
| MNT_CPROTECT
);
881 if (flags
& MNT_MULTILABEL
) {
882 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
886 mp
->mnt_flag
|= MNT_MULTILABEL
;
890 * Process device path for local file systems if requested
892 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
893 !(internal_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
))) {
894 //snapshot, vm, datavolume mounts are special
895 if (vfs_context_is64bit(ctx
)) {
896 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
899 fsmountargs
+= sizeof(devpath
);
902 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
905 /* munge into LP64 addr */
906 devpath
= CAST_USER_ADDR_T(tmp
);
907 fsmountargs
+= sizeof(tmp
);
910 /* Lookup device and authorize access to it */
914 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
915 if ((error
= namei(&nd
))) {
919 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
924 if (devvp
->v_type
!= VBLK
) {
928 if (major(devvp
->v_rdev
) >= nblkdev
) {
933 * If mount by non-root, then verify that user has necessary
934 * permissions on the device.
936 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
937 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
939 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
940 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
942 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0) {
947 /* On first mount, preflight and open device */
948 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
949 if ((error
= vnode_ref(devvp
))) {
953 * Disallow multiple mounts of the same device.
954 * Disallow mounting of a device that is currently in use
955 * (except for root, which might share swap device for miniroot).
956 * Flush out any old buffers remaining from a previous use.
958 if ((error
= vfs_mountedon(devvp
))) {
962 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
966 if ((error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
))) {
970 if ((error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0))) {
974 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
976 error
= mac_vnode_check_open(ctx
,
978 ronly
? FREAD
: FREAD
| FWRITE
);
983 if ((error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
))) {
987 mp
->mnt_devvp
= devvp
;
988 device_vnode
= devvp
;
989 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
990 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
991 (device_vnode
= mp
->mnt_devvp
)) {
995 * If upgrade to read-write by non-root, then verify
996 * that user has necessary permissions on the device.
998 vnode_getalways(device_vnode
);
1000 if (suser(vfs_context_ucred(ctx
), NULL
) &&
1001 (error
= vnode_authorize(device_vnode
, NULL
,
1002 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
1004 vnode_put(device_vnode
);
1008 /* Tell the device that we're upgrading */
1009 dev
= (dev_t
)device_vnode
->v_rdev
;
1012 if ((u_int
)maj
>= (u_int
)nblkdev
) {
1013 panic("Volume mounted on a device with invalid major number.");
1016 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
1017 vnode_put(device_vnode
);
1018 device_vnode
= NULLVP
;
1023 } // localargs && !(snapshot | data | vm)
1026 if ((flags
& MNT_UPDATE
) == 0) {
1027 mac_mount_label_init(mp
);
1028 mac_mount_label_associate(ctx
, mp
);
1031 if ((flags
& MNT_UPDATE
) != 0) {
1032 error
= mac_mount_check_label_update(ctx
, mp
);
1040 * Mount the filesystem. We already asserted that internal_flags
1041 * cannot have more than one mount-by-role bit set.
1043 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
1044 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
1045 (caddr_t
)fsmountargs
, 0, ctx
);
1046 } else if (internal_flags
& KERNEL_MOUNT_DATAVOL
) {
1047 #if CONFIG_ROSV_STARTUP
1048 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1049 fs_role_mount_args_t frma
= {origin_mp
, VFS_DATA_ROLE
};
1050 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1052 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE
, error
);
1054 /* Mark volume associated with system volume */
1055 mp
->mnt_kern_flag
|= MNTK_SYSTEM
;
1057 /* Attempt to acquire the mnt_devvp and set it up */
1058 struct vnode
*mp_devvp
= NULL
;
1059 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1060 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1061 0, &mp_devvp
, vfs_context_kernel());
1063 mp
->mnt_devvp
= mp_devvp
;
1064 //vnode_lookup took an iocount, need to drop it.
1065 vnode_put(mp_devvp
);
1066 // now set `device_vnode` to the devvp that was acquired.
1067 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1068 // note that though the iocount above was dropped, the mount acquires
1069 // an implicit reference against the device.
1070 device_vnode
= mp_devvp
;
1077 } else if (internal_flags
& KERNEL_MOUNT_VMVOL
) {
1079 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1080 fs_role_mount_args_t frma
= {origin_mp
, VFS_VM_ROLE
};
1081 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1083 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE
, error
);
1085 /* Mark volume associated with system volume and a swap mount */
1086 mp
->mnt_kern_flag
|= (MNTK_SYSTEM
| MNTK_SWAP_MOUNT
);
1087 /* Attempt to acquire the mnt_devvp and set it up */
1088 struct vnode
*mp_devvp
= NULL
;
1089 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1090 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1091 0, &mp_devvp
, vfs_context_kernel());
1093 mp
->mnt_devvp
= mp_devvp
;
1094 //vnode_lookup took an iocount, need to drop it.
1095 vnode_put(mp_devvp
);
1097 // now set `device_vnode` to the devvp that was acquired.
1098 // note that though the iocount above was dropped, the mount acquires
1099 // an implicit reference against the device.
1100 device_vnode
= mp_devvp
;
1108 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
1111 if (flags
& MNT_UPDATE
) {
1112 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) {
1113 mp
->mnt_flag
&= ~MNT_RDONLY
;
1116 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
1117 mp
->mnt_kern_flag
&= ~MNTK_WANTRDWR
;
1119 mp
->mnt_flag
= flag
; /* restore flag value */
1121 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
1122 lck_rw_done(&mp
->mnt_rwlock
);
1123 is_rwlock_locked
= FALSE
;
1125 enablequotas(mp
, ctx
);
1131 * Put the new filesystem on the mount list after root.
1134 struct vfs_attr vfsattr
;
1136 error
= mac_mount_check_mount_late(ctx
, mp
);
1141 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
1142 error
= VFS_ROOT(mp
, &rvp
, ctx
);
1144 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
1147 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
1149 * drop reference provided by VFS_ROOT
1159 vnode_lock_spin(vp
);
1160 CLR(vp
->v_flag
, VMOUNT
);
1161 vp
->v_mountedhere
= mp
;
1165 * taking the name_cache_lock exclusively will
1166 * insure that everyone is out of the fast path who
1167 * might be trying to use a now stale copy of
1168 * vp->v_mountedhere->mnt_realrootvp
1169 * bumping mount_generation causes the cached values
1174 name_cache_unlock();
1176 error
= vnode_ref(vp
);
1181 have_usecount
= TRUE
;
1183 error
= checkdirs(vp
, ctx
);
1185 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1189 * there is no cleanup code here so I have made it void
1190 * we need to revisit this
1192 (void)VFS_START(mp
, 0, ctx
);
1194 if (mount_list_add(mp
) != 0) {
1196 * The system is shutting down trying to umount
1197 * everything, so fail with a plausible errno.
1202 lck_rw_done(&mp
->mnt_rwlock
);
1203 is_rwlock_locked
= FALSE
;
1205 /* Check if this mounted file system supports EAs or named streams. */
1206 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1207 VFSATTR_INIT(&vfsattr
);
1208 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
1209 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
1210 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
1211 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
1212 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
1213 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
1214 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1217 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
1218 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
1219 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
1222 /* Check if this file system supports path from id lookups. */
1223 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
1224 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
1225 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1226 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
1227 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1228 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1231 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
1232 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
1233 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1236 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1237 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1239 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1240 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1242 /* increment the operations count */
1243 OSAddAtomic(1, &vfs_nummntops
);
1244 enablequotas(mp
, ctx
);
1247 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1250 * cache the IO attributes for the underlying physical media...
1251 * an error return indicates the underlying driver doesn't
1252 * support all the queries necessary... however, reasonable
1253 * defaults will have been set, so no reason to bail or care
1255 vfs_init_io_attributes(device_vnode
, mp
);
1258 /* Now that mount is setup, notify the listeners */
1259 vfs_notify_mount(pvp
);
1260 IOBSDMountChange(mp
, kIOMountChangeMount
);
1262 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1263 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1264 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1265 mp
->mnt_vtable
->vfc_name
, error
);
1268 vnode_lock_spin(vp
);
1269 CLR(vp
->v_flag
, VMOUNT
);
1272 mp
->mnt_vtable
->vfc_refcount
--;
1273 mount_list_unlock();
1276 vnode_rele(device_vnode
);
1277 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
);
1279 lck_rw_done(&mp
->mnt_rwlock
);
1280 is_rwlock_locked
= FALSE
;
1283 * if we get here, we have a mount structure that needs to be freed,
1284 * but since the coveredvp hasn't yet been updated to point at it,
1285 * no need to worry about other threads holding a crossref on this mp
1286 * so it's ok to just free it
1288 mount_lock_destroy(mp
);
1290 mac_mount_label_destroy(mp
);
1292 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1296 * drop I/O count on the device vp if there was one
1298 if (devpath
&& devvp
) {
1304 /* Error condition exits */
1306 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1309 * If the mount has been placed on the covered vp,
1310 * it may have been discovered by now, so we have
1311 * to treat this just like an unmount
1313 mount_lock_spin(mp
);
1314 mp
->mnt_lflag
|= MNT_LDEAD
;
1317 if (device_vnode
!= NULLVP
) {
1318 vnode_rele(device_vnode
);
1319 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
1324 vnode_lock_spin(vp
);
1327 vp
->v_mountedhere
= (mount_t
) 0;
1331 if (have_usecount
) {
1335 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
)) {
1339 if (devpath
&& devvp
) {
1343 /* Release mnt_rwlock only when it was taken */
1344 if (is_rwlock_locked
== TRUE
) {
1345 lck_rw_done(&mp
->mnt_rwlock
);
1349 if (mp
->mnt_crossref
) {
1350 mount_dropcrossref(mp
, vp
, 0);
1352 mount_lock_destroy(mp
);
1354 mac_mount_label_destroy(mp
);
1356 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1361 vfsp
->vfc_refcount
--;
1362 mount_list_unlock();
1369 * Flush in-core data, check for competing mount attempts,
1373 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1376 #pragma unused(cnp,fsname)
1378 struct vnode_attr va
;
1383 * If the user is not root, ensure that they own the directory
1384 * onto which we are attempting to mount.
1387 VATTR_WANTED(&va
, va_uid
);
1388 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1389 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1390 (!vfs_context_issuser(ctx
)))) {
1396 if ((error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
))) {
1400 if ((error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0))) {
1404 if (vp
->v_type
!= VDIR
) {
1409 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1415 error
= mac_mount_check_mount(ctx
, vp
,
1422 vnode_lock_spin(vp
);
1423 SET(vp
->v_flag
, VMOUNT
);
1430 #if CONFIG_IMGSRC_ACCESS
1432 #define DEBUG_IMGSRC 0
1435 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
1437 #define IMGSRC_DEBUG(args...) do { } while(0)
1441 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1443 struct nameidata nd
;
1444 vnode_t vp
, realdevvp
;
1447 enum uio_seg uio
= UIO_USERSPACE
;
1449 if (ctx
== vfs_context_kernel()) {
1453 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, uio
, devpath
, ctx
);
1454 if ((error
= namei(&nd
))) {
1455 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1461 if (!vnode_isblk(vp
)) {
1462 IMGSRC_DEBUG("Not block device.\n");
1467 realdevvp
= mp
->mnt_devvp
;
1468 if (realdevvp
== NULLVP
) {
1469 IMGSRC_DEBUG("No device backs the mount.\n");
1474 error
= vnode_getwithref(realdevvp
);
1476 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1480 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1481 IMGSRC_DEBUG("Wrong dev_t.\n");
1486 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1489 * If mount by non-root, then verify that user has necessary
1490 * permissions on the device.
1492 if (!vfs_context_issuser(ctx
)) {
1493 accessmode
= KAUTH_VNODE_READ_DATA
;
1494 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1495 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1497 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1498 IMGSRC_DEBUG("Access denied.\n");
1506 vnode_put(realdevvp
);
1519 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1520 * and call checkdirs()
1523 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1527 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1529 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1530 mp
->mnt_vtable
->vfc_name
, vnode_getname(vp
));
1532 vnode_lock_spin(vp
);
1533 CLR(vp
->v_flag
, VMOUNT
);
1534 vp
->v_mountedhere
= mp
;
1538 * taking the name_cache_lock exclusively will
1539 * insure that everyone is out of the fast path who
1540 * might be trying to use a now stale copy of
1541 * vp->v_mountedhere->mnt_realrootvp
1542 * bumping mount_generation causes the cached values
1547 name_cache_unlock();
1549 error
= vnode_ref(vp
);
1554 error
= checkdirs(vp
, ctx
);
1556 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1563 mp
->mnt_vnodecovered
= NULLVP
;
1569 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1572 vnode_lock_spin(vp
);
1573 vp
->v_mountedhere
= (mount_t
)NULL
;
1576 mp
->mnt_vnodecovered
= NULLVP
;
1580 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1584 /* unmount in progress return error */
1585 mount_lock_spin(mp
);
1586 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1591 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1594 * We only allow the filesystem to be reloaded if it
1595 * is currently mounted read-only.
1597 if ((flags
& MNT_RELOAD
) &&
1598 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1604 * Only root, or the user that did the original mount is
1605 * permitted to update it.
1607 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1608 (!vfs_context_issuser(ctx
))) {
1613 error
= mac_mount_check_remount(ctx
, mp
);
1621 lck_rw_done(&mp
->mnt_rwlock
);
1628 mount_end_update(mount_t mp
)
1630 lck_rw_done(&mp
->mnt_rwlock
);
1634 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1638 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1642 vp
= imgsrc_rootvnodes
[height
];
1643 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1652 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
,
1653 struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
,
1654 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1658 boolean_t placed
= FALSE
;
1659 struct vfstable
*vfsp
;
1660 user_addr_t devpath
;
1661 char *old_mntonname
;
1667 /* If we didn't imageboot, nothing to move */
1668 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1672 /* Only root can do this */
1673 if (!vfs_context_issuser(ctx
)) {
1677 IMGSRC_DEBUG("looking for root vnode.\n");
1680 * Get root vnode of filesystem we're moving.
1684 struct user64_mnt_imgsrc_args mia64
;
1685 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1687 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1691 height
= mia64
.mi_height
;
1692 flags
= mia64
.mi_flags
;
1693 devpath
= mia64
.mi_devpath
;
1695 struct user32_mnt_imgsrc_args mia32
;
1696 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1698 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1702 height
= mia32
.mi_height
;
1703 flags
= mia32
.mi_flags
;
1704 devpath
= mia32
.mi_devpath
;
1708 * For binary compatibility--assumes one level of nesting.
1711 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
1716 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
1720 /* munge into LP64 addr */
1721 devpath
= CAST_USER_ADDR_T(tmp
);
1729 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1733 error
= get_imgsrc_rootvnode(height
, &rvp
);
1735 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error
);
1739 IMGSRC_DEBUG("got old root vnode\n");
1741 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1743 /* Can only move once */
1744 mp
= vnode_mount(rvp
);
1745 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1746 IMGSRC_DEBUG("Already moved.\n");
1751 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp
->mnt_vtable
->vfc_name
);
1752 IMGSRC_DEBUG("Starting updated.\n");
1754 /* Get exclusive rwlock on mount, authorize update on mp */
1755 error
= mount_begin_update(mp
, ctx
, 0);
1757 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1762 * It can only be moved once. Flag is set under the rwlock,
1763 * so we're now safe to proceed.
1765 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1766 IMGSRC_DEBUG("Already moved [2]\n");
1770 IMGSRC_DEBUG("Preparing coveredvp.\n");
1772 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1773 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1775 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1779 IMGSRC_DEBUG("Covered vp OK.\n");
1781 /* Sanity check the name caller has provided */
1782 vfsp
= mp
->mnt_vtable
;
1783 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1784 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1785 vfsp
->vfc_name
, fsname
);
1790 /* Check the device vnode and update mount-from name, for local filesystems */
1791 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1792 IMGSRC_DEBUG("Local, doing device validation.\n");
1794 if (devpath
!= USER_ADDR_NULL
) {
1795 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1797 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1806 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1807 * and increment the name cache's mount generation
1810 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1811 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1818 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1819 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1821 /* Forbid future moves */
1823 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1826 /* Finally, add to mount list, completely ready to go */
1827 if (mount_list_add(mp
) != 0) {
1829 * The system is shutting down trying to umount
1830 * everything, so fail with a plausible errno.
1836 mount_end_update(mp
);
1838 FREE(old_mntonname
, M_TEMP
);
1840 vfs_notify_mount(pvp
);
1844 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1847 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1852 * Placing the mp on the vnode clears VMOUNT,
1853 * so cleanup is different after that point
1856 /* Rele the vp, clear VMOUNT and v_mountedhere */
1857 undo_place_on_covered_vp(mp
, vp
);
1859 vnode_lock_spin(vp
);
1860 CLR(vp
->v_flag
, VMOUNT
);
1864 mount_end_update(mp
);
1868 FREE(old_mntonname
, M_TEMP
);
1872 #if CONFIG_LOCKERBOOT
1875 mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
1876 const char *pbdevpath
)
1879 struct nameidata nd
;
1880 boolean_t cleanup_nd
= FALSE
;
1881 vfs_context_t ctx
= vfs_context_kernel();
1882 boolean_t is64
= TRUE
;
1883 boolean_t by_index
= TRUE
;
1884 struct user64_mnt_imgsrc_args mia64
= {
1887 .mi_devpath
= CAST_USER_ADDR_T(pbdevpath
),
1889 user_addr_t mia64addr
= CAST_USER_ADDR_T(&mia64
);
1891 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
1892 UIO_SYSSPACE
, CAST_USER_ADDR_T(mntpoint
), ctx
);
1895 IMGSRC_DEBUG("namei: %d\n", error
);
1900 error
= relocate_imageboot_source(nd
.ni_dvp
, nd
.ni_vp
,
1901 &nd
.ni_cnd
, fsname
, ctx
, is64
, mia64addr
, by_index
);
1905 int stashed
= error
;
1907 error
= vnode_put(nd
.ni_vp
);
1909 panic("vnode_put() returned non-zero: %d", error
);
1913 error
= vnode_put(nd
.ni_dvp
);
1915 panic("vnode_put() returned non-zero: %d", error
);
1924 #endif /* CONFIG_LOCKERBOOT */
1925 #endif /* CONFIG_IMGSRC_ACCESS */
1928 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1930 struct nameidata qnd
;
1932 char qfpath
[MAXPATHLEN
];
1933 const char *qfname
= QUOTAFILENAME
;
1934 const char *qfopsname
= QUOTAOPSNAME
;
1935 const char *qfextension
[] = INITQFNAMES
;
1937 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1938 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0) {
1942 * Enable filesystem disk quotas if necessary.
1943 * We ignore errors as this should not interfere with final mount
1945 for (type
= 0; type
< MAXQUOTAS
; type
++) {
1946 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1947 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1948 CAST_USER_ADDR_T(qfpath
), ctx
);
1949 if (namei(&qnd
) != 0) {
1950 continue; /* option file to trigger quotas is not present */
1952 vnode_put(qnd
.ni_vp
);
1954 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1956 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1963 checkdirs_callback(proc_t p
, void * arg
)
1965 struct cdirargs
* cdrp
= (struct cdirargs
*)arg
;
1966 vnode_t olddp
= cdrp
->olddp
;
1967 vnode_t newdp
= cdrp
->newdp
;
1968 struct filedesc
*fdp
;
1969 vnode_t new_cvp
= newdp
;
1970 vnode_t new_rvp
= newdp
;
1971 vnode_t old_cvp
= NULL
;
1972 vnode_t old_rvp
= NULL
;
1975 * XXX Also needs to iterate each thread in the process to see if it
1976 * XXX is using a per-thread current working directory, and, if so,
1977 * XXX update that as well.
1981 * First, with the proc_fdlock held, check to see if we will need
1982 * to do any work. If not, we will get out fast.
1987 (fdp
->fd_cdir
!= olddp
&& fdp
->fd_rdir
!= olddp
)) {
1989 return PROC_RETURNED
;
1994 * Ok, we will have to do some work. Always take two refs
1995 * because we might need that many. We'll dispose of whatever
1996 * we ended up not using.
1998 if (vnode_ref(newdp
) != 0) {
1999 return PROC_RETURNED
;
2001 if (vnode_ref(newdp
) != 0) {
2003 return PROC_RETURNED
;
2007 * Now do the work. Note: we dropped the proc_fdlock, so we
2008 * have to do all of the checks again.
2013 if (fdp
->fd_cdir
== olddp
) {
2015 fdp
->fd_cdir
= newdp
;
2018 if (fdp
->fd_rdir
== olddp
) {
2020 fdp
->fd_rdir
= newdp
;
2027 * Dispose of any references that are no longer needed.
2029 if (old_cvp
!= NULL
) {
2030 vnode_rele(old_cvp
);
2032 if (old_rvp
!= NULL
) {
2033 vnode_rele(old_rvp
);
2035 if (new_cvp
!= NULL
) {
2036 vnode_rele(new_cvp
);
2038 if (new_rvp
!= NULL
) {
2039 vnode_rele(new_rvp
);
2042 return PROC_RETURNED
;
2048 * Scan all active processes to see if any of them have a current
2049 * or root directory onto which the new filesystem has just been
2050 * mounted. If so, replace them with the new mount point.
2053 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
2058 struct cdirargs cdr
;
2060 if (olddp
->v_usecount
== 1) {
2063 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
2067 panic("mount: lost mount: error %d", err
);
2074 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2075 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
2077 if (rootvnode
== olddp
) {
2089 * Unmount a file system.
2091 * Note: unmount takes a path to the vnode mounted on as argument,
2092 * not special file (as before).
2096 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
2101 struct nameidata nd
;
2102 vfs_context_t ctx
= vfs_context_current();
2104 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
2105 UIO_USERSPACE
, uap
->path
, ctx
);
2115 error
= mac_mount_check_umount(ctx
, mp
);
2122 * Must be the root of the filesystem
2124 if ((vp
->v_flag
& VROOT
) == 0) {
2130 /* safedounmount consumes the mount ref */
2131 return safedounmount(mp
, uap
->flags
, ctx
);
2135 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
2139 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
2140 if (mp
== (mount_t
)0) {
2145 /* safedounmount consumes the mount ref */
2146 return safedounmount(mp
, flags
, ctx
);
2151 * The mount struct comes with a mount ref which will be consumed.
2152 * Do the actual file system unmount, prevent some common foot shooting.
2155 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2158 proc_t p
= vfs_context_proc(ctx
);
2161 * If the file system is not responding and MNT_NOBLOCK
2162 * is set and not a forced unmount then return EBUSY.
2164 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
2165 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
2171 * Skip authorization if the mount is tagged as permissive and
2172 * this is not a forced-unmount attempt.
2174 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
2176 * Only root, or the user that did the original mount is
2177 * permitted to unmount this filesystem.
2179 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
2180 (error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
2185 * Don't allow unmounting the root file system (or the associated VM or DATA mounts) .
2187 if ((mp
->mnt_flag
& MNT_ROOTFS
) || (mp
->mnt_kern_flag
& MNTK_SYSTEM
)) {
2188 error
= EBUSY
; /* the root (or associated volumes) is always busy */
2192 #ifdef CONFIG_IMGSRC_ACCESS
2193 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
2197 #endif /* CONFIG_IMGSRC_ACCESS */
2199 return dounmount(mp
, flags
, 1, ctx
);
2207 * Do the actual file system unmount.
2210 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
2212 vnode_t coveredvp
= (vnode_t
)0;
2215 int forcedunmount
= 0;
2217 struct vnode
*devvp
= NULLVP
;
2219 proc_t p
= vfs_context_proc(ctx
);
2221 int pflags_save
= 0;
2222 #endif /* CONFIG_TRIGGERS */
2225 if (!(flags
& MNT_FORCE
)) {
2226 fsevent_unmount(mp
, ctx
); /* has to come first! */
2233 * If already an unmount in progress just return EBUSY.
2234 * Even a forced unmount cannot override.
2236 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
2244 if (flags
& MNT_FORCE
) {
2246 mp
->mnt_lflag
|= MNT_LFORCE
;
2250 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2251 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
2255 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
2256 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
2257 mp
->mnt_flag
&= ~MNT_ASYNC
;
2259 * anyone currently in the fast path that
2260 * trips over the cached rootvp will be
2261 * dumped out and forced into the slow path
2262 * to regenerate a new cached value
2264 mp
->mnt_realrootvp
= NULLVP
;
2267 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
2269 * Force unmount any mounts in this filesystem.
2270 * If any unmounts fail - just leave them dangling.
2273 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
2277 * taking the name_cache_lock exclusively will
2278 * insure that everyone is out of the fast path who
2279 * might be trying to use a now stale copy of
2280 * vp->v_mountedhere->mnt_realrootvp
2281 * bumping mount_generation causes the cached values
2286 name_cache_unlock();
2289 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2294 if (forcedunmount
== 0) {
2295 ubc_umount(mp
); /* release cached vnodes */
2296 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2297 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
2300 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2301 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2302 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2308 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
2311 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
2314 if (forcedunmount
) {
2315 lflags
|= FORCECLOSE
;
2317 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
2318 if ((forcedunmount
== 0) && error
) {
2320 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2321 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2322 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2326 /* make sure there are no one in the mount iterations or lookup */
2327 mount_iterdrain(mp
);
2329 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
2331 mount_iterreset(mp
);
2333 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2334 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2335 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2339 /* increment the operations count */
2341 OSAddAtomic(1, &vfs_nummntops
);
2344 if (mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
2345 /* hold an io reference and drop the usecount before close */
2346 devvp
= mp
->mnt_devvp
;
2347 vnode_getalways(devvp
);
2349 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
2351 vnode_clearmountedon(devvp
);
2354 lck_rw_done(&mp
->mnt_rwlock
);
2355 mount_list_remove(mp
);
2356 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2358 /* mark the mount point hook in the vp but not drop the ref yet */
2359 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2361 * The covered vnode needs special handling. Trying to get an
2362 * iocount must not block here as this may lead to deadlocks
2363 * if the Filesystem to which the covered vnode belongs is
2364 * undergoing forced unmounts. Since we hold a usecount, the
2365 * vnode cannot be reused (it can, however, still be terminated)
2367 vnode_getalways(coveredvp
);
2368 vnode_lock_spin(coveredvp
);
2371 coveredvp
->v_mountedhere
= (struct mount
*)0;
2372 CLR(coveredvp
->v_flag
, VMOUNT
);
2374 vnode_unlock(coveredvp
);
2375 vnode_put(coveredvp
);
2379 mp
->mnt_vtable
->vfc_refcount
--;
2380 mount_list_unlock();
2382 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2383 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2385 mp
->mnt_lflag
|= MNT_LDEAD
;
2387 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2389 * do the wakeup here
2390 * in case we block in mount_refdrain
2391 * which will drop the mount lock
2392 * and allow anyone blocked in vfs_busy
2393 * to wakeup and see the LDEAD state
2395 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2396 wakeup((caddr_t
)mp
);
2400 /* free disk_conditioner_info structure for this mount */
2401 disk_conditioner_unmount(mp
);
2404 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2405 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2410 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2411 // Restore P_NOREMOTEHANG bit to its previous value
2412 if ((pflags_save
& P_NOREMOTEHANG
) == 0) {
2413 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2418 * Callback and context are set together under the mount lock, and
2419 * never cleared, so we're safe to examine them here, drop the lock,
2422 if (mp
->mnt_triggercallback
!= NULL
) {
2425 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2426 } else if (did_vflush
) {
2427 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2434 #endif /* CONFIG_TRIGGERS */
2436 lck_rw_done(&mp
->mnt_rwlock
);
2439 wakeup((caddr_t
)mp
);
2443 if ((coveredvp
!= NULLVP
)) {
2444 vnode_t pvp
= NULLVP
;
2447 * The covered vnode needs special handling. Trying to
2448 * get an iocount must not block here as this may lead
2449 * to deadlocks if the Filesystem to which the covered
2450 * vnode belongs is undergoing forced unmounts. Since we
2451 * hold a usecount, the vnode cannot be reused
2452 * (it can, however, still be terminated).
2454 vnode_getalways(coveredvp
);
2456 mount_dropcrossref(mp
, coveredvp
, 0);
2458 * We'll _try_ to detect if this really needs to be
2459 * done. The coveredvp can only be in termination (or
2460 * terminated) if the coveredvp's mount point is in a
2461 * forced unmount (or has been) since we still hold the
2464 if (!vnode_isrecycled(coveredvp
)) {
2465 pvp
= vnode_getparent(coveredvp
);
2467 if (coveredvp
->v_resolve
) {
2468 vnode_trigger_rearm(coveredvp
, ctx
);
2473 vnode_rele(coveredvp
);
2474 vnode_put(coveredvp
);
2478 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2481 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2482 mount_lock_destroy(mp
);
2484 mac_mount_label_destroy(mp
);
2486 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2488 panic("dounmount: no coveredvp");
2495 * Unmount any mounts in this filesystem.
2498 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2501 fsid_t
*fsids
, fsid
;
2503 int count
= 0, i
, m
= 0;
2508 // Get an array to hold the submounts fsids.
2509 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2511 fsids_sz
= count
* sizeof(fsid_t
);
2512 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2513 if (fsids
== NULL
) {
2514 mount_list_unlock();
2517 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2520 * Fill the array with submount fsids.
2521 * Since mounts are always added to the tail of the mount list, the
2522 * list is always in mount order.
2523 * For each mount check if the mounted-on vnode belongs to a
2524 * mount that's already added to our array of mounts to be unmounted.
2526 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2527 vp
= smp
->mnt_vnodecovered
;
2531 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2532 for (i
= 0; i
<= m
; i
++) {
2533 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2534 fsids
[i
].val
[1] == fsid
.val
[1]) {
2535 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2540 mount_list_unlock();
2542 // Unmount the submounts in reverse order. Ignore errors.
2543 for (i
= m
; i
> 0; i
--) {
2544 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2547 mount_iterdrop(smp
);
2548 (void) dounmount(smp
, flags
, 1, ctx
);
2553 FREE(fsids
, M_TEMP
);
2558 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2563 if (mp
->mnt_crossref
< 0) {
2564 panic("mount cross refs -ve");
2567 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2569 vnode_put_locked(dp
);
2573 mount_lock_destroy(mp
);
2575 mac_mount_label_destroy(mp
);
2577 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2581 vnode_put_locked(dp
);
2588 * Sync each mounted filesystem.
2594 int print_vmpage_stat
= 0;
2597 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2598 * mounted read-write with the passed waitfor value.
2600 * Parameters: mp mount-point descriptor per mounted file-system instance.
2601 * arg user argument (please see below)
2603 * User argument is a pointer to 32 bit unsigned integer which describes the
2604 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2605 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2608 * Returns: VFS_RETURNED
2611 sync_callback(mount_t mp
, void *arg
)
2613 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2614 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2615 unsigned waitfor
= MNT_NOWAIT
;
2618 waitfor
= *(uint32_t*)arg
;
2621 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2622 if (waitfor
!= MNT_WAIT
&&
2623 waitfor
!= (MNT_WAIT
| MNT_VOLUME
) &&
2624 waitfor
!= MNT_NOWAIT
&&
2625 waitfor
!= (MNT_NOWAIT
| MNT_VOLUME
) &&
2626 waitfor
!= MNT_DWAIT
&&
2627 waitfor
!= (MNT_DWAIT
| MNT_VOLUME
)) {
2628 panic("Passed inappropriate waitfor %u to "
2629 "sync_callback()", waitfor
);
2632 mp
->mnt_flag
&= ~MNT_ASYNC
;
2633 (void)VFS_SYNC(mp
, waitfor
, vfs_context_kernel());
2635 mp
->mnt_flag
|= MNT_ASYNC
;
2639 return VFS_RETURNED
;
2644 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2646 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2648 if (print_vmpage_stat
) {
2649 vm_countdirtypages();
2656 #endif /* DIAGNOSTIC */
2662 SYNC_ONLY_RELIABLE_MEDIA
= 1,
2663 SYNC_ONLY_UNRELIABLE_MEDIA
= 2
2667 sync_internal_callback(mount_t mp
, void *arg
)
2670 int is_reliable
= !(mp
->mnt_kern_flag
& MNTK_VIRTUALDEV
) &&
2671 (mp
->mnt_flag
& MNT_LOCAL
);
2672 sync_type_t sync_type
= *((sync_type_t
*)arg
);
2674 if ((sync_type
== SYNC_ONLY_RELIABLE_MEDIA
) && !is_reliable
) {
2675 return VFS_RETURNED
;
2676 } else if ((sync_type
== SYNC_ONLY_UNRELIABLE_MEDIA
) && is_reliable
) {
2677 return VFS_RETURNED
;
2681 (void)sync_callback(mp
, NULL
);
2683 return VFS_RETURNED
;
2686 int sync_thread_state
= 0;
2687 int sync_timeout_seconds
= 5;
2689 #define SYNC_THREAD_RUN 0x0001
2690 #define SYNC_THREAD_RUNNING 0x0002
2693 sync_thread(__unused
void *arg
, __unused wait_result_t wr
)
2695 sync_type_t sync_type
;
2697 lck_mtx_lock(sync_mtx_lck
);
2698 while (sync_thread_state
& SYNC_THREAD_RUN
) {
2699 sync_thread_state
&= ~SYNC_THREAD_RUN
;
2700 lck_mtx_unlock(sync_mtx_lck
);
2702 sync_type
= SYNC_ONLY_RELIABLE_MEDIA
;
2703 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2704 sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
;
2705 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2707 lck_mtx_lock(sync_mtx_lck
);
2710 * This wakeup _has_ to be issued before the lock is released otherwise
2711 * we may end up waking up a thread in sync_internal which is
2712 * expecting a wakeup from a thread it just created and not from this
2713 * thread which is about to exit.
2715 wakeup(&sync_thread_state
);
2716 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2717 lck_mtx_unlock(sync_mtx_lck
);
2719 if (print_vmpage_stat
) {
2720 vm_countdirtypages();
2727 #endif /* DIAGNOSTIC */
2730 struct timeval sync_timeout_last_print
= {.tv_sec
= 0, .tv_usec
= 0};
2733 * An in-kernel sync for power management to call.
2734 * This function always returns within sync_timeout seconds.
2736 __private_extern__
int
2741 int thread_created
= FALSE
;
2742 struct timespec ts
= {.tv_sec
= sync_timeout_seconds
, .tv_nsec
= 0};
2744 lck_mtx_lock(sync_mtx_lck
);
2745 sync_thread_state
|= SYNC_THREAD_RUN
;
2746 if (!(sync_thread_state
& SYNC_THREAD_RUNNING
)) {
2749 sync_thread_state
|= SYNC_THREAD_RUNNING
;
2750 kr
= kernel_thread_start(sync_thread
, NULL
, &thd
);
2751 if (kr
!= KERN_SUCCESS
) {
2752 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2753 lck_mtx_unlock(sync_mtx_lck
);
2754 printf("sync_thread failed\n");
2757 thread_created
= TRUE
;
2760 error
= msleep((caddr_t
)&sync_thread_state
, sync_mtx_lck
,
2761 (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2766 if (now
.tv_sec
- sync_timeout_last_print
.tv_sec
> 120) {
2767 printf("sync timed out: %d sec\n", sync_timeout_seconds
);
2768 sync_timeout_last_print
.tv_sec
= now
.tv_sec
;
2772 if (thread_created
) {
2773 thread_deallocate(thd
);
2777 } /* end of sync_internal call */
2780 * Change filesystem quotas.
2784 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2787 int error
, quota_cmd
, quota_status
= 0;
2790 struct nameidata nd
;
2791 vfs_context_t ctx
= vfs_context_current();
2792 struct dqblk my_dqblk
= {};
2794 AUDIT_ARG(uid
, uap
->uid
);
2795 AUDIT_ARG(cmd
, uap
->cmd
);
2796 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2802 mp
= nd
.ni_vp
->v_mount
;
2804 vnode_put(nd
.ni_vp
);
2807 /* copyin any data we will need for downstream code */
2808 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2810 switch (quota_cmd
) {
2812 /* uap->arg specifies a file from which to take the quotas */
2813 fnamelen
= MAXPATHLEN
;
2814 datap
= kalloc(MAXPATHLEN
);
2815 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2818 /* uap->arg is a pointer to a dqblk structure. */
2819 datap
= (caddr_t
) &my_dqblk
;
2823 /* uap->arg is a pointer to a dqblk structure. */
2824 datap
= (caddr_t
) &my_dqblk
;
2825 if (proc_is64bit(p
)) {
2826 struct user_dqblk my_dqblk64
;
2827 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof(my_dqblk64
));
2829 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2832 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof(my_dqblk
));
2836 /* uap->arg is a pointer to an integer */
2837 datap
= (caddr_t
) "a_status
;
2845 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2848 switch (quota_cmd
) {
2850 if (datap
!= NULL
) {
2851 kfree(datap
, MAXPATHLEN
);
2855 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2857 if (proc_is64bit(p
)) {
2858 struct user_dqblk my_dqblk64
;
2860 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
));
2861 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2862 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof(my_dqblk64
));
2864 error
= copyout(datap
, uap
->arg
, sizeof(struct dqblk
));
2869 /* uap->arg is a pointer to an integer */
2871 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2883 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2890 * Get filesystem statistics.
2892 * Returns: 0 Success
2894 * vfs_update_vfsstat:???
2895 * munge_statfs:EFAULT
2899 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2902 struct vfsstatfs
*sp
;
2904 struct nameidata nd
;
2905 vfs_context_t ctx
= vfs_context_current();
2908 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2909 UIO_USERSPACE
, uap
->path
, ctx
);
2916 sp
= &mp
->mnt_vfsstat
;
2920 error
= mac_mount_check_stat(ctx
, mp
);
2927 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2933 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2939 * Get filesystem statistics.
2943 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2947 struct vfsstatfs
*sp
;
2950 AUDIT_ARG(fd
, uap
->fd
);
2952 if ((error
= file_vnode(uap
->fd
, &vp
))) {
2956 error
= vnode_getwithref(vp
);
2962 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2971 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2977 sp
= &mp
->mnt_vfsstat
;
2978 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2982 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2992 vfs_get_statfs64(struct mount
*mp
, struct statfs64
*sfs
)
2994 struct vfsstatfs
*vsfs
= &mp
->mnt_vfsstat
;
2996 bzero(sfs
, sizeof(*sfs
));
2998 sfs
->f_bsize
= vsfs
->f_bsize
;
2999 sfs
->f_iosize
= (int32_t)vsfs
->f_iosize
;
3000 sfs
->f_blocks
= vsfs
->f_blocks
;
3001 sfs
->f_bfree
= vsfs
->f_bfree
;
3002 sfs
->f_bavail
= vsfs
->f_bavail
;
3003 sfs
->f_files
= vsfs
->f_files
;
3004 sfs
->f_ffree
= vsfs
->f_ffree
;
3005 sfs
->f_fsid
= vsfs
->f_fsid
;
3006 sfs
->f_owner
= vsfs
->f_owner
;
3007 sfs
->f_type
= mp
->mnt_vtable
->vfc_typenum
;
3008 sfs
->f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
3009 sfs
->f_fssubtype
= vsfs
->f_fssubtype
;
3010 sfs
->f_flags_ext
= ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
)) ? MNT_EXT_ROOT_DATA_VOL
: 0;
3011 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
3012 strlcpy(&sfs
->f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
3014 strlcpy(&sfs
->f_fstypename
[0], &vsfs
->f_fstypename
[0], MFSTYPENAMELEN
);
3016 strlcpy(&sfs
->f_mntonname
[0], &vsfs
->f_mntonname
[0], MAXPATHLEN
);
3017 strlcpy(&sfs
->f_mntfromname
[0], &vsfs
->f_mntfromname
[0], MAXPATHLEN
);
3021 * Get file system statistics in 64-bit mode
3024 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
3028 struct nameidata nd
;
3029 struct statfs64 sfs
;
3030 vfs_context_t ctxp
= vfs_context_current();
3033 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
3034 UIO_USERSPACE
, uap
->path
, ctxp
);
3044 error
= mac_mount_check_stat(ctxp
, mp
);
3051 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
3057 vfs_get_statfs64(mp
, &sfs
);
3058 if ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
) &&
3059 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3060 /* This process does not want to see a seperate data volume mountpoint */
3061 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3063 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3070 * Get file system statistics in 64-bit mode
3073 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
3077 struct statfs64 sfs
;
3080 AUDIT_ARG(fd
, uap
->fd
);
3082 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3086 error
= vnode_getwithref(vp
);
3092 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
3101 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3107 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
3111 vfs_get_statfs64(mp
, &sfs
);
3112 if ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
) &&
3113 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3114 /* This process does not want to see a seperate data volume mountpoint */
3115 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3117 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3126 struct getfsstat_struct
{
3137 getfsstat_callback(mount_t mp
, void * arg
)
3139 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3140 struct vfsstatfs
*sp
;
3142 vfs_context_t ctx
= vfs_context_current();
3144 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3146 error
= mac_mount_check_stat(ctx
, mp
);
3148 fstp
->error
= error
;
3149 return VFS_RETURNED_DONE
;
3152 sp
= &mp
->mnt_vfsstat
;
3154 * If MNT_NOWAIT is specified, do not refresh the
3155 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
3157 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3158 (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3159 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3160 (error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
)))) {
3161 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3162 return VFS_RETURNED
;
3166 * Need to handle LP64 version of struct statfs
3168 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
3170 fstp
->error
= error
;
3171 return VFS_RETURNED_DONE
;
3173 fstp
->sfsp
+= my_size
;
3177 error
= mac_mount_label_get(mp
, *fstp
->mp
);
3179 fstp
->error
= error
;
3180 return VFS_RETURNED_DONE
;
3187 return VFS_RETURNED
;
3191 * Get statistics on all filesystems.
3194 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
3196 struct __mac_getfsstat_args muap
;
3198 muap
.buf
= uap
->buf
;
3199 muap
.bufsize
= uap
->bufsize
;
3200 muap
.mac
= USER_ADDR_NULL
;
3202 muap
.flags
= uap
->flags
;
3204 return __mac_getfsstat(p
, &muap
, retval
);
3208 * __mac_getfsstat: Get MAC-related file system statistics
3210 * Parameters: p (ignored)
3211 * uap User argument descriptor (see below)
3212 * retval Count of file system statistics (N stats)
3214 * Indirect: uap->bufsize Buffer size
3215 * uap->macsize MAC info size
3216 * uap->buf Buffer where information will be returned
3218 * uap->flags File system flags
3221 * Returns: 0 Success
3226 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
3230 size_t count
, maxcount
, bufsize
, macsize
;
3231 struct getfsstat_struct fst
;
3233 if ((unsigned)uap
->bufsize
> INT_MAX
|| (unsigned)uap
->macsize
> INT_MAX
) {
3237 bufsize
= (size_t) uap
->bufsize
;
3238 macsize
= (size_t) uap
->macsize
;
3240 if (IS_64BIT_PROCESS(p
)) {
3241 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
3243 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
3251 if (uap
->mac
!= USER_ADDR_NULL
) {
3256 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
3257 if (count
!= maxcount
) {
3261 /* Copy in the array */
3262 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
3267 error
= copyin(uap
->mac
, mp0
, macsize
);
3269 FREE(mp0
, M_MACTEMP
);
3273 /* Normalize to an array of user_addr_t */
3274 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
3276 FREE(mp0
, M_MACTEMP
);
3280 for (i
= 0; i
< count
; i
++) {
3281 if (IS_64BIT_PROCESS(p
)) {
3282 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
3284 mp
[i
] = (user_addr_t
)mp0
[i
];
3287 FREE(mp0
, M_MACTEMP
);
3294 fst
.flags
= uap
->flags
;
3297 fst
.maxcount
= maxcount
;
3300 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat_callback
, &fst
);
3303 FREE(mp
, M_MACTEMP
);
3307 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3311 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3312 *retval
= fst
.maxcount
;
3314 *retval
= fst
.count
;
3320 getfsstat64_callback(mount_t mp
, void * arg
)
3322 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3323 struct vfsstatfs
*sp
;
3324 struct statfs64 sfs
;
3327 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3329 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3331 fstp
->error
= error
;
3332 return VFS_RETURNED_DONE
;
3335 sp
= &mp
->mnt_vfsstat
;
3337 * If MNT_NOWAIT is specified, do not refresh the fsstat
3338 * cache. MNT_WAIT overrides MNT_NOWAIT.
3340 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3341 * getfsstat, since the constants are out of the same
3344 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3345 ((((fstp
->flags
& MNT_NOWAIT
) == 0) || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3346 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3347 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)))) {
3348 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3349 return VFS_RETURNED
;
3352 vfs_get_statfs64(mp
, &sfs
);
3353 error
= copyout(&sfs
, fstp
->sfsp
, sizeof(sfs
));
3355 fstp
->error
= error
;
3356 return VFS_RETURNED_DONE
;
3358 fstp
->sfsp
+= sizeof(sfs
);
3361 return VFS_RETURNED
;
3365 * Get statistics on all file systems in 64 bit mode.
3368 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
3371 int count
, maxcount
;
3372 struct getfsstat_struct fst
;
3374 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
3380 fst
.flags
= uap
->flags
;
3383 fst
.maxcount
= maxcount
;
3385 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat64_callback
, &fst
);
3388 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3392 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3393 *retval
= fst
.maxcount
;
3395 *retval
= fst
.count
;
3402 * gets the associated vnode with the file descriptor passed.
3406 * ctx - vfs context of caller
3407 * fd - file descriptor for which vnode is required.
3408 * vpp - Pointer to pointer to vnode to be returned.
3410 * The vnode is returned with an iocount so any vnode obtained
3411 * by this call needs a vnode_put
3415 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
3419 struct fileproc
*fp
;
3420 proc_t p
= vfs_context_proc(ctx
);
3424 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
3429 error
= vnode_getwithref(vp
);
3431 (void)fp_drop(p
, fd
, fp
, 0);
3435 (void)fp_drop(p
, fd
, fp
, 0);
3441 * Wrapper function around namei to start lookup from a directory
3442 * specified by a file descriptor ni_dirfd.
3444 * In addition to all the errors returned by namei, this call can
3445 * return ENOTDIR if the file descriptor does not refer to a directory.
3446 * and EBADF if the file descriptor is not valid.
3449 nameiat(struct nameidata
*ndp
, int dirfd
)
3451 if ((dirfd
!= AT_FDCWD
) &&
3452 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
3453 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3457 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3458 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3463 c
= *((char *)(ndp
->ni_dirp
));
3469 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3475 if (vnode_vtype(dvp_at
) != VDIR
) {
3480 ndp
->ni_dvp
= dvp_at
;
3481 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3483 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3493 * Change current working directory to a given file descriptor.
3497 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3499 struct filedesc
*fdp
= p
->p_fd
;
3505 vfs_context_t ctx
= vfs_context_current();
3507 AUDIT_ARG(fd
, uap
->fd
);
3508 if (per_thread
&& uap
->fd
== -1) {
3510 * Switching back from per-thread to per process CWD; verify we
3511 * in fact have one before proceeding. The only success case
3512 * for this code path is to return 0 preemptively after zapping
3513 * the thread structure contents.
3515 thread_t th
= vfs_context_thread(ctx
);
3517 uthread_t uth
= get_bsdthread_info(th
);
3519 uth
->uu_cdir
= NULLVP
;
3520 if (tvp
!= NULLVP
) {
3528 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3531 if ((error
= vnode_getwithref(vp
))) {
3536 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3538 if (vp
->v_type
!= VDIR
) {
3544 error
= mac_vnode_check_chdir(ctx
, vp
);
3549 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3554 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3555 if (vfs_busy(mp
, LK_NOWAIT
)) {
3559 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3570 if ((error
= vnode_ref(vp
))) {
3576 thread_t th
= vfs_context_thread(ctx
);
3578 uthread_t uth
= get_bsdthread_info(th
);
3581 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3607 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3609 return common_fchdir(p
, uap
, 0);
3613 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3615 return common_fchdir(p
, (void *)uap
, 1);
3620 * Change current working directory (".").
3622 * Returns: 0 Success
3623 * change_dir:ENOTDIR
3625 * vnode_ref:ENOENT No such file or directory
3629 chdir_internal(proc_t p
, vfs_context_t ctx
, struct nameidata
*ndp
, int per_thread
)
3631 struct filedesc
*fdp
= p
->p_fd
;
3635 error
= change_dir(ndp
, ctx
);
3639 if ((error
= vnode_ref(ndp
->ni_vp
))) {
3640 vnode_put(ndp
->ni_vp
);
3644 * drop the iocount we picked up in change_dir
3646 vnode_put(ndp
->ni_vp
);
3649 thread_t th
= vfs_context_thread(ctx
);
3651 uthread_t uth
= get_bsdthread_info(th
);
3653 uth
->uu_cdir
= ndp
->ni_vp
;
3654 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3656 vnode_rele(ndp
->ni_vp
);
3662 fdp
->fd_cdir
= ndp
->ni_vp
;
3675 * Change current working directory (".").
3677 * Returns: 0 Success
3678 * chdir_internal:ENOTDIR
3679 * chdir_internal:ENOENT No such file or directory
3680 * chdir_internal:???
3684 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3686 struct nameidata nd
;
3687 vfs_context_t ctx
= vfs_context_current();
3689 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3690 UIO_USERSPACE
, uap
->path
, ctx
);
3692 return chdir_internal(p
, ctx
, &nd
, per_thread
);
3699 * Change current working directory (".") for the entire process
3701 * Parameters: p Process requesting the call
3702 * uap User argument descriptor (see below)
3705 * Indirect parameters: uap->path Directory path
3707 * Returns: 0 Success
3708 * common_chdir: ENOTDIR
3709 * common_chdir: ENOENT No such file or directory
3714 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3716 return common_chdir(p
, (void *)uap
, 0);
3722 * Change current working directory (".") for a single thread
3724 * Parameters: p Process requesting the call
3725 * uap User argument descriptor (see below)
3728 * Indirect parameters: uap->path Directory path
3730 * Returns: 0 Success
3731 * common_chdir: ENOTDIR
3732 * common_chdir: ENOENT No such file or directory
3737 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3739 return common_chdir(p
, (void *)uap
, 1);
3744 * Change notion of root (``/'') directory.
3748 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3750 struct filedesc
*fdp
= p
->p_fd
;
3752 struct nameidata nd
;
3754 vfs_context_t ctx
= vfs_context_current();
3756 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
3760 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3761 UIO_USERSPACE
, uap
->path
, ctx
);
3762 error
= change_dir(&nd
, ctx
);
3768 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3771 vnode_put(nd
.ni_vp
);
3776 if ((error
= vnode_ref(nd
.ni_vp
))) {
3777 vnode_put(nd
.ni_vp
);
3780 vnode_put(nd
.ni_vp
);
3784 fdp
->fd_rdir
= nd
.ni_vp
;
3785 fdp
->fd_flags
|= FD_CHROOT
;
3796 * Common routine for chroot and chdir.
3798 * Returns: 0 Success
3799 * ENOTDIR Not a directory
3800 * namei:??? [anything namei can return]
3801 * vnode_authorize:??? [anything vnode_authorize can return]
3804 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3809 if ((error
= namei(ndp
))) {
3815 if (vp
->v_type
!= VDIR
) {
3821 error
= mac_vnode_check_chdir(ctx
, vp
);
3828 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3838 * Free the vnode data (for directories) associated with the file glob.
3841 fg_vn_data_alloc(void)
3843 struct fd_vn_data
*fvdata
;
3845 /* Allocate per fd vnode data */
3846 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3847 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3848 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3853 * Free the vnode data (for directories) associated with the file glob.
3856 fg_vn_data_free(void *fgvndata
)
3858 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3860 if (fvdata
->fv_buf
) {
3861 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3863 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3864 FREE(fvdata
, M_FD_VN_DATA
);
3868 * Check permissions, allocate an open file structure,
3869 * and call the device open routine if any.
3871 * Returns: 0 Success
3882 * XXX Need to implement uid, gid
3885 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3886 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3889 proc_t p
= vfs_context_proc(ctx
);
3890 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3891 struct fileproc
*fp
;
3894 int type
, indx
, error
;
3896 struct vfs_context context
;
3900 if ((oflags
& O_ACCMODE
) == O_ACCMODE
) {
3904 flags
= FFLAGS(uflags
);
3905 CLR(flags
, FENCRYPTED
);
3906 CLR(flags
, FUNENCRYPTED
);
3908 AUDIT_ARG(fflags
, oflags
);
3909 AUDIT_ARG(mode
, vap
->va_mode
);
3911 if ((error
= falloc_withalloc(p
,
3912 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3915 uu
->uu_dupfd
= -indx
- 1;
3917 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3918 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)) { /* XXX from fdopen */
3919 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3920 fp_drop(p
, indx
, NULL
, 0);
3925 if (error
== ERESTART
) {
3928 fp_free(p
, indx
, fp
);
3934 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3935 fp
->f_fglob
->fg_ops
= &vnops
;
3936 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3938 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3939 lf
.l_whence
= SEEK_SET
;
3942 if (flags
& O_EXLOCK
) {
3943 lf
.l_type
= F_WRLCK
;
3945 lf
.l_type
= F_RDLCK
;
3948 if ((flags
& FNONBLOCK
) == 0) {
3952 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3958 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
))) {
3961 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3964 /* try to truncate by setting the size attribute */
3965 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0)) {
3970 * For directories we hold some additional information in the fd.
3972 if (vnode_vtype(vp
) == VDIR
) {
3973 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3975 fp
->f_fglob
->fg_vn_data
= NULL
;
3981 * The first terminal open (without a O_NOCTTY) by a session leader
3982 * results in it being set as the controlling terminal.
3984 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3985 !(flags
& O_NOCTTY
)) {
3988 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3989 (caddr_t
)&tmp
, ctx
);
3993 if (flags
& O_CLOEXEC
) {
3994 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3996 if (flags
& O_CLOFORK
) {
3997 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3999 procfdtbl_releasefd(p
, indx
, NULL
);
4001 #if CONFIG_SECLUDED_MEMORY
4002 if (secluded_for_filecache
&&
4003 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
4004 vnode_vtype(vp
) == VREG
) {
4005 memory_object_control_t moc
;
4007 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
4009 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
4010 /* nothing to do... */
4011 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
4012 /* writable -> no longer eligible for secluded pages */
4013 memory_object_mark_eligible_for_secluded(moc
,
4015 } else if (secluded_for_filecache
== 1) {
4016 char pathname
[32] = { 0, };
4018 /* XXX FBDP: better way to detect /Applications/ ? */
4019 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4020 (void)copyinstr(ndp
->ni_dirp
,
4025 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
4030 pathname
[sizeof(pathname
) - 1] = '\0';
4031 if (strncmp(pathname
,
4033 strlen("/Applications/")) == 0 &&
4035 "/Applications/Camera.app/",
4036 strlen("/Applications/Camera.app/")) != 0) {
4039 * AND from "/Applications/"
4040 * AND not from "/Applications/Camera.app/"
4041 * ==> eligible for secluded
4043 memory_object_mark_eligible_for_secluded(moc
,
4046 } else if (secluded_for_filecache
== 2) {
4048 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4050 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4052 /* not implemented... */
4054 size_t len
= strlen(vp
->v_name
);
4055 if (!strncmp(vp
->v_name
, DYLD_SHARED_CACHE_NAME
, len
) ||
4056 !strncmp(vp
->v_name
, "dyld", len
) ||
4057 !strncmp(vp
->v_name
, "launchd", len
) ||
4058 !strncmp(vp
->v_name
, "Camera", len
) ||
4059 !strncmp(vp
->v_name
, "mediaserverd", len
) ||
4060 !strncmp(vp
->v_name
, "SpringBoard", len
) ||
4061 !strncmp(vp
->v_name
, "backboardd", len
)) {
4063 * This file matters when launching Camera:
4064 * do not store its contents in the secluded
4065 * pool that will be drained on Camera launch.
4067 memory_object_mark_eligible_for_secluded(moc
,
4072 #endif /* CONFIG_SECLUDED_MEMORY */
4074 fp_drop(p
, indx
, fp
, 1);
4081 context
= *vfs_context_current();
4082 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
4084 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
4085 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
4086 lf
.l_whence
= SEEK_SET
;
4089 lf
.l_type
= F_UNLCK
;
4092 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
4095 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
4097 fp_free(p
, indx
, fp
);
4103 * While most of the *at syscall handlers can call nameiat() which
4104 * is a wrapper around namei, the use of namei and initialisation
4105 * of nameidata are far removed and in different functions - namei
4106 * gets called in vn_open_auth for open1. So we'll just do here what
4110 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
4111 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
4114 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
4118 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4119 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
4124 c
= *((char *)(ndp
->ni_dirp
));
4130 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
4136 if (vnode_vtype(dvp_at
) != VDIR
) {
4141 ndp
->ni_dvp
= dvp_at
;
4142 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
4143 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
4150 return open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
);
4154 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
4156 * Parameters: p Process requesting the open
4157 * uap User argument descriptor (see below)
4158 * retval Pointer to an area to receive the
4159 * return calue from the system call
4161 * Indirect: uap->path Path to open (same as 'open')
4162 * uap->flags Flags to open (same as 'open'
4163 * uap->uid UID to set, if creating
4164 * uap->gid GID to set, if creating
4165 * uap->mode File mode, if creating (same as 'open')
4166 * uap->xsecurity ACL to set, if creating
4168 * Returns: 0 Success
4171 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4173 * XXX: We should enummerate the possible errno values here, and where
4174 * in the code they originated.
4177 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
4179 struct filedesc
*fdp
= p
->p_fd
;
4181 kauth_filesec_t xsecdst
;
4182 struct vnode_attr va
;
4183 struct nameidata nd
;
4186 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4189 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
4190 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
4195 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4196 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4197 if (uap
->uid
!= KAUTH_UID_NONE
) {
4198 VATTR_SET(&va
, va_uid
, uap
->uid
);
4200 if (uap
->gid
!= KAUTH_GID_NONE
) {
4201 VATTR_SET(&va
, va_gid
, uap
->gid
);
4203 if (xsecdst
!= NULL
) {
4204 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4207 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4208 uap
->path
, vfs_context_current());
4210 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4211 fileproc_alloc_init
, NULL
, retval
);
4212 if (xsecdst
!= NULL
) {
4213 kauth_filesec_free(xsecdst
);
4220 * Go through the data-protected atomically controlled open (2)
4222 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4225 open_dprotected_np(__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
)
4227 int flags
= uap
->flags
;
4228 int class = uap
->class;
4229 int dpflags
= uap
->dpflags
;
4232 * Follow the same path as normal open(2)
4233 * Look up the item if it exists, and acquire the vnode.
4235 struct filedesc
*fdp
= p
->p_fd
;
4236 struct vnode_attr va
;
4237 struct nameidata nd
;
4242 /* Mask off all but regular access permissions */
4243 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4244 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4246 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4247 uap
->path
, vfs_context_current());
4250 * Initialize the extra fields in vnode_attr to pass down our
4252 * 1. target cprotect class.
4253 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4255 if (flags
& O_CREAT
) {
4256 /* lower level kernel code validates that the class is valid before applying it. */
4257 if (class != PROTECTION_CLASS_DEFAULT
) {
4259 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4260 * file behave the same as open (2)
4262 VATTR_SET(&va
, va_dataprotect_class
, class);
4266 if (dpflags
& (O_DP_GETRAWENCRYPTED
| O_DP_GETRAWUNENCRYPTED
)) {
4267 if (flags
& (O_RDWR
| O_WRONLY
)) {
4268 /* Not allowed to write raw encrypted bytes */
4271 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
4272 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
4274 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
4275 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
4279 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4280 fileproc_alloc_init
, NULL
, retval
);
4286 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
4287 int fd
, enum uio_seg segflg
, int *retval
)
4289 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
4290 struct vnode_attr va
;
4291 struct nameidata nd
;
4295 /* Mask off all but regular access permissions */
4296 cmode
= ((mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4297 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4299 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
4302 return open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
4307 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
4309 __pthread_testcancel(1);
4310 return open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
);
4314 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
4317 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4318 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
);
4322 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
4325 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4326 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
);
4330 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
4332 __pthread_testcancel(1);
4333 return openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
);
4337 * openbyid_np: open a file given a file system id and a file system object id
4338 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4339 * file systems that don't support object ids it is a node id (uint64_t).
4341 * Parameters: p Process requesting the open
4342 * uap User argument descriptor (see below)
4343 * retval Pointer to an area to receive the
4344 * return calue from the system call
4346 * Indirect: uap->path Path to open (same as 'open')
4348 * uap->fsid id of target file system
4349 * uap->objid id of target file system object
4350 * uap->flags Flags to open (same as 'open')
4352 * Returns: 0 Success
4356 * XXX: We should enummerate the possible errno values here, and where
4357 * in the code they originated.
4360 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
4366 int buflen
= MAXPATHLEN
;
4368 vfs_context_t ctx
= vfs_context_current();
4370 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
4374 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
4378 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4379 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
4383 AUDIT_ARG(value32
, fsid
.val
[0]);
4384 AUDIT_ARG(value64
, objid
);
4386 /*resolve path from fsis, objid*/
4388 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
4393 error
= fsgetpath_internal( ctx
, fsid
.val
[0], objid
, buflen
,
4394 buf
, FSOPT_ISREALFSID
, &pathlen
);
4400 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
4408 error
= openat_internal(
4409 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
4418 * Create a special file.
4420 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
4423 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
4425 struct vnode_attr va
;
4426 vfs_context_t ctx
= vfs_context_current();
4428 struct nameidata nd
;
4432 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4433 VATTR_SET(&va
, va_rdev
, uap
->dev
);
4435 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4436 if ((uap
->mode
& S_IFMT
) == S_IFIFO
) {
4437 return mkfifo1(ctx
, uap
->path
, &va
);
4440 AUDIT_ARG(mode
, uap
->mode
);
4441 AUDIT_ARG(value32
, uap
->dev
);
4443 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
4446 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
4447 UIO_USERSPACE
, uap
->path
, ctx
);
4460 switch (uap
->mode
& S_IFMT
) {
4462 VATTR_SET(&va
, va_type
, VCHR
);
4465 VATTR_SET(&va
, va_type
, VBLK
);
4473 error
= mac_vnode_check_create(ctx
,
4474 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
4480 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4484 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0) {
4489 int update_flags
= 0;
4491 // Make sure the name & parent pointers are hooked up
4492 if (vp
->v_name
== NULL
) {
4493 update_flags
|= VNODE_UPDATE_NAME
;
4495 if (vp
->v_parent
== NULLVP
) {
4496 update_flags
|= VNODE_UPDATE_PARENT
;
4500 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4504 add_fsevent(FSE_CREATE_FILE
, ctx
,
4512 * nameidone has to happen before we vnode_put(dvp)
4513 * since it may need to release the fs_nodelock on the dvp
4526 * Create a named pipe.
4528 * Returns: 0 Success
4531 * vnode_authorize:???
4535 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4539 struct nameidata nd
;
4541 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4542 UIO_USERSPACE
, upath
, ctx
);
4550 /* check that this is a new file and authorize addition */
4555 VATTR_SET(vap
, va_type
, VFIFO
);
4557 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
4561 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4564 * nameidone has to happen before we vnode_put(dvp)
4565 * since it may need to release the fs_nodelock on the dvp
4579 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4581 * Parameters: p Process requesting the open
4582 * uap User argument descriptor (see below)
4585 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4586 * uap->uid UID to set
4587 * uap->gid GID to set
4588 * uap->mode File mode to set (same as 'mkfifo')
4589 * uap->xsecurity ACL to set, if creating
4591 * Returns: 0 Success
4594 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4596 * XXX: We should enummerate the possible errno values here, and where
4597 * in the code they originated.
4600 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4603 kauth_filesec_t xsecdst
;
4604 struct vnode_attr va
;
4606 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4608 xsecdst
= KAUTH_FILESEC_NONE
;
4609 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4610 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
4616 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4617 if (uap
->uid
!= KAUTH_UID_NONE
) {
4618 VATTR_SET(&va
, va_uid
, uap
->uid
);
4620 if (uap
->gid
!= KAUTH_GID_NONE
) {
4621 VATTR_SET(&va
, va_gid
, uap
->gid
);
4623 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4624 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4627 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4629 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4630 kauth_filesec_free(xsecdst
);
4637 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4639 struct vnode_attr va
;
4642 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4644 return mkfifo1(vfs_context_current(), uap
->path
, &va
);
4649 my_strrchr(char *p
, int ch
)
4653 for (save
= NULL
;; ++p
) {
4664 extern int safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
);
4665 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4666 extern int safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4669 safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
)
4671 int ret
, len
= _len
;
4673 *truncated_path
= 0;
4676 ret
= vn_getpath(dvp
, path
, &len
);
4678 ret
= vn_getpath_no_firmlink(dvp
, path
, &len
);
4680 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4682 path
[len
- 1] = '/';
4683 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
- len
) + 1;
4684 if (len
> MAXPATHLEN
) {
4687 // the string got truncated!
4688 *truncated_path
= 1;
4689 ptr
= my_strrchr(path
, '/');
4691 *ptr
= '\0'; // chop off the string at the last directory component
4693 len
= strlen(path
) + 1;
4696 } else if (ret
== 0) {
4697 *truncated_path
= 1;
4698 } else if (ret
!= 0) {
4699 struct vnode
*mydvp
= dvp
;
4701 if (ret
!= ENOSPC
) {
4702 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4703 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4705 *truncated_path
= 1;
4708 if (mydvp
->v_parent
!= NULL
) {
4709 mydvp
= mydvp
->v_parent
;
4710 } else if (mydvp
->v_mount
) {
4711 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4714 // no parent and no mount point? only thing is to punt and say "/" changed
4715 strlcpy(path
, "/", _len
);
4720 if (mydvp
== NULL
) {
4726 ret
= vn_getpath(mydvp
, path
, &len
);
4728 ret
= vn_getpath_no_firmlink(mydvp
, path
, &len
);
4730 } while (ret
== ENOSPC
);
4737 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4739 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 1);
4743 safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4745 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 0);
4749 * Make a hard file link.
4751 * Returns: 0 Success
4756 * vnode_authorize:???
4761 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4762 user_addr_t link
, int flag
, enum uio_seg segflg
)
4764 vnode_t vp
, pvp
, dvp
, lvp
;
4765 struct nameidata nd
;
4771 int need_event
, has_listeners
, need_kpath2
;
4772 char *target_path
= NULL
;
4775 vp
= dvp
= lvp
= NULLVP
;
4777 /* look up the object we are linking to */
4778 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4779 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4782 error
= nameiat(&nd
, fd1
);
4791 * Normally, linking to directories is not supported.
4792 * However, some file systems may have limited support.
4794 if (vp
->v_type
== VDIR
) {
4795 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4796 error
= EPERM
; /* POSIX */
4800 /* Linking to a directory requires ownership. */
4801 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4802 struct vnode_attr dva
;
4805 VATTR_WANTED(&dva
, va_uid
);
4806 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4807 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4808 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4815 /* lookup the target node */
4819 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4820 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4822 error
= nameiat(&nd
, fd2
);
4830 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0) {
4835 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4836 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0) {
4840 /* target node must not exist */
4841 if (lvp
!= NULLVP
) {
4845 /* cannot link across mountpoints */
4846 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4851 /* authorize creation of the target note */
4852 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4856 /* and finally make the link */
4857 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4863 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4867 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4871 has_listeners
= kauth_authorize_fileop_has_listeners();
4875 if (AUDIT_RECORD_EXISTS()) {
4880 if (need_event
|| has_listeners
|| need_kpath2
) {
4881 char *link_to_path
= NULL
;
4882 int len
, link_name_len
;
4884 /* build the path to the new link file */
4885 GET_PATH(target_path
);
4886 if (target_path
== NULL
) {
4891 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4893 AUDIT_ARG(kpath
, target_path
, ARG_KPATH2
);
4895 if (has_listeners
) {
4896 /* build the path to file we are linking to */
4897 GET_PATH(link_to_path
);
4898 if (link_to_path
== NULL
) {
4903 link_name_len
= MAXPATHLEN
;
4904 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4906 * Call out to allow 3rd party notification of rename.
4907 * Ignore result of kauth_authorize_fileop call.
4909 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4910 (uintptr_t)link_to_path
,
4911 (uintptr_t)target_path
);
4913 if (link_to_path
!= NULL
) {
4914 RELEASE_PATH(link_to_path
);
4919 /* construct fsevent */
4920 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4922 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4925 // build the path to the destination of the link
4926 add_fsevent(FSE_CREATE_FILE
, ctx
,
4927 FSE_ARG_STRING
, len
, target_path
,
4928 FSE_ARG_FINFO
, &finfo
,
4933 // need an iocount on pvp in this case
4934 if (pvp
&& pvp
!= dvp
) {
4935 error
= vnode_get(pvp
);
4942 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4943 FSE_ARG_VNODE
, pvp
, FSE_ARG_DONE
);
4945 if (pvp
&& pvp
!= dvp
) {
4953 * nameidone has to happen before we vnode_put(dvp)
4954 * since it may need to release the fs_nodelock on the dvp
4957 if (target_path
!= NULL
) {
4958 RELEASE_PATH(target_path
);
4972 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4974 return linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4975 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
);
4979 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4981 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
) {
4985 return linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4986 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
);
4990 * Make a symbolic link.
4992 * We could add support for ACLs here too...
4996 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4997 user_addr_t link
, enum uio_seg segflg
)
4999 struct vnode_attr va
;
5002 struct nameidata nd
;
5008 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
5009 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
5010 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
5012 path
= (char *)path_data
;
5017 AUDIT_ARG(text
, path
); /* This is the link string */
5019 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
5022 error
= nameiat(&nd
, fd
);
5029 p
= vfs_context_proc(ctx
);
5031 VATTR_SET(&va
, va_type
, VLNK
);
5032 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
5035 error
= mac_vnode_check_create(ctx
,
5036 dvp
, &nd
.ni_cnd
, &va
);
5049 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
5051 /* get default ownership, etc. */
5053 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
5056 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
5060 if (error
== 0 && vp
) {
5061 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
5065 /* do fallback attribute handling */
5066 if (error
== 0 && vp
) {
5067 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
5071 int update_flags
= 0;
5073 /*check if a new vnode was created, else try to get one*/
5075 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
5077 nd
.ni_op
= OP_LOOKUP
;
5079 nd
.ni_cnd
.cn_flags
= 0;
5080 error
= nameiat(&nd
, fd
);
5088 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
5089 /* call out to allow 3rd party notification of rename.
5090 * Ignore result of kauth_authorize_fileop call.
5092 if (kauth_authorize_fileop_has_listeners() &&
5094 char *new_link_path
= NULL
;
5097 /* build the path to the new link file */
5098 new_link_path
= get_pathbuff();
5100 vn_getpath(dvp
, new_link_path
, &len
);
5101 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
5102 new_link_path
[len
- 1] = '/';
5103 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
- len
);
5106 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
5107 (uintptr_t)path
, (uintptr_t)new_link_path
);
5108 if (new_link_path
!= NULL
) {
5109 release_pathbuff(new_link_path
);
5113 // Make sure the name & parent pointers are hooked up
5114 if (vp
->v_name
== NULL
) {
5115 update_flags
|= VNODE_UPDATE_NAME
;
5117 if (vp
->v_parent
== NULLVP
) {
5118 update_flags
|= VNODE_UPDATE_PARENT
;
5122 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
5126 add_fsevent(FSE_CREATE_FILE
, ctx
,
5134 * nameidone has to happen before we vnode_put(dvp)
5135 * since it may need to release the fs_nodelock on the dvp
5144 if (path
&& (path
!= (char *)path_data
)) {
5145 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
5152 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
5154 return symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
5155 uap
->link
, UIO_USERSPACE
);
5159 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
5160 __unused
int32_t *retval
)
5162 return symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
5163 uap
->path2
, UIO_USERSPACE
);
5167 * Delete a whiteout from the filesystem.
5168 * No longer supported.
5171 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
5177 * Delete a name from the filesystem.
5181 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
5182 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
5184 struct nameidata nd
;
5187 struct componentname
*cnp
;
5189 char *no_firmlink_path
= NULL
;
5191 int len_no_firmlink_path
= 0;
5194 struct vnode_attr va
;
5200 int truncated_no_firmlink_path
;
5202 struct vnode_attr
*vap
;
5204 int retry_count
= 0;
5207 cn_flags
= LOCKPARENT
;
5208 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
)) {
5209 cn_flags
|= AUDITVNPATH1
;
5211 /* If a starting dvp is passed, it trumps any fd passed. */
5217 /* unlink or delete is allowed on rsrc forks and named streams */
5218 cn_flags
|= CN_ALLOWRSRCFORK
;
5227 truncated_no_firmlink_path
= 0;
5230 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
5232 nd
.ni_dvp
= start_dvp
;
5233 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
5237 error
= nameiat(&nd
, fd
);
5246 /* With Carbon delete semantics, busy files cannot be deleted */
5247 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
5248 flags
|= VNODE_REMOVE_NODELETEBUSY
;
5251 /* Skip any potential upcalls if told to. */
5252 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
5253 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
5257 batched
= vnode_compound_remove_available(vp
);
5259 * The root of a mounted filesystem cannot be deleted.
5261 if ((vp
->v_flag
& VROOT
) || (dvp
->v_mount
!= vp
->v_mount
)) {
5266 #if DEVELOPMENT || DEBUG
5268 * XXX VSWAP: Check for entitlements or special flag here
5269 * so we can restrict access appropriately.
5271 #else /* DEVELOPMENT || DEBUG */
5273 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
5277 #endif /* DEVELOPMENT || DEBUG */
5280 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
5282 if (error
== ENOENT
) {
5283 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5294 if (!vnode_compound_remove_available(dvp
)) {
5295 panic("No vp, but no compound remove?");
5300 need_event
= need_fsevent(FSE_DELETE
, dvp
);
5303 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
5304 /* XXX need to get these data in batched VNOP */
5305 get_fse_info(vp
, &finfo
, ctx
);
5308 error
= vfs_get_notify_attributes(&va
);
5317 has_listeners
= kauth_authorize_fileop_has_listeners();
5318 if (need_event
|| has_listeners
) {
5326 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
5327 if (no_firmlink_path
== NULL
) {
5328 GET_PATH(no_firmlink_path
);
5329 if (no_firmlink_path
== NULL
) {
5334 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
5338 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
) {
5339 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
5343 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
5345 if (error
== EKEEPLOOKING
) {
5347 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5350 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
5351 panic("EKEEPLOOKING, but continue flag not set?");
5354 if (vnode_isdir(vp
)) {
5358 goto continue_lookup
;
5359 } else if (error
== ENOENT
&& batched
) {
5360 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5362 * For compound VNOPs, the authorization callback may
5363 * return ENOENT in case of racing hardlink lookups
5364 * hitting the name cache, redrive the lookup.
5374 * Call out to allow 3rd party notification of delete.
5375 * Ignore result of kauth_authorize_fileop call.
5378 if (has_listeners
) {
5379 kauth_authorize_fileop(vfs_context_ucred(ctx
),
5380 KAUTH_FILEOP_DELETE
,
5385 if (vp
->v_flag
& VISHARDLINK
) {
5387 // if a hardlink gets deleted we want to blow away the
5388 // v_parent link because the path that got us to this
5389 // instance of the link is no longer valid. this will
5390 // force the next call to get the path to ask the file
5391 // system instead of just following the v_parent link.
5393 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
5398 if (vp
->v_flag
& VISHARDLINK
) {
5399 get_fse_info(vp
, &finfo
, ctx
);
5401 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
5403 if (truncated_path
) {
5404 finfo
.mode
|= FSE_TRUNCATED_PATH
;
5406 add_fsevent(FSE_DELETE
, ctx
,
5407 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
5408 FSE_ARG_FINFO
, &finfo
,
5420 if (no_firmlink_path
!= NULL
) {
5421 RELEASE_PATH(no_firmlink_path
);
5422 no_firmlink_path
= NULL
;
5425 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5426 * will cause its shadow file to go away if necessary.
5428 if (vp
&& (vnode_isnamedstream(vp
)) &&
5429 (vp
->v_parent
!= NULLVP
) &&
5430 vnode_isshadow(vp
)) {
5435 * nameidone has to happen before we vnode_put(dvp)
5436 * since it may need to release the fs_nodelock on the dvp
5452 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
5453 enum uio_seg segflg
, int unlink_flags
)
5455 return unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
5460 * Delete a name from the filesystem using Carbon semantics.
5463 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
5465 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5466 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
);
5470 * Delete a name from the filesystem using POSIX semantics.
5473 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
5475 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5476 uap
->path
, UIO_USERSPACE
, 0);
5480 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
5482 if (uap
->flag
& ~(AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5486 if (uap
->flag
& (AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5487 int unlink_flags
= 0;
5489 if (uap
->flag
& AT_REMOVEDIR_DATALESS
) {
5490 unlink_flags
|= VNODE_REMOVE_DATALESS_DIR
;
5492 return rmdirat_internal(vfs_context_current(), uap
->fd
,
5493 uap
->path
, UIO_USERSPACE
, unlink_flags
);
5495 return unlinkat_internal(vfs_context_current(), uap
->fd
,
5496 NULLVP
, uap
->path
, UIO_USERSPACE
, 0);
5501 * Reposition read/write file offset.
5504 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
5506 struct fileproc
*fp
;
5508 struct vfs_context
*ctx
;
5509 off_t offset
= uap
->offset
, file_size
;
5512 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
5513 if (error
== ENOTSUP
) {
5518 if (vnode_isfifo(vp
)) {
5524 ctx
= vfs_context_current();
5526 if (uap
->whence
== L_INCR
&& uap
->offset
== 0) {
5527 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
5530 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
5538 if ((error
= vnode_getwithref(vp
))) {
5543 switch (uap
->whence
) {
5545 offset
+= fp
->f_fglob
->fg_offset
;
5548 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0) {
5551 offset
+= file_size
;
5556 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
5559 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
5565 if (uap
->offset
> 0 && offset
< 0) {
5566 /* Incremented/relative move past max size */
5570 * Allow negative offsets on character devices, per
5571 * POSIX 1003.1-2001. Most likely for writing disk
5574 if (offset
< 0 && vp
->v_type
!= VCHR
) {
5575 /* Decremented/relative move before start */
5579 fp
->f_fglob
->fg_offset
= offset
;
5580 *retval
= fp
->f_fglob
->fg_offset
;
5586 * An lseek can affect whether data is "available to read." Use
5587 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5589 post_event_if_success(vp
, error
, NOTE_NONE
);
5590 (void)vnode_put(vp
);
5597 * Check access permissions.
5599 * Returns: 0 Success
5600 * vnode_authorize:???
5603 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
5605 kauth_action_t action
;
5609 * If just the regular access bits, convert them to something
5610 * that vnode_authorize will understand.
5612 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
5614 if (uflags
& R_OK
) {
5615 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5617 if (uflags
& W_OK
) {
5618 if (vnode_isdir(vp
)) {
5619 action
|= KAUTH_VNODE_ADD_FILE
|
5620 KAUTH_VNODE_ADD_SUBDIRECTORY
;
5621 /* might want delete rights here too */
5623 action
|= KAUTH_VNODE_WRITE_DATA
;
5626 if (uflags
& X_OK
) {
5627 if (vnode_isdir(vp
)) {
5628 action
|= KAUTH_VNODE_SEARCH
;
5630 action
|= KAUTH_VNODE_EXECUTE
;
5634 /* take advantage of definition of uflags */
5635 action
= uflags
>> 8;
5639 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5645 /* action == 0 means only check for existence */
5647 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5658 * access_extended: Check access permissions in bulk.
5660 * Description: uap->entries Pointer to an array of accessx
5661 * descriptor structs, plus one or
5662 * more NULL terminated strings (see
5663 * "Notes" section below).
5664 * uap->size Size of the area pointed to by
5666 * uap->results Pointer to the results array.
5668 * Returns: 0 Success
5669 * ENOMEM Insufficient memory
5670 * EINVAL Invalid arguments
5671 * namei:EFAULT Bad address
5672 * namei:ENAMETOOLONG Filename too long
5673 * namei:ENOENT No such file or directory
5674 * namei:ELOOP Too many levels of symbolic links
5675 * namei:EBADF Bad file descriptor
5676 * namei:ENOTDIR Not a directory
5681 * uap->results Array contents modified
5683 * Notes: The uap->entries are structured as an arbitrary length array
5684 * of accessx descriptors, followed by one or more NULL terminated
5687 * struct accessx_descriptor[0]
5689 * struct accessx_descriptor[n]
5690 * char name_data[0];
5692 * We determine the entry count by walking the buffer containing
5693 * the uap->entries argument descriptor. For each descriptor we
5694 * see, the valid values for the offset ad_name_offset will be
5695 * in the byte range:
5697 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5699 * [ uap->entries + uap->size - 2 ]
5701 * since we must have at least one string, and the string must
5702 * be at least one character plus the NULL terminator in length.
5704 * XXX: Need to support the check-as uid argument
5707 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5709 struct accessx_descriptor
*input
= NULL
;
5710 errno_t
*result
= NULL
;
5713 unsigned int desc_max
, desc_actual
, i
, j
;
5714 struct vfs_context context
;
5715 struct nameidata nd
;
5719 #define ACCESSX_MAX_DESCR_ON_STACK 10
5720 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5722 context
.vc_ucred
= NULL
;
5725 * Validate parameters; if valid, copy the descriptor array and string
5726 * arguments into local memory. Before proceeding, the following
5727 * conditions must have been met:
5729 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5730 * o There must be sufficient room in the request for at least one
5731 * descriptor and a one yte NUL terminated string.
5732 * o The allocation of local storage must not fail.
5734 if (uap
->size
> ACCESSX_MAX_TABLESIZE
) {
5737 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2)) {
5740 if (uap
->size
<= sizeof(stack_input
)) {
5741 input
= stack_input
;
5743 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5744 if (input
== NULL
) {
5749 error
= copyin(uap
->entries
, input
, uap
->size
);
5754 AUDIT_ARG(opaque
, input
, uap
->size
);
5757 * Force NUL termination of the copyin buffer to avoid nami() running
5758 * off the end. If the caller passes us bogus data, they may get a
5761 ((char *)input
)[uap
->size
- 1] = 0;
5764 * Access is defined as checking against the process' real identity,
5765 * even if operations are checking the effective identity. This
5766 * requires that we use a local vfs context.
5768 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5769 context
.vc_thread
= current_thread();
5772 * Find out how many entries we have, so we can allocate the result
5773 * array by walking the list and adjusting the count downward by the
5774 * earliest string offset we see.
5776 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5777 desc_actual
= desc_max
;
5778 for (i
= 0; i
< desc_actual
; i
++) {
5780 * Take the offset to the name string for this entry and
5781 * convert to an input array index, which would be one off
5782 * the end of the array if this entry was the lowest-addressed
5785 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5788 * An offset greater than the max allowable offset is an error.
5789 * It is also an error for any valid entry to point
5790 * to a location prior to the end of the current entry, if
5791 * it's not a reference to the string of the previous entry.
5793 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5798 /* Also do not let ad_name_offset point to something beyond the size of the input */
5799 if (input
[i
].ad_name_offset
>= uap
->size
) {
5805 * An offset of 0 means use the previous descriptor's offset;
5806 * this is used to chain multiple requests for the same file
5807 * to avoid multiple lookups.
5810 /* This is not valid for the first entry */
5819 * If the offset of the string for this descriptor is before
5820 * what we believe is the current actual last descriptor,
5821 * then we need to adjust our estimate downward; this permits
5822 * the string table following the last descriptor to be out
5823 * of order relative to the descriptor list.
5825 if (j
< desc_actual
) {
5831 * We limit the actual number of descriptors we are willing to process
5832 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5833 * requested does not exceed this limit,
5835 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5839 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
| M_ZERO
);
5840 if (result
== NULL
) {
5846 * Do the work by iterating over the descriptor entries we know to
5847 * at least appear to contain valid data.
5850 for (i
= 0; i
< desc_actual
; i
++) {
5852 * If the ad_name_offset is 0, then we use the previous
5853 * results to make the check; otherwise, we are looking up
5856 if (input
[i
].ad_name_offset
!= 0) {
5857 /* discard old vnodes */
5868 * Scan forward in the descriptor list to see if we
5869 * need the parent vnode. We will need it if we are
5870 * deleting, since we must have rights to remove
5871 * entries in the parent directory, as well as the
5872 * rights to delete the object itself.
5874 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5875 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++) {
5876 if (input
[j
].ad_flags
& _DELETE_OK
) {
5881 niopts
= FOLLOW
| AUDITVNPATH1
;
5883 /* need parent for vnode_authorize for deletion test */
5885 niopts
|= WANTPARENT
;
5889 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5890 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5903 * Handle lookup errors.
5913 /* run this access check */
5914 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5917 /* fatal lookup error */
5923 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5925 /* copy out results */
5926 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5929 if (input
&& input
!= stack_input
) {
5930 FREE(input
, M_TEMP
);
5933 FREE(result
, M_TEMP
);
5941 if (IS_VALID_CRED(context
.vc_ucred
)) {
5942 kauth_cred_unref(&context
.vc_ucred
);
5949 * Returns: 0 Success
5950 * namei:EFAULT Bad address
5951 * namei:ENAMETOOLONG Filename too long
5952 * namei:ENOENT No such file or directory
5953 * namei:ELOOP Too many levels of symbolic links
5954 * namei:EBADF Bad file descriptor
5955 * namei:ENOTDIR Not a directory
5960 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5961 int flag
, enum uio_seg segflg
)
5964 struct nameidata nd
;
5966 struct vfs_context context
;
5968 int is_namedstream
= 0;
5972 * Unless the AT_EACCESS option is used, Access is defined as checking
5973 * against the process' real identity, even if operations are checking
5974 * the effective identity. So we need to tweak the credential
5975 * in the context for that case.
5977 if (!(flag
& AT_EACCESS
)) {
5978 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5980 context
.vc_ucred
= ctx
->vc_ucred
;
5982 context
.vc_thread
= ctx
->vc_thread
;
5985 niopts
= (flag
& AT_SYMLINK_NOFOLLOW
? NOFOLLOW
: FOLLOW
) | AUDITVNPATH1
;
5986 /* need parent for vnode_authorize for deletion test */
5987 if (amode
& _DELETE_OK
) {
5988 niopts
|= WANTPARENT
;
5990 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5994 /* access(F_OK) calls are allowed for resource forks. */
5995 if (amode
== F_OK
) {
5996 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5999 error
= nameiat(&nd
, fd
);
6005 /* Grab reference on the shadow stream file vnode to
6006 * force an inactive on release which will mark it
6009 if (vnode_isnamedstream(nd
.ni_vp
) &&
6010 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6011 vnode_isshadow(nd
.ni_vp
)) {
6013 vnode_ref(nd
.ni_vp
);
6017 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
6020 if (is_namedstream
) {
6021 vnode_rele(nd
.ni_vp
);
6025 vnode_put(nd
.ni_vp
);
6026 if (amode
& _DELETE_OK
) {
6027 vnode_put(nd
.ni_dvp
);
6032 if (!(flag
& AT_EACCESS
)) {
6033 kauth_cred_unref(&context
.vc_ucred
);
6039 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
6041 return faccessat_internal(vfs_context_current(), AT_FDCWD
,
6042 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
);
6046 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
6047 __unused
int32_t *retval
)
6049 if (uap
->flag
& ~(AT_EACCESS
| AT_SYMLINK_NOFOLLOW
)) {
6053 return faccessat_internal(vfs_context_current(), uap
->fd
,
6054 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
);
6058 * Returns: 0 Success
6065 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
6066 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
6067 enum uio_seg segflg
, int fd
, int flag
)
6069 struct nameidata nd
;
6076 struct user64_stat user64_sb
;
6077 struct user32_stat user32_sb
;
6078 struct user64_stat64 user64_sb64
;
6079 struct user32_stat64 user32_sb64
;
6083 kauth_filesec_t fsec
;
6084 size_t xsecurity_bufsize
;
6086 struct fileproc
*fp
= NULL
;
6087 int needsrealdev
= 0;
6089 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6090 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
6094 int is_namedstream
= 0;
6095 /* stat calls are allowed for resource forks. */
6096 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
6099 if (flag
& AT_FDONLY
) {
6102 error
= fp_getfvp(vfs_context_proc(ctx
), fd
, &fp
, &fvp
);
6106 if ((error
= vnode_getwithref(fvp
))) {
6112 error
= nameiat(&nd
, fd
);
6117 fsec
= KAUTH_FILESEC_NONE
;
6119 statptr
= (void *)&source
;
6122 /* Grab reference on the shadow stream file vnode to
6123 * force an inactive on release which will mark it
6126 if (vnode_isnamedstream(nd
.ni_vp
) &&
6127 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6128 vnode_isshadow(nd
.ni_vp
)) {
6130 vnode_ref(nd
.ni_vp
);
6134 needsrealdev
= flag
& AT_REALDEV
? 1 : 0;
6135 if (fp
&& (xsecurity
== USER_ADDR_NULL
)) {
6137 * If the caller has the file open, and is not
6138 * requesting extended security information, we are
6139 * going to let them get the basic stat information.
6141 error
= vn_stat_noauth(nd
.ni_vp
, statptr
, NULL
, isstat64
, needsrealdev
, ctx
,
6142 fp
->f_fglob
->fg_cred
);
6144 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
),
6145 isstat64
, needsrealdev
, ctx
);
6149 if (is_namedstream
) {
6150 vnode_rele(nd
.ni_vp
);
6153 vnode_put(nd
.ni_vp
);
6163 /* Zap spare fields */
6164 if (isstat64
!= 0) {
6165 source
.sb64
.st_lspare
= 0;
6166 source
.sb64
.st_qspare
[0] = 0LL;
6167 source
.sb64
.st_qspare
[1] = 0LL;
6168 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6169 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
6170 my_size
= sizeof(dest
.user64_sb64
);
6171 sbp
= (caddr_t
)&dest
.user64_sb64
;
6173 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
6174 my_size
= sizeof(dest
.user32_sb64
);
6175 sbp
= (caddr_t
)&dest
.user32_sb64
;
6178 * Check if we raced (post lookup) against the last unlink of a file.
6180 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
6181 source
.sb64
.st_nlink
= 1;
6184 source
.sb
.st_lspare
= 0;
6185 source
.sb
.st_qspare
[0] = 0LL;
6186 source
.sb
.st_qspare
[1] = 0LL;
6187 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6188 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
6189 my_size
= sizeof(dest
.user64_sb
);
6190 sbp
= (caddr_t
)&dest
.user64_sb
;
6192 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
6193 my_size
= sizeof(dest
.user32_sb
);
6194 sbp
= (caddr_t
)&dest
.user32_sb
;
6198 * Check if we raced (post lookup) against the last unlink of a file.
6200 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
6201 source
.sb
.st_nlink
= 1;
6204 if ((error
= copyout(sbp
, ub
, my_size
)) != 0) {
6208 /* caller wants extended security information? */
6209 if (xsecurity
!= USER_ADDR_NULL
) {
6210 /* did we get any? */
6211 if (fsec
== KAUTH_FILESEC_NONE
) {
6212 if (susize(xsecurity_size
, 0) != 0) {
6217 /* find the user buffer size */
6218 xsecurity_bufsize
= fusize(xsecurity_size
);
6220 /* copy out the actual data size */
6221 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
6226 /* if the caller supplied enough room, copy out to it */
6227 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
)) {
6228 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
6233 if (fsec
!= KAUTH_FILESEC_NONE
) {
6234 kauth_filesec_free(fsec
);
6240 * stat_extended: Get file status; with extended security (ACL).
6242 * Parameters: p (ignored)
6243 * uap User argument descriptor (see below)
6246 * Indirect: uap->path Path of file to get status from
6247 * uap->ub User buffer (holds file status info)
6248 * uap->xsecurity ACL to get (extended security)
6249 * uap->xsecurity_size Size of ACL
6251 * Returns: 0 Success
6256 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
6257 __unused
int32_t *retval
)
6259 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6260 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6265 * Returns: 0 Success
6266 * fstatat_internal:??? [see fstatat_internal() in this file]
6269 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
6271 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6272 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0);
6276 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
6278 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6279 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0);
6283 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6285 * Parameters: p (ignored)
6286 * uap User argument descriptor (see below)
6289 * Indirect: uap->path Path of file to get status from
6290 * uap->ub User buffer (holds file status info)
6291 * uap->xsecurity ACL to get (extended security)
6292 * uap->xsecurity_size Size of ACL
6294 * Returns: 0 Success
6299 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
6301 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6302 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6307 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6309 * Parameters: p (ignored)
6310 * uap User argument descriptor (see below)
6313 * Indirect: uap->path Path of file to get status from
6314 * uap->ub User buffer (holds file status info)
6315 * uap->xsecurity ACL to get (extended security)
6316 * uap->xsecurity_size Size of ACL
6318 * Returns: 0 Success
6323 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
6325 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6326 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6327 AT_SYMLINK_NOFOLLOW
);
6331 * Get file status; this version does not follow links.
6334 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
6336 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6337 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6341 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
6343 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6344 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6348 * lstat64_extended: Get file status; can handle large inode numbers; does not
6349 * follow links; with extended security (ACL).
6351 * Parameters: p (ignored)
6352 * uap User argument descriptor (see below)
6355 * Indirect: uap->path Path of file to get status from
6356 * uap->ub User buffer (holds file status info)
6357 * uap->xsecurity ACL to get (extended security)
6358 * uap->xsecurity_size Size of ACL
6360 * Returns: 0 Success
6365 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
6367 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6368 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6369 AT_SYMLINK_NOFOLLOW
);
6373 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
6375 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6379 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6380 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6384 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
6385 __unused
int32_t *retval
)
6387 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6391 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6392 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6396 * Get configurable pathname variables.
6398 * Returns: 0 Success
6402 * Notes: Global implementation constants are intended to be
6403 * implemented in this function directly; all other constants
6404 * are per-FS implementation, and therefore must be handled in
6405 * each respective FS, instead.
6407 * XXX We implement some things globally right now that should actually be
6408 * XXX per-FS; we will need to deal with this at some point.
6412 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
6415 struct nameidata nd
;
6416 vfs_context_t ctx
= vfs_context_current();
6418 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
6419 UIO_USERSPACE
, uap
->path
, ctx
);
6425 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
6427 vnode_put(nd
.ni_vp
);
6433 * Return target name of a symbolic link.
6437 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
6438 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
6444 struct nameidata nd
;
6445 char uio_buf
[UIO_SIZEOF(1)];
6447 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
6450 error
= nameiat(&nd
, fd
);
6458 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
6459 &uio_buf
[0], sizeof(uio_buf
));
6460 uio_addiov(auio
, buf
, bufsize
);
6461 if (vp
->v_type
!= VLNK
) {
6465 error
= mac_vnode_check_readlink(ctx
, vp
);
6468 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
6472 error
= VNOP_READLINK(vp
, auio
, ctx
);
6477 *retval
= bufsize
- (int)uio_resid(auio
);
6482 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
6484 enum uio_seg procseg
;
6486 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6487 return readlinkat_internal(vfs_context_current(), AT_FDCWD
,
6488 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
6489 uap
->count
, procseg
, retval
);
6493 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
6495 enum uio_seg procseg
;
6497 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6498 return readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6499 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
);
6503 * Change file flags, the deep inner layer.
6506 chflags0(vnode_t vp
, struct vnode_attr
*va
,
6507 int (*setattr
)(vnode_t
, void *, vfs_context_t
),
6508 void *arg
, vfs_context_t ctx
)
6510 kauth_action_t action
= 0;
6514 error
= mac_vnode_check_setflags(ctx
, vp
, va
->va_flags
);
6520 /* request authorisation, disregard immutability */
6521 if ((error
= vnode_authattr(vp
, va
, &action
, ctx
)) != 0) {
6525 * Request that the auth layer disregard those file flags it's allowed to when
6526 * authorizing this operation; we need to do this in order to be able to
6527 * clear immutable flags.
6529 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0)) {
6532 error
= (*setattr
)(vp
, arg
, ctx
);
6536 mac_vnode_notify_setflags(ctx
, vp
, va
->va_flags
);
6545 * Change file flags.
6547 * NOTE: this will vnode_put() `vp'
6550 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
6552 struct vnode_attr va
;
6556 VATTR_SET(&va
, va_flags
, flags
);
6558 error
= chflags0(vp
, &va
, (void *)vnode_setattr
, &va
, ctx
);
6561 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6569 * Change flags of a file given a path name.
6573 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
6576 vfs_context_t ctx
= vfs_context_current();
6578 struct nameidata nd
;
6580 AUDIT_ARG(fflags
, uap
->flags
);
6581 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6582 UIO_USERSPACE
, uap
->path
, ctx
);
6590 /* we don't vnode_put() here because chflags1 does internally */
6591 error
= chflags1(vp
, uap
->flags
, ctx
);
6597 * Change flags of a file given a file descriptor.
6601 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
6606 AUDIT_ARG(fd
, uap
->fd
);
6607 AUDIT_ARG(fflags
, uap
->flags
);
6608 if ((error
= file_vnode(uap
->fd
, &vp
))) {
6612 if ((error
= vnode_getwithref(vp
))) {
6617 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6619 /* we don't vnode_put() here because chflags1 does internally */
6620 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
6627 * Change security information on a filesystem object.
6629 * Returns: 0 Success
6630 * EPERM Operation not permitted
6631 * vnode_authattr:??? [anything vnode_authattr can return]
6632 * vnode_authorize:??? [anything vnode_authorize can return]
6633 * vnode_setattr:??? [anything vnode_setattr can return]
6635 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6636 * translated to EPERM before being returned.
6639 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
6641 kauth_action_t action
;
6644 AUDIT_ARG(mode
, vap
->va_mode
);
6645 /* XXX audit new args */
6648 /* chmod calls are not allowed for resource forks. */
6649 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6655 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
6656 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0) {
6660 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6661 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
6662 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6663 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1))) {
6668 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
6669 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
))) {
6674 /* make sure that the caller is allowed to set this security information */
6675 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
6676 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6677 if (error
== EACCES
) {
6683 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0) {
6688 if (VATTR_IS_ACTIVE(vap
, va_mode
)) {
6689 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
6692 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6693 mac_vnode_notify_setowner(ctx
, vp
,
6694 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6695 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
6698 if (VATTR_IS_ACTIVE(vap
, va_acl
)) {
6699 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
6708 * Change mode of a file given a path name.
6710 * Returns: 0 Success
6711 * namei:??? [anything namei can return]
6712 * chmod_vnode:??? [anything chmod_vnode can return]
6715 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
6716 int fd
, int flag
, enum uio_seg segflg
)
6718 struct nameidata nd
;
6721 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6722 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
6724 if ((error
= nameiat(&nd
, fd
))) {
6727 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
6728 vnode_put(nd
.ni_vp
);
6734 * chmod_extended: Change the mode of a file given a path name; with extended
6735 * argument list (including extended security (ACL)).
6737 * Parameters: p Process requesting the open
6738 * uap User argument descriptor (see below)
6741 * Indirect: uap->path Path to object (same as 'chmod')
6742 * uap->uid UID to set
6743 * uap->gid GID to set
6744 * uap->mode File mode to set (same as 'chmod')
6745 * uap->xsecurity ACL to set (or delete)
6747 * Returns: 0 Success
6750 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6752 * XXX: We should enummerate the possible errno values here, and where
6753 * in the code they originated.
6756 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6759 struct vnode_attr va
;
6760 kauth_filesec_t xsecdst
;
6762 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6765 if (uap
->mode
!= -1) {
6766 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6768 if (uap
->uid
!= KAUTH_UID_NONE
) {
6769 VATTR_SET(&va
, va_uid
, uap
->uid
);
6771 if (uap
->gid
!= KAUTH_GID_NONE
) {
6772 VATTR_SET(&va
, va_gid
, uap
->gid
);
6776 switch (uap
->xsecurity
) {
6777 /* explicit remove request */
6778 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6779 VATTR_SET(&va
, va_acl
, NULL
);
6782 case USER_ADDR_NULL
:
6785 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6788 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6789 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6792 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6795 if (xsecdst
!= NULL
) {
6796 kauth_filesec_free(xsecdst
);
6802 * Returns: 0 Success
6803 * chmodat:??? [anything chmodat can return]
6806 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6807 int flag
, enum uio_seg segflg
)
6809 struct vnode_attr va
;
6812 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6814 return chmodat(ctx
, path
, &va
, fd
, flag
, segflg
);
6818 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6820 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6821 AT_FDCWD
, 0, UIO_USERSPACE
);
6825 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6827 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
6831 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6832 uap
->fd
, uap
->flag
, UIO_USERSPACE
);
6836 * Change mode of a file given a file descriptor.
6839 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6846 if ((error
= file_vnode(fd
, &vp
)) != 0) {
6849 if ((error
= vnode_getwithref(vp
)) != 0) {
6853 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6855 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6856 (void)vnode_put(vp
);
6863 * fchmod_extended: Change mode of a file given a file descriptor; with
6864 * extended argument list (including extended security (ACL)).
6866 * Parameters: p Process requesting to change file mode
6867 * uap User argument descriptor (see below)
6870 * Indirect: uap->mode File mode to set (same as 'chmod')
6871 * uap->uid UID to set
6872 * uap->gid GID to set
6873 * uap->xsecurity ACL to set (or delete)
6874 * uap->fd File descriptor of file to change mode
6876 * Returns: 0 Success
6881 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6884 struct vnode_attr va
;
6885 kauth_filesec_t xsecdst
;
6887 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6890 if (uap
->mode
!= -1) {
6891 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6893 if (uap
->uid
!= KAUTH_UID_NONE
) {
6894 VATTR_SET(&va
, va_uid
, uap
->uid
);
6896 if (uap
->gid
!= KAUTH_GID_NONE
) {
6897 VATTR_SET(&va
, va_gid
, uap
->gid
);
6901 switch (uap
->xsecurity
) {
6902 case USER_ADDR_NULL
:
6903 VATTR_SET(&va
, va_acl
, NULL
);
6905 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6906 VATTR_SET(&va
, va_acl
, NULL
);
6909 case CAST_USER_ADDR_T(-1):
6912 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6915 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6918 error
= fchmod1(p
, uap
->fd
, &va
);
6921 switch (uap
->xsecurity
) {
6922 case USER_ADDR_NULL
:
6923 case CAST_USER_ADDR_T(-1):
6926 if (xsecdst
!= NULL
) {
6927 kauth_filesec_free(xsecdst
);
6934 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6936 struct vnode_attr va
;
6939 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6941 return fchmod1(p
, uap
->fd
, &va
);
6946 * Set ownership given a path name.
6950 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6951 gid_t gid
, int flag
, enum uio_seg segflg
)
6954 struct vnode_attr va
;
6956 struct nameidata nd
;
6958 kauth_action_t action
;
6960 AUDIT_ARG(owner
, uid
, gid
);
6962 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6963 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6965 error
= nameiat(&nd
, fd
);
6974 if (uid
!= (uid_t
)VNOVAL
) {
6975 VATTR_SET(&va
, va_uid
, uid
);
6977 if (gid
!= (gid_t
)VNOVAL
) {
6978 VATTR_SET(&va
, va_gid
, gid
);
6982 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6988 /* preflight and authorize attribute changes */
6989 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6992 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6995 error
= vnode_setattr(vp
, &va
, ctx
);
6999 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
7005 * EACCES is only allowed from namei(); permissions failure should
7006 * return EPERM, so we need to translate the error code.
7008 if (error
== EACCES
) {
7017 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
7019 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7020 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
);
7024 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
7026 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7027 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
);
7031 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
7033 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
7037 return fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
7038 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
);
7042 * Set ownership given a file descriptor.
7046 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
7048 struct vnode_attr va
;
7049 vfs_context_t ctx
= vfs_context_current();
7052 kauth_action_t action
;
7054 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7055 AUDIT_ARG(fd
, uap
->fd
);
7057 if ((error
= file_vnode(uap
->fd
, &vp
))) {
7061 if ((error
= vnode_getwithref(vp
))) {
7065 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7068 if (uap
->uid
!= VNOVAL
) {
7069 VATTR_SET(&va
, va_uid
, uap
->uid
);
7071 if (uap
->gid
!= VNOVAL
) {
7072 VATTR_SET(&va
, va_gid
, uap
->gid
);
7076 /* chown calls are not allowed for resource forks. */
7077 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7084 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7090 /* preflight and authorize attribute changes */
7091 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7094 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7095 if (error
== EACCES
) {
7100 error
= vnode_setattr(vp
, &va
, ctx
);
7104 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7109 (void)vnode_put(vp
);
7115 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
7119 if (usrtvp
== USER_ADDR_NULL
) {
7120 struct timeval old_tv
;
7121 /* XXX Y2038 bug because of microtime argument */
7123 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
7126 if (IS_64BIT_PROCESS(current_proc())) {
7127 struct user64_timeval tv
[2];
7128 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7132 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7133 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7135 struct user32_timeval tv
[2];
7136 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7140 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7141 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7148 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
7152 struct vnode_attr va
;
7153 kauth_action_t action
;
7155 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7158 VATTR_SET(&va
, va_access_time
, ts
[0]);
7159 VATTR_SET(&va
, va_modify_time
, ts
[1]);
7161 va
.va_vaflags
|= VA_UTIMES_NULL
;
7165 /* utimes calls are not allowed for resource forks. */
7166 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7173 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7178 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7179 if (!nullflag
&& error
== EACCES
) {
7185 /* since we may not need to auth anything, check here */
7186 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7187 if (!nullflag
&& error
== EACCES
) {
7192 error
= vnode_setattr(vp
, &va
, ctx
);
7196 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7205 * Set the access and modification times of a file.
7209 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
7211 struct timespec ts
[2];
7214 struct nameidata nd
;
7215 vfs_context_t ctx
= vfs_context_current();
7218 * AUDIT: Needed to change the order of operations to do the
7219 * name lookup first because auditing wants the path.
7221 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
7222 UIO_USERSPACE
, uap
->path
, ctx
);
7230 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7231 * the current time instead.
7234 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7238 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
7241 vnode_put(nd
.ni_vp
);
7246 * Set the access and modification times of a file.
7250 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
7252 struct timespec ts
[2];
7257 AUDIT_ARG(fd
, uap
->fd
);
7259 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7262 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
7265 if ((error
= vnode_getwithref(vp
))) {
7270 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
7277 * Truncate a file given its path name.
7281 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
7284 struct vnode_attr va
;
7285 vfs_context_t ctx
= vfs_context_current();
7287 struct nameidata nd
;
7288 kauth_action_t action
;
7290 if (uap
->length
< 0) {
7293 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
7294 UIO_USERSPACE
, uap
->path
, ctx
);
7295 if ((error
= namei(&nd
))) {
7303 VATTR_SET(&va
, va_data_size
, uap
->length
);
7306 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
7312 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7315 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7318 error
= vnode_setattr(vp
, &va
, ctx
);
7322 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
7332 * Truncate a file given a file descriptor.
7336 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
7338 vfs_context_t ctx
= vfs_context_current();
7339 struct vnode_attr va
;
7341 struct fileproc
*fp
;
7345 AUDIT_ARG(fd
, uap
->fd
);
7346 if (uap
->length
< 0) {
7350 if ((error
= fp_lookup(p
, fd
, &fp
, 0))) {
7354 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
7356 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
7365 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
7367 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
7368 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7373 if ((error
= vnode_getwithref(vp
)) != 0) {
7377 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7380 error
= mac_vnode_check_truncate(ctx
,
7381 fp
->f_fglob
->fg_cred
, vp
);
7383 (void)vnode_put(vp
);
7388 VATTR_SET(&va
, va_data_size
, uap
->length
);
7389 error
= vnode_setattr(vp
, &va
, ctx
);
7393 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
7397 (void)vnode_put(vp
);
7405 * Sync an open file with synchronized I/O _file_ integrity completion
7409 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
7411 __pthread_testcancel(1);
7412 return fsync_common(p
, uap
, MNT_WAIT
);
7417 * Sync an open file with synchronized I/O _file_ integrity completion
7419 * Notes: This is a legacy support function that does not test for
7420 * thread cancellation points.
7424 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
7426 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
);
7431 * Sync an open file with synchronized I/O _data_ integrity completion
7435 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
7437 __pthread_testcancel(1);
7438 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
);
7445 * Common fsync code to support both synchronized I/O file integrity completion
7446 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7448 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7449 * will only guarantee that the file data contents are retrievable. If
7450 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7451 * includes additional metadata unnecessary for retrieving the file data
7452 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7455 * Parameters: p The process
7456 * uap->fd The descriptor to synchronize
7457 * flags The data integrity flags
7459 * Returns: int Success
7460 * fp_getfvp:EBADF Bad file descriptor
7461 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7462 * VNOP_FSYNC:??? unspecified
7464 * Notes: We use struct fsync_args because it is a short name, and all
7465 * caller argument structures are otherwise identical.
7468 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
7471 struct fileproc
*fp
;
7472 vfs_context_t ctx
= vfs_context_current();
7475 AUDIT_ARG(fd
, uap
->fd
);
7477 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
7480 if ((error
= vnode_getwithref(vp
))) {
7485 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7487 error
= VNOP_FSYNC(vp
, flags
, ctx
);
7490 /* Sync resource fork shadow file if necessary. */
7492 (vp
->v_flag
& VISNAMEDSTREAM
) &&
7493 (vp
->v_parent
!= NULLVP
) &&
7494 vnode_isshadow(vp
) &&
7495 (fp
->f_flags
& FP_WRITTEN
)) {
7496 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
7500 (void)vnode_put(vp
);
7506 * Duplicate files. Source must be a file, target must be a file or
7509 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7510 * perform inheritance correctly.
7514 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
7516 vnode_t tvp
, fvp
, tdvp
, sdvp
;
7517 struct nameidata fromnd
, tond
;
7519 vfs_context_t ctx
= vfs_context_current();
7521 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
7522 struct vnode_attr va
;
7525 /* Check that the flags are valid. */
7527 if (uap
->flags
& ~CPF_MASK
) {
7531 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
7532 UIO_USERSPACE
, uap
->from
, ctx
);
7533 if ((error
= namei(&fromnd
))) {
7538 NDINIT(&tond
, CREATE
, OP_LINK
,
7539 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7540 UIO_USERSPACE
, uap
->to
, ctx
);
7541 if ((error
= namei(&tond
))) {
7548 if (!(uap
->flags
& CPF_OVERWRITE
)) {
7554 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
7559 /* This calls existing MAC hooks for open */
7560 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
7567 * See unlinkat_internal for an explanation of the potential
7568 * ENOENT from the MAC hook but the gist is that the MAC hook
7569 * can fail because vn_getpath isn't able to return the full
7570 * path. We choose to ignore this failure.
7572 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
7573 if (error
&& error
!= ENOENT
) {
7581 VATTR_SET(&va
, va_type
, fvp
->v_type
);
7582 /* Mask off all but regular access permissions */
7583 VATTR_SET(&va
, va_mode
,
7584 ((((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
7585 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
7589 #endif /* CONFIG_MACF */
7591 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
7599 * If source is the same as the destination (that is the
7600 * same inode number) then there is nothing to do.
7601 * (fixed to have POSIX semantics - CSM 3/2/98)
7607 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
7610 sdvp
= tond
.ni_startdir
;
7612 * nameidone has to happen before we vnode_put(tdvp)
7613 * since it may need to release the fs_nodelock on the tdvp
7633 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7636 * Helper function for doing clones. The caller is expected to provide an
7637 * iocounted source vnode and release it.
7640 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
7641 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
7644 struct nameidata tond
;
7647 boolean_t free_src_acl
;
7648 boolean_t attr_cleanup
;
7650 kauth_action_t action
;
7651 struct componentname
*cnp
;
7653 struct vnode_attr va
;
7654 struct vnode_attr nva
;
7655 uint32_t vnop_flags
;
7657 v_type
= vnode_vtype(fvp
);
7662 action
= KAUTH_VNODE_ADD_FILE
;
7665 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
7666 fvp
->v_mountedhere
) {
7669 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
7675 AUDIT_ARG(fd2
, dst_dirfd
);
7676 AUDIT_ARG(value32
, flags
);
7678 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7679 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
7680 UIO_USERSPACE
, dst
, ctx
);
7681 if ((error
= nameiat(&tond
, dst_dirfd
))) {
7688 free_src_acl
= FALSE
;
7689 attr_cleanup
= FALSE
;
7696 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
7702 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
))) {
7706 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
))) {
7710 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
7711 if (data_read_authorised
) {
7712 action
&= ~KAUTH_VNODE_READ_DATA
;
7714 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
))) {
7719 * certain attributes may need to be changed from the source, we ask for
7723 VATTR_WANTED(&va
, va_uid
);
7724 VATTR_WANTED(&va
, va_gid
);
7725 VATTR_WANTED(&va
, va_mode
);
7726 VATTR_WANTED(&va
, va_flags
);
7727 VATTR_WANTED(&va
, va_acl
);
7729 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0) {
7734 VATTR_SET(&nva
, va_type
, v_type
);
7735 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
7736 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
7737 free_src_acl
= TRUE
;
7740 /* Handle ACL inheritance, initialize vap. */
7741 if (v_type
== VLNK
) {
7742 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
7744 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
7748 attr_cleanup
= TRUE
;
7751 vnop_flags
= VNODE_CLONEFILE_DEFAULT
;
7753 * We've got initial values for all security parameters,
7754 * If we are superuser, then we can change owners to be the
7755 * same as the source. Both superuser and the owner have default
7756 * WRITE_SECURITY privileges so all other fields can be taken
7757 * from source as well.
7759 if (!(flags
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) {
7760 if (VATTR_IS_SUPPORTED(&va
, va_uid
)) {
7761 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
7763 if (VATTR_IS_SUPPORTED(&va
, va_gid
)) {
7764 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
7767 vnop_flags
|= VNODE_CLONEFILE_NOOWNERCOPY
;
7770 if (VATTR_IS_SUPPORTED(&va
, va_mode
)) {
7771 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
7773 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
7774 VATTR_SET(&nva
, va_flags
,
7775 ((va
.va_flags
& ~(UF_DATAVAULT
| SF_RESTRICTED
)) | /* Turn off from source */
7776 (nva
.va_flags
& (UF_DATAVAULT
| SF_RESTRICTED
))));
7779 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
);
7781 if (!error
&& tvp
) {
7782 int update_flags
= 0;
7785 #endif /* CONFIG_FSE */
7788 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
7789 VNODE_LABEL_CREATE
, ctx
);
7792 * If some of the requested attributes weren't handled by the
7793 * VNOP, use our fallback code.
7795 if (!VATTR_ALL_SUPPORTED(&va
)) {
7796 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
7799 // Make sure the name & parent pointers are hooked up
7800 if (tvp
->v_name
== NULL
) {
7801 update_flags
|= VNODE_UPDATE_NAME
;
7803 if (tvp
->v_parent
== NULLVP
) {
7804 update_flags
|= VNODE_UPDATE_PARENT
;
7808 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
7809 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
7813 switch (vnode_vtype(tvp
)) {
7817 fsevent
= FSE_CREATE_FILE
;
7820 fsevent
= FSE_CREATE_DIR
;
7826 if (need_fsevent(fsevent
, tvp
)) {
7828 * The following is a sequence of three explicit events.
7829 * A pair of FSE_CLONE events representing the source and destination
7830 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7831 * fseventsd may coalesce the destination clone and create events
7832 * into a single event resulting in the following sequence for a client
7834 * FSE_CLONE | FSE_CREATE (dst)
7836 add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
,
7838 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
7841 #endif /* CONFIG_FSE */
7846 vn_attribute_cleanup(&nva
, defaulted
);
7848 if (free_src_acl
&& va
.va_acl
) {
7849 kauth_acl_free(va
.va_acl
);
7860 * clone files or directories, target must not exist.
7864 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7865 __unused
int32_t *retval
)
7868 struct nameidata fromnd
;
7871 vfs_context_t ctx
= vfs_context_current();
7873 /* Check that the flags are valid. */
7874 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7878 AUDIT_ARG(fd
, uap
->src_dirfd
);
7880 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7881 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7882 UIO_USERSPACE
, uap
->src
, ctx
);
7883 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
))) {
7890 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7898 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7899 __unused
int32_t *retval
)
7902 struct fileproc
*fp
;
7904 vfs_context_t ctx
= vfs_context_current();
7906 /* Check that the flags are valid. */
7907 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7911 AUDIT_ARG(fd
, uap
->src_fd
);
7912 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7917 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7918 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7923 if ((error
= vnode_getwithref(fvp
))) {
7927 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7929 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7934 file_drop(uap
->src_fd
);
7939 rename_submounts_callback(mount_t mp
, void *arg
)
7942 mount_t pmp
= (mount_t
)arg
;
7943 int prefix_len
= strlen(pmp
->mnt_vfsstat
.f_mntonname
);
7945 if (strncmp(mp
->mnt_vfsstat
.f_mntonname
, pmp
->mnt_vfsstat
.f_mntonname
, prefix_len
) != 0) {
7949 if (mp
->mnt_vfsstat
.f_mntonname
[prefix_len
] != '/') {
7953 if ((error
= vfs_busy(mp
, LK_NOWAIT
))) {
7954 printf("vfs_busy failed with %d for %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
7958 int pathlen
= MAXPATHLEN
;
7959 if ((error
= vn_getpath_ext(mp
->mnt_vnodecovered
, NULL
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
))) {
7960 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
7969 * Rename files. Source and destination must either both be directories,
7970 * or both not be directories. If target is a directory, it must be empty.
7974 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7975 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7977 if (flags
& ~VFS_RENAME_FLAGS_MASK
) {
7981 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
)) {
7987 struct nameidata
*fromnd
, *tond
;
7995 const char *oname
= NULL
;
7996 char *from_name
= NULL
, *to_name
= NULL
;
7997 char *from_name_no_firmlink
= NULL
, *to_name_no_firmlink
= NULL
;
7998 int from_len
= 0, to_len
= 0;
7999 int from_len_no_firmlink
= 0, to_len_no_firmlink
= 0;
8000 int holding_mntlock
;
8001 mount_t locked_mp
= NULL
;
8002 vnode_t oparent
= NULLVP
;
8004 fse_info from_finfo
, to_finfo
;
8006 int from_truncated
= 0, to_truncated
= 0;
8007 int from_truncated_no_firmlink
= 0, to_truncated_no_firmlink
= 0;
8009 struct vnode_attr
*fvap
, *tvap
;
8011 /* carving out a chunk for structs that are too big to be on stack. */
8013 struct nameidata from_node
, to_node
;
8014 struct vnode_attr fv_attr
, tv_attr
;
8016 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
8017 fromnd
= &__rename_data
->from_node
;
8018 tond
= &__rename_data
->to_node
;
8020 holding_mntlock
= 0;
8029 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
8031 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8033 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
8035 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8038 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8039 if ((error
= nameiat(fromnd
, fromfd
))) {
8042 fdvp
= fromnd
->ni_dvp
;
8043 fvp
= fromnd
->ni_vp
;
8045 if (fvp
&& fvp
->v_type
== VDIR
) {
8046 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
8050 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8051 if ((error
= nameiat(tond
, tofd
))) {
8053 * Translate error code for rename("dir1", "dir2/.").
8055 if (error
== EISDIR
&& fvp
->v_type
== VDIR
) {
8060 tdvp
= tond
->ni_dvp
;
8064 #if DEVELOPMENT || DEBUG
8066 * XXX VSWAP: Check for entitlements or special flag here
8067 * so we can restrict access appropriately.
8069 #else /* DEVELOPMENT || DEBUG */
8071 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8076 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8080 #endif /* DEVELOPMENT || DEBUG */
8082 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
8087 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
8092 batched
= vnode_compound_rename_available(fdvp
);
8095 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
8098 get_fse_info(fvp
, &from_finfo
, ctx
);
8100 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
8105 fvap
= &__rename_data
->fv_attr
;
8109 get_fse_info(tvp
, &to_finfo
, ctx
);
8110 } else if (batched
) {
8111 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
8116 tvap
= &__rename_data
->tv_attr
;
8121 #endif /* CONFIG_FSE */
8123 has_listeners
= kauth_authorize_fileop_has_listeners();
8127 if (AUDIT_RECORD_EXISTS()) {
8132 if (need_event
|| has_listeners
) {
8133 if (from_name
== NULL
) {
8134 GET_PATH(from_name
);
8135 if (from_name
== NULL
) {
8141 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
8143 if (from_name_no_firmlink
== NULL
) {
8144 GET_PATH(from_name_no_firmlink
);
8145 if (from_name_no_firmlink
== NULL
) {
8151 from_len_no_firmlink
= safe_getpath_no_firmlink(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name_no_firmlink
, MAXPATHLEN
, &from_truncated_no_firmlink
);
8154 if (need_event
|| need_kpath2
|| has_listeners
) {
8155 if (to_name
== NULL
) {
8157 if (to_name
== NULL
) {
8163 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
8165 if (to_name_no_firmlink
== NULL
) {
8166 GET_PATH(to_name_no_firmlink
);
8167 if (to_name_no_firmlink
== NULL
) {
8173 to_len_no_firmlink
= safe_getpath_no_firmlink(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name_no_firmlink
, MAXPATHLEN
, &to_truncated_no_firmlink
);
8174 if (to_name
&& need_kpath2
) {
8175 AUDIT_ARG(kpath
, to_name
, ARG_KPATH2
);
8180 * Claim: this check will never reject a valid rename.
8181 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8182 * Suppose fdvp and tdvp are not on the same mount.
8183 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
8184 * then you can't move it to within another dir on the same mountpoint.
8185 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8187 * If this check passes, then we are safe to pass these vnodes to the same FS.
8189 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
8193 goto skipped_lookup
;
8197 error
= vn_authorize_renamex_with_paths(fdvp
, fvp
, &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
, flags
, NULL
);
8199 if (error
== ENOENT
) {
8200 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8202 * We encountered a race where after doing the namei, tvp stops
8203 * being valid. If so, simply re-drive the rename call from the
8215 * If the source and destination are the same (i.e. they're
8216 * links to the same vnode) and the target file system is
8217 * case sensitive, then there is nothing to do.
8219 * XXX Come back to this.
8225 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8226 * then assume that this file system is case sensitive.
8228 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
8229 pathconf_val
!= 0) {
8235 * Allow the renaming of mount points.
8236 * - target must not exist
8237 * - target must reside in the same directory as source
8238 * - union mounts cannot be renamed
8239 * - "/" cannot be renamed
8241 * XXX Handle this in VFS after a continued lookup (if we missed
8242 * in the cache to start off)
8244 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8245 * we'll skip past here. The file system is responsible for
8246 * checking that @tvp is not a descendent of @fvp and vice versa
8247 * so it should always return EINVAL if either @tvp or @fvp is the
8250 if ((fvp
->v_flag
& VROOT
) &&
8251 (fvp
->v_type
== VDIR
) &&
8253 (fvp
->v_mountedhere
== NULL
) &&
8255 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
8256 ((fvp
->v_mount
->mnt_kern_flag
& MNTK_SYSTEM
) == 0) &&
8257 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
8260 /* switch fvp to the covered vnode */
8261 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
8262 if ((vnode_getwithref(coveredvp
))) {
8272 * Check for cross-device rename.
8274 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
8275 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
8281 * If source is the same as the destination (that is the
8282 * same inode number) then there is nothing to do...
8283 * EXCEPT if the underlying file system supports case
8284 * insensitivity and is case preserving. In this case
8285 * the file system needs to handle the special case of
8286 * getting the same vnode as target (fvp) and source (tvp).
8288 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8289 * and _PC_CASE_PRESERVING can have this exception, and they need to
8290 * handle the special case of getting the same vnode as target and
8291 * source. NOTE: Then the target is unlocked going into vnop_rename,
8292 * so not to cause locking problems. There is a single reference on tvp.
8294 * NOTE - that fvp == tvp also occurs if they are hard linked and
8295 * that correct behaviour then is just to return success without doing
8298 * XXX filesystem should take care of this itself, perhaps...
8300 if (fvp
== tvp
&& fdvp
== tdvp
) {
8301 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
8302 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
8303 fromnd
->ni_cnd
.cn_namelen
)) {
8308 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
8310 * we're holding a reference and lock
8311 * on locked_mp, but it no longer matches
8312 * what we want to do... so drop our hold
8314 mount_unlock_renames(locked_mp
);
8315 mount_drop(locked_mp
, 0);
8316 holding_mntlock
= 0;
8318 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
8320 * serialize renames that re-shape
8321 * the tree... if holding_mntlock is
8322 * set, then we're ready to go...
8324 * first need to drop the iocounts
8325 * we picked up, second take the
8326 * lock to serialize the access,
8327 * then finally start the lookup
8328 * process over with the lock held
8330 if (!holding_mntlock
) {
8332 * need to grab a reference on
8333 * the mount point before we
8334 * drop all the iocounts... once
8335 * the iocounts are gone, the mount
8338 locked_mp
= fvp
->v_mount
;
8339 mount_ref(locked_mp
, 0);
8342 * nameidone has to happen before we vnode_put(tvp)
8343 * since it may need to release the fs_nodelock on the tvp
8353 * nameidone has to happen before we vnode_put(fdvp)
8354 * since it may need to release the fs_nodelock on the fvp
8361 mount_lock_renames(locked_mp
);
8362 holding_mntlock
= 1;
8368 * when we dropped the iocounts to take
8369 * the lock, we allowed the identity of
8370 * the various vnodes to change... if they did,
8371 * we may no longer be dealing with a rename
8372 * that reshapes the tree... once we're holding
8373 * the iocounts, the vnodes can't change type
8374 * so we're free to drop the lock at this point
8377 if (holding_mntlock
) {
8378 mount_unlock_renames(locked_mp
);
8379 mount_drop(locked_mp
, 0);
8380 holding_mntlock
= 0;
8384 // save these off so we can later verify that fvp is the same
8385 oname
= fvp
->v_name
;
8386 oparent
= fvp
->v_parent
;
8389 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
8390 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
8393 if (holding_mntlock
) {
8395 * we can drop our serialization
8398 mount_unlock_renames(locked_mp
);
8399 mount_drop(locked_mp
, 0);
8400 holding_mntlock
= 0;
8403 if (error
== EDATALESS
) {
8405 * If we've been here before, something has gone
8406 * horribly wrong and we should just get out lest
8407 * we spiral around the drain forever.
8409 if (flags
& VFS_RENAME_DATALESS
) {
8415 * The object we're renaming is dataless (or has a
8416 * dataless descendent) and requires materialization
8417 * before the rename occurs. But we're holding the
8418 * mount point's rename lock, so it's not safe to
8421 * In this case, we release the lock, perform the
8422 * materialization, and start the whole thing over.
8424 error
= vnode_materialize_dataless_file(fvp
,
8425 NAMESPACE_HANDLER_RENAME_OP
);
8429 * The next time around we need to tell the
8430 * file system that the materializtaion has
8433 flags
|= VFS_RENAME_DATALESS
;
8438 if (error
== EKEEPLOOKING
) {
8439 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8440 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8441 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8445 fromnd
->ni_vp
= fvp
;
8448 goto continue_lookup
;
8452 * We may encounter a race in the VNOP where the destination didn't
8453 * exist when we did the namei, but it does by the time we go and
8454 * try to create the entry. In this case, we should re-drive this rename
8455 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8456 * but other filesystems susceptible to this race could return it, too.
8458 if (error
== ERECYCLE
) {
8463 * For compound VNOPs, the authorization callback may return
8464 * ENOENT in case of racing hardlink lookups hitting the name
8465 * cache, redrive the lookup.
8467 if (batched
&& error
== ENOENT
) {
8468 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8477 /* call out to allow 3rd party notification of rename.
8478 * Ignore result of kauth_authorize_fileop call.
8480 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8481 KAUTH_FILEOP_RENAME
,
8482 (uintptr_t)from_name
, (uintptr_t)to_name
);
8483 if (flags
& VFS_RENAME_SWAP
) {
8484 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8485 KAUTH_FILEOP_RENAME
,
8486 (uintptr_t)to_name
, (uintptr_t)from_name
);
8490 if (from_name
!= NULL
&& to_name
!= NULL
) {
8491 if (from_truncated
|| to_truncated
) {
8492 // set it here since only the from_finfo gets reported up to user space
8493 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8497 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
8500 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
8504 add_fsevent(FSE_RENAME
, ctx
,
8505 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8506 FSE_ARG_FINFO
, &from_finfo
,
8507 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8508 FSE_ARG_FINFO
, &to_finfo
,
8510 if (flags
& VFS_RENAME_SWAP
) {
8512 * Strictly speaking, swap is the equivalent of
8513 * *three* renames. FSEvents clients should only take
8514 * the events as a hint, so we only bother reporting
8517 add_fsevent(FSE_RENAME
, ctx
,
8518 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8519 FSE_ARG_FINFO
, &to_finfo
,
8520 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8521 FSE_ARG_FINFO
, &from_finfo
,
8525 add_fsevent(FSE_RENAME
, ctx
,
8526 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8527 FSE_ARG_FINFO
, &from_finfo
,
8528 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8532 #endif /* CONFIG_FSE */
8535 * update filesystem's mount point data
8538 char *cp
, *pathend
, *mpname
;
8544 mp
= fvp
->v_mountedhere
;
8546 if (vfs_busy(mp
, LK_NOWAIT
)) {
8550 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
8552 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
8553 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
8555 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
8558 /* find current mount point prefix */
8559 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
8560 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
8565 /* find last component of target name */
8566 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
8572 /* Update f_mntonname of sub mounts */
8573 vfs_iterate(0, rename_submounts_callback
, (void *)mp
);
8575 /* append name to prefix */
8576 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
8577 bzero(pathend
, maxlen
);
8579 strlcpy(pathend
, mpname
, maxlen
);
8581 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
8585 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
8588 * fix up name & parent pointers. note that we first
8589 * check that fvp has the same name/parent pointers it
8590 * had before the rename call... this is a 'weak' check
8593 * XXX oparent and oname may not be set in the compound vnop case
8595 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
8598 update_flags
= VNODE_UPDATE_NAME
;
8601 update_flags
|= VNODE_UPDATE_PARENT
;
8604 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
8607 if (to_name
!= NULL
) {
8608 RELEASE_PATH(to_name
);
8611 if (to_name_no_firmlink
!= NULL
) {
8612 RELEASE_PATH(to_name_no_firmlink
);
8613 to_name_no_firmlink
= NULL
;
8615 if (from_name
!= NULL
) {
8616 RELEASE_PATH(from_name
);
8619 if (from_name_no_firmlink
!= NULL
) {
8620 RELEASE_PATH(from_name_no_firmlink
);
8621 from_name_no_firmlink
= NULL
;
8623 if (holding_mntlock
) {
8624 mount_unlock_renames(locked_mp
);
8625 mount_drop(locked_mp
, 0);
8626 holding_mntlock
= 0;
8630 * nameidone has to happen before we vnode_put(tdvp)
8631 * since it may need to release the fs_nodelock on the tdvp
8642 * nameidone has to happen before we vnode_put(fdvp)
8643 * since it may need to release the fs_nodelock on the fdvp
8654 * If things changed after we did the namei, then we will re-drive
8655 * this rename call from the top.
8662 FREE(__rename_data
, M_TEMP
);
8667 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
8669 return renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
8670 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0);
8674 renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
8676 return renameat_internal(
8677 vfs_context_current(),
8678 uap
->fromfd
, uap
->from
,
8680 UIO_USERSPACE
, uap
->flags
);
8684 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
8686 return renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
8687 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0);
8691 * Make a directory file.
8693 * Returns: 0 Success
8696 * vnode_authorize:???
8701 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
8702 enum uio_seg segflg
)
8706 int update_flags
= 0;
8708 struct nameidata nd
;
8710 AUDIT_ARG(mode
, vap
->va_mode
);
8711 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
8713 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
8714 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
8717 error
= nameiat(&nd
, fd
);
8729 batched
= vnode_compound_mkdir_available(dvp
);
8731 VATTR_SET(vap
, va_type
, VDIR
);
8735 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8736 * only get EXISTS or EISDIR for existing path components, and not that it could see
8737 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8738 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8740 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
8741 if (error
== EACCES
|| error
== EPERM
) {
8749 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8750 * rather than EACCESS if the target exists.
8752 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
8754 error2
= nameiat(&nd
, fd
);
8768 * make the directory
8770 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
8771 if (error
== EKEEPLOOKING
) {
8773 goto continue_lookup
;
8779 // Make sure the name & parent pointers are hooked up
8780 if (vp
->v_name
== NULL
) {
8781 update_flags
|= VNODE_UPDATE_NAME
;
8783 if (vp
->v_parent
== NULLVP
) {
8784 update_flags
|= VNODE_UPDATE_PARENT
;
8788 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
8792 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
8797 * nameidone has to happen before we vnode_put(dvp)
8798 * since it may need to release the fs_nodelock on the dvp
8813 * mkdir_extended: Create a directory; with extended security (ACL).
8815 * Parameters: p Process requesting to create the directory
8816 * uap User argument descriptor (see below)
8819 * Indirect: uap->path Path of directory to create
8820 * uap->mode Access permissions to set
8821 * uap->xsecurity ACL to set
8823 * Returns: 0 Success
8828 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
8831 kauth_filesec_t xsecdst
;
8832 struct vnode_attr va
;
8834 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
8837 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
8838 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
8843 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8844 if (xsecdst
!= NULL
) {
8845 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
8848 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8850 if (xsecdst
!= NULL
) {
8851 kauth_filesec_free(xsecdst
);
8857 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
8859 struct vnode_attr va
;
8862 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8864 return mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8869 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
8871 struct vnode_attr va
;
8874 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8876 return mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
8881 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
8882 enum uio_seg segflg
, int unlink_flags
)
8886 struct nameidata nd
;
8888 char *no_firmlink_path
= NULL
;
8890 int len_no_firmlink_path
= 0;
8891 int has_listeners
= 0;
8893 int truncated_path
= 0;
8894 int truncated_no_firmlink_path
= 0;
8896 struct vnode_attr va
;
8897 #endif /* CONFIG_FSE */
8898 struct vnode_attr
*vap
= NULL
;
8899 int restart_count
= 0;
8905 * This loop exists to restart rmdir in the unlikely case that two
8906 * processes are simultaneously trying to remove the same directory
8907 * containing orphaned appleDouble files.
8910 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
8911 segflg
, dirpath
, ctx
);
8912 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
8917 error
= nameiat(&nd
, fd
);
8926 batched
= vnode_compound_rmdir_available(vp
);
8928 if (vp
->v_flag
& VROOT
) {
8930 * The root of a mounted filesystem cannot be deleted.
8936 #if DEVELOPMENT || DEBUG
8938 * XXX VSWAP: Check for entitlements or special flag here
8939 * so we can restrict access appropriately.
8941 #else /* DEVELOPMENT || DEBUG */
8943 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
8947 #endif /* DEVELOPMENT || DEBUG */
8950 * Removed a check here; we used to abort if vp's vid
8951 * was not the same as what we'd seen the last time around.
8952 * I do not think that check was valid, because if we retry
8953 * and all dirents are gone, the directory could legitimately
8954 * be recycled but still be present in a situation where we would
8955 * have had permission to delete. Therefore, we won't make
8956 * an effort to preserve that check now that we may not have a
8961 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
8963 if (error
== ENOENT
) {
8964 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8975 if (!vnode_compound_rmdir_available(dvp
)) {
8976 panic("No error, but no compound rmdir?");
8983 need_event
= need_fsevent(FSE_DELETE
, dvp
);
8986 get_fse_info(vp
, &finfo
, ctx
);
8988 error
= vfs_get_notify_attributes(&va
);
8997 has_listeners
= kauth_authorize_fileop_has_listeners();
8998 if (need_event
|| has_listeners
) {
9007 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
9009 if (no_firmlink_path
== NULL
) {
9010 GET_PATH(no_firmlink_path
);
9011 if (no_firmlink_path
== NULL
) {
9017 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
9019 if (truncated_no_firmlink_path
) {
9020 finfo
.mode
|= FSE_TRUNCATED_PATH
;
9025 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9028 /* Couldn't find a vnode */
9032 if (error
== EKEEPLOOKING
) {
9033 goto continue_lookup
;
9034 } else if (batched
&& error
== ENOENT
) {
9035 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
9037 * For compound VNOPs, the authorization callback
9038 * may return ENOENT in case of racing hard link lookups
9039 * redrive the lookup.
9048 * XXX There's no provision for passing flags
9049 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9050 * because it's not empty, then we try again
9051 * with VNOP_REMOVE(), passing in a special
9052 * flag that clever file systems will know
9055 if (error
== ENOTEMPTY
&&
9056 (unlink_flags
& VNODE_REMOVE_DATALESS_DIR
) != 0) {
9058 * If this fails, we want to keep the original
9061 if (vn_remove(dvp
, &vp
, &nd
,
9062 VNODE_REMOVE_DATALESS_DIR
, vap
, ctx
) == 0) {
9067 #if CONFIG_APPLEDOUBLE
9069 * Special case to remove orphaned AppleDouble
9070 * files. I don't like putting this in the kernel,
9071 * but carbon does not like putting this in carbon either,
9074 if (error
== ENOTEMPTY
) {
9075 int ad_error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
9076 if (ad_error
== EBUSY
) {
9083 * Assuming everything went well, we will try the RMDIR again
9086 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9089 #endif /* CONFIG_APPLEDOUBLE */
9091 * Call out to allow 3rd party notification of delete.
9092 * Ignore result of kauth_authorize_fileop call.
9095 if (has_listeners
) {
9096 kauth_authorize_fileop(vfs_context_ucred(ctx
),
9097 KAUTH_FILEOP_DELETE
,
9102 if (vp
->v_flag
& VISHARDLINK
) {
9103 // see the comment in unlink1() about why we update
9104 // the parent of a hard link when it is removed
9105 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
9111 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
9113 add_fsevent(FSE_DELETE
, ctx
,
9114 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
9115 FSE_ARG_FINFO
, &finfo
,
9127 if (no_firmlink_path
!= NULL
) {
9128 RELEASE_PATH(no_firmlink_path
);
9129 no_firmlink_path
= NULL
;
9133 * nameidone has to happen before we vnode_put(dvp)
9134 * since it may need to release the fs_nodelock on the dvp
9143 if (restart_flag
== 0) {
9144 wakeup_one((caddr_t
)vp
);
9147 tsleep(vp
, PVFS
, "rm AD", 1);
9148 } while (restart_flag
!= 0);
9154 * Remove a directory file.
9158 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
9160 return rmdirat_internal(vfs_context_current(), AT_FDCWD
,
9161 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
, 0);
9164 /* Get direntry length padded to 8 byte alignment */
9165 #define DIRENT64_LEN(namlen) \
9166 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9168 /* Get dirent length padded to 4 byte alignment */
9169 #define DIRENT_LEN(namelen) \
9170 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9172 /* Get the end of this dirent */
9173 #define DIRENT_END(dep) \
9174 (((char *)(dep)) + (dep)->d_reclen - 1)
9177 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
9178 int *numdirent
, vfs_context_t ctxp
)
9180 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
9181 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
9182 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
9183 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
9188 struct direntry
*entry64
;
9194 * We're here because the underlying file system does not
9195 * support direnties or we mounted denying support so we must
9196 * fall back to dirents and convert them to direntries.
9198 * Our kernel buffer needs to be smaller since re-packing will
9199 * expand each dirent. The worse case (when the name length
9200 * is 3 or less) corresponds to a struct direntry size of 32
9201 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9202 * (4-byte aligned). So having a buffer that is 3/8 the size
9203 * will prevent us from reading more than we can pack.
9205 * Since this buffer is wired memory, we will limit the
9206 * buffer size to a maximum of 32K. We would really like to
9207 * use 32K in the MIN(), but we use magic number 87371 to
9208 * prevent uio_resid() * 3 / 8 from overflowing.
9210 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
9211 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
9212 if (bufptr
== NULL
) {
9216 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
9217 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
9218 auio
->uio_offset
= uio
->uio_offset
;
9220 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
9222 dep
= (struct dirent
*)bufptr
;
9223 bytesread
= bufsize
- uio_resid(auio
);
9225 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
9228 * Convert all the entries and copy them out to user's buffer.
9230 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
9231 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
9233 if (DIRENT_END(dep
) > ((char *)bufptr
+ bytesread
) ||
9234 DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) {
9235 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
,
9236 vp
->v_mount
->mnt_vfsstat
.f_mntonname
,
9237 vp
->v_name
? vp
->v_name
: "<unknown>");
9242 bzero(entry64
, enbufsize
);
9243 /* Convert a dirent to a dirent64. */
9244 entry64
->d_ino
= dep
->d_ino
;
9245 entry64
->d_seekoff
= 0;
9246 entry64
->d_reclen
= enbufsize
;
9247 entry64
->d_namlen
= dep
->d_namlen
;
9248 entry64
->d_type
= dep
->d_type
;
9249 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
9251 /* Move to next entry. */
9252 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
9254 /* Copy entry64 to user's buffer. */
9255 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
9258 /* Update the real offset using the offset we got from VNOP_READDIR. */
9260 uio
->uio_offset
= auio
->uio_offset
;
9263 FREE(bufptr
, M_TEMP
);
9264 FREE(entry64
, M_TEMP
);
9269 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
9272 * Read a block of directory entries in a file system independent format.
9275 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
9276 off_t
*offset
, int *eofflag
, int flags
)
9279 struct vfs_context context
= *vfs_context_current(); /* local copy */
9280 struct fileproc
*fp
;
9282 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9284 int error
, numdirent
;
9285 char uio_buf
[UIO_SIZEOF(1)];
9287 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
9291 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9292 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9297 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
) {
9298 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
9302 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
9307 if ((error
= vnode_getwithref(vp
))) {
9310 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9313 if (vp
->v_type
!= VDIR
) {
9314 (void)vnode_put(vp
);
9320 error
= mac_vnode_check_readdir(&context
, vp
);
9322 (void)vnode_put(vp
);
9327 loff
= fp
->f_fglob
->fg_offset
;
9328 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9329 uio_addiov(auio
, bufp
, bufsize
);
9331 if (flags
& VNODE_READDIR_EXTENDED
) {
9332 error
= vnode_readdir64(vp
, auio
, flags
, eofflag
, &numdirent
, &context
);
9333 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
9335 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, &numdirent
, &context
);
9336 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
9339 (void)vnode_put(vp
);
9343 if ((user_ssize_t
)bufsize
== uio_resid(auio
)) {
9344 if (union_dircheckp
) {
9345 error
= union_dircheckp(&vp
, fp
, &context
);
9350 (void)vnode_put(vp
);
9355 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
9356 struct vnode
*tvp
= vp
;
9357 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
9359 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9360 fp
->f_fglob
->fg_offset
= 0;
9374 *bytesread
= bufsize
- uio_resid(auio
);
9382 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
9388 AUDIT_ARG(fd
, uap
->fd
);
9389 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
,
9390 &bytesread
, &offset
, &eofflag
, 0);
9393 if (proc_is64bit(p
)) {
9394 user64_long_t base
= (user64_long_t
)offset
;
9395 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
9397 user32_long_t base
= (user32_long_t
)offset
;
9398 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
9400 *retval
= bytesread
;
9406 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
9411 user_size_t bufsize
;
9413 AUDIT_ARG(fd
, uap
->fd
);
9416 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9417 * then the kernel carves out the last 4 bytes to return extended
9418 * information to userspace (namely whether we reached EOF with this call).
9420 if (uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9421 bufsize
= uap
->bufsize
- sizeof(getdirentries64_flags_t
);
9423 bufsize
= uap
->bufsize
;
9426 error
= getdirentries_common(uap
->fd
, uap
->buf
, bufsize
,
9427 &bytesread
, &offset
, &eofflag
, VNODE_READDIR_EXTENDED
);
9430 *retval
= bytesread
;
9431 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
9433 if (error
== 0 && uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9434 getdirentries64_flags_t flags
= 0;
9436 flags
|= GETDIRENTRIES64_EOF
;
9438 error
= copyout(&flags
, (user_addr_t
)uap
->buf
+ bufsize
,
9447 * Set the mode mask for creation of filesystem nodes.
9448 * XXX implement xsecurity
9450 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
9452 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
9454 struct filedesc
*fdp
;
9456 AUDIT_ARG(mask
, newmask
);
9459 *retval
= fdp
->fd_cmask
;
9460 fdp
->fd_cmask
= newmask
& ALLPERMS
;
9466 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9468 * Parameters: p Process requesting to set the umask
9469 * uap User argument descriptor (see below)
9470 * retval umask of the process (parameter p)
9472 * Indirect: uap->newmask umask to set
9473 * uap->xsecurity ACL to set
9475 * Returns: 0 Success
9480 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
9483 kauth_filesec_t xsecdst
;
9485 xsecdst
= KAUTH_FILESEC_NONE
;
9486 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
9487 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
9491 xsecdst
= KAUTH_FILESEC_NONE
;
9494 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
9496 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
9497 kauth_filesec_free(xsecdst
);
9503 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
9505 return umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
);
9509 * Void all references to file by ripping underlying filesystem
9514 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
9517 struct vnode_attr va
;
9518 vfs_context_t ctx
= vfs_context_current();
9520 struct nameidata nd
;
9522 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
9532 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
9537 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
9543 error
= mac_vnode_check_revoke(ctx
, vp
);
9550 VATTR_WANTED(&va
, va_uid
);
9551 if ((error
= vnode_getattr(vp
, &va
, ctx
))) {
9554 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
9555 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
9558 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
))) {
9559 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
9568 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9569 * The following system calls are designed to support features
9570 * which are specific to the HFS & HFS Plus volume formats
9575 * Obtain attribute information on objects in a directory while enumerating
9580 getdirentriesattr(proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
9583 struct fileproc
*fp
;
9585 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9586 uint32_t count
= 0, savecount
= 0;
9587 uint32_t newstate
= 0;
9590 struct attrlist attributelist
;
9591 vfs_context_t ctx
= vfs_context_current();
9593 char uio_buf
[UIO_SIZEOF(1)];
9594 kauth_action_t action
;
9598 /* Get the attributes into kernel space */
9599 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
9602 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
9606 if ((error
= fp_getfvp(p
, fd
, &fp
, &vp
))) {
9609 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9610 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9617 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
9625 if ((error
= vnode_getwithref(vp
))) {
9629 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9632 if (vp
->v_type
!= VDIR
) {
9633 (void)vnode_put(vp
);
9639 error
= mac_vnode_check_readdir(ctx
, vp
);
9641 (void)vnode_put(vp
);
9646 /* set up the uio structure which will contain the users return buffer */
9647 loff
= fp
->f_fglob
->fg_offset
;
9648 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9649 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
9652 * If the only item requested is file names, we can let that past with
9653 * just LIST_DIRECTORY. If they want any other attributes, that means
9654 * they need SEARCH as well.
9656 action
= KAUTH_VNODE_LIST_DIRECTORY
;
9657 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
9658 attributelist
.fileattr
|| attributelist
.dirattr
) {
9659 action
|= KAUTH_VNODE_SEARCH
;
9662 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
9663 /* Believe it or not, uap->options only has 32-bits of valid
9664 * info, so truncate before extending again */
9666 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
9667 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
9671 (void) vnode_put(vp
);
9676 * If we've got the last entry of a directory in a union mount
9677 * then reset the eofflag and pretend there's still more to come.
9678 * The next call will again set eofflag and the buffer will be empty,
9679 * so traverse to the underlying directory and do the directory
9682 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
9683 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
9685 } else { // Empty buffer
9686 struct vnode
*tvp
= vp
;
9687 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
9688 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
9689 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9690 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
9692 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
9700 (void)vnode_put(vp
);
9705 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
9707 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
)))) {
9710 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
)))) {
9713 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
)))) {
9717 *retval
= eofflag
; /* similar to getdirentries */
9721 return error
; /* return error earlier, an retval of 0 or 1 now */
9722 } /* end of getdirentriesattr system call */
9725 * Exchange data between two files
9730 exchangedata(__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
9732 struct nameidata fnd
, snd
;
9733 vfs_context_t ctx
= vfs_context_current();
9737 u_int32_t nameiflags
;
9740 int flen
= 0, slen
= 0;
9741 int from_truncated
= 0, to_truncated
= 0;
9743 fse_info f_finfo
, s_finfo
;
9747 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
9748 nameiflags
|= FOLLOW
;
9751 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
9752 UIO_USERSPACE
, uap
->path1
, ctx
);
9754 error
= namei(&fnd
);
9762 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
9763 UIO_USERSPACE
, uap
->path2
, ctx
);
9765 error
= namei(&snd
);
9774 * if the files are the same, return an inval error
9782 * if the files are on different volumes, return an error
9784 if (svp
->v_mount
!= fvp
->v_mount
) {
9789 /* If they're not files, return an error */
9790 if ((vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
9796 error
= mac_vnode_check_exchangedata(ctx
,
9802 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
9803 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0)) {
9809 need_fsevent(FSE_EXCHANGE
, fvp
) ||
9811 kauth_authorize_fileop_has_listeners()) {
9814 if (fpath
== NULL
|| spath
== NULL
) {
9819 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
9820 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
9823 get_fse_info(fvp
, &f_finfo
, ctx
);
9824 get_fse_info(svp
, &s_finfo
, ctx
);
9825 if (from_truncated
|| to_truncated
) {
9826 // set it here since only the f_finfo gets reported up to user space
9827 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
9831 /* Ok, make the call */
9832 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
9835 const char *tmpname
;
9837 if (fpath
!= NULL
&& spath
!= NULL
) {
9838 /* call out to allow 3rd party notification of exchangedata.
9839 * Ignore result of kauth_authorize_fileop call.
9841 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
9842 (uintptr_t)fpath
, (uintptr_t)spath
);
9846 tmpname
= fvp
->v_name
;
9847 fvp
->v_name
= svp
->v_name
;
9848 svp
->v_name
= tmpname
;
9850 if (fvp
->v_parent
!= svp
->v_parent
) {
9853 tmp
= fvp
->v_parent
;
9854 fvp
->v_parent
= svp
->v_parent
;
9855 svp
->v_parent
= tmp
;
9857 name_cache_unlock();
9860 if (fpath
!= NULL
&& spath
!= NULL
) {
9861 add_fsevent(FSE_EXCHANGE
, ctx
,
9862 FSE_ARG_STRING
, flen
, fpath
,
9863 FSE_ARG_FINFO
, &f_finfo
,
9864 FSE_ARG_STRING
, slen
, spath
,
9865 FSE_ARG_FINFO
, &s_finfo
,
9872 if (fpath
!= NULL
) {
9873 RELEASE_PATH(fpath
);
9875 if (spath
!= NULL
) {
9876 RELEASE_PATH(spath
);
9885 * Return (in MB) the amount of freespace on the given vnode's volume.
9887 uint32_t freespace_mb(vnode_t vp
);
9890 freespace_mb(vnode_t vp
)
9892 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
9893 return ((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
9894 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20;
9902 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
9907 struct nameidata nd
;
9908 struct user64_fssearchblock searchblock
;
9909 struct searchstate
*state
;
9910 struct attrlist
*returnattrs
;
9911 struct timeval timelimit
;
9912 void *searchparams1
, *searchparams2
;
9914 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9915 uint32_t nummatches
;
9917 uint32_t nameiflags
;
9918 vfs_context_t ctx
= vfs_context_current();
9919 char uio_buf
[UIO_SIZEOF(1)];
9921 /* Start by copying in fsearchblock parameter list */
9922 if (IS_64BIT_PROCESS(p
)) {
9923 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
9924 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
9925 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
9927 struct user32_fssearchblock tmp_searchblock
;
9929 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
9930 // munge into 64-bit version
9931 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
9932 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
9933 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
9934 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
9936 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9937 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9939 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
9940 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
9941 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
9942 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
9943 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
9944 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
9945 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
9951 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9953 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
9954 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
) {
9958 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9959 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9960 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9963 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9964 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9965 /* assumes the size is still 556 bytes it will continue to work */
9967 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
9968 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2 * sizeof(uint32_t));
9970 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
9972 /* Now set up the various pointers to the correct place in our newly allocated memory */
9974 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
9975 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
9976 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof(struct attrlist
));
9978 /* Now copy in the stuff given our local variables. */
9980 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
))) {
9984 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
))) {
9988 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
)))) {
9992 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
)))) {
9997 * When searching a union mount, need to set the
9998 * start flag at the first call on each layer to
9999 * reset state for the new volume.
10001 if (uap
->options
& SRCHFS_START
) {
10002 state
->ss_union_layer
= 0;
10004 uap
->options
|= state
->ss_union_flags
;
10006 state
->ss_union_flags
= 0;
10009 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
10010 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
10011 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
10012 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
10013 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
10016 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
10017 attrreference_t
* string_ref
;
10018 u_int32_t
* start_length
;
10019 user64_size_t param_length
;
10021 /* validate searchparams1 */
10022 param_length
= searchblock
.sizeofsearchparams1
;
10023 /* skip the word that specifies length of the buffer */
10024 start_length
= (u_int32_t
*) searchparams1
;
10025 start_length
= start_length
+ 1;
10026 string_ref
= (attrreference_t
*) start_length
;
10028 /* ensure no negative offsets or too big offsets */
10029 if (string_ref
->attr_dataoffset
< 0) {
10033 if (string_ref
->attr_length
> MAXPATHLEN
) {
10038 /* Check for pointer overflow in the string ref */
10039 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
10044 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
10048 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
10054 /* set up the uio structure which will contain the users return buffer */
10055 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10056 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
10059 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
10060 nameiflags
|= FOLLOW
;
10062 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
10063 UIO_USERSPACE
, uap
->path
, ctx
);
10065 error
= namei(&nd
);
10073 * Switch to the root vnode for the volume
10075 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
10083 * If it's a union mount, the path lookup takes
10084 * us to the top layer. But we may need to descend
10085 * to a lower layer. For non-union mounts the layer
10088 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
10089 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0) {
10093 vp
= vp
->v_mount
->mnt_vnodecovered
;
10099 error
= vnode_getwithref(vp
);
10107 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
10116 * If searchblock.maxmatches == 0, then skip the search. This has happened
10117 * before and sometimes the underlying code doesnt deal with it well.
10119 if (searchblock
.maxmatches
== 0) {
10125 * Allright, we have everything we need, so lets make that call.
10127 * We keep special track of the return value from the file system:
10128 * EAGAIN is an acceptable error condition that shouldn't keep us
10129 * from copying out any results...
10132 fserror
= VNOP_SEARCHFS(vp
,
10135 &searchblock
.searchattrs
,
10136 (u_long
)searchblock
.maxmatches
,
10140 (u_long
)uap
->scriptcode
,
10141 (u_long
)uap
->options
,
10143 (struct searchstate
*) &state
->ss_fsstate
,
10147 * If it's a union mount we need to be called again
10148 * to search the mounted-on filesystem.
10150 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
10151 state
->ss_union_flags
= SRCHFS_START
;
10152 state
->ss_union_layer
++; // search next layer down
10160 /* Now copy out the stuff that needs copying out. That means the number of matches, the
10161 * search state. Everything was already put into he return buffer by the vop call. */
10163 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0) {
10167 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0) {
10175 FREE(searchparams1
, M_TEMP
);
10178 } /* end of searchfs system call */
10180 #else /* CONFIG_SEARCHFS */
10183 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
10188 #endif /* CONFIG_SEARCHFS */
10191 #if CONFIG_DATALESS_FILES
10194 * === Namespace Resolver Up-call Mechanism ===
10196 * When I/O is performed to a dataless file or directory (read, write,
10197 * lookup-in, etc.), the file system performs an upcall to the namespace
10198 * resolver (filecoordinationd) to materialize the object.
10200 * We need multiple up-calls to be in flight at once, and we need these
10201 * up-calls to be interruptible, thus the following implementation:
10203 * => The nspace_resolver_request represents the in-kernel request state.
10204 * It contains a request ID, storage space for the errno code returned
10205 * by filecoordinationd, and flags.
10207 * => The request ID is simply a global monotonically incrementing 32-bit
10208 * number. Outstanding requests are stored in a hash table, and the
10209 * hash function is extremely simple.
10211 * => When an upcall is to be made to filecoordinationd, a request structure
10212 * is allocated on the stack (it is small, and needs to live only during
10213 * the duration of the call to resolve_nspace_item_ext()). It is
10214 * initialized and inserted into the table. Some backpressure from
10215 * filecoordinationd is applied by limiting the numnber of entries that
10216 * can be inserted into the table (and thus limiting the number of
10217 * outstanding requests issued to filecoordinationd); waiting for an
10218 * available slot is interruptible.
10220 * => Once the request has been inserted into the table, the up-call is made
10221 * to filecoordinationd via a MiG-generated stub. The up-call returns
10222 * immediately and filecoordinationd processes the request asynchronously.
10224 * => The caller now waits for the request to complete. Tnis is achieved by
10225 * sleeping on the address of the request structure and waiting for
10226 * filecoordinationd to mark the request structure as complete. This
10227 * is an interruptible sleep call; if interrupted, the request structure
10228 * is removed from the table and EINTR is returned to the caller. If
10229 * this occurs, an advisory up-call is made to filecoordinationd with
10230 * the request ID to indicate that the request can be aborted or
10231 * de-prioritized at the discretion of filecoordinationd.
10233 * => When filecoordinationd has completed the request, it signals completion
10234 * by writing to the vfs.nspace.complete sysctl node. Only a process
10235 * decorated as a namespace resolver can write to this sysctl node. The
10236 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10237 * The request ID is looked up in the table, and if the request is found,
10238 * the error code is stored in the request structure and a wakeup()
10239 * issued on the address of the request structure. If the request is not
10240 * found, we simply drop the completion notification, assuming that the
10241 * caller was interrupted.
10243 * => When the waiting thread wakes up, it extracts the error code from the
10244 * request structure, removes the request from the table, and returns the
10245 * error code to the calling function. Fini!
10248 struct nspace_resolver_request
{
10249 LIST_ENTRY(nspace_resolver_request
) r_hashlink
;
10251 int r_resolver_error
;
10255 #define RRF_COMPLETE 0x0001
10258 next_nspace_req_id(void)
10260 static uint32_t next_req_id
;
10262 return OSAddAtomic(1, &next_req_id
);
10265 #define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10266 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
10268 static LIST_HEAD(nspace_resolver_requesthead
,
10269 nspace_resolver_request
) * nspace_resolver_request_hashtbl
;
10270 static u_long nspace_resolver_request_hashmask
;
10271 static u_int nspace_resolver_request_count
;
10272 static bool nspace_resolver_request_wait_slot
;
10273 static lck_grp_t
*nspace_resolver_request_lck_grp
;
10274 static lck_mtx_t nspace_resolver_request_hash_mutex
;
10276 #define NSPACE_REQ_LOCK() \
10277 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10278 #define NSPACE_REQ_UNLOCK() \
10279 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10281 #define NSPACE_RESOLVER_HASH(req_id) \
10282 (&nspace_resolver_request_hashtbl[(req_id) & \
10283 nspace_resolver_request_hashmask])
10285 static struct nspace_resolver_request
*
10286 nspace_resolver_req_lookup(uint32_t req_id
)
10288 struct nspace_resolver_requesthead
*bucket
;
10289 struct nspace_resolver_request
*req
;
10291 bucket
= NSPACE_RESOLVER_HASH(req_id
);
10292 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10293 if (req
->r_req_id
== req_id
) {
10302 nspace_resolver_req_add(struct nspace_resolver_request
*req
)
10304 struct nspace_resolver_requesthead
*bucket
;
10307 while (nspace_resolver_request_count
>=
10308 NSPACE_RESOLVER_MAX_OUTSTANDING
) {
10309 nspace_resolver_request_wait_slot
= true;
10310 error
= msleep(&nspace_resolver_request_count
,
10311 &nspace_resolver_request_hash_mutex
,
10312 PVFS
| PCATCH
, "nspacerq", NULL
);
10318 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10320 assert(nspace_resolver_req_lookup(req
->r_req_id
) == NULL
);
10321 #endif /* DIAGNOSTIC */
10322 LIST_INSERT_HEAD(bucket
, req
, r_hashlink
);
10323 nspace_resolver_request_count
++;
10329 nspace_resolver_req_remove(struct nspace_resolver_request
*req
)
10331 struct nspace_resolver_requesthead
*bucket
;
10333 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10335 assert(nspace_resolver_req_lookup(req
->r_req_id
) != NULL
);
10336 #endif /* DIAGNOSTIC */
10337 LIST_REMOVE(req
, r_hashlink
);
10338 nspace_resolver_request_count
--;
10340 if (nspace_resolver_request_wait_slot
) {
10341 nspace_resolver_request_wait_slot
= false;
10342 wakeup(&nspace_resolver_request_count
);
10347 nspace_resolver_req_cancel(uint32_t req_id
)
10352 // Failures here aren't fatal -- the cancellation message
10353 // sent to the resolver is merely advisory.
10355 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10356 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10360 kr
= send_nspace_resolve_cancel(mp
, req_id
);
10361 if (kr
!= KERN_SUCCESS
) {
10362 os_log_error(OS_LOG_DEFAULT
,
10363 "NSPACE send_nspace_resolve_cancel failure: %d", kr
);
10366 ipc_port_release_send(mp
);
10370 nspace_resolver_req_wait(struct nspace_resolver_request
*req
)
10372 bool send_cancel_message
= false;
10377 while ((req
->r_flags
& RRF_COMPLETE
) == 0) {
10378 error
= msleep(req
, &nspace_resolver_request_hash_mutex
,
10379 PVFS
| PCATCH
, "nspace", NULL
);
10380 if (error
&& error
!= ERESTART
) {
10381 req
->r_resolver_error
= (error
== EINTR
) ? EINTR
:
10383 send_cancel_message
= true;
10388 nspace_resolver_req_remove(req
);
10390 NSPACE_REQ_UNLOCK();
10392 if (send_cancel_message
) {
10393 nspace_resolver_req_cancel(req
->r_req_id
);
10396 return req
->r_resolver_error
;
10400 nspace_resolver_req_mark_complete(
10401 struct nspace_resolver_request
*req
,
10402 int resolver_error
)
10404 req
->r_resolver_error
= resolver_error
;
10405 req
->r_flags
|= RRF_COMPLETE
;
10410 nspace_resolver_req_completed(uint32_t req_id
, int resolver_error
)
10412 struct nspace_resolver_request
*req
;
10416 // If we don't find the request corresponding to our req_id,
10417 // just drop the completion signal on the floor; it's likely
10418 // that the requester interrupted with a signal.
10420 req
= nspace_resolver_req_lookup(req_id
);
10422 nspace_resolver_req_mark_complete(req
, resolver_error
);
10425 NSPACE_REQ_UNLOCK();
10428 static struct proc
*nspace_resolver_proc
;
10431 nspace_resolver_get_proc_state(struct proc
*p
, int *is_resolver
)
10433 *is_resolver
= ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10434 p
== nspace_resolver_proc
) ? 1 : 0;
10439 nspace_resolver_set_proc_state(struct proc
*p
, int is_resolver
)
10441 vfs_context_t ctx
= vfs_context_current();
10445 // The system filecoordinationd runs as uid == 0. This also
10446 // has the nice side-effect of filtering out filecoordinationd
10447 // running in the simulator.
10449 if (!vfs_context_issuser(ctx
)) {
10453 error
= priv_check_cred(vfs_context_ucred(ctx
),
10454 PRIV_VFS_DATALESS_RESOLVER
, 0);
10462 if (nspace_resolver_proc
== NULL
) {
10464 p
->p_lflag
|= P_LNSPACE_RESOLVER
;
10466 nspace_resolver_proc
= p
;
10471 NSPACE_REQ_UNLOCK();
10473 // This is basically just like the exit case.
10474 // nspace_resolver_exited() will verify that the
10475 // process is the resolver, and will clear the
10477 nspace_resolver_exited(p
);
10484 nspace_materialization_get_proc_state(struct proc
*p
, int *is_prevented
)
10486 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) != 0 ||
10487 (p
->p_vfs_iopolicy
&
10488 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) == 0) {
10497 nspace_materialization_set_proc_state(struct proc
*p
, int is_prevented
)
10499 if (p
->p_lflag
& P_LNSPACE_RESOLVER
) {
10500 return is_prevented
? 0 : EBUSY
;
10503 if (is_prevented
) {
10504 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
), &p
->p_vfs_iopolicy
);
10506 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
, &p
->p_vfs_iopolicy
);
10512 nspace_materialization_get_thread_state(int *is_prevented
)
10514 uthread_t ut
= get_bsdthread_info(current_thread());
10516 *is_prevented
= (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) ? 1 : 0;
10521 nspace_materialization_set_thread_state(int is_prevented
)
10523 uthread_t ut
= get_bsdthread_info(current_thread());
10525 if (is_prevented
) {
10526 ut
->uu_flag
|= UT_NSPACE_NODATALESSFAULTS
;
10528 ut
->uu_flag
&= ~UT_NSPACE_NODATALESSFAULTS
;
10534 nspace_materialization_is_prevented(void)
10536 proc_t p
= current_proc();
10537 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
10538 vfs_context_t ctx
= vfs_context_current();
10541 * Kernel context ==> return EDEADLK, as we would with any random
10542 * process decorated as no-materialize.
10544 if (ctx
== vfs_context_kernel()) {
10549 * If the process has the dataless-manipulation entitlement,
10550 * materialization is prevented, and depending on the kind
10551 * of file system operation, things get to proceed as if the
10552 * object is not dataless.
10554 if (vfs_context_is_dataless_manipulator(ctx
)) {
10555 return EJUSTRETURN
;
10559 * Per-thread decorations override any process-wide decorations.
10560 * (Foundation uses this, and this overrides even the dataless-
10561 * manipulation entitlement so as to make API contracts consistent.)
10564 if (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) {
10567 if (ut
->uu_flag
& UT_NSPACE_FORCEDATALESSFAULTS
) {
10573 * If the process's iopolicy specifies that dataless files
10574 * can be materialized, then we let it go ahead.
10576 if (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) {
10581 * The default behavior is to not materialize dataless files;
10582 * return to the caller that deadlock was detected.
10587 /* the vfs.nspace branch */
10588 SYSCTL_NODE(_vfs
, OID_AUTO
, nspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
, NULL
, "vfs nspace hinge");
10591 sysctl_nspace_resolver(__unused
struct sysctl_oid
*oidp
,
10592 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10594 struct proc
*p
= req
->p
;
10595 int new_value
, old_value
, changed
= 0;
10598 error
= nspace_resolver_get_proc_state(p
, &old_value
);
10603 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10605 if (error
== 0 && changed
) {
10606 error
= nspace_resolver_set_proc_state(p
, new_value
);
10611 /* decorate this process as the dataless file resolver */
10612 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, resolver
,
10613 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10614 0, 0, sysctl_nspace_resolver
, "I", "");
10617 sysctl_nspace_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10618 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10620 struct proc
*p
= req
->p
;
10621 int new_value
, old_value
, changed
= 0;
10624 error
= nspace_materialization_get_proc_state(p
, &old_value
);
10629 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10631 if (error
== 0 && changed
) {
10632 error
= nspace_materialization_set_proc_state(p
, new_value
);
10637 /* decorate this process as not wanting to materialize dataless files */
10638 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, prevent_materialization
,
10639 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10640 0, 0, sysctl_nspace_prevent_materialization
, "I", "");
10643 sysctl_nspace_thread_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10644 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10646 int new_value
, old_value
, changed
= 0;
10649 error
= nspace_materialization_get_thread_state(&old_value
);
10654 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10656 if (error
== 0 && changed
) {
10657 error
= nspace_materialization_set_thread_state(new_value
);
10662 /* decorate this thread as not wanting to materialize dataless files */
10663 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, thread_prevent_materialization
,
10664 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10665 0, 0, sysctl_nspace_thread_prevent_materialization
, "I", "");
10668 sysctl_nspace_complete(__unused
struct sysctl_oid
*oidp
, __unused
void *arg1
,
10669 __unused
int arg2
, struct sysctl_req
*req
)
10671 struct proc
*p
= req
->p
;
10672 uint32_t req_status
[2] = { 0, 0 };
10673 int error
, is_resolver
, changed
= 0;
10675 error
= nspace_resolver_get_proc_state(p
, &is_resolver
);
10680 if (!is_resolver
) {
10684 error
= sysctl_io_opaque(req
, req_status
, sizeof(req_status
),
10691 * req_status[0] is the req_id
10693 * req_status[1] is the errno
10695 if (error
== 0 && changed
) {
10696 nspace_resolver_req_completed(req_status
[0],
10697 (int)req_status
[1]);
10702 /* Resolver reports completed reqs here. */
10703 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, complete
,
10704 CTLTYPE_OPAQUE
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10705 0, 0, sysctl_nspace_complete
, "-", "");
10707 #endif /* CONFIG_DATALESS_FILES */
10709 #if CONFIG_DATALESS_FILES
10710 #define __no_dataless_unused /* nothing */
10712 #define __no_dataless_unused __unused
10716 nspace_resolver_init(void)
10718 #if CONFIG_DATALESS_FILES
10719 nspace_resolver_request_lck_grp
=
10720 lck_grp_alloc_init("file namespace resolver", NULL
);
10722 lck_mtx_init(&nspace_resolver_request_hash_mutex
,
10723 nspace_resolver_request_lck_grp
, NULL
);
10725 nspace_resolver_request_hashtbl
=
10726 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE
,
10727 M_VNODE
/* XXX */, &nspace_resolver_request_hashmask
);
10728 #endif /* CONFIG_DATALESS_FILES */
10732 nspace_resolver_exited(struct proc
*p __no_dataless_unused
)
10734 #if CONFIG_DATALESS_FILES
10735 struct nspace_resolver_requesthead
*bucket
;
10736 struct nspace_resolver_request
*req
;
10741 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10742 p
== nspace_resolver_proc
) {
10743 for (idx
= 0; idx
<= nspace_resolver_request_hashmask
; idx
++) {
10744 bucket
= &nspace_resolver_request_hashtbl
[idx
];
10745 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10746 nspace_resolver_req_mark_complete(req
,
10750 nspace_resolver_proc
= NULL
;
10753 NSPACE_REQ_UNLOCK();
10754 #endif /* CONFIG_DATALESS_FILES */
10758 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
10760 return resolve_nspace_item_ext(vp
, op
, NULL
);
10763 #define DATALESS_RESOLVER_ENTITLEMENT \
10764 "com.apple.private.vfs.dataless-resolver"
10765 #define DATALESS_MANIPULATION_ENTITLEMENT \
10766 "com.apple.private.vfs.dataless-manipulation"
10769 * Return TRUE if the vfs context is associated with a process entitled
10770 * for dataless manipulation.
10772 * XXX Arguably belongs in vfs_subr.c, but is here because of the
10773 * complication around CONFIG_DATALESS_FILES.
10776 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused
)
10778 #if CONFIG_DATALESS_FILES
10779 assert(ctx
->vc_thread
== current_thread());
10780 task_t
const task
= current_task();
10781 return IOTaskHasEntitlement(task
, DATALESS_MANIPULATION_ENTITLEMENT
) ||
10782 IOTaskHasEntitlement(task
, DATALESS_RESOLVER_ENTITLEMENT
);
10785 #endif /* CONFIG_DATALESS_FILES */
10789 resolve_nspace_item_ext(
10790 struct vnode
*vp __no_dataless_unused
,
10791 uint64_t op __no_dataless_unused
,
10792 void *arg __unused
)
10794 #if CONFIG_DATALESS_FILES
10800 struct nspace_resolver_request req
;
10802 // only allow namespace events on regular files, directories and symlinks.
10803 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
10808 // if this is a snapshot event and the vnode is on a
10809 // disk image just pretend nothing happened since any
10810 // change to the disk image will cause the disk image
10811 // itself to get backed up and this avoids multi-way
10812 // deadlocks between the snapshot handler and the ever
10813 // popular diskimages-helper process. the variable
10814 // nspace_allow_virtual_devs allows this behavior to
10815 // be overridden (for use by the Mobile TimeMachine
10816 // testing infrastructure which uses disk images)
10818 if (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
) {
10819 os_log_debug(OS_LOG_DEFAULT
, "NSPACE SNAPSHOT not handled");
10823 error
= nspace_materialization_is_prevented();
10825 os_log_debug(OS_LOG_DEFAULT
,
10826 "NSPACE process/thread is decorated as no-materialization");
10830 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10831 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10832 os_log_error(OS_LOG_DEFAULT
, "NSPACE no port");
10833 // Treat this like being unable to access the backing
10838 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
10839 if (path
== NULL
) {
10841 goto out_release_port
;
10843 path_len
= MAXPATHLEN
;
10845 error
= vn_getpath(vp
, path
, &path_len
);
10847 int xxx_rdar44371223
; /* XXX Mig bug */
10848 req
.r_req_id
= next_nspace_req_id();
10849 req
.r_resolver_error
= 0;
10853 error
= nspace_resolver_req_add(&req
);
10854 NSPACE_REQ_UNLOCK();
10856 goto out_release_port
;
10859 os_log_debug(OS_LOG_DEFAULT
, "NSPACE resolve_path call");
10860 kr
= send_nspace_resolve_path(mp
, req
.r_req_id
,
10861 current_proc()->p_pid
, (uint32_t)(op
& 0xffffffff),
10862 path
, &xxx_rdar44371223
);
10863 if (kr
!= KERN_SUCCESS
) {
10864 // Also treat this like being unable to access
10865 // the backing store server.
10866 os_log_error(OS_LOG_DEFAULT
,
10867 "NSPACE resolve_path failure: %d", kr
);
10871 nspace_resolver_req_remove(&req
);
10872 NSPACE_REQ_UNLOCK();
10873 goto out_release_port
;
10876 // Give back the memory we allocated earlier while
10877 // we wait; we no longer need it.
10878 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
10881 // Request has been submitted to the resolver.
10882 // Now (interruptibly) wait for completion.
10883 // Upon requrn, the request will have been removed
10884 // from the lookup table.
10885 error
= nspace_resolver_req_wait(&req
);
10889 if (path
!= NULL
) {
10890 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
10892 ipc_port_release_send(mp
);
10897 #endif /* CONFIG_DATALESS_FILES */
10901 nspace_snapshot_event(__unused vnode_t vp
, __unused
time_t ctime
,
10902 __unused
uint64_t op_type
, __unused
void *arg
)
10909 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
10911 struct vnode_attr va
;
10915 VATTR_WANTED(&va
, va_fsid
);
10916 VATTR_WANTED(&va
, va_fileid
);
10918 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
10919 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
10922 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
10930 static unsigned long
10931 fsctl_bogus_command_compat(unsigned long cmd
)
10934 case IOCBASECMD(FSIOC_SYNC_VOLUME
):
10935 return FSIOC_SYNC_VOLUME
;
10936 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
):
10937 return FSIOC_ROUTEFS_SETROUTEID
;
10938 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
):
10939 return FSIOC_SET_PACKAGE_EXTS
;
10940 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
):
10941 return FSIOC_SET_FSTYPENAME_OVERRIDE
;
10942 case IOCBASECMD(DISK_CONDITIONER_IOC_GET
):
10943 return DISK_CONDITIONER_IOC_GET
;
10944 case IOCBASECMD(DISK_CONDITIONER_IOC_SET
):
10945 return DISK_CONDITIONER_IOC_SET
;
10946 case IOCBASECMD(FSIOC_FIOSEEKHOLE
):
10947 return FSIOC_FIOSEEKHOLE
;
10948 case IOCBASECMD(FSIOC_FIOSEEKDATA
):
10949 return FSIOC_FIOSEEKDATA
;
10950 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
):
10951 return SPOTLIGHT_IOC_GET_MOUNT_TIME
;
10952 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
):
10953 return SPOTLIGHT_IOC_GET_LAST_MTIME
;
10960 cas_bsdflags_setattr(vnode_t vp
, void *arg
, vfs_context_t ctx
)
10962 return VNOP_IOCTL(vp
, FSIOC_CAS_BSDFLAGS
, arg
, FWRITE
, ctx
);
10966 * Make a filesystem-specific control call:
10970 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
10975 #define STK_PARAMS 128
10976 char stkbuf
[STK_PARAMS
] = {0};
10977 caddr_t data
, memp
;
10978 vnode_t vp
= *arg_vp
;
10980 if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
10984 cmd
= fsctl_bogus_command_compat(cmd
);
10986 size
= IOCPARM_LEN(cmd
);
10987 if (size
> IOCPARM_MAX
) {
10991 is64bit
= proc_is64bit(p
);
10995 if (size
> sizeof(stkbuf
)) {
10996 if ((memp
= (caddr_t
)kalloc(size
)) == 0) {
11004 if (cmd
& IOC_IN
) {
11006 error
= copyin(udata
, data
, size
);
11015 *(user_addr_t
*)data
= udata
;
11017 *(uint32_t *)data
= (uint32_t)udata
;
11020 } else if ((cmd
& IOC_OUT
) && size
) {
11022 * Zero the buffer so the user always
11023 * gets back something deterministic.
11026 } else if (cmd
& IOC_VOID
) {
11028 *(user_addr_t
*)data
= udata
;
11030 *(uint32_t *)data
= (uint32_t)udata
;
11034 /* Check to see if it's a generic command */
11036 case FSIOC_SYNC_VOLUME
: {
11037 struct vfs_attr vfa
;
11038 mount_t mp
= vp
->v_mount
;
11042 /* record vid of vp so we can drop it below. */
11043 uint32_t vvid
= vp
->v_id
;
11046 * Then grab mount_iterref so that we can release the vnode.
11047 * Without this, a thread may call vnode_iterate_prepare then
11048 * get into a deadlock because we've never released the root vp
11050 error
= mount_iterref(mp
, 0);
11057 if (*(uint32_t*)data
& FSCTL_SYNC_WAIT
) {
11062 * If the filessytem supports multiple filesytems in a
11063 * partition (For eg APFS volumes in a container, it knows
11064 * that the waitfor argument to VFS_SYNC are flags.
11066 VFSATTR_INIT(&vfa
);
11067 VFSATTR_WANTED(&vfa
, f_capabilities
);
11068 if ((vfs_getattr(mp
, &vfa
, vfs_context_current()) == 0) &&
11069 VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) &&
11070 ((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
)) &&
11071 ((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
))) {
11075 /* issue the sync for this volume */
11076 (void)sync_callback(mp
, &arg
);
11079 * Then release the mount_iterref once we're done syncing; it's not
11080 * needed for the VNOP_IOCTL below
11082 mount_iterdrop(mp
);
11084 if (arg
& FSCTL_SYNC_FULLSYNC
) {
11085 /* re-obtain vnode iocount on the root vp, if possible */
11086 error
= vnode_getwithvid(vp
, vvid
);
11088 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
11092 /* mark the argument VP as having been released */
11097 case FSIOC_ROUTEFS_SETROUTEID
: {
11099 char routepath
[MAXPATHLEN
];
11102 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11105 bzero(routepath
, MAXPATHLEN
);
11106 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
11110 error
= routefs_kernel_mount(routepath
);
11118 case FSIOC_SET_PACKAGE_EXTS
: {
11119 user_addr_t ext_strings
;
11120 uint32_t num_entries
;
11121 uint32_t max_width
;
11123 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0))) {
11127 if ((is64bit
&& size
!= sizeof(user64_package_ext_info
))
11128 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
11129 // either you're 64-bit and passed a 64-bit struct or
11130 // you're 32-bit and passed a 32-bit struct. otherwise
11137 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
11138 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
11139 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
11141 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
11142 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
11143 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
11145 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
11149 case FSIOC_SET_FSTYPENAME_OVERRIDE
:
11151 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11155 mount_lock(vp
->v_mount
);
11156 if (data
[0] != 0) {
11157 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
11158 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
11159 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11160 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
11161 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
11164 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11165 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
11167 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
11168 vp
->v_mount
->fstypename_override
[0] = '\0';
11170 mount_unlock(vp
->v_mount
);
11175 case DISK_CONDITIONER_IOC_GET
: {
11176 error
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11180 case DISK_CONDITIONER_IOC_SET
: {
11181 error
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11185 case FSIOC_CAS_BSDFLAGS
: {
11186 struct fsioc_cas_bsdflags
*cas
= (struct fsioc_cas_bsdflags
*)data
;
11187 struct vnode_attr va
;
11190 VATTR_SET(&va
, va_flags
, cas
->new_flags
);
11192 error
= chflags0(vp
, &va
, cas_bsdflags_setattr
, cas
, ctx
);
11196 case FSIOC_FD_ONLY_OPEN_ONCE
: {
11197 if (vnode_usecount(vp
) > 1) {
11206 /* other, known commands shouldn't be passed down here */
11209 case F_TRIM_ACTIVE_FILE
:
11211 case F_TRANSCODEKEY
:
11212 case F_GETPROTECTIONLEVEL
:
11213 case F_GETDEFAULTPROTLEVEL
:
11214 case F_MAKECOMPRESSED
:
11215 case F_SET_GREEDY_MODE
:
11216 case F_SETSTATICCONTENT
:
11218 case F_SETBACKINGSTORE
:
11219 case F_GETPATH_MTMINFO
:
11220 case APFSIOC_REVERT_TO_SNAPSHOT
:
11221 case FSIOC_FIOSEEKHOLE
:
11222 case FSIOC_FIOSEEKDATA
:
11223 case HFS_GET_BOOT_INFO
:
11224 case HFS_SET_BOOT_INFO
:
11228 case F_BARRIERFSYNC
:
11234 /* Invoke the filesystem-specific code */
11235 error
= VNOP_IOCTL(vp
, cmd
, data
, options
, ctx
);
11237 } /* end switch stmt */
11240 * if no errors, copy any data to user. Size was
11241 * already set and checked above.
11243 if (error
== 0 && (cmd
& IOC_OUT
) && size
) {
11244 error
= copyout(data
, udata
, size
);
11257 fsctl(proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
11260 struct nameidata nd
;
11263 vfs_context_t ctx
= vfs_context_current();
11265 AUDIT_ARG(cmd
, uap
->cmd
);
11266 AUDIT_ARG(value32
, uap
->options
);
11267 /* Get the vnode for the file we are getting info on: */
11270 // if we come through fsctl() then the file is by definition not open.
11271 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11272 // lest the caller mistakenly thinks the only open is their own (but in
11273 // reality it's someone elses).
11275 if (uap
->cmd
== FSIOC_FD_ONLY_OPEN_ONCE
) {
11278 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
11279 nameiflags
|= FOLLOW
;
11281 if (uap
->cmd
== FSIOC_FIRMLINK_CTL
) {
11282 nameiflags
|= (CN_FIRMLINK_NOFOLLOW
| NOCACHE
);
11284 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
11285 UIO_USERSPACE
, uap
->path
, ctx
);
11286 if ((error
= namei(&nd
))) {
11293 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
11299 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11309 ffsctl(proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
11313 vfs_context_t ctx
= vfs_context_current();
11316 AUDIT_ARG(fd
, uap
->fd
);
11317 AUDIT_ARG(cmd
, uap
->cmd
);
11318 AUDIT_ARG(value32
, uap
->options
);
11320 /* Get the vnode for the file we are getting info on: */
11321 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11325 if ((error
= vnode_getwithref(vp
))) {
11331 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
11338 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11342 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11349 /* end of fsctl system call */
11352 * Retrieve the data of an extended attribute.
11355 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
11358 struct nameidata nd
;
11359 char attrname
[XATTR_MAXNAMELEN
+ 1];
11360 vfs_context_t ctx
= vfs_context_current();
11362 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11363 size_t attrsize
= 0;
11365 u_int32_t nameiflags
;
11367 char uio_buf
[UIO_SIZEOF(1)];
11369 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11373 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11374 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11375 if ((error
= namei(&nd
))) {
11381 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11385 if (xattr_protected(attrname
)) {
11386 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
11392 * the specific check for 0xffffffff is a hack to preserve
11393 * binaray compatibilty in K64 with applications that discovered
11394 * that passing in a buf pointer and a size of -1 resulted in
11395 * just the size of the indicated extended attribute being returned.
11396 * this isn't part of the documented behavior, but because of the
11397 * original implemtation's check for "uap->size > 0", this behavior
11398 * was allowed. In K32 that check turned into a signed comparison
11399 * even though uap->size is unsigned... in K64, we blow by that
11400 * check because uap->size is unsigned and doesn't get sign smeared
11401 * in the munger for a 32 bit user app. we also need to add a
11402 * check to limit the maximum size of the buffer being passed in...
11403 * unfortunately, the underlying fileystems seem to just malloc
11404 * the requested size even if the actual extended attribute is tiny.
11405 * because that malloc is for kernel wired memory, we have to put a
11406 * sane limit on it.
11408 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11409 * U64 running on K64 will yield -1 (64 bits wide)
11410 * U32/U64 running on K32 will yield -1 (32 bits wide)
11412 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1) {
11417 if (uap
->size
> (size_t)XATTR_MAXSIZE
) {
11418 uap
->size
= XATTR_MAXSIZE
;
11421 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11422 &uio_buf
[0], sizeof(uio_buf
));
11423 uio_addiov(auio
, uap
->value
, uap
->size
);
11426 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
11431 *retval
= uap
->size
- uio_resid(auio
);
11433 *retval
= (user_ssize_t
)attrsize
;
11440 * Retrieve the data of an extended attribute.
11443 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
11446 char attrname
[XATTR_MAXNAMELEN
+ 1];
11448 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11449 size_t attrsize
= 0;
11452 char uio_buf
[UIO_SIZEOF(1)];
11454 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11458 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11461 if ((error
= vnode_getwithref(vp
))) {
11462 file_drop(uap
->fd
);
11465 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11469 if (xattr_protected(attrname
)) {
11473 if (uap
->value
&& uap
->size
> 0) {
11474 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11475 &uio_buf
[0], sizeof(uio_buf
));
11476 uio_addiov(auio
, uap
->value
, uap
->size
);
11479 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11481 (void)vnode_put(vp
);
11482 file_drop(uap
->fd
);
11485 *retval
= uap
->size
- uio_resid(auio
);
11487 *retval
= (user_ssize_t
)attrsize
;
11493 * Set the data of an extended attribute.
11496 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
11499 struct nameidata nd
;
11500 char attrname
[XATTR_MAXNAMELEN
+ 1];
11501 vfs_context_t ctx
= vfs_context_current();
11503 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11505 u_int32_t nameiflags
;
11507 char uio_buf
[UIO_SIZEOF(1)];
11509 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11513 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11515 if (error
== EPERM
) {
11516 /* if the string won't fit in attrname, copyinstr emits EPERM */
11517 return ENAMETOOLONG
;
11519 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11522 if (xattr_protected(attrname
)) {
11525 if (uap
->size
!= 0 && uap
->value
== 0) {
11529 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11530 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11531 if ((error
= namei(&nd
))) {
11537 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11538 &uio_buf
[0], sizeof(uio_buf
));
11539 uio_addiov(auio
, uap
->value
, uap
->size
);
11541 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
11544 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11555 * Set the data of an extended attribute.
11558 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
11561 char attrname
[XATTR_MAXNAMELEN
+ 1];
11563 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11566 char uio_buf
[UIO_SIZEOF(1)];
11568 vfs_context_t ctx
= vfs_context_current();
11571 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11575 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11577 if (error
== EPERM
) {
11578 /* if the string won't fit in attrname, copyinstr emits EPERM */
11579 return ENAMETOOLONG
;
11581 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11584 if (xattr_protected(attrname
)) {
11587 if (uap
->size
!= 0 && uap
->value
== 0) {
11590 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11593 if ((error
= vnode_getwithref(vp
))) {
11594 file_drop(uap
->fd
);
11597 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11598 &uio_buf
[0], sizeof(uio_buf
));
11599 uio_addiov(auio
, uap
->value
, uap
->size
);
11601 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
11604 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11610 file_drop(uap
->fd
);
11616 * Remove an extended attribute.
11617 * XXX Code duplication here.
11620 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
11623 struct nameidata nd
;
11624 char attrname
[XATTR_MAXNAMELEN
+ 1];
11625 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11626 vfs_context_t ctx
= vfs_context_current();
11628 u_int32_t nameiflags
;
11631 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11635 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11639 if (xattr_protected(attrname
)) {
11642 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11643 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11644 if ((error
= namei(&nd
))) {
11650 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
11653 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11664 * Remove an extended attribute.
11665 * XXX Code duplication here.
11668 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
11671 char attrname
[XATTR_MAXNAMELEN
+ 1];
11675 vfs_context_t ctx
= vfs_context_current();
11678 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11682 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11686 if (xattr_protected(attrname
)) {
11689 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11692 if ((error
= vnode_getwithref(vp
))) {
11693 file_drop(uap
->fd
);
11697 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
11700 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11706 file_drop(uap
->fd
);
11712 * Retrieve the list of extended attribute names.
11713 * XXX Code duplication here.
11716 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
11719 struct nameidata nd
;
11720 vfs_context_t ctx
= vfs_context_current();
11722 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11723 size_t attrsize
= 0;
11724 u_int32_t nameiflags
;
11726 char uio_buf
[UIO_SIZEOF(1)];
11728 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11732 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11733 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11734 if ((error
= namei(&nd
))) {
11739 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11740 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
11741 &uio_buf
[0], sizeof(uio_buf
));
11742 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11745 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
11749 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11751 *retval
= (user_ssize_t
)attrsize
;
11757 * Retrieve the list of extended attribute names.
11758 * XXX Code duplication here.
11761 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
11765 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11766 size_t attrsize
= 0;
11768 char uio_buf
[UIO_SIZEOF(1)];
11770 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11774 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11777 if ((error
= vnode_getwithref(vp
))) {
11778 file_drop(uap
->fd
);
11781 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11782 auio
= uio_createwithbuffer(1, 0, spacetype
,
11783 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
11784 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11787 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11790 file_drop(uap
->fd
);
11792 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11794 *retval
= (user_ssize_t
)attrsize
;
11800 fsgetpath_internal(vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
11801 vm_size_t bufsize
, caddr_t buf
, uint32_t options
, int *pathlen
)
11804 struct mount
*mp
= NULL
;
11808 /* maximum number of times to retry build_path */
11809 unsigned int retries
= 0x10;
11811 if (bufsize
> PAGE_SIZE
) {
11820 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
11821 error
= ENOTSUP
; /* unexpected failure */
11827 struct vfs_attr vfsattr
;
11828 int use_vfs_root
= TRUE
;
11830 VFSATTR_INIT(&vfsattr
);
11831 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
11832 if (!(options
& FSOPT_ISREALFSID
) &&
11833 vfs_getattr(mp
, &vfsattr
, vfs_context_kernel()) == 0 &&
11834 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
11835 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
) &&
11836 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
)) {
11837 use_vfs_root
= FALSE
;
11841 if (use_vfs_root
) {
11842 error
= VFS_ROOT(mp
, &vp
, ctx
);
11844 error
= VFS_VGET(mp
, objid
, &vp
, ctx
);
11847 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
11850 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
11852 * If the fileid isn't found and we're in a union
11853 * mount volume, then see if the fileid is in the
11854 * mounted-on volume.
11856 struct mount
*tmp
= mp
;
11857 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
11859 if (vfs_busy(mp
, LK_NOWAIT
) == 0) {
11871 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
11878 /* Obtain the absolute path to this vnode. */
11879 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
11880 if (options
& FSOPT_NOFIRMLINKPATH
) {
11881 bpflags
|= BUILDPATH_NO_FIRMLINK
;
11883 bpflags
|= BUILDPATH_CHECK_MOVED
;
11884 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
11888 /* there was a race building the path, try a few more times */
11889 if (error
== EAGAIN
) {
11900 AUDIT_ARG(text
, buf
);
11902 if (kdebug_enable
) {
11903 long dbg_parms
[NUMPARMS
];
11906 dbg_namelen
= (int)sizeof(dbg_parms
);
11908 if (length
< dbg_namelen
) {
11909 memcpy((char *)dbg_parms
, buf
, length
);
11910 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
11912 dbg_namelen
= length
;
11914 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
11917 kdebug_vfs_lookup(dbg_parms
, dbg_namelen
, (void *)vp
,
11918 KDBG_VFS_LOOKUP_FLAG_LOOKUP
);
11921 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
11928 * Obtain the full pathname of a file system object by id.
11931 fsgetpath_extended(user_addr_t buf
, int bufsize
, user_addr_t user_fsid
, uint64_t objid
,
11932 uint32_t options
, user_ssize_t
*retval
)
11934 vfs_context_t ctx
= vfs_context_current();
11940 if (options
& ~(FSOPT_NOFIRMLINKPATH
| FSOPT_ISREALFSID
)) {
11944 if ((error
= copyin(user_fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
11947 AUDIT_ARG(value32
, fsid
.val
[0]);
11948 AUDIT_ARG(value64
, objid
);
11949 /* Restrict output buffer size for now. */
11951 if (bufsize
> PAGE_SIZE
|| bufsize
<= 0) {
11954 MALLOC(realpath
, char *, bufsize
, M_TEMP
, M_WAITOK
| M_ZERO
);
11955 if (realpath
== NULL
) {
11959 error
= fsgetpath_internal(ctx
, fsid
.val
[0], objid
, bufsize
, realpath
,
11966 error
= copyout((caddr_t
)realpath
, buf
, length
);
11968 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
11971 FREE(realpath
, M_TEMP
);
11977 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
11979 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
11984 fsgetpath_ext(__unused proc_t p
, struct fsgetpath_ext_args
*uap
, user_ssize_t
*retval
)
11986 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
11987 uap
->options
, retval
);
11991 * Common routine to handle various flavors of statfs data heading out
11994 * Returns: 0 Success
11998 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
11999 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
12000 boolean_t partial_copy
)
12003 int my_size
, copy_size
;
12006 struct user64_statfs sfs
;
12007 my_size
= copy_size
= sizeof(sfs
);
12008 bzero(&sfs
, my_size
);
12009 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12010 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
12011 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12012 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
12013 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
12014 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
12015 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
12016 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
12017 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
12018 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
12019 sfs
.f_fsid
= sfsp
->f_fsid
;
12020 sfs
.f_owner
= sfsp
->f_owner
;
12021 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12022 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12024 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12026 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12027 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12029 if (partial_copy
) {
12030 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12032 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12034 struct user32_statfs sfs
;
12036 my_size
= copy_size
= sizeof(sfs
);
12037 bzero(&sfs
, my_size
);
12039 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12040 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
12041 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12044 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12045 * have to fudge the numbers here in that case. We inflate the blocksize in order
12046 * to reflect the filesystem size as best we can.
12048 if ((sfsp
->f_blocks
> INT_MAX
)
12049 /* Hack for 4061702 . I think the real fix is for Carbon to
12050 * look for some volume capability and not depend on hidden
12051 * semantics agreed between a FS and carbon.
12052 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12053 * for Carbon to set bNoVolumeSizes volume attribute.
12054 * Without this the webdavfs files cannot be copied onto
12055 * disk as they look huge. This change should not affect
12056 * XSAN as they should not setting these to -1..
12058 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
12059 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
12060 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
12064 * Work out how far we have to shift the block count down to make it fit.
12065 * Note that it's possible to have to shift so far that the resulting
12066 * blocksize would be unreportably large. At that point, we will clip
12067 * any values that don't fit.
12069 * For safety's sake, we also ensure that f_iosize is never reported as
12070 * being smaller than f_bsize.
12072 for (shift
= 0; shift
< 32; shift
++) {
12073 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
) {
12076 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
) {
12080 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
12081 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
12082 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
12083 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
12084 #undef __SHIFT_OR_CLIP
12085 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
12086 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
12088 /* filesystem is small enough to be reported honestly */
12089 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
12090 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
12091 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
12092 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
12093 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
12095 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
12096 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
12097 sfs
.f_fsid
= sfsp
->f_fsid
;
12098 sfs
.f_owner
= sfsp
->f_owner
;
12099 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12100 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12102 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12104 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12105 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12107 if (partial_copy
) {
12108 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12110 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12113 if (sizep
!= NULL
) {
12120 * copy stat structure into user_stat structure.
12123 munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
12125 bzero(usbp
, sizeof(*usbp
));
12127 usbp
->st_dev
= sbp
->st_dev
;
12128 usbp
->st_ino
= sbp
->st_ino
;
12129 usbp
->st_mode
= sbp
->st_mode
;
12130 usbp
->st_nlink
= sbp
->st_nlink
;
12131 usbp
->st_uid
= sbp
->st_uid
;
12132 usbp
->st_gid
= sbp
->st_gid
;
12133 usbp
->st_rdev
= sbp
->st_rdev
;
12134 #ifndef _POSIX_C_SOURCE
12135 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12136 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12137 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12138 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12139 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12140 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12142 usbp
->st_atime
= sbp
->st_atime
;
12143 usbp
->st_atimensec
= sbp
->st_atimensec
;
12144 usbp
->st_mtime
= sbp
->st_mtime
;
12145 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12146 usbp
->st_ctime
= sbp
->st_ctime
;
12147 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12149 usbp
->st_size
= sbp
->st_size
;
12150 usbp
->st_blocks
= sbp
->st_blocks
;
12151 usbp
->st_blksize
= sbp
->st_blksize
;
12152 usbp
->st_flags
= sbp
->st_flags
;
12153 usbp
->st_gen
= sbp
->st_gen
;
12154 usbp
->st_lspare
= sbp
->st_lspare
;
12155 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12156 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12160 munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
12162 bzero(usbp
, sizeof(*usbp
));
12164 usbp
->st_dev
= sbp
->st_dev
;
12165 usbp
->st_ino
= sbp
->st_ino
;
12166 usbp
->st_mode
= sbp
->st_mode
;
12167 usbp
->st_nlink
= sbp
->st_nlink
;
12168 usbp
->st_uid
= sbp
->st_uid
;
12169 usbp
->st_gid
= sbp
->st_gid
;
12170 usbp
->st_rdev
= sbp
->st_rdev
;
12171 #ifndef _POSIX_C_SOURCE
12172 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12173 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12174 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12175 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12176 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12177 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12179 usbp
->st_atime
= sbp
->st_atime
;
12180 usbp
->st_atimensec
= sbp
->st_atimensec
;
12181 usbp
->st_mtime
= sbp
->st_mtime
;
12182 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12183 usbp
->st_ctime
= sbp
->st_ctime
;
12184 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12186 usbp
->st_size
= sbp
->st_size
;
12187 usbp
->st_blocks
= sbp
->st_blocks
;
12188 usbp
->st_blksize
= sbp
->st_blksize
;
12189 usbp
->st_flags
= sbp
->st_flags
;
12190 usbp
->st_gen
= sbp
->st_gen
;
12191 usbp
->st_lspare
= sbp
->st_lspare
;
12192 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12193 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12197 * copy stat64 structure into user_stat64 structure.
12200 munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
12202 bzero(usbp
, sizeof(*usbp
));
12204 usbp
->st_dev
= sbp
->st_dev
;
12205 usbp
->st_ino
= sbp
->st_ino
;
12206 usbp
->st_mode
= sbp
->st_mode
;
12207 usbp
->st_nlink
= sbp
->st_nlink
;
12208 usbp
->st_uid
= sbp
->st_uid
;
12209 usbp
->st_gid
= sbp
->st_gid
;
12210 usbp
->st_rdev
= sbp
->st_rdev
;
12211 #ifndef _POSIX_C_SOURCE
12212 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12213 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12214 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12215 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12216 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12217 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12218 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12219 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12221 usbp
->st_atime
= sbp
->st_atime
;
12222 usbp
->st_atimensec
= sbp
->st_atimensec
;
12223 usbp
->st_mtime
= sbp
->st_mtime
;
12224 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12225 usbp
->st_ctime
= sbp
->st_ctime
;
12226 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12227 usbp
->st_birthtime
= sbp
->st_birthtime
;
12228 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12230 usbp
->st_size
= sbp
->st_size
;
12231 usbp
->st_blocks
= sbp
->st_blocks
;
12232 usbp
->st_blksize
= sbp
->st_blksize
;
12233 usbp
->st_flags
= sbp
->st_flags
;
12234 usbp
->st_gen
= sbp
->st_gen
;
12235 usbp
->st_lspare
= sbp
->st_lspare
;
12236 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12237 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12241 munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
12243 bzero(usbp
, sizeof(*usbp
));
12245 usbp
->st_dev
= sbp
->st_dev
;
12246 usbp
->st_ino
= sbp
->st_ino
;
12247 usbp
->st_mode
= sbp
->st_mode
;
12248 usbp
->st_nlink
= sbp
->st_nlink
;
12249 usbp
->st_uid
= sbp
->st_uid
;
12250 usbp
->st_gid
= sbp
->st_gid
;
12251 usbp
->st_rdev
= sbp
->st_rdev
;
12252 #ifndef _POSIX_C_SOURCE
12253 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12254 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12255 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12256 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12257 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12258 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12259 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12260 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12262 usbp
->st_atime
= sbp
->st_atime
;
12263 usbp
->st_atimensec
= sbp
->st_atimensec
;
12264 usbp
->st_mtime
= sbp
->st_mtime
;
12265 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12266 usbp
->st_ctime
= sbp
->st_ctime
;
12267 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12268 usbp
->st_birthtime
= sbp
->st_birthtime
;
12269 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12271 usbp
->st_size
= sbp
->st_size
;
12272 usbp
->st_blocks
= sbp
->st_blocks
;
12273 usbp
->st_blksize
= sbp
->st_blksize
;
12274 usbp
->st_flags
= sbp
->st_flags
;
12275 usbp
->st_gen
= sbp
->st_gen
;
12276 usbp
->st_lspare
= sbp
->st_lspare
;
12277 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12278 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12282 * Purge buffer cache for simulating cold starts
12285 vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
12287 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
12289 return VNODE_RETURNED
;
12293 vfs_purge_callback(mount_t mp
, __unused
void * arg
)
12295 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
12297 return VFS_RETURNED
;
12301 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
12303 if (!kauth_cred_issuser(kauth_cred_get())) {
12307 vfs_iterate(0 /* flags */, vfs_purge_callback
, NULL
);
12313 * gets the vnode associated with the (unnamed) snapshot directory
12314 * for a Filesystem. The snapshot directory vnode is returned with
12315 * an iocount on it.
12318 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
12320 return VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
);
12324 * Get the snapshot vnode.
12326 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12327 * needs nameidone() on ndp.
12329 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12331 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12335 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
12336 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
12337 #if !CONFIG_TRIGGERS
12340 enum path_operation pathop
,
12346 struct vfs_attr vfa
;
12351 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
12356 if (!vnode_isvroot(*rvpp
)) {
12361 /* Make sure the filesystem supports snapshots */
12362 VFSATTR_INIT(&vfa
);
12363 VFSATTR_WANTED(&vfa
, f_capabilities
);
12364 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
12365 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
12366 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
12367 VOL_CAP_INT_SNAPSHOT
)) ||
12368 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
12369 VOL_CAP_INT_SNAPSHOT
))) {
12374 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
12379 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12380 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12386 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12387 * (the length returned by copyinstr includes the terminating NUL)
12389 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
12390 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
12394 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++) {
12397 if (i
< (int)name_len
) {
12403 if (op
== CREATE
) {
12404 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
12406 } else if (op
== DELETE
) {
12407 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
12415 /* Check if the snapshot already exists ... */
12416 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
12417 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
12418 ndp
->ni_dvp
= *sdvpp
;
12420 error
= namei(ndp
);
12422 FREE(name_buf
, M_TEMP
);
12438 * create a filesystem snapshot (for supporting filesystems)
12440 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12441 * We get to the (unnamed) snapshot directory vnode and create the vnode
12442 * for the snapshot in it.
12446 * a) Passed in name for snapshot cannot have slashes.
12447 * b) name can't be "." or ".."
12449 * Since this requires superuser privileges, vnode_authorize calls are not
12453 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12456 vnode_t rvp
, snapdvp
;
12458 struct nameidata namend
;
12460 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
12466 if (namend
.ni_vp
) {
12467 vnode_put(namend
.ni_vp
);
12470 struct vnode_attr va
;
12471 vnode_t vp
= NULLVP
;
12474 VATTR_SET(&va
, va_type
, VREG
);
12475 VATTR_SET(&va
, va_mode
, 0);
12477 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
12478 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
12479 if (!error
&& vp
) {
12484 nameidone(&namend
);
12485 vnode_put(snapdvp
);
12491 * Delete a Filesystem snapshot
12493 * get the vnode for the unnamed snapshot directory and the snapshot and
12494 * delete the snapshot.
12497 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12500 vnode_t rvp
, snapdvp
;
12502 struct nameidata namend
;
12504 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
12510 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
12511 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
12513 vnode_put(namend
.ni_vp
);
12514 nameidone(&namend
);
12515 vnode_put(snapdvp
);
12522 * Revert a filesystem to a snapshot
12524 * Marks the filesystem to revert to the given snapshot on next mount.
12527 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12533 struct fs_snapshot_revert_args revert_data
;
12534 struct componentname cnp
;
12538 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12542 mp
= vnode_mount(rvp
);
12544 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12545 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12547 FREE(name_buf
, M_TEMP
);
12553 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
12555 FREE(name_buf
, M_TEMP
);
12562 * Grab mount_iterref so that we can release the vnode,
12563 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12565 error
= mount_iterref(mp
, 0);
12568 FREE(name_buf
, M_TEMP
);
12572 memset(&cnp
, 0, sizeof(cnp
));
12573 cnp
.cn_pnbuf
= (char *)name_buf
;
12574 cnp
.cn_nameiop
= LOOKUP
;
12575 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12576 cnp
.cn_pnlen
= MAXPATHLEN
;
12577 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12578 cnp
.cn_namelen
= (int)name_len
;
12579 revert_data
.sr_cnp
= &cnp
;
12581 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
12582 mount_iterdrop(mp
);
12583 FREE(name_buf
, M_TEMP
);
12586 /* If there was any error, try again using VNOP_IOCTL */
12589 struct nameidata namend
;
12591 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
12598 error
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
12601 vnode_put(namend
.ni_vp
);
12602 nameidone(&namend
);
12603 vnode_put(snapdvp
);
12611 * rename a Filesystem snapshot
12613 * get the vnode for the unnamed snapshot directory and the snapshot and
12614 * rename the snapshot. This is a very specialised (and simple) case of
12615 * rename(2) (which has to deal with a lot more complications). It differs
12616 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12619 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
12620 __unused
uint32_t flags
, vfs_context_t ctx
)
12622 vnode_t rvp
, snapdvp
;
12624 caddr_t newname_buf
;
12627 struct nameidata
*fromnd
, *tond
;
12628 /* carving out a chunk for structs that are too big to be on stack. */
12630 struct nameidata from_node
;
12631 struct nameidata to_node
;
12634 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
12635 fromnd
= &__rename_data
->from_node
;
12636 tond
= &__rename_data
->to_node
;
12638 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
12643 fvp
= fromnd
->ni_vp
;
12645 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12646 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
12652 * Some sanity checks- new name can't be empty, "." or ".." or have
12654 * (the length returned by copyinstr includes the terminating NUL)
12656 * The FS rename VNOP is suppossed to handle this but we'll pick it
12659 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
12660 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
12664 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++) {
12667 if (i
< (int)name_len
) {
12673 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
12680 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
12681 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
12682 tond
->ni_dvp
= snapdvp
;
12684 error
= namei(tond
);
12687 } else if (tond
->ni_vp
) {
12689 * snapshot rename behaves differently than rename(2) - if the
12690 * new name exists, EEXIST is returned.
12692 vnode_put(tond
->ni_vp
);
12697 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
12698 &tond
->ni_cnd
, ctx
);
12703 FREE(newname_buf
, M_TEMP
);
12705 vnode_put(snapdvp
);
12709 FREE(__rename_data
, M_TEMP
);
12714 * Mount a Filesystem snapshot
12716 * get the vnode for the unnamed snapshot directory and the snapshot and
12717 * mount the snapshot.
12720 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
12721 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
12723 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
12725 struct nameidata
*snapndp
, *dirndp
;
12726 /* carving out a chunk for structs that are too big to be on stack. */
12728 struct nameidata snapnd
;
12729 struct nameidata dirnd
;
12730 } * __snapshot_mount_data
;
12732 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
12734 snapndp
= &__snapshot_mount_data
->snapnd
;
12735 dirndp
= &__snapshot_mount_data
->dirnd
;
12737 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
12743 snapvp
= snapndp
->ni_vp
;
12744 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
12749 /* Get the vnode to be covered */
12750 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
12751 UIO_USERSPACE
, directory
, ctx
);
12752 error
= namei(dirndp
);
12757 vp
= dirndp
->ni_vp
;
12758 pvp
= dirndp
->ni_dvp
;
12760 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
12763 mount_t mp
= vnode_mount(rvp
);
12764 struct fs_snapshot_mount_args smnt_data
;
12766 smnt_data
.sm_mp
= mp
;
12767 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
12768 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
12769 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags
& MNT_DONTBROWSE
,
12770 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
12778 vnode_put(snapdvp
);
12780 nameidone(snapndp
);
12782 FREE(__snapshot_mount_data
, M_TEMP
);
12787 * Root from a snapshot of the filesystem
12789 * Marks the filesystem to root from the given snapshot on next boot.
12792 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12798 struct fs_snapshot_root_args root_data
;
12799 struct componentname cnp
;
12803 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12807 mp
= vnode_mount(rvp
);
12809 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12810 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12812 FREE(name_buf
, M_TEMP
);
12817 // XXX MAC checks ?
12820 * Grab mount_iterref so that we can release the vnode,
12821 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12823 error
= mount_iterref(mp
, 0);
12826 FREE(name_buf
, M_TEMP
);
12830 memset(&cnp
, 0, sizeof(cnp
));
12831 cnp
.cn_pnbuf
= (char *)name_buf
;
12832 cnp
.cn_nameiop
= LOOKUP
;
12833 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12834 cnp
.cn_pnlen
= MAXPATHLEN
;
12835 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12836 cnp
.cn_namelen
= (int)name_len
;
12837 root_data
.sr_cnp
= &cnp
;
12839 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
12841 mount_iterdrop(mp
);
12842 FREE(name_buf
, M_TEMP
);
12848 * FS snapshot operations dispatcher
12851 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
12852 __unused
int32_t *retval
)
12855 vfs_context_t ctx
= vfs_context_current();
12857 AUDIT_ARG(fd
, uap
->dirfd
);
12858 AUDIT_ARG(value32
, uap
->op
);
12860 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
12866 * Enforce user authorization for snapshot modification operations
12868 if ((uap
->op
!= SNAPSHOT_OP_MOUNT
) &&
12869 (uap
->op
!= SNAPSHOT_OP_ROOT
)) {
12870 vnode_t dvp
= NULLVP
;
12871 vnode_t devvp
= NULLVP
;
12874 error
= vnode_getfromfd(ctx
, uap
->dirfd
, &dvp
);
12878 mp
= vnode_mount(dvp
);
12879 devvp
= mp
->mnt_devvp
;
12881 /* get an iocount on devvp */
12882 if (devvp
== NULLVP
) {
12883 error
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 0, &devvp
, ctx
);
12884 /* for mounts which arent block devices */
12885 if (error
== ENOENT
) {
12889 error
= vnode_getwithref(devvp
);
12897 if ((vfs_context_issuser(ctx
) == 0) &&
12898 (vnode_authorize(devvp
, NULL
, KAUTH_VNODE_WRITE_DATA
, ctx
) != 0)) {
12910 case SNAPSHOT_OP_CREATE
:
12911 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12913 case SNAPSHOT_OP_DELETE
:
12914 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12916 case SNAPSHOT_OP_RENAME
:
12917 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
12920 case SNAPSHOT_OP_MOUNT
:
12921 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
12922 uap
->data
, uap
->flags
, ctx
);
12924 case SNAPSHOT_OP_REVERT
:
12925 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12927 #if CONFIG_MNT_ROOTSNAP
12928 case SNAPSHOT_OP_ROOT
:
12929 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12931 #endif /* CONFIG_MNT_ROOTSNAP */