2 * Copyright (c) 1995-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/sysctl.h>
98 #include <sys/xattr.h>
99 #include <sys/fcntl.h>
100 #include <sys/fsctl.h>
101 #include <sys/ubc_internal.h>
102 #include <sys/disk.h>
103 #include <sys/content_protection.h>
104 #include <sys/clonefile.h>
105 #include <sys/snapshot.h>
106 #include <sys/priv.h>
107 #include <sys/fsgetpath.h>
108 #include <machine/cons.h>
109 #include <machine/limits.h>
110 #include <miscfs/specfs/specdev.h>
112 #include <vfs/vfs_disk_conditioner.h>
114 #include <security/audit/audit.h>
115 #include <bsm/audit_kevents.h>
117 #include <mach/mach_types.h>
118 #include <kern/kern_types.h>
119 #include <kern/kalloc.h>
120 #include <kern/task.h>
122 #include <vm/vm_pageout.h>
123 #include <vm/vm_protos.h>
125 #include <libkern/OSAtomic.h>
126 #include <pexpert/pexpert.h>
127 #include <IOKit/IOBSD.h>
130 #include <kern/host.h>
131 #include <kern/ipc_misc.h>
132 #include <mach/host_priv.h>
133 #include <mach/vfs_nspace.h>
137 #include <miscfs/routefs/routefs.h>
141 #include <security/mac.h>
142 #include <security/mac_framework.h>
146 #define GET_PATH(x) \
147 (x) = get_pathbuff();
148 #define RELEASE_PATH(x) \
151 #define GET_PATH(x) \
152 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
153 #define RELEASE_PATH(x) \
154 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
155 #endif /* CONFIG_FSE */
157 #ifndef HFS_GET_BOOT_INFO
158 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
161 #ifndef HFS_SET_BOOT_INFO
162 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
165 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
166 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
169 extern void disk_conditioner_unmount(mount_t mp
);
171 /* struct for checkdirs iteration */
176 /* callback for checkdirs iteration */
177 static int checkdirs_callback(proc_t p
, void * arg
);
179 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
180 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
181 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
182 static int getfsstat_callback(mount_t mp
, void * arg
);
183 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
184 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
185 static int sync_callback(mount_t
, void *);
186 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
187 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
188 boolean_t partial_copy
);
189 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
190 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
191 struct componentname
*cnp
, user_addr_t fsmountargs
,
192 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
194 void vfs_notify_mount(vnode_t pdvp
);
196 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
198 struct fd_vn_data
* fg_vn_data_alloc(void);
201 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
202 * Concurrent lookups (or lookups by ids) on hard links can cause the
203 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
204 * does) to return ENOENT as the path cannot be returned from the name cache
205 * alone. We have no option but to retry and hope to get one namei->reverse path
206 * generation done without an intervening lookup, lookup by id on the hard link
207 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
208 * which currently are the MAC hooks for rename, unlink and rmdir.
210 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
212 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
,
215 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, uint32_t options
, int *);
217 #ifdef CONFIG_IMGSRC_ACCESS
218 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
219 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
220 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
221 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
222 static void mount_end_update(mount_t mp
);
223 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
224 #endif /* CONFIG_IMGSRC_ACCESS */
226 #if CONFIG_LOCKERBOOT
227 int mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
228 const char *pbdevpath
);
232 #if CONFIG_MNT_ROOTSNAP
233 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
);
235 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
) __attribute__((unused
));
238 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
241 int sync_internal(void);
244 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
246 extern lck_grp_t
*fd_vn_lck_grp
;
247 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
248 extern lck_attr_t
*fd_vn_lck_attr
;
251 * incremented each time a mount or unmount operation occurs
252 * used to invalidate the cached value of the rootvp in the
253 * mount structure utilized by cache_lookup_path
255 uint32_t mount_generation
= 0;
257 /* counts number of mount and unmount operations */
258 unsigned int vfs_nummntops
= 0;
260 extern const struct fileops vnops
;
261 #if CONFIG_APPLEDOUBLE
262 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
263 #endif /* CONFIG_APPLEDOUBLE */
266 * Virtual File System System Calls
269 #if NFSCLIENT || DEVFS || ROUTEFS
271 * Private in-kernel mounting spi (NFS only, not exported)
275 vfs_iskernelmount(mount_t mp
)
277 return (mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
;
282 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
283 void *data
, __unused
size_t datalen
, int syscall_flags
, uint32_t kern_flags
, vfs_context_t ctx
)
289 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
290 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
293 * Get the vnode to be covered if it's not supplied
298 if (kern_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_VMVOL
| KERNEL_MOUNT_DATAVOL
)) {
299 printf("failed to locate mount-on path: %s ", path
);
307 char *pnbuf
= CAST_DOWN(char *, path
);
309 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
310 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
314 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
315 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
325 #endif /* NFSCLIENT || DEVFS */
328 * Mount a file system.
332 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
334 struct __mac_mount_args muap
;
336 muap
.type
= uap
->type
;
337 muap
.path
= uap
->path
;
338 muap
.flags
= uap
->flags
;
339 muap
.data
= uap
->data
;
340 muap
.mac_p
= USER_ADDR_NULL
;
341 return __mac_mount(p
, &muap
, retval
);
345 fmount(__unused proc_t p
, struct fmount_args
*uap
, __unused
int32_t *retval
)
347 struct componentname cn
;
348 vfs_context_t ctx
= vfs_context_current();
351 int flags
= uap
->flags
;
352 char fstypename
[MFSNAMELEN
];
353 char *labelstr
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */
357 AUDIT_ARG(fd
, uap
->fd
);
358 AUDIT_ARG(fflags
, flags
);
359 /* fstypename will get audited by mount_common */
361 /* Sanity check the flags */
362 if (flags
& (MNT_IMGSRC_BY_INDEX
| MNT_ROOTFS
)) {
366 if (flags
& MNT_UNION
) {
370 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
375 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
379 if ((error
= vnode_getwithref(vp
)) != 0) {
384 pvp
= vnode_getparent(vp
);
391 memset(&cn
, 0, sizeof(struct componentname
));
392 MALLOC(cn
.cn_pnbuf
, char *, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
393 cn
.cn_pnlen
= MAXPATHLEN
;
395 if ((error
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) {
396 FREE(cn
.cn_pnbuf
, M_TEMP
);
403 error
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
);
405 FREE(cn
.cn_pnbuf
, M_TEMP
);
414 vfs_notify_mount(vnode_t pdvp
)
416 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
417 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
422 * Mount a file system taking into account MAC label behavior.
423 * See mount(2) man page for more information
425 * Parameters: p Process requesting the mount
426 * uap User argument descriptor (see below)
429 * Indirect: uap->type Filesystem type
430 * uap->path Path to mount
431 * uap->data Mount arguments
432 * uap->mac_p MAC info
433 * uap->flags Mount flags
439 boolean_t root_fs_upgrade_try
= FALSE
;
442 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
446 int need_nameidone
= 0;
447 vfs_context_t ctx
= vfs_context_current();
448 char fstypename
[MFSNAMELEN
];
451 char *labelstr
= NULL
;
452 int flags
= uap
->flags
;
454 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
455 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
460 * Get the fs type name from user space
462 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
468 * Get the vnode to be covered
470 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
471 UIO_USERSPACE
, uap
->path
, ctx
);
480 #ifdef CONFIG_IMGSRC_ACCESS
481 /* Mounting image source cannot be batched with other operations */
482 if (flags
== MNT_IMGSRC_BY_INDEX
) {
483 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
484 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
487 #endif /* CONFIG_IMGSRC_ACCESS */
491 * Get the label string (if any) from user space
493 if (uap
->mac_p
!= USER_ADDR_NULL
) {
498 struct user64_mac mac64
;
499 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
500 mac
.m_buflen
= mac64
.m_buflen
;
501 mac
.m_string
= mac64
.m_string
;
503 struct user32_mac mac32
;
504 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
505 mac
.m_buflen
= mac32
.m_buflen
;
506 mac
.m_string
= mac32
.m_string
;
511 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
512 (mac
.m_buflen
< 2)) {
516 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
517 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
521 AUDIT_ARG(mac_string
, labelstr
);
523 #endif /* CONFIG_MACF */
525 AUDIT_ARG(fflags
, flags
);
528 if (flags
& MNT_UNION
) {
529 /* No union mounts on release kernels */
535 if ((vp
->v_flag
& VROOT
) &&
536 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
537 if (!(flags
& MNT_UNION
)) {
541 * For a union mount on '/', treat it as fresh
542 * mount instead of update.
543 * Otherwise, union mouting on '/' used to panic the
544 * system before, since mnt_vnodecovered was found to
545 * be NULL for '/' which is required for unionlookup
546 * after it gets ENOENT on union mount.
548 flags
= (flags
& ~(MNT_UPDATE
));
552 if ((flags
& MNT_RDONLY
) == 0) {
553 /* Release kernels are not allowed to mount "/" as rw */
559 * See 7392553 for more details on why this check exists.
560 * Suffice to say: If this check is ON and something tries
561 * to mount the rootFS RW, we'll turn off the codesign
562 * bitmap optimization.
564 #if CHECK_CS_VALIDATION_BITMAP
565 if ((flags
& MNT_RDONLY
) == 0) {
566 root_fs_upgrade_try
= TRUE
;
571 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
572 labelstr
, FALSE
, ctx
);
578 FREE(labelstr
, M_MACTEMP
);
580 #endif /* CONFIG_MACF */
588 if (need_nameidone
) {
596 * common mount implementation (final stage of mounting)
599 * fstypename file system type (ie it's vfs name)
600 * pvp parent of covered vnode
602 * cnp component name (ie path) of covered vnode
603 * flags generic mount flags
604 * fsmountargs file system specific data
605 * labelstr optional MAC label
606 * kernelmount TRUE for mounts initiated from inside the kernel
607 * ctx caller's context
610 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
611 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
612 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
615 #pragma unused(labelstr)
617 struct vnode
*devvp
= NULLVP
;
618 struct vnode
*device_vnode
= NULLVP
;
623 struct vfstable
*vfsp
= (struct vfstable
*)0;
624 struct proc
*p
= vfs_context_proc(ctx
);
626 user_addr_t devpath
= USER_ADDR_NULL
;
629 boolean_t vfsp_ref
= FALSE
;
630 boolean_t is_rwlock_locked
= FALSE
;
631 boolean_t did_rele
= FALSE
;
632 boolean_t have_usecount
= FALSE
;
634 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM
635 /* Check for mutually-exclusive flag bits */
636 uint32_t checkflags
= (internal_flags
& (KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
));
638 while (checkflags
!= 0) {
639 checkflags
&= (checkflags
- 1);
644 //not allowed to request multiple mount-by-role flags
651 * Process an update for an existing mount
653 if (flags
& MNT_UPDATE
) {
654 if ((vp
->v_flag
& VROOT
) == 0) {
660 /* unmount in progress return error */
662 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
668 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
669 is_rwlock_locked
= TRUE
;
671 * We only allow the filesystem to be reloaded if it
672 * is currently mounted read-only.
674 if ((flags
& MNT_RELOAD
) &&
675 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
681 * If content protection is enabled, update mounts are not
682 * allowed to turn it off.
684 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
685 ((flags
& MNT_CPROTECT
) == 0)) {
691 * can't turn off MNT_REMOVABLE either but it may be an unexpected
692 * failure to return an error for this so we'll just silently
693 * add it if it is not passed in.
695 if ((mp
->mnt_flag
& MNT_REMOVABLE
) &&
696 ((flags
& MNT_REMOVABLE
) == 0)) {
697 flags
|= MNT_REMOVABLE
;
700 #ifdef CONFIG_IMGSRC_ACCESS
701 /* Can't downgrade the backer of the root FS */
702 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
703 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
707 #endif /* CONFIG_IMGSRC_ACCESS */
710 * Only root, or the user that did the original mount is
711 * permitted to update it.
713 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
714 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
718 error
= mac_mount_check_remount(ctx
, mp
);
724 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
725 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
727 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
728 flags
|= MNT_NOSUID
| MNT_NODEV
;
729 if (mp
->mnt_flag
& MNT_NOEXEC
) {
737 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
739 vfsp
= mp
->mnt_vtable
;
744 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
745 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
747 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
748 flags
|= MNT_NOSUID
| MNT_NODEV
;
749 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
) {
754 /* XXXAUDIT: Should we capture the type on the error path as well? */
755 AUDIT_ARG(text
, fstypename
);
757 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
758 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
759 vfsp
->vfc_refcount
++;
771 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
772 * except in ROSV configs.
774 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) &&
775 ((internal_flags
& (KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
)) == 0)) {
776 error
= EINVAL
; /* unsupported request */
780 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
786 * Allocate and initialize the filesystem (mount_t)
788 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
790 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
793 /* Initialize the default IO constraints */
794 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
795 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
796 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
797 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
798 mp
->mnt_devblocksize
= DEV_BSIZE
;
799 mp
->mnt_alignmentmask
= PAGE_MASK
;
800 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
803 mp
->mnt_realrootvp
= NULLVP
;
804 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
806 TAILQ_INIT(&mp
->mnt_vnodelist
);
807 TAILQ_INIT(&mp
->mnt_workerqueue
);
808 TAILQ_INIT(&mp
->mnt_newvnodes
);
810 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
811 is_rwlock_locked
= TRUE
;
812 mp
->mnt_op
= vfsp
->vfc_vfsops
;
813 mp
->mnt_vtable
= vfsp
;
814 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
815 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
816 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
818 int pathlen
= MAXPATHLEN
;
820 if (vn_getpath_ext(vp
, pvp
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
)) {
821 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
824 mp
->mnt_vnodecovered
= vp
;
825 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
826 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
827 mp
->mnt_devbsdunit
= 0;
829 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
830 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
832 #if NFSCLIENT || DEVFS || ROUTEFS
834 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
836 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0) {
837 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
839 #endif /* NFSCLIENT || DEVFS */
844 * Set the mount level flags.
846 if (flags
& MNT_RDONLY
) {
847 mp
->mnt_flag
|= MNT_RDONLY
;
848 } else if (mp
->mnt_flag
& MNT_RDONLY
) {
849 // disallow read/write upgrades of file systems that
850 // had the TYPENAME_OVERRIDE feature set.
851 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
855 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
857 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
858 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
859 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
860 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
861 MNT_QUARANTINE
| MNT_CPROTECT
);
866 * On release builds of iOS based platforms, always enforce NOSUID on
867 * all mounts. We do this here because we can catch update mounts as well as
868 * non-update mounts in this case.
870 mp
->mnt_flag
|= (MNT_NOSUID
);
874 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
875 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
876 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
877 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
878 MNT_QUARANTINE
| MNT_CPROTECT
);
881 if (flags
& MNT_MULTILABEL
) {
882 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
886 mp
->mnt_flag
|= MNT_MULTILABEL
;
890 * Process device path for local file systems if requested
892 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
893 !(internal_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
))) {
894 //snapshot, vm, datavolume mounts are special
895 if (vfs_context_is64bit(ctx
)) {
896 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
899 fsmountargs
+= sizeof(devpath
);
902 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
905 /* munge into LP64 addr */
906 devpath
= CAST_USER_ADDR_T(tmp
);
907 fsmountargs
+= sizeof(tmp
);
910 /* Lookup device and authorize access to it */
914 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
915 if ((error
= namei(&nd
))) {
919 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
924 if (devvp
->v_type
!= VBLK
) {
928 if (major(devvp
->v_rdev
) >= nblkdev
) {
933 * If mount by non-root, then verify that user has necessary
934 * permissions on the device.
936 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
937 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
939 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
940 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
942 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0) {
947 /* On first mount, preflight and open device */
948 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
949 if ((error
= vnode_ref(devvp
))) {
953 * Disallow multiple mounts of the same device.
954 * Disallow mounting of a device that is currently in use
955 * (except for root, which might share swap device for miniroot).
956 * Flush out any old buffers remaining from a previous use.
958 if ((error
= vfs_mountedon(devvp
))) {
962 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
966 if ((error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
))) {
970 if ((error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0))) {
974 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
976 error
= mac_vnode_check_open(ctx
,
978 ronly
? FREAD
: FREAD
| FWRITE
);
983 if ((error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
))) {
987 mp
->mnt_devvp
= devvp
;
988 device_vnode
= devvp
;
989 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
990 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
991 (device_vnode
= mp
->mnt_devvp
)) {
995 * If upgrade to read-write by non-root, then verify
996 * that user has necessary permissions on the device.
998 vnode_getalways(device_vnode
);
1000 if (suser(vfs_context_ucred(ctx
), NULL
) &&
1001 (error
= vnode_authorize(device_vnode
, NULL
,
1002 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
1004 vnode_put(device_vnode
);
1008 /* Tell the device that we're upgrading */
1009 dev
= (dev_t
)device_vnode
->v_rdev
;
1012 if ((u_int
)maj
>= (u_int
)nblkdev
) {
1013 panic("Volume mounted on a device with invalid major number.");
1016 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
1017 vnode_put(device_vnode
);
1018 device_vnode
= NULLVP
;
1023 } // localargs && !(snapshot | data | vm)
1026 if ((flags
& MNT_UPDATE
) == 0) {
1027 mac_mount_label_init(mp
);
1028 mac_mount_label_associate(ctx
, mp
);
1031 if ((flags
& MNT_UPDATE
) != 0) {
1032 error
= mac_mount_check_label_update(ctx
, mp
);
1040 * Mount the filesystem. We already asserted that internal_flags
1041 * cannot have more than one mount-by-role bit set.
1043 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
1044 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
1045 (caddr_t
)fsmountargs
, 0, ctx
);
1046 } else if (internal_flags
& KERNEL_MOUNT_DATAVOL
) {
1047 #if CONFIG_ROSV_STARTUP
1048 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1049 fs_role_mount_args_t frma
= {origin_mp
, VFS_DATA_ROLE
};
1050 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1052 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE
, error
);
1054 /* Mark volume associated with system volume */
1055 mp
->mnt_kern_flag
|= MNTK_SYSTEM
;
1057 /* Attempt to acquire the mnt_devvp and set it up */
1058 struct vnode
*mp_devvp
= NULL
;
1059 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1060 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1061 0, &mp_devvp
, vfs_context_kernel());
1063 mp
->mnt_devvp
= mp_devvp
;
1064 //vnode_lookup took an iocount, need to drop it.
1065 vnode_put(mp_devvp
);
1066 // now set `device_vnode` to the devvp that was acquired.
1067 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1068 // note that though the iocount above was dropped, the mount acquires
1069 // an implicit reference against the device.
1070 device_vnode
= mp_devvp
;
1077 } else if (internal_flags
& KERNEL_MOUNT_VMVOL
) {
1079 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1080 fs_role_mount_args_t frma
= {origin_mp
, VFS_VM_ROLE
};
1081 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1083 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE
, error
);
1085 /* Mark volume associated with system volume and a swap mount */
1086 mp
->mnt_kern_flag
|= (MNTK_SYSTEM
| MNTK_SWAP_MOUNT
);
1087 /* Attempt to acquire the mnt_devvp and set it up */
1088 struct vnode
*mp_devvp
= NULL
;
1089 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1090 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1091 0, &mp_devvp
, vfs_context_kernel());
1093 mp
->mnt_devvp
= mp_devvp
;
1094 //vnode_lookup took an iocount, need to drop it.
1095 vnode_put(mp_devvp
);
1097 // now set `device_vnode` to the devvp that was acquired.
1098 // note that though the iocount above was dropped, the mount acquires
1099 // an implicit reference against the device.
1100 device_vnode
= mp_devvp
;
1108 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
1111 if (flags
& MNT_UPDATE
) {
1112 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) {
1113 mp
->mnt_flag
&= ~MNT_RDONLY
;
1116 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
1117 mp
->mnt_kern_flag
&= ~MNTK_WANTRDWR
;
1119 mp
->mnt_flag
= flag
; /* restore flag value */
1121 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
1122 lck_rw_done(&mp
->mnt_rwlock
);
1123 is_rwlock_locked
= FALSE
;
1125 enablequotas(mp
, ctx
);
1131 * Put the new filesystem on the mount list after root.
1134 struct vfs_attr vfsattr
;
1136 error
= mac_mount_check_mount_late(ctx
, mp
);
1141 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
1142 error
= VFS_ROOT(mp
, &rvp
, ctx
);
1144 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
1147 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
1149 * drop reference provided by VFS_ROOT
1159 vnode_lock_spin(vp
);
1160 CLR(vp
->v_flag
, VMOUNT
);
1161 vp
->v_mountedhere
= mp
;
1165 * taking the name_cache_lock exclusively will
1166 * insure that everyone is out of the fast path who
1167 * might be trying to use a now stale copy of
1168 * vp->v_mountedhere->mnt_realrootvp
1169 * bumping mount_generation causes the cached values
1174 name_cache_unlock();
1176 error
= vnode_ref(vp
);
1181 have_usecount
= TRUE
;
1183 error
= checkdirs(vp
, ctx
);
1185 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1189 * there is no cleanup code here so I have made it void
1190 * we need to revisit this
1192 (void)VFS_START(mp
, 0, ctx
);
1194 if (mount_list_add(mp
) != 0) {
1196 * The system is shutting down trying to umount
1197 * everything, so fail with a plausible errno.
1202 lck_rw_done(&mp
->mnt_rwlock
);
1203 is_rwlock_locked
= FALSE
;
1205 /* Check if this mounted file system supports EAs or named streams. */
1206 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1207 VFSATTR_INIT(&vfsattr
);
1208 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
1209 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
1210 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
1211 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
1212 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
1213 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
1214 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1217 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
1218 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
1219 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
1222 /* Check if this file system supports path from id lookups. */
1223 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
1224 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
1225 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1226 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
1227 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1228 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1231 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
1232 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
1233 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1236 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1237 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1239 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1240 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1242 /* increment the operations count */
1243 OSAddAtomic(1, &vfs_nummntops
);
1244 enablequotas(mp
, ctx
);
1247 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1250 * cache the IO attributes for the underlying physical media...
1251 * an error return indicates the underlying driver doesn't
1252 * support all the queries necessary... however, reasonable
1253 * defaults will have been set, so no reason to bail or care
1255 vfs_init_io_attributes(device_vnode
, mp
);
1258 /* Now that mount is setup, notify the listeners */
1259 vfs_notify_mount(pvp
);
1260 IOBSDMountChange(mp
, kIOMountChangeMount
);
1262 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1263 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1264 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1265 mp
->mnt_vtable
->vfc_name
, error
);
1268 vnode_lock_spin(vp
);
1269 CLR(vp
->v_flag
, VMOUNT
);
1272 mp
->mnt_vtable
->vfc_refcount
--;
1273 mount_list_unlock();
1276 vnode_rele(device_vnode
);
1277 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
);
1279 lck_rw_done(&mp
->mnt_rwlock
);
1280 is_rwlock_locked
= FALSE
;
1283 * if we get here, we have a mount structure that needs to be freed,
1284 * but since the coveredvp hasn't yet been updated to point at it,
1285 * no need to worry about other threads holding a crossref on this mp
1286 * so it's ok to just free it
1288 mount_lock_destroy(mp
);
1290 mac_mount_label_destroy(mp
);
1292 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1296 * drop I/O count on the device vp if there was one
1298 if (devpath
&& devvp
) {
1304 /* Error condition exits */
1306 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1309 * If the mount has been placed on the covered vp,
1310 * it may have been discovered by now, so we have
1311 * to treat this just like an unmount
1313 mount_lock_spin(mp
);
1314 mp
->mnt_lflag
|= MNT_LDEAD
;
1317 if (device_vnode
!= NULLVP
) {
1318 vnode_rele(device_vnode
);
1319 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
1324 vnode_lock_spin(vp
);
1327 vp
->v_mountedhere
= (mount_t
) 0;
1331 if (have_usecount
) {
1335 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
)) {
1339 if (devpath
&& devvp
) {
1343 /* Release mnt_rwlock only when it was taken */
1344 if (is_rwlock_locked
== TRUE
) {
1345 lck_rw_done(&mp
->mnt_rwlock
);
1349 if (mp
->mnt_crossref
) {
1350 mount_dropcrossref(mp
, vp
, 0);
1352 mount_lock_destroy(mp
);
1354 mac_mount_label_destroy(mp
);
1356 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1361 vfsp
->vfc_refcount
--;
1362 mount_list_unlock();
1369 * Flush in-core data, check for competing mount attempts,
1373 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1376 #pragma unused(cnp,fsname)
1378 struct vnode_attr va
;
1383 * If the user is not root, ensure that they own the directory
1384 * onto which we are attempting to mount.
1387 VATTR_WANTED(&va
, va_uid
);
1388 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1389 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1390 (!vfs_context_issuser(ctx
)))) {
1396 if ((error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
))) {
1400 if ((error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0))) {
1404 if (vp
->v_type
!= VDIR
) {
1409 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1415 error
= mac_mount_check_mount(ctx
, vp
,
1422 vnode_lock_spin(vp
);
1423 SET(vp
->v_flag
, VMOUNT
);
1430 #if CONFIG_IMGSRC_ACCESS
1432 #define DEBUG_IMGSRC 0
1435 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
1437 #define IMGSRC_DEBUG(args...) do { } while(0)
1441 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1443 struct nameidata nd
;
1444 vnode_t vp
, realdevvp
;
1447 enum uio_seg uio
= UIO_USERSPACE
;
1449 if (ctx
== vfs_context_kernel()) {
1453 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, uio
, devpath
, ctx
);
1454 if ((error
= namei(&nd
))) {
1455 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1461 if (!vnode_isblk(vp
)) {
1462 IMGSRC_DEBUG("Not block device.\n");
1467 realdevvp
= mp
->mnt_devvp
;
1468 if (realdevvp
== NULLVP
) {
1469 IMGSRC_DEBUG("No device backs the mount.\n");
1474 error
= vnode_getwithref(realdevvp
);
1476 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1480 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1481 IMGSRC_DEBUG("Wrong dev_t.\n");
1486 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1489 * If mount by non-root, then verify that user has necessary
1490 * permissions on the device.
1492 if (!vfs_context_issuser(ctx
)) {
1493 accessmode
= KAUTH_VNODE_READ_DATA
;
1494 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1495 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1497 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1498 IMGSRC_DEBUG("Access denied.\n");
1506 vnode_put(realdevvp
);
1519 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1520 * and call checkdirs()
1523 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1527 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1529 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1530 mp
->mnt_vtable
->vfc_name
, vnode_getname(vp
));
1532 vnode_lock_spin(vp
);
1533 CLR(vp
->v_flag
, VMOUNT
);
1534 vp
->v_mountedhere
= mp
;
1538 * taking the name_cache_lock exclusively will
1539 * insure that everyone is out of the fast path who
1540 * might be trying to use a now stale copy of
1541 * vp->v_mountedhere->mnt_realrootvp
1542 * bumping mount_generation causes the cached values
1547 name_cache_unlock();
1549 error
= vnode_ref(vp
);
1554 error
= checkdirs(vp
, ctx
);
1556 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1563 mp
->mnt_vnodecovered
= NULLVP
;
1569 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1572 vnode_lock_spin(vp
);
1573 vp
->v_mountedhere
= (mount_t
)NULL
;
1576 mp
->mnt_vnodecovered
= NULLVP
;
1580 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1584 /* unmount in progress return error */
1585 mount_lock_spin(mp
);
1586 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1591 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1594 * We only allow the filesystem to be reloaded if it
1595 * is currently mounted read-only.
1597 if ((flags
& MNT_RELOAD
) &&
1598 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1604 * Only root, or the user that did the original mount is
1605 * permitted to update it.
1607 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1608 (!vfs_context_issuser(ctx
))) {
1613 error
= mac_mount_check_remount(ctx
, mp
);
1621 lck_rw_done(&mp
->mnt_rwlock
);
1628 mount_end_update(mount_t mp
)
1630 lck_rw_done(&mp
->mnt_rwlock
);
1634 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1638 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1642 vp
= imgsrc_rootvnodes
[height
];
1643 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1652 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
,
1653 struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
,
1654 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1658 boolean_t placed
= FALSE
;
1659 struct vfstable
*vfsp
;
1660 user_addr_t devpath
;
1661 char *old_mntonname
;
1667 /* If we didn't imageboot, nothing to move */
1668 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1672 /* Only root can do this */
1673 if (!vfs_context_issuser(ctx
)) {
1677 IMGSRC_DEBUG("looking for root vnode.\n");
1680 * Get root vnode of filesystem we're moving.
1684 struct user64_mnt_imgsrc_args mia64
;
1685 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1687 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1691 height
= mia64
.mi_height
;
1692 flags
= mia64
.mi_flags
;
1693 devpath
= mia64
.mi_devpath
;
1695 struct user32_mnt_imgsrc_args mia32
;
1696 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1698 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1702 height
= mia32
.mi_height
;
1703 flags
= mia32
.mi_flags
;
1704 devpath
= mia32
.mi_devpath
;
1708 * For binary compatibility--assumes one level of nesting.
1711 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
1716 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
1720 /* munge into LP64 addr */
1721 devpath
= CAST_USER_ADDR_T(tmp
);
1729 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1733 error
= get_imgsrc_rootvnode(height
, &rvp
);
1735 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error
);
1739 IMGSRC_DEBUG("got old root vnode\n");
1741 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1743 /* Can only move once */
1744 mp
= vnode_mount(rvp
);
1745 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1746 IMGSRC_DEBUG("Already moved.\n");
1751 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp
->mnt_vtable
->vfc_name
);
1752 IMGSRC_DEBUG("Starting updated.\n");
1754 /* Get exclusive rwlock on mount, authorize update on mp */
1755 error
= mount_begin_update(mp
, ctx
, 0);
1757 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1762 * It can only be moved once. Flag is set under the rwlock,
1763 * so we're now safe to proceed.
1765 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1766 IMGSRC_DEBUG("Already moved [2]\n");
1770 IMGSRC_DEBUG("Preparing coveredvp.\n");
1772 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1773 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1775 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1779 IMGSRC_DEBUG("Covered vp OK.\n");
1781 /* Sanity check the name caller has provided */
1782 vfsp
= mp
->mnt_vtable
;
1783 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1784 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1785 vfsp
->vfc_name
, fsname
);
1790 /* Check the device vnode and update mount-from name, for local filesystems */
1791 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1792 IMGSRC_DEBUG("Local, doing device validation.\n");
1794 if (devpath
!= USER_ADDR_NULL
) {
1795 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1797 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1806 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1807 * and increment the name cache's mount generation
1810 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1811 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1818 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1819 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1821 /* Forbid future moves */
1823 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1826 /* Finally, add to mount list, completely ready to go */
1827 if (mount_list_add(mp
) != 0) {
1829 * The system is shutting down trying to umount
1830 * everything, so fail with a plausible errno.
1836 mount_end_update(mp
);
1838 FREE(old_mntonname
, M_TEMP
);
1840 vfs_notify_mount(pvp
);
1844 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1847 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1852 * Placing the mp on the vnode clears VMOUNT,
1853 * so cleanup is different after that point
1856 /* Rele the vp, clear VMOUNT and v_mountedhere */
1857 undo_place_on_covered_vp(mp
, vp
);
1859 vnode_lock_spin(vp
);
1860 CLR(vp
->v_flag
, VMOUNT
);
1864 mount_end_update(mp
);
1868 FREE(old_mntonname
, M_TEMP
);
1872 #if CONFIG_LOCKERBOOT
1875 mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
1876 const char *pbdevpath
)
1879 struct nameidata nd
;
1880 boolean_t cleanup_nd
= FALSE
;
1881 vfs_context_t ctx
= vfs_context_kernel();
1882 boolean_t is64
= TRUE
;
1883 boolean_t by_index
= TRUE
;
1884 struct user64_mnt_imgsrc_args mia64
= {
1887 .mi_devpath
= CAST_USER_ADDR_T(pbdevpath
),
1889 user_addr_t mia64addr
= CAST_USER_ADDR_T(&mia64
);
1891 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
1892 UIO_SYSSPACE
, CAST_USER_ADDR_T(mntpoint
), ctx
);
1895 IMGSRC_DEBUG("namei: %d\n", error
);
1900 error
= relocate_imageboot_source(nd
.ni_dvp
, nd
.ni_vp
,
1901 &nd
.ni_cnd
, fsname
, ctx
, is64
, mia64addr
, by_index
);
1905 int stashed
= error
;
1907 error
= vnode_put(nd
.ni_vp
);
1909 panic("vnode_put() returned non-zero: %d", error
);
1913 error
= vnode_put(nd
.ni_dvp
);
1915 panic("vnode_put() returned non-zero: %d", error
);
1924 #endif /* CONFIG_LOCKERBOOT */
1925 #endif /* CONFIG_IMGSRC_ACCESS */
1928 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1930 struct nameidata qnd
;
1932 char qfpath
[MAXPATHLEN
];
1933 const char *qfname
= QUOTAFILENAME
;
1934 const char *qfopsname
= QUOTAOPSNAME
;
1935 const char *qfextension
[] = INITQFNAMES
;
1937 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1938 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0) {
1942 * Enable filesystem disk quotas if necessary.
1943 * We ignore errors as this should not interfere with final mount
1945 for (type
= 0; type
< MAXQUOTAS
; type
++) {
1946 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1947 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1948 CAST_USER_ADDR_T(qfpath
), ctx
);
1949 if (namei(&qnd
) != 0) {
1950 continue; /* option file to trigger quotas is not present */
1952 vnode_put(qnd
.ni_vp
);
1954 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1956 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1963 checkdirs_callback(proc_t p
, void * arg
)
1965 struct cdirargs
* cdrp
= (struct cdirargs
*)arg
;
1966 vnode_t olddp
= cdrp
->olddp
;
1967 vnode_t newdp
= cdrp
->newdp
;
1968 struct filedesc
*fdp
;
1969 vnode_t new_cvp
= newdp
;
1970 vnode_t new_rvp
= newdp
;
1971 vnode_t old_cvp
= NULL
;
1972 vnode_t old_rvp
= NULL
;
1975 * XXX Also needs to iterate each thread in the process to see if it
1976 * XXX is using a per-thread current working directory, and, if so,
1977 * XXX update that as well.
1981 * First, with the proc_fdlock held, check to see if we will need
1982 * to do any work. If not, we will get out fast.
1987 (fdp
->fd_cdir
!= olddp
&& fdp
->fd_rdir
!= olddp
)) {
1989 return PROC_RETURNED
;
1994 * Ok, we will have to do some work. Always take two refs
1995 * because we might need that many. We'll dispose of whatever
1996 * we ended up not using.
1998 if (vnode_ref(newdp
) != 0) {
1999 return PROC_RETURNED
;
2001 if (vnode_ref(newdp
) != 0) {
2003 return PROC_RETURNED
;
2007 * Now do the work. Note: we dropped the proc_fdlock, so we
2008 * have to do all of the checks again.
2013 if (fdp
->fd_cdir
== olddp
) {
2015 fdp
->fd_cdir
= newdp
;
2018 if (fdp
->fd_rdir
== olddp
) {
2020 fdp
->fd_rdir
= newdp
;
2027 * Dispose of any references that are no longer needed.
2029 if (old_cvp
!= NULL
) {
2030 vnode_rele(old_cvp
);
2032 if (old_rvp
!= NULL
) {
2033 vnode_rele(old_rvp
);
2035 if (new_cvp
!= NULL
) {
2036 vnode_rele(new_cvp
);
2038 if (new_rvp
!= NULL
) {
2039 vnode_rele(new_rvp
);
2042 return PROC_RETURNED
;
2048 * Scan all active processes to see if any of them have a current
2049 * or root directory onto which the new filesystem has just been
2050 * mounted. If so, replace them with the new mount point.
2053 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
2058 struct cdirargs cdr
;
2060 if (olddp
->v_usecount
== 1) {
2063 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
2067 panic("mount: lost mount: error %d", err
);
2074 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2075 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
2077 if (rootvnode
== olddp
) {
2089 * Unmount a file system.
2091 * Note: unmount takes a path to the vnode mounted on as argument,
2092 * not special file (as before).
2096 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
2101 struct nameidata nd
;
2102 vfs_context_t ctx
= vfs_context_current();
2104 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
2105 UIO_USERSPACE
, uap
->path
, ctx
);
2115 error
= mac_mount_check_umount(ctx
, mp
);
2122 * Must be the root of the filesystem
2124 if ((vp
->v_flag
& VROOT
) == 0) {
2130 /* safedounmount consumes the mount ref */
2131 return safedounmount(mp
, uap
->flags
, ctx
);
2135 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
2139 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
2140 if (mp
== (mount_t
)0) {
2145 /* safedounmount consumes the mount ref */
2146 return safedounmount(mp
, flags
, ctx
);
2151 * The mount struct comes with a mount ref which will be consumed.
2152 * Do the actual file system unmount, prevent some common foot shooting.
2155 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2158 proc_t p
= vfs_context_proc(ctx
);
2161 * If the file system is not responding and MNT_NOBLOCK
2162 * is set and not a forced unmount then return EBUSY.
2164 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
2165 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
2171 * Skip authorization if the mount is tagged as permissive and
2172 * this is not a forced-unmount attempt.
2174 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
2176 * Only root, or the user that did the original mount is
2177 * permitted to unmount this filesystem.
2179 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
2180 (error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
2185 * Don't allow unmounting the root file system (or the associated VM or DATA mounts) .
2187 if ((mp
->mnt_flag
& MNT_ROOTFS
) || (mp
->mnt_kern_flag
& MNTK_SYSTEM
)) {
2188 error
= EBUSY
; /* the root (or associated volumes) is always busy */
2192 #ifdef CONFIG_IMGSRC_ACCESS
2193 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
2197 #endif /* CONFIG_IMGSRC_ACCESS */
2199 return dounmount(mp
, flags
, 1, ctx
);
2207 * Do the actual file system unmount.
2210 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
2212 vnode_t coveredvp
= (vnode_t
)0;
2215 int forcedunmount
= 0;
2217 struct vnode
*devvp
= NULLVP
;
2219 proc_t p
= vfs_context_proc(ctx
);
2221 int pflags_save
= 0;
2222 #endif /* CONFIG_TRIGGERS */
2225 if (!(flags
& MNT_FORCE
)) {
2226 fsevent_unmount(mp
, ctx
); /* has to come first! */
2233 * If already an unmount in progress just return EBUSY.
2234 * Even a forced unmount cannot override.
2236 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
2244 if (flags
& MNT_FORCE
) {
2246 mp
->mnt_lflag
|= MNT_LFORCE
;
2250 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2251 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
2255 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
2256 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
2257 mp
->mnt_flag
&= ~MNT_ASYNC
;
2259 * anyone currently in the fast path that
2260 * trips over the cached rootvp will be
2261 * dumped out and forced into the slow path
2262 * to regenerate a new cached value
2264 mp
->mnt_realrootvp
= NULLVP
;
2267 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
2269 * Force unmount any mounts in this filesystem.
2270 * If any unmounts fail - just leave them dangling.
2273 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
2277 * taking the name_cache_lock exclusively will
2278 * insure that everyone is out of the fast path who
2279 * might be trying to use a now stale copy of
2280 * vp->v_mountedhere->mnt_realrootvp
2281 * bumping mount_generation causes the cached values
2286 name_cache_unlock();
2289 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2294 if (forcedunmount
== 0) {
2295 ubc_umount(mp
); /* release cached vnodes */
2296 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2297 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
2300 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2301 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2302 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2308 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
2311 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
2314 if (forcedunmount
) {
2315 lflags
|= FORCECLOSE
;
2317 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
2318 if ((forcedunmount
== 0) && error
) {
2320 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2321 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2322 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2326 /* make sure there are no one in the mount iterations or lookup */
2327 mount_iterdrain(mp
);
2329 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
2331 mount_iterreset(mp
);
2333 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2334 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2335 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2339 /* increment the operations count */
2341 OSAddAtomic(1, &vfs_nummntops
);
2344 if (mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
2345 /* hold an io reference and drop the usecount before close */
2346 devvp
= mp
->mnt_devvp
;
2347 vnode_getalways(devvp
);
2349 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
2351 vnode_clearmountedon(devvp
);
2354 lck_rw_done(&mp
->mnt_rwlock
);
2355 mount_list_remove(mp
);
2356 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2358 /* mark the mount point hook in the vp but not drop the ref yet */
2359 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2361 * The covered vnode needs special handling. Trying to get an
2362 * iocount must not block here as this may lead to deadlocks
2363 * if the Filesystem to which the covered vnode belongs is
2364 * undergoing forced unmounts. Since we hold a usecount, the
2365 * vnode cannot be reused (it can, however, still be terminated)
2367 vnode_getalways(coveredvp
);
2368 vnode_lock_spin(coveredvp
);
2371 coveredvp
->v_mountedhere
= (struct mount
*)0;
2372 CLR(coveredvp
->v_flag
, VMOUNT
);
2374 vnode_unlock(coveredvp
);
2375 vnode_put(coveredvp
);
2379 mp
->mnt_vtable
->vfc_refcount
--;
2380 mount_list_unlock();
2382 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2383 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2385 mp
->mnt_lflag
|= MNT_LDEAD
;
2387 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2389 * do the wakeup here
2390 * in case we block in mount_refdrain
2391 * which will drop the mount lock
2392 * and allow anyone blocked in vfs_busy
2393 * to wakeup and see the LDEAD state
2395 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2396 wakeup((caddr_t
)mp
);
2400 /* free disk_conditioner_info structure for this mount */
2401 disk_conditioner_unmount(mp
);
2404 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2405 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2410 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2411 // Restore P_NOREMOTEHANG bit to its previous value
2412 if ((pflags_save
& P_NOREMOTEHANG
) == 0) {
2413 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2418 * Callback and context are set together under the mount lock, and
2419 * never cleared, so we're safe to examine them here, drop the lock,
2422 if (mp
->mnt_triggercallback
!= NULL
) {
2425 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2426 } else if (did_vflush
) {
2427 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2434 #endif /* CONFIG_TRIGGERS */
2436 lck_rw_done(&mp
->mnt_rwlock
);
2439 wakeup((caddr_t
)mp
);
2443 if ((coveredvp
!= NULLVP
)) {
2444 vnode_t pvp
= NULLVP
;
2447 * The covered vnode needs special handling. Trying to
2448 * get an iocount must not block here as this may lead
2449 * to deadlocks if the Filesystem to which the covered
2450 * vnode belongs is undergoing forced unmounts. Since we
2451 * hold a usecount, the vnode cannot be reused
2452 * (it can, however, still be terminated).
2454 vnode_getalways(coveredvp
);
2456 mount_dropcrossref(mp
, coveredvp
, 0);
2458 * We'll _try_ to detect if this really needs to be
2459 * done. The coveredvp can only be in termination (or
2460 * terminated) if the coveredvp's mount point is in a
2461 * forced unmount (or has been) since we still hold the
2464 if (!vnode_isrecycled(coveredvp
)) {
2465 pvp
= vnode_getparent(coveredvp
);
2467 if (coveredvp
->v_resolve
) {
2468 vnode_trigger_rearm(coveredvp
, ctx
);
2473 vnode_rele(coveredvp
);
2474 vnode_put(coveredvp
);
2478 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2481 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2482 mount_lock_destroy(mp
);
2484 mac_mount_label_destroy(mp
);
2486 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2488 panic("dounmount: no coveredvp");
2495 * Unmount any mounts in this filesystem.
2498 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2501 fsid_t
*fsids
, fsid
;
2503 int count
= 0, i
, m
= 0;
2508 // Get an array to hold the submounts fsids.
2509 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2511 fsids_sz
= count
* sizeof(fsid_t
);
2512 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2513 if (fsids
== NULL
) {
2514 mount_list_unlock();
2517 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2520 * Fill the array with submount fsids.
2521 * Since mounts are always added to the tail of the mount list, the
2522 * list is always in mount order.
2523 * For each mount check if the mounted-on vnode belongs to a
2524 * mount that's already added to our array of mounts to be unmounted.
2526 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2527 vp
= smp
->mnt_vnodecovered
;
2531 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2532 for (i
= 0; i
<= m
; i
++) {
2533 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2534 fsids
[i
].val
[1] == fsid
.val
[1]) {
2535 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2540 mount_list_unlock();
2542 // Unmount the submounts in reverse order. Ignore errors.
2543 for (i
= m
; i
> 0; i
--) {
2544 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2547 mount_iterdrop(smp
);
2548 (void) dounmount(smp
, flags
, 1, ctx
);
2553 FREE(fsids
, M_TEMP
);
2558 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2563 if (mp
->mnt_crossref
< 0) {
2564 panic("mount cross refs -ve");
2567 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2569 vnode_put_locked(dp
);
2573 mount_lock_destroy(mp
);
2575 mac_mount_label_destroy(mp
);
2577 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2581 vnode_put_locked(dp
);
2588 * Sync each mounted filesystem.
2594 int print_vmpage_stat
= 0;
2597 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2598 * mounted read-write with the passed waitfor value.
2600 * Parameters: mp mount-point descriptor per mounted file-system instance.
2601 * arg user argument (please see below)
2603 * User argument is a pointer to 32 bit unsigned integer which describes the
2604 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2605 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2608 * Returns: VFS_RETURNED
2611 sync_callback(mount_t mp
, void *arg
)
2613 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2614 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2615 unsigned waitfor
= MNT_NOWAIT
;
2618 waitfor
= *(uint32_t*)arg
;
2621 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2622 if (waitfor
!= MNT_WAIT
&&
2623 waitfor
!= (MNT_WAIT
| MNT_VOLUME
) &&
2624 waitfor
!= MNT_NOWAIT
&&
2625 waitfor
!= (MNT_NOWAIT
| MNT_VOLUME
) &&
2626 waitfor
!= MNT_DWAIT
&&
2627 waitfor
!= (MNT_DWAIT
| MNT_VOLUME
)) {
2628 panic("Passed inappropriate waitfor %u to "
2629 "sync_callback()", waitfor
);
2632 mp
->mnt_flag
&= ~MNT_ASYNC
;
2633 (void)VFS_SYNC(mp
, waitfor
, vfs_context_kernel());
2635 mp
->mnt_flag
|= MNT_ASYNC
;
2639 return VFS_RETURNED
;
2644 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2646 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2648 if (print_vmpage_stat
) {
2649 vm_countdirtypages();
2656 #endif /* DIAGNOSTIC */
2662 SYNC_ONLY_RELIABLE_MEDIA
= 1,
2663 SYNC_ONLY_UNRELIABLE_MEDIA
= 2
2667 sync_internal_callback(mount_t mp
, void *arg
)
2670 int is_reliable
= !(mp
->mnt_kern_flag
& MNTK_VIRTUALDEV
) &&
2671 (mp
->mnt_flag
& MNT_LOCAL
);
2672 sync_type_t sync_type
= *((sync_type_t
*)arg
);
2674 if ((sync_type
== SYNC_ONLY_RELIABLE_MEDIA
) && !is_reliable
) {
2675 return VFS_RETURNED
;
2676 } else if ((sync_type
== SYNC_ONLY_UNRELIABLE_MEDIA
) && is_reliable
) {
2677 return VFS_RETURNED
;
2681 (void)sync_callback(mp
, NULL
);
2683 return VFS_RETURNED
;
2686 int sync_thread_state
= 0;
2687 int sync_timeout_seconds
= 5;
2689 #define SYNC_THREAD_RUN 0x0001
2690 #define SYNC_THREAD_RUNNING 0x0002
2693 sync_thread(__unused
void *arg
, __unused wait_result_t wr
)
2695 sync_type_t sync_type
;
2697 lck_mtx_lock(sync_mtx_lck
);
2698 while (sync_thread_state
& SYNC_THREAD_RUN
) {
2699 sync_thread_state
&= ~SYNC_THREAD_RUN
;
2700 lck_mtx_unlock(sync_mtx_lck
);
2702 sync_type
= SYNC_ONLY_RELIABLE_MEDIA
;
2703 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2704 sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
;
2705 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2707 lck_mtx_lock(sync_mtx_lck
);
2710 * This wakeup _has_ to be issued before the lock is released otherwise
2711 * we may end up waking up a thread in sync_internal which is
2712 * expecting a wakeup from a thread it just created and not from this
2713 * thread which is about to exit.
2715 wakeup(&sync_thread_state
);
2716 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2717 lck_mtx_unlock(sync_mtx_lck
);
2719 if (print_vmpage_stat
) {
2720 vm_countdirtypages();
2727 #endif /* DIAGNOSTIC */
2730 struct timeval sync_timeout_last_print
= {.tv_sec
= 0, .tv_usec
= 0};
2733 * An in-kernel sync for power management to call.
2734 * This function always returns within sync_timeout seconds.
2736 __private_extern__
int
2741 int thread_created
= FALSE
;
2742 struct timespec ts
= {.tv_sec
= sync_timeout_seconds
, .tv_nsec
= 0};
2744 lck_mtx_lock(sync_mtx_lck
);
2745 sync_thread_state
|= SYNC_THREAD_RUN
;
2746 if (!(sync_thread_state
& SYNC_THREAD_RUNNING
)) {
2749 sync_thread_state
|= SYNC_THREAD_RUNNING
;
2750 kr
= kernel_thread_start(sync_thread
, NULL
, &thd
);
2751 if (kr
!= KERN_SUCCESS
) {
2752 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2753 lck_mtx_unlock(sync_mtx_lck
);
2754 printf("sync_thread failed\n");
2757 thread_created
= TRUE
;
2760 error
= msleep((caddr_t
)&sync_thread_state
, sync_mtx_lck
,
2761 (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2766 if (now
.tv_sec
- sync_timeout_last_print
.tv_sec
> 120) {
2767 printf("sync timed out: %d sec\n", sync_timeout_seconds
);
2768 sync_timeout_last_print
.tv_sec
= now
.tv_sec
;
2772 if (thread_created
) {
2773 thread_deallocate(thd
);
2777 } /* end of sync_internal call */
2780 * Change filesystem quotas.
2784 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2787 int error
, quota_cmd
, quota_status
= 0;
2790 struct nameidata nd
;
2791 vfs_context_t ctx
= vfs_context_current();
2792 struct dqblk my_dqblk
= {};
2794 AUDIT_ARG(uid
, uap
->uid
);
2795 AUDIT_ARG(cmd
, uap
->cmd
);
2796 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2802 mp
= nd
.ni_vp
->v_mount
;
2803 vnode_put(nd
.ni_vp
);
2806 /* copyin any data we will need for downstream code */
2807 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2809 switch (quota_cmd
) {
2811 /* uap->arg specifies a file from which to take the quotas */
2812 fnamelen
= MAXPATHLEN
;
2813 datap
= kalloc(MAXPATHLEN
);
2814 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2817 /* uap->arg is a pointer to a dqblk structure. */
2818 datap
= (caddr_t
) &my_dqblk
;
2822 /* uap->arg is a pointer to a dqblk structure. */
2823 datap
= (caddr_t
) &my_dqblk
;
2824 if (proc_is64bit(p
)) {
2825 struct user_dqblk my_dqblk64
;
2826 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof(my_dqblk64
));
2828 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2831 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof(my_dqblk
));
2835 /* uap->arg is a pointer to an integer */
2836 datap
= (caddr_t
) "a_status
;
2844 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2847 switch (quota_cmd
) {
2849 if (datap
!= NULL
) {
2850 kfree(datap
, MAXPATHLEN
);
2854 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2856 if (proc_is64bit(p
)) {
2857 struct user_dqblk my_dqblk64
;
2859 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
));
2860 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2861 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof(my_dqblk64
));
2863 error
= copyout(datap
, uap
->arg
, sizeof(struct dqblk
));
2868 /* uap->arg is a pointer to an integer */
2870 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2881 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2888 * Get filesystem statistics.
2890 * Returns: 0 Success
2892 * vfs_update_vfsstat:???
2893 * munge_statfs:EFAULT
2897 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2900 struct vfsstatfs
*sp
;
2902 struct nameidata nd
;
2903 vfs_context_t ctx
= vfs_context_current();
2906 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2907 UIO_USERSPACE
, uap
->path
, ctx
);
2914 sp
= &mp
->mnt_vfsstat
;
2918 error
= mac_mount_check_stat(ctx
, mp
);
2925 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2931 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2937 * Get filesystem statistics.
2941 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2945 struct vfsstatfs
*sp
;
2948 AUDIT_ARG(fd
, uap
->fd
);
2950 if ((error
= file_vnode(uap
->fd
, &vp
))) {
2954 error
= vnode_getwithref(vp
);
2960 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2969 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2975 sp
= &mp
->mnt_vfsstat
;
2976 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2980 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2990 vfs_get_statfs64(struct mount
*mp
, struct statfs64
*sfs
)
2992 struct vfsstatfs
*vsfs
= &mp
->mnt_vfsstat
;
2994 bzero(sfs
, sizeof(*sfs
));
2996 sfs
->f_bsize
= vsfs
->f_bsize
;
2997 sfs
->f_iosize
= (int32_t)vsfs
->f_iosize
;
2998 sfs
->f_blocks
= vsfs
->f_blocks
;
2999 sfs
->f_bfree
= vsfs
->f_bfree
;
3000 sfs
->f_bavail
= vsfs
->f_bavail
;
3001 sfs
->f_files
= vsfs
->f_files
;
3002 sfs
->f_ffree
= vsfs
->f_ffree
;
3003 sfs
->f_fsid
= vsfs
->f_fsid
;
3004 sfs
->f_owner
= vsfs
->f_owner
;
3005 sfs
->f_type
= mp
->mnt_vtable
->vfc_typenum
;
3006 sfs
->f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
3007 sfs
->f_fssubtype
= vsfs
->f_fssubtype
;
3008 sfs
->f_flags_ext
= ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
)) ? MNT_EXT_ROOT_DATA_VOL
: 0;
3009 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
3010 strlcpy(&sfs
->f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
3012 strlcpy(&sfs
->f_fstypename
[0], &vsfs
->f_fstypename
[0], MFSTYPENAMELEN
);
3014 strlcpy(&sfs
->f_mntonname
[0], &vsfs
->f_mntonname
[0], MAXPATHLEN
);
3015 strlcpy(&sfs
->f_mntfromname
[0], &vsfs
->f_mntfromname
[0], MAXPATHLEN
);
3019 * Get file system statistics in 64-bit mode
3022 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
3026 struct nameidata nd
;
3027 struct statfs64 sfs
;
3028 vfs_context_t ctxp
= vfs_context_current();
3031 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
3032 UIO_USERSPACE
, uap
->path
, ctxp
);
3042 error
= mac_mount_check_stat(ctxp
, mp
);
3049 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
3055 vfs_get_statfs64(mp
, &sfs
);
3056 if ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
) &&
3057 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3058 /* This process does not want to see a seperate data volume mountpoint */
3059 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3061 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3068 * Get file system statistics in 64-bit mode
3071 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
3075 struct statfs64 sfs
;
3078 AUDIT_ARG(fd
, uap
->fd
);
3080 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3084 error
= vnode_getwithref(vp
);
3090 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
3099 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3105 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
3109 vfs_get_statfs64(mp
, &sfs
);
3110 if ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
) &&
3111 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3112 /* This process does not want to see a seperate data volume mountpoint */
3113 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3115 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3124 struct getfsstat_struct
{
3135 getfsstat_callback(mount_t mp
, void * arg
)
3137 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3138 struct vfsstatfs
*sp
;
3140 vfs_context_t ctx
= vfs_context_current();
3142 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3144 error
= mac_mount_check_stat(ctx
, mp
);
3146 fstp
->error
= error
;
3147 return VFS_RETURNED_DONE
;
3150 sp
= &mp
->mnt_vfsstat
;
3152 * If MNT_NOWAIT is specified, do not refresh the
3153 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
3155 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3156 (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3157 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3158 (error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
)))) {
3159 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3160 return VFS_RETURNED
;
3164 * Need to handle LP64 version of struct statfs
3166 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
3168 fstp
->error
= error
;
3169 return VFS_RETURNED_DONE
;
3171 fstp
->sfsp
+= my_size
;
3175 error
= mac_mount_label_get(mp
, *fstp
->mp
);
3177 fstp
->error
= error
;
3178 return VFS_RETURNED_DONE
;
3185 return VFS_RETURNED
;
3189 * Get statistics on all filesystems.
3192 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
3194 struct __mac_getfsstat_args muap
;
3196 muap
.buf
= uap
->buf
;
3197 muap
.bufsize
= uap
->bufsize
;
3198 muap
.mac
= USER_ADDR_NULL
;
3200 muap
.flags
= uap
->flags
;
3202 return __mac_getfsstat(p
, &muap
, retval
);
3206 * __mac_getfsstat: Get MAC-related file system statistics
3208 * Parameters: p (ignored)
3209 * uap User argument descriptor (see below)
3210 * retval Count of file system statistics (N stats)
3212 * Indirect: uap->bufsize Buffer size
3213 * uap->macsize MAC info size
3214 * uap->buf Buffer where information will be returned
3216 * uap->flags File system flags
3219 * Returns: 0 Success
3224 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
3228 size_t count
, maxcount
, bufsize
, macsize
;
3229 struct getfsstat_struct fst
;
3231 if ((unsigned)uap
->bufsize
> INT_MAX
|| (unsigned)uap
->macsize
> INT_MAX
) {
3235 bufsize
= (size_t) uap
->bufsize
;
3236 macsize
= (size_t) uap
->macsize
;
3238 if (IS_64BIT_PROCESS(p
)) {
3239 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
3241 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
3249 if (uap
->mac
!= USER_ADDR_NULL
) {
3254 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
3255 if (count
!= maxcount
) {
3259 /* Copy in the array */
3260 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
3265 error
= copyin(uap
->mac
, mp0
, macsize
);
3267 FREE(mp0
, M_MACTEMP
);
3271 /* Normalize to an array of user_addr_t */
3272 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
3274 FREE(mp0
, M_MACTEMP
);
3278 for (i
= 0; i
< count
; i
++) {
3279 if (IS_64BIT_PROCESS(p
)) {
3280 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
3282 mp
[i
] = (user_addr_t
)mp0
[i
];
3285 FREE(mp0
, M_MACTEMP
);
3292 fst
.flags
= uap
->flags
;
3295 fst
.maxcount
= maxcount
;
3298 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat_callback
, &fst
);
3301 FREE(mp
, M_MACTEMP
);
3305 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3309 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3310 *retval
= fst
.maxcount
;
3312 *retval
= fst
.count
;
3318 getfsstat64_callback(mount_t mp
, void * arg
)
3320 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3321 struct vfsstatfs
*sp
;
3322 struct statfs64 sfs
;
3325 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3327 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3329 fstp
->error
= error
;
3330 return VFS_RETURNED_DONE
;
3333 sp
= &mp
->mnt_vfsstat
;
3335 * If MNT_NOWAIT is specified, do not refresh the fsstat
3336 * cache. MNT_WAIT overrides MNT_NOWAIT.
3338 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3339 * getfsstat, since the constants are out of the same
3342 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3343 ((((fstp
->flags
& MNT_NOWAIT
) == 0) || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3344 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3345 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)))) {
3346 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3347 return VFS_RETURNED
;
3350 vfs_get_statfs64(mp
, &sfs
);
3351 error
= copyout(&sfs
, fstp
->sfsp
, sizeof(sfs
));
3353 fstp
->error
= error
;
3354 return VFS_RETURNED_DONE
;
3356 fstp
->sfsp
+= sizeof(sfs
);
3359 return VFS_RETURNED
;
3363 * Get statistics on all file systems in 64 bit mode.
3366 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
3369 int count
, maxcount
;
3370 struct getfsstat_struct fst
;
3372 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
3378 fst
.flags
= uap
->flags
;
3381 fst
.maxcount
= maxcount
;
3383 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat64_callback
, &fst
);
3386 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3390 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3391 *retval
= fst
.maxcount
;
3393 *retval
= fst
.count
;
3400 * gets the associated vnode with the file descriptor passed.
3404 * ctx - vfs context of caller
3405 * fd - file descriptor for which vnode is required.
3406 * vpp - Pointer to pointer to vnode to be returned.
3408 * The vnode is returned with an iocount so any vnode obtained
3409 * by this call needs a vnode_put
3413 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
3417 struct fileproc
*fp
;
3418 proc_t p
= vfs_context_proc(ctx
);
3422 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
3427 error
= vnode_getwithref(vp
);
3429 (void)fp_drop(p
, fd
, fp
, 0);
3433 (void)fp_drop(p
, fd
, fp
, 0);
3439 * Wrapper function around namei to start lookup from a directory
3440 * specified by a file descriptor ni_dirfd.
3442 * In addition to all the errors returned by namei, this call can
3443 * return ENOTDIR if the file descriptor does not refer to a directory.
3444 * and EBADF if the file descriptor is not valid.
3447 nameiat(struct nameidata
*ndp
, int dirfd
)
3449 if ((dirfd
!= AT_FDCWD
) &&
3450 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
3451 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3455 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3456 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3461 c
= *((char *)(ndp
->ni_dirp
));
3467 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3473 if (vnode_vtype(dvp_at
) != VDIR
) {
3478 ndp
->ni_dvp
= dvp_at
;
3479 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3481 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3491 * Change current working directory to a given file descriptor.
3495 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3497 struct filedesc
*fdp
= p
->p_fd
;
3503 vfs_context_t ctx
= vfs_context_current();
3505 AUDIT_ARG(fd
, uap
->fd
);
3506 if (per_thread
&& uap
->fd
== -1) {
3508 * Switching back from per-thread to per process CWD; verify we
3509 * in fact have one before proceeding. The only success case
3510 * for this code path is to return 0 preemptively after zapping
3511 * the thread structure contents.
3513 thread_t th
= vfs_context_thread(ctx
);
3515 uthread_t uth
= get_bsdthread_info(th
);
3517 uth
->uu_cdir
= NULLVP
;
3518 if (tvp
!= NULLVP
) {
3526 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3529 if ((error
= vnode_getwithref(vp
))) {
3534 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3536 if (vp
->v_type
!= VDIR
) {
3542 error
= mac_vnode_check_chdir(ctx
, vp
);
3547 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3552 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3553 if (vfs_busy(mp
, LK_NOWAIT
)) {
3557 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3568 if ((error
= vnode_ref(vp
))) {
3574 thread_t th
= vfs_context_thread(ctx
);
3576 uthread_t uth
= get_bsdthread_info(th
);
3579 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3605 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3607 return common_fchdir(p
, uap
, 0);
3611 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3613 return common_fchdir(p
, (void *)uap
, 1);
3618 * Change current working directory (".").
3620 * Returns: 0 Success
3621 * change_dir:ENOTDIR
3623 * vnode_ref:ENOENT No such file or directory
3627 chdir_internal(proc_t p
, vfs_context_t ctx
, struct nameidata
*ndp
, int per_thread
)
3629 struct filedesc
*fdp
= p
->p_fd
;
3633 error
= change_dir(ndp
, ctx
);
3637 if ((error
= vnode_ref(ndp
->ni_vp
))) {
3638 vnode_put(ndp
->ni_vp
);
3642 * drop the iocount we picked up in change_dir
3644 vnode_put(ndp
->ni_vp
);
3647 thread_t th
= vfs_context_thread(ctx
);
3649 uthread_t uth
= get_bsdthread_info(th
);
3651 uth
->uu_cdir
= ndp
->ni_vp
;
3652 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3654 vnode_rele(ndp
->ni_vp
);
3660 fdp
->fd_cdir
= ndp
->ni_vp
;
3673 * Change current working directory (".").
3675 * Returns: 0 Success
3676 * chdir_internal:ENOTDIR
3677 * chdir_internal:ENOENT No such file or directory
3678 * chdir_internal:???
3682 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3684 struct nameidata nd
;
3685 vfs_context_t ctx
= vfs_context_current();
3687 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3688 UIO_USERSPACE
, uap
->path
, ctx
);
3690 return chdir_internal(p
, ctx
, &nd
, per_thread
);
3697 * Change current working directory (".") for the entire process
3699 * Parameters: p Process requesting the call
3700 * uap User argument descriptor (see below)
3703 * Indirect parameters: uap->path Directory path
3705 * Returns: 0 Success
3706 * common_chdir: ENOTDIR
3707 * common_chdir: ENOENT No such file or directory
3712 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3714 return common_chdir(p
, (void *)uap
, 0);
3720 * Change current working directory (".") for a single thread
3722 * Parameters: p Process requesting the call
3723 * uap User argument descriptor (see below)
3726 * Indirect parameters: uap->path Directory path
3728 * Returns: 0 Success
3729 * common_chdir: ENOTDIR
3730 * common_chdir: ENOENT No such file or directory
3735 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3737 return common_chdir(p
, (void *)uap
, 1);
3742 * Change notion of root (``/'') directory.
3746 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3748 struct filedesc
*fdp
= p
->p_fd
;
3750 struct nameidata nd
;
3752 vfs_context_t ctx
= vfs_context_current();
3754 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
3758 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3759 UIO_USERSPACE
, uap
->path
, ctx
);
3760 error
= change_dir(&nd
, ctx
);
3766 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3769 vnode_put(nd
.ni_vp
);
3774 if ((error
= vnode_ref(nd
.ni_vp
))) {
3775 vnode_put(nd
.ni_vp
);
3778 vnode_put(nd
.ni_vp
);
3782 fdp
->fd_rdir
= nd
.ni_vp
;
3783 fdp
->fd_flags
|= FD_CHROOT
;
3794 * Common routine for chroot and chdir.
3796 * Returns: 0 Success
3797 * ENOTDIR Not a directory
3798 * namei:??? [anything namei can return]
3799 * vnode_authorize:??? [anything vnode_authorize can return]
3802 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3807 if ((error
= namei(ndp
))) {
3813 if (vp
->v_type
!= VDIR
) {
3819 error
= mac_vnode_check_chdir(ctx
, vp
);
3826 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3836 * Free the vnode data (for directories) associated with the file glob.
3839 fg_vn_data_alloc(void)
3841 struct fd_vn_data
*fvdata
;
3843 /* Allocate per fd vnode data */
3844 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3845 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3846 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3851 * Free the vnode data (for directories) associated with the file glob.
3854 fg_vn_data_free(void *fgvndata
)
3856 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3858 if (fvdata
->fv_buf
) {
3859 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3861 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3862 FREE(fvdata
, M_FD_VN_DATA
);
3866 * Check permissions, allocate an open file structure,
3867 * and call the device open routine if any.
3869 * Returns: 0 Success
3880 * XXX Need to implement uid, gid
3883 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3884 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3887 proc_t p
= vfs_context_proc(ctx
);
3888 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3889 struct fileproc
*fp
;
3892 int type
, indx
, error
;
3894 struct vfs_context context
;
3898 if ((oflags
& O_ACCMODE
) == O_ACCMODE
) {
3902 flags
= FFLAGS(uflags
);
3903 CLR(flags
, FENCRYPTED
);
3904 CLR(flags
, FUNENCRYPTED
);
3906 AUDIT_ARG(fflags
, oflags
);
3907 AUDIT_ARG(mode
, vap
->va_mode
);
3909 if ((error
= falloc_withalloc(p
,
3910 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3913 uu
->uu_dupfd
= -indx
- 1;
3915 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3916 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)) { /* XXX from fdopen */
3917 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3918 fp_drop(p
, indx
, NULL
, 0);
3923 if (error
== ERESTART
) {
3926 fp_free(p
, indx
, fp
);
3932 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3933 fp
->f_fglob
->fg_ops
= &vnops
;
3934 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3936 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3937 lf
.l_whence
= SEEK_SET
;
3940 if (flags
& O_EXLOCK
) {
3941 lf
.l_type
= F_WRLCK
;
3943 lf
.l_type
= F_RDLCK
;
3946 if ((flags
& FNONBLOCK
) == 0) {
3950 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3956 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
))) {
3959 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3962 /* try to truncate by setting the size attribute */
3963 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0)) {
3968 * For directories we hold some additional information in the fd.
3970 if (vnode_vtype(vp
) == VDIR
) {
3971 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3973 fp
->f_fglob
->fg_vn_data
= NULL
;
3979 * The first terminal open (without a O_NOCTTY) by a session leader
3980 * results in it being set as the controlling terminal.
3982 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3983 !(flags
& O_NOCTTY
)) {
3986 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3987 (caddr_t
)&tmp
, ctx
);
3991 if (flags
& O_CLOEXEC
) {
3992 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3994 if (flags
& O_CLOFORK
) {
3995 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3997 procfdtbl_releasefd(p
, indx
, NULL
);
3999 #if CONFIG_SECLUDED_MEMORY
4000 if (secluded_for_filecache
&&
4001 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
4002 vnode_vtype(vp
) == VREG
) {
4003 memory_object_control_t moc
;
4005 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
4007 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
4008 /* nothing to do... */
4009 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
4010 /* writable -> no longer eligible for secluded pages */
4011 memory_object_mark_eligible_for_secluded(moc
,
4013 } else if (secluded_for_filecache
== 1) {
4014 char pathname
[32] = { 0, };
4016 /* XXX FBDP: better way to detect /Applications/ ? */
4017 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4018 (void)copyinstr(ndp
->ni_dirp
,
4023 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
4028 pathname
[sizeof(pathname
) - 1] = '\0';
4029 if (strncmp(pathname
,
4031 strlen("/Applications/")) == 0 &&
4033 "/Applications/Camera.app/",
4034 strlen("/Applications/Camera.app/")) != 0) {
4037 * AND from "/Applications/"
4038 * AND not from "/Applications/Camera.app/"
4039 * ==> eligible for secluded
4041 memory_object_mark_eligible_for_secluded(moc
,
4044 } else if (secluded_for_filecache
== 2) {
4046 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4048 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4050 /* not implemented... */
4052 size_t len
= strlen(vp
->v_name
);
4053 if (!strncmp(vp
->v_name
, DYLD_SHARED_CACHE_NAME
, len
) ||
4054 !strncmp(vp
->v_name
, "dyld", len
) ||
4055 !strncmp(vp
->v_name
, "launchd", len
) ||
4056 !strncmp(vp
->v_name
, "Camera", len
) ||
4057 !strncmp(vp
->v_name
, "mediaserverd", len
) ||
4058 !strncmp(vp
->v_name
, "SpringBoard", len
) ||
4059 !strncmp(vp
->v_name
, "backboardd", len
)) {
4061 * This file matters when launching Camera:
4062 * do not store its contents in the secluded
4063 * pool that will be drained on Camera launch.
4065 memory_object_mark_eligible_for_secluded(moc
,
4070 #endif /* CONFIG_SECLUDED_MEMORY */
4072 fp_drop(p
, indx
, fp
, 1);
4079 context
= *vfs_context_current();
4080 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
4082 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
4083 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
4084 lf
.l_whence
= SEEK_SET
;
4087 lf
.l_type
= F_UNLCK
;
4090 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
4093 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
4095 fp_free(p
, indx
, fp
);
4101 * While most of the *at syscall handlers can call nameiat() which
4102 * is a wrapper around namei, the use of namei and initialisation
4103 * of nameidata are far removed and in different functions - namei
4104 * gets called in vn_open_auth for open1. So we'll just do here what
4108 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
4109 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
4112 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
4116 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4117 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
4122 c
= *((char *)(ndp
->ni_dirp
));
4128 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
4134 if (vnode_vtype(dvp_at
) != VDIR
) {
4139 ndp
->ni_dvp
= dvp_at
;
4140 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
4141 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
4148 return open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
);
4152 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
4154 * Parameters: p Process requesting the open
4155 * uap User argument descriptor (see below)
4156 * retval Pointer to an area to receive the
4157 * return calue from the system call
4159 * Indirect: uap->path Path to open (same as 'open')
4160 * uap->flags Flags to open (same as 'open'
4161 * uap->uid UID to set, if creating
4162 * uap->gid GID to set, if creating
4163 * uap->mode File mode, if creating (same as 'open')
4164 * uap->xsecurity ACL to set, if creating
4166 * Returns: 0 Success
4169 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4171 * XXX: We should enummerate the possible errno values here, and where
4172 * in the code they originated.
4175 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
4177 struct filedesc
*fdp
= p
->p_fd
;
4179 kauth_filesec_t xsecdst
;
4180 struct vnode_attr va
;
4181 struct nameidata nd
;
4184 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4187 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
4188 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
4193 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4194 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4195 if (uap
->uid
!= KAUTH_UID_NONE
) {
4196 VATTR_SET(&va
, va_uid
, uap
->uid
);
4198 if (uap
->gid
!= KAUTH_GID_NONE
) {
4199 VATTR_SET(&va
, va_gid
, uap
->gid
);
4201 if (xsecdst
!= NULL
) {
4202 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4205 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4206 uap
->path
, vfs_context_current());
4208 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4209 fileproc_alloc_init
, NULL
, retval
);
4210 if (xsecdst
!= NULL
) {
4211 kauth_filesec_free(xsecdst
);
4218 * Go through the data-protected atomically controlled open (2)
4220 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4223 open_dprotected_np(__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
)
4225 int flags
= uap
->flags
;
4226 int class = uap
->class;
4227 int dpflags
= uap
->dpflags
;
4230 * Follow the same path as normal open(2)
4231 * Look up the item if it exists, and acquire the vnode.
4233 struct filedesc
*fdp
= p
->p_fd
;
4234 struct vnode_attr va
;
4235 struct nameidata nd
;
4240 /* Mask off all but regular access permissions */
4241 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4242 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4244 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4245 uap
->path
, vfs_context_current());
4248 * Initialize the extra fields in vnode_attr to pass down our
4250 * 1. target cprotect class.
4251 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4253 if (flags
& O_CREAT
) {
4254 /* lower level kernel code validates that the class is valid before applying it. */
4255 if (class != PROTECTION_CLASS_DEFAULT
) {
4257 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4258 * file behave the same as open (2)
4260 VATTR_SET(&va
, va_dataprotect_class
, class);
4264 if (dpflags
& (O_DP_GETRAWENCRYPTED
| O_DP_GETRAWUNENCRYPTED
)) {
4265 if (flags
& (O_RDWR
| O_WRONLY
)) {
4266 /* Not allowed to write raw encrypted bytes */
4269 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
4270 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
4272 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
4273 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
4277 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4278 fileproc_alloc_init
, NULL
, retval
);
4284 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
4285 int fd
, enum uio_seg segflg
, int *retval
)
4287 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
4288 struct vnode_attr va
;
4289 struct nameidata nd
;
4293 /* Mask off all but regular access permissions */
4294 cmode
= ((mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4295 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4297 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
4300 return open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
4305 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
4307 __pthread_testcancel(1);
4308 return open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
);
4312 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
4315 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4316 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
);
4320 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
4323 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4324 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
);
4328 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
4330 __pthread_testcancel(1);
4331 return openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
);
4335 * openbyid_np: open a file given a file system id and a file system object id
4336 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4337 * file systems that don't support object ids it is a node id (uint64_t).
4339 * Parameters: p Process requesting the open
4340 * uap User argument descriptor (see below)
4341 * retval Pointer to an area to receive the
4342 * return calue from the system call
4344 * Indirect: uap->path Path to open (same as 'open')
4346 * uap->fsid id of target file system
4347 * uap->objid id of target file system object
4348 * uap->flags Flags to open (same as 'open')
4350 * Returns: 0 Success
4354 * XXX: We should enummerate the possible errno values here, and where
4355 * in the code they originated.
4358 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
4364 int buflen
= MAXPATHLEN
;
4366 vfs_context_t ctx
= vfs_context_current();
4368 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
4372 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
4376 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4377 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
4381 AUDIT_ARG(value32
, fsid
.val
[0]);
4382 AUDIT_ARG(value64
, objid
);
4384 /*resolve path from fsis, objid*/
4386 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
4391 error
= fsgetpath_internal( ctx
, fsid
.val
[0], objid
, buflen
,
4392 buf
, FSOPT_ISREALFSID
, &pathlen
);
4398 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
4406 error
= openat_internal(
4407 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
4416 * Create a special file.
4418 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
4421 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
4423 struct vnode_attr va
;
4424 vfs_context_t ctx
= vfs_context_current();
4426 struct nameidata nd
;
4430 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4431 VATTR_SET(&va
, va_rdev
, uap
->dev
);
4433 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4434 if ((uap
->mode
& S_IFMT
) == S_IFIFO
) {
4435 return mkfifo1(ctx
, uap
->path
, &va
);
4438 AUDIT_ARG(mode
, uap
->mode
);
4439 AUDIT_ARG(value32
, uap
->dev
);
4441 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
4444 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
4445 UIO_USERSPACE
, uap
->path
, ctx
);
4458 switch (uap
->mode
& S_IFMT
) {
4460 VATTR_SET(&va
, va_type
, VCHR
);
4463 VATTR_SET(&va
, va_type
, VBLK
);
4471 error
= mac_vnode_check_create(ctx
,
4472 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
4478 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4482 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0) {
4487 int update_flags
= 0;
4489 // Make sure the name & parent pointers are hooked up
4490 if (vp
->v_name
== NULL
) {
4491 update_flags
|= VNODE_UPDATE_NAME
;
4493 if (vp
->v_parent
== NULLVP
) {
4494 update_flags
|= VNODE_UPDATE_PARENT
;
4498 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4502 add_fsevent(FSE_CREATE_FILE
, ctx
,
4510 * nameidone has to happen before we vnode_put(dvp)
4511 * since it may need to release the fs_nodelock on the dvp
4524 * Create a named pipe.
4526 * Returns: 0 Success
4529 * vnode_authorize:???
4533 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4537 struct nameidata nd
;
4539 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4540 UIO_USERSPACE
, upath
, ctx
);
4548 /* check that this is a new file and authorize addition */
4553 VATTR_SET(vap
, va_type
, VFIFO
);
4555 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
4559 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4562 * nameidone has to happen before we vnode_put(dvp)
4563 * since it may need to release the fs_nodelock on the dvp
4577 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4579 * Parameters: p Process requesting the open
4580 * uap User argument descriptor (see below)
4583 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4584 * uap->uid UID to set
4585 * uap->gid GID to set
4586 * uap->mode File mode to set (same as 'mkfifo')
4587 * uap->xsecurity ACL to set, if creating
4589 * Returns: 0 Success
4592 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4594 * XXX: We should enummerate the possible errno values here, and where
4595 * in the code they originated.
4598 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4601 kauth_filesec_t xsecdst
;
4602 struct vnode_attr va
;
4604 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4606 xsecdst
= KAUTH_FILESEC_NONE
;
4607 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4608 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
4614 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4615 if (uap
->uid
!= KAUTH_UID_NONE
) {
4616 VATTR_SET(&va
, va_uid
, uap
->uid
);
4618 if (uap
->gid
!= KAUTH_GID_NONE
) {
4619 VATTR_SET(&va
, va_gid
, uap
->gid
);
4621 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4622 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4625 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4627 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4628 kauth_filesec_free(xsecdst
);
4635 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4637 struct vnode_attr va
;
4640 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4642 return mkfifo1(vfs_context_current(), uap
->path
, &va
);
4647 my_strrchr(char *p
, int ch
)
4651 for (save
= NULL
;; ++p
) {
4662 extern int safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
);
4663 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4664 extern int safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4667 safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
)
4669 int ret
, len
= _len
;
4671 *truncated_path
= 0;
4674 ret
= vn_getpath(dvp
, path
, &len
);
4676 ret
= vn_getpath_no_firmlink(dvp
, path
, &len
);
4678 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4680 path
[len
- 1] = '/';
4681 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
- len
) + 1;
4682 if (len
> MAXPATHLEN
) {
4685 // the string got truncated!
4686 *truncated_path
= 1;
4687 ptr
= my_strrchr(path
, '/');
4689 *ptr
= '\0'; // chop off the string at the last directory component
4691 len
= strlen(path
) + 1;
4694 } else if (ret
== 0) {
4695 *truncated_path
= 1;
4696 } else if (ret
!= 0) {
4697 struct vnode
*mydvp
= dvp
;
4699 if (ret
!= ENOSPC
) {
4700 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4701 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4703 *truncated_path
= 1;
4706 if (mydvp
->v_parent
!= NULL
) {
4707 mydvp
= mydvp
->v_parent
;
4708 } else if (mydvp
->v_mount
) {
4709 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4712 // no parent and no mount point? only thing is to punt and say "/" changed
4713 strlcpy(path
, "/", _len
);
4718 if (mydvp
== NULL
) {
4724 ret
= vn_getpath(mydvp
, path
, &len
);
4726 ret
= vn_getpath_no_firmlink(mydvp
, path
, &len
);
4728 } while (ret
== ENOSPC
);
4735 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4737 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 1);
4741 safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4743 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 0);
4747 * Make a hard file link.
4749 * Returns: 0 Success
4754 * vnode_authorize:???
4759 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4760 user_addr_t link
, int flag
, enum uio_seg segflg
)
4762 vnode_t vp
, pvp
, dvp
, lvp
;
4763 struct nameidata nd
;
4769 int need_event
, has_listeners
, need_kpath2
;
4770 char *target_path
= NULL
;
4773 vp
= dvp
= lvp
= NULLVP
;
4775 /* look up the object we are linking to */
4776 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4777 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4780 error
= nameiat(&nd
, fd1
);
4789 * Normally, linking to directories is not supported.
4790 * However, some file systems may have limited support.
4792 if (vp
->v_type
== VDIR
) {
4793 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4794 error
= EPERM
; /* POSIX */
4798 /* Linking to a directory requires ownership. */
4799 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4800 struct vnode_attr dva
;
4803 VATTR_WANTED(&dva
, va_uid
);
4804 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4805 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4806 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4813 /* lookup the target node */
4817 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4818 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4820 error
= nameiat(&nd
, fd2
);
4828 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0) {
4833 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4834 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0) {
4838 /* target node must not exist */
4839 if (lvp
!= NULLVP
) {
4843 /* cannot link across mountpoints */
4844 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4849 /* authorize creation of the target note */
4850 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4854 /* and finally make the link */
4855 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4861 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4865 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4869 has_listeners
= kauth_authorize_fileop_has_listeners();
4873 if (AUDIT_RECORD_EXISTS()) {
4878 if (need_event
|| has_listeners
|| need_kpath2
) {
4879 char *link_to_path
= NULL
;
4880 int len
, link_name_len
;
4882 /* build the path to the new link file */
4883 GET_PATH(target_path
);
4884 if (target_path
== NULL
) {
4889 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4891 AUDIT_ARG(kpath
, target_path
, ARG_KPATH2
);
4893 if (has_listeners
) {
4894 /* build the path to file we are linking to */
4895 GET_PATH(link_to_path
);
4896 if (link_to_path
== NULL
) {
4901 link_name_len
= MAXPATHLEN
;
4902 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4904 * Call out to allow 3rd party notification of rename.
4905 * Ignore result of kauth_authorize_fileop call.
4907 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4908 (uintptr_t)link_to_path
,
4909 (uintptr_t)target_path
);
4911 if (link_to_path
!= NULL
) {
4912 RELEASE_PATH(link_to_path
);
4917 /* construct fsevent */
4918 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4920 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4923 // build the path to the destination of the link
4924 add_fsevent(FSE_CREATE_FILE
, ctx
,
4925 FSE_ARG_STRING
, len
, target_path
,
4926 FSE_ARG_FINFO
, &finfo
,
4931 // need an iocount on pvp in this case
4932 if (pvp
&& pvp
!= dvp
) {
4933 error
= vnode_get(pvp
);
4940 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4941 FSE_ARG_VNODE
, pvp
, FSE_ARG_DONE
);
4943 if (pvp
&& pvp
!= dvp
) {
4951 * nameidone has to happen before we vnode_put(dvp)
4952 * since it may need to release the fs_nodelock on the dvp
4955 if (target_path
!= NULL
) {
4956 RELEASE_PATH(target_path
);
4970 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4972 return linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4973 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
);
4977 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4979 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
) {
4983 return linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4984 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
);
4988 * Make a symbolic link.
4990 * We could add support for ACLs here too...
4994 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4995 user_addr_t link
, enum uio_seg segflg
)
4997 struct vnode_attr va
;
5000 struct nameidata nd
;
5006 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
5007 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
5008 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
5010 path
= (char *)path_data
;
5015 AUDIT_ARG(text
, path
); /* This is the link string */
5017 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
5020 error
= nameiat(&nd
, fd
);
5027 p
= vfs_context_proc(ctx
);
5029 VATTR_SET(&va
, va_type
, VLNK
);
5030 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
5033 error
= mac_vnode_check_create(ctx
,
5034 dvp
, &nd
.ni_cnd
, &va
);
5047 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
5049 /* get default ownership, etc. */
5051 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
5054 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
5058 if (error
== 0 && vp
) {
5059 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
5063 /* do fallback attribute handling */
5064 if (error
== 0 && vp
) {
5065 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
5069 int update_flags
= 0;
5071 /*check if a new vnode was created, else try to get one*/
5073 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
5075 nd
.ni_op
= OP_LOOKUP
;
5077 nd
.ni_cnd
.cn_flags
= 0;
5078 error
= nameiat(&nd
, fd
);
5086 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
5087 /* call out to allow 3rd party notification of rename.
5088 * Ignore result of kauth_authorize_fileop call.
5090 if (kauth_authorize_fileop_has_listeners() &&
5092 char *new_link_path
= NULL
;
5095 /* build the path to the new link file */
5096 new_link_path
= get_pathbuff();
5098 vn_getpath(dvp
, new_link_path
, &len
);
5099 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
5100 new_link_path
[len
- 1] = '/';
5101 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
- len
);
5104 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
5105 (uintptr_t)path
, (uintptr_t)new_link_path
);
5106 if (new_link_path
!= NULL
) {
5107 release_pathbuff(new_link_path
);
5111 // Make sure the name & parent pointers are hooked up
5112 if (vp
->v_name
== NULL
) {
5113 update_flags
|= VNODE_UPDATE_NAME
;
5115 if (vp
->v_parent
== NULLVP
) {
5116 update_flags
|= VNODE_UPDATE_PARENT
;
5120 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
5124 add_fsevent(FSE_CREATE_FILE
, ctx
,
5132 * nameidone has to happen before we vnode_put(dvp)
5133 * since it may need to release the fs_nodelock on the dvp
5142 if (path
&& (path
!= (char *)path_data
)) {
5143 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
5150 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
5152 return symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
5153 uap
->link
, UIO_USERSPACE
);
5157 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
5158 __unused
int32_t *retval
)
5160 return symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
5161 uap
->path2
, UIO_USERSPACE
);
5165 * Delete a whiteout from the filesystem.
5166 * No longer supported.
5169 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
5175 * Delete a name from the filesystem.
5179 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
5180 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
5182 struct nameidata nd
;
5185 struct componentname
*cnp
;
5187 char *no_firmlink_path
= NULL
;
5189 int len_no_firmlink_path
= 0;
5192 struct vnode_attr va
;
5198 int truncated_no_firmlink_path
;
5200 struct vnode_attr
*vap
;
5202 int retry_count
= 0;
5205 cn_flags
= LOCKPARENT
;
5206 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
)) {
5207 cn_flags
|= AUDITVNPATH1
;
5209 /* If a starting dvp is passed, it trumps any fd passed. */
5215 /* unlink or delete is allowed on rsrc forks and named streams */
5216 cn_flags
|= CN_ALLOWRSRCFORK
;
5225 truncated_no_firmlink_path
= 0;
5228 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
5230 nd
.ni_dvp
= start_dvp
;
5231 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
5235 error
= nameiat(&nd
, fd
);
5244 /* With Carbon delete semantics, busy files cannot be deleted */
5245 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
5246 flags
|= VNODE_REMOVE_NODELETEBUSY
;
5249 /* Skip any potential upcalls if told to. */
5250 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
5251 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
5255 batched
= vnode_compound_remove_available(vp
);
5257 * The root of a mounted filesystem cannot be deleted.
5259 if ((vp
->v_flag
& VROOT
) || (dvp
->v_mount
!= vp
->v_mount
)) {
5264 #if DEVELOPMENT || DEBUG
5266 * XXX VSWAP: Check for entitlements or special flag here
5267 * so we can restrict access appropriately.
5269 #else /* DEVELOPMENT || DEBUG */
5271 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
5275 #endif /* DEVELOPMENT || DEBUG */
5278 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
5280 if (error
== ENOENT
) {
5281 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5292 if (!vnode_compound_remove_available(dvp
)) {
5293 panic("No vp, but no compound remove?");
5298 need_event
= need_fsevent(FSE_DELETE
, dvp
);
5301 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
5302 /* XXX need to get these data in batched VNOP */
5303 get_fse_info(vp
, &finfo
, ctx
);
5306 error
= vfs_get_notify_attributes(&va
);
5315 has_listeners
= kauth_authorize_fileop_has_listeners();
5316 if (need_event
|| has_listeners
) {
5324 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
5325 if (no_firmlink_path
== NULL
) {
5326 GET_PATH(no_firmlink_path
);
5327 if (no_firmlink_path
== NULL
) {
5332 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
5336 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
) {
5337 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
5341 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
5343 if (error
== EKEEPLOOKING
) {
5345 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5348 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
5349 panic("EKEEPLOOKING, but continue flag not set?");
5352 if (vnode_isdir(vp
)) {
5356 goto continue_lookup
;
5357 } else if (error
== ENOENT
&& batched
) {
5358 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5360 * For compound VNOPs, the authorization callback may
5361 * return ENOENT in case of racing hardlink lookups
5362 * hitting the name cache, redrive the lookup.
5372 * Call out to allow 3rd party notification of delete.
5373 * Ignore result of kauth_authorize_fileop call.
5376 if (has_listeners
) {
5377 kauth_authorize_fileop(vfs_context_ucred(ctx
),
5378 KAUTH_FILEOP_DELETE
,
5383 if (vp
->v_flag
& VISHARDLINK
) {
5385 // if a hardlink gets deleted we want to blow away the
5386 // v_parent link because the path that got us to this
5387 // instance of the link is no longer valid. this will
5388 // force the next call to get the path to ask the file
5389 // system instead of just following the v_parent link.
5391 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
5396 if (vp
->v_flag
& VISHARDLINK
) {
5397 get_fse_info(vp
, &finfo
, ctx
);
5399 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
5401 if (truncated_path
) {
5402 finfo
.mode
|= FSE_TRUNCATED_PATH
;
5404 add_fsevent(FSE_DELETE
, ctx
,
5405 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
5406 FSE_ARG_FINFO
, &finfo
,
5418 if (no_firmlink_path
!= NULL
) {
5419 RELEASE_PATH(no_firmlink_path
);
5420 no_firmlink_path
= NULL
;
5423 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5424 * will cause its shadow file to go away if necessary.
5426 if (vp
&& (vnode_isnamedstream(vp
)) &&
5427 (vp
->v_parent
!= NULLVP
) &&
5428 vnode_isshadow(vp
)) {
5433 * nameidone has to happen before we vnode_put(dvp)
5434 * since it may need to release the fs_nodelock on the dvp
5450 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
5451 enum uio_seg segflg
, int unlink_flags
)
5453 return unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
5458 * Delete a name from the filesystem using Carbon semantics.
5461 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
5463 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5464 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
);
5468 * Delete a name from the filesystem using POSIX semantics.
5471 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
5473 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5474 uap
->path
, UIO_USERSPACE
, 0);
5478 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
5480 if (uap
->flag
& ~(AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5484 if (uap
->flag
& (AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5485 int unlink_flags
= 0;
5487 if (uap
->flag
& AT_REMOVEDIR_DATALESS
) {
5488 unlink_flags
|= VNODE_REMOVE_DATALESS_DIR
;
5490 return rmdirat_internal(vfs_context_current(), uap
->fd
,
5491 uap
->path
, UIO_USERSPACE
, unlink_flags
);
5493 return unlinkat_internal(vfs_context_current(), uap
->fd
,
5494 NULLVP
, uap
->path
, UIO_USERSPACE
, 0);
5499 * Reposition read/write file offset.
5502 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
5504 struct fileproc
*fp
;
5506 struct vfs_context
*ctx
;
5507 off_t offset
= uap
->offset
, file_size
;
5510 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
5511 if (error
== ENOTSUP
) {
5516 if (vnode_isfifo(vp
)) {
5522 ctx
= vfs_context_current();
5524 if (uap
->whence
== L_INCR
&& uap
->offset
== 0) {
5525 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
5528 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
5536 if ((error
= vnode_getwithref(vp
))) {
5541 switch (uap
->whence
) {
5543 offset
+= fp
->f_fglob
->fg_offset
;
5546 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0) {
5549 offset
+= file_size
;
5554 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
5557 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
5563 if (uap
->offset
> 0 && offset
< 0) {
5564 /* Incremented/relative move past max size */
5568 * Allow negative offsets on character devices, per
5569 * POSIX 1003.1-2001. Most likely for writing disk
5572 if (offset
< 0 && vp
->v_type
!= VCHR
) {
5573 /* Decremented/relative move before start */
5577 fp
->f_fglob
->fg_offset
= offset
;
5578 *retval
= fp
->f_fglob
->fg_offset
;
5584 * An lseek can affect whether data is "available to read." Use
5585 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5587 post_event_if_success(vp
, error
, NOTE_NONE
);
5588 (void)vnode_put(vp
);
5595 * Check access permissions.
5597 * Returns: 0 Success
5598 * vnode_authorize:???
5601 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
5603 kauth_action_t action
;
5607 * If just the regular access bits, convert them to something
5608 * that vnode_authorize will understand.
5610 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
5612 if (uflags
& R_OK
) {
5613 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5615 if (uflags
& W_OK
) {
5616 if (vnode_isdir(vp
)) {
5617 action
|= KAUTH_VNODE_ADD_FILE
|
5618 KAUTH_VNODE_ADD_SUBDIRECTORY
;
5619 /* might want delete rights here too */
5621 action
|= KAUTH_VNODE_WRITE_DATA
;
5624 if (uflags
& X_OK
) {
5625 if (vnode_isdir(vp
)) {
5626 action
|= KAUTH_VNODE_SEARCH
;
5628 action
|= KAUTH_VNODE_EXECUTE
;
5632 /* take advantage of definition of uflags */
5633 action
= uflags
>> 8;
5637 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5643 /* action == 0 means only check for existence */
5645 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5656 * access_extended: Check access permissions in bulk.
5658 * Description: uap->entries Pointer to an array of accessx
5659 * descriptor structs, plus one or
5660 * more NULL terminated strings (see
5661 * "Notes" section below).
5662 * uap->size Size of the area pointed to by
5664 * uap->results Pointer to the results array.
5666 * Returns: 0 Success
5667 * ENOMEM Insufficient memory
5668 * EINVAL Invalid arguments
5669 * namei:EFAULT Bad address
5670 * namei:ENAMETOOLONG Filename too long
5671 * namei:ENOENT No such file or directory
5672 * namei:ELOOP Too many levels of symbolic links
5673 * namei:EBADF Bad file descriptor
5674 * namei:ENOTDIR Not a directory
5679 * uap->results Array contents modified
5681 * Notes: The uap->entries are structured as an arbitrary length array
5682 * of accessx descriptors, followed by one or more NULL terminated
5685 * struct accessx_descriptor[0]
5687 * struct accessx_descriptor[n]
5688 * char name_data[0];
5690 * We determine the entry count by walking the buffer containing
5691 * the uap->entries argument descriptor. For each descriptor we
5692 * see, the valid values for the offset ad_name_offset will be
5693 * in the byte range:
5695 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5697 * [ uap->entries + uap->size - 2 ]
5699 * since we must have at least one string, and the string must
5700 * be at least one character plus the NULL terminator in length.
5702 * XXX: Need to support the check-as uid argument
5705 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5707 struct accessx_descriptor
*input
= NULL
;
5708 errno_t
*result
= NULL
;
5711 unsigned int desc_max
, desc_actual
, i
, j
;
5712 struct vfs_context context
;
5713 struct nameidata nd
;
5717 #define ACCESSX_MAX_DESCR_ON_STACK 10
5718 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5720 context
.vc_ucred
= NULL
;
5723 * Validate parameters; if valid, copy the descriptor array and string
5724 * arguments into local memory. Before proceeding, the following
5725 * conditions must have been met:
5727 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5728 * o There must be sufficient room in the request for at least one
5729 * descriptor and a one yte NUL terminated string.
5730 * o The allocation of local storage must not fail.
5732 if (uap
->size
> ACCESSX_MAX_TABLESIZE
) {
5735 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2)) {
5738 if (uap
->size
<= sizeof(stack_input
)) {
5739 input
= stack_input
;
5741 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5742 if (input
== NULL
) {
5747 error
= copyin(uap
->entries
, input
, uap
->size
);
5752 AUDIT_ARG(opaque
, input
, uap
->size
);
5755 * Force NUL termination of the copyin buffer to avoid nami() running
5756 * off the end. If the caller passes us bogus data, they may get a
5759 ((char *)input
)[uap
->size
- 1] = 0;
5762 * Access is defined as checking against the process' real identity,
5763 * even if operations are checking the effective identity. This
5764 * requires that we use a local vfs context.
5766 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5767 context
.vc_thread
= current_thread();
5770 * Find out how many entries we have, so we can allocate the result
5771 * array by walking the list and adjusting the count downward by the
5772 * earliest string offset we see.
5774 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5775 desc_actual
= desc_max
;
5776 for (i
= 0; i
< desc_actual
; i
++) {
5778 * Take the offset to the name string for this entry and
5779 * convert to an input array index, which would be one off
5780 * the end of the array if this entry was the lowest-addressed
5783 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5786 * An offset greater than the max allowable offset is an error.
5787 * It is also an error for any valid entry to point
5788 * to a location prior to the end of the current entry, if
5789 * it's not a reference to the string of the previous entry.
5791 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5796 /* Also do not let ad_name_offset point to something beyond the size of the input */
5797 if (input
[i
].ad_name_offset
>= uap
->size
) {
5803 * An offset of 0 means use the previous descriptor's offset;
5804 * this is used to chain multiple requests for the same file
5805 * to avoid multiple lookups.
5808 /* This is not valid for the first entry */
5817 * If the offset of the string for this descriptor is before
5818 * what we believe is the current actual last descriptor,
5819 * then we need to adjust our estimate downward; this permits
5820 * the string table following the last descriptor to be out
5821 * of order relative to the descriptor list.
5823 if (j
< desc_actual
) {
5829 * We limit the actual number of descriptors we are willing to process
5830 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5831 * requested does not exceed this limit,
5833 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5837 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
| M_ZERO
);
5838 if (result
== NULL
) {
5844 * Do the work by iterating over the descriptor entries we know to
5845 * at least appear to contain valid data.
5848 for (i
= 0; i
< desc_actual
; i
++) {
5850 * If the ad_name_offset is 0, then we use the previous
5851 * results to make the check; otherwise, we are looking up
5854 if (input
[i
].ad_name_offset
!= 0) {
5855 /* discard old vnodes */
5866 * Scan forward in the descriptor list to see if we
5867 * need the parent vnode. We will need it if we are
5868 * deleting, since we must have rights to remove
5869 * entries in the parent directory, as well as the
5870 * rights to delete the object itself.
5872 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5873 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++) {
5874 if (input
[j
].ad_flags
& _DELETE_OK
) {
5879 niopts
= FOLLOW
| AUDITVNPATH1
;
5881 /* need parent for vnode_authorize for deletion test */
5883 niopts
|= WANTPARENT
;
5887 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5888 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5901 * Handle lookup errors.
5911 /* run this access check */
5912 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5915 /* fatal lookup error */
5921 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5923 /* copy out results */
5924 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5927 if (input
&& input
!= stack_input
) {
5928 FREE(input
, M_TEMP
);
5931 FREE(result
, M_TEMP
);
5939 if (IS_VALID_CRED(context
.vc_ucred
)) {
5940 kauth_cred_unref(&context
.vc_ucred
);
5947 * Returns: 0 Success
5948 * namei:EFAULT Bad address
5949 * namei:ENAMETOOLONG Filename too long
5950 * namei:ENOENT No such file or directory
5951 * namei:ELOOP Too many levels of symbolic links
5952 * namei:EBADF Bad file descriptor
5953 * namei:ENOTDIR Not a directory
5958 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5959 int flag
, enum uio_seg segflg
)
5962 struct nameidata nd
;
5964 struct vfs_context context
;
5966 int is_namedstream
= 0;
5970 * Unless the AT_EACCESS option is used, Access is defined as checking
5971 * against the process' real identity, even if operations are checking
5972 * the effective identity. So we need to tweak the credential
5973 * in the context for that case.
5975 if (!(flag
& AT_EACCESS
)) {
5976 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5978 context
.vc_ucred
= ctx
->vc_ucred
;
5980 context
.vc_thread
= ctx
->vc_thread
;
5983 niopts
= (flag
& AT_SYMLINK_NOFOLLOW
? NOFOLLOW
: FOLLOW
) | AUDITVNPATH1
;
5984 /* need parent for vnode_authorize for deletion test */
5985 if (amode
& _DELETE_OK
) {
5986 niopts
|= WANTPARENT
;
5988 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5992 /* access(F_OK) calls are allowed for resource forks. */
5993 if (amode
== F_OK
) {
5994 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5997 error
= nameiat(&nd
, fd
);
6003 /* Grab reference on the shadow stream file vnode to
6004 * force an inactive on release which will mark it
6007 if (vnode_isnamedstream(nd
.ni_vp
) &&
6008 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6009 vnode_isshadow(nd
.ni_vp
)) {
6011 vnode_ref(nd
.ni_vp
);
6015 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
6018 if (is_namedstream
) {
6019 vnode_rele(nd
.ni_vp
);
6023 vnode_put(nd
.ni_vp
);
6024 if (amode
& _DELETE_OK
) {
6025 vnode_put(nd
.ni_dvp
);
6030 if (!(flag
& AT_EACCESS
)) {
6031 kauth_cred_unref(&context
.vc_ucred
);
6037 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
6039 return faccessat_internal(vfs_context_current(), AT_FDCWD
,
6040 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
);
6044 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
6045 __unused
int32_t *retval
)
6047 if (uap
->flag
& ~(AT_EACCESS
| AT_SYMLINK_NOFOLLOW
)) {
6051 return faccessat_internal(vfs_context_current(), uap
->fd
,
6052 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
);
6056 * Returns: 0 Success
6063 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
6064 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
6065 enum uio_seg segflg
, int fd
, int flag
)
6067 struct nameidata nd
;
6074 struct user64_stat user64_sb
;
6075 struct user32_stat user32_sb
;
6076 struct user64_stat64 user64_sb64
;
6077 struct user32_stat64 user32_sb64
;
6081 kauth_filesec_t fsec
;
6082 size_t xsecurity_bufsize
;
6084 struct fileproc
*fp
= NULL
;
6085 int needsrealdev
= 0;
6087 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6088 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
6092 int is_namedstream
= 0;
6093 /* stat calls are allowed for resource forks. */
6094 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
6097 if (flag
& AT_FDONLY
) {
6100 error
= fp_getfvp(vfs_context_proc(ctx
), fd
, &fp
, &fvp
);
6104 if ((error
= vnode_getwithref(fvp
))) {
6110 error
= nameiat(&nd
, fd
);
6115 fsec
= KAUTH_FILESEC_NONE
;
6117 statptr
= (void *)&source
;
6120 /* Grab reference on the shadow stream file vnode to
6121 * force an inactive on release which will mark it
6124 if (vnode_isnamedstream(nd
.ni_vp
) &&
6125 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6126 vnode_isshadow(nd
.ni_vp
)) {
6128 vnode_ref(nd
.ni_vp
);
6132 needsrealdev
= flag
& AT_REALDEV
? 1 : 0;
6133 if (fp
&& (xsecurity
== USER_ADDR_NULL
)) {
6135 * If the caller has the file open, and is not
6136 * requesting extended security information, we are
6137 * going to let them get the basic stat information.
6139 error
= vn_stat_noauth(nd
.ni_vp
, statptr
, NULL
, isstat64
, needsrealdev
, ctx
,
6140 fp
->f_fglob
->fg_cred
);
6142 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
),
6143 isstat64
, needsrealdev
, ctx
);
6147 if (is_namedstream
) {
6148 vnode_rele(nd
.ni_vp
);
6151 vnode_put(nd
.ni_vp
);
6161 /* Zap spare fields */
6162 if (isstat64
!= 0) {
6163 source
.sb64
.st_lspare
= 0;
6164 source
.sb64
.st_qspare
[0] = 0LL;
6165 source
.sb64
.st_qspare
[1] = 0LL;
6166 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6167 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
6168 my_size
= sizeof(dest
.user64_sb64
);
6169 sbp
= (caddr_t
)&dest
.user64_sb64
;
6171 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
6172 my_size
= sizeof(dest
.user32_sb64
);
6173 sbp
= (caddr_t
)&dest
.user32_sb64
;
6176 * Check if we raced (post lookup) against the last unlink of a file.
6178 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
6179 source
.sb64
.st_nlink
= 1;
6182 source
.sb
.st_lspare
= 0;
6183 source
.sb
.st_qspare
[0] = 0LL;
6184 source
.sb
.st_qspare
[1] = 0LL;
6185 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6186 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
6187 my_size
= sizeof(dest
.user64_sb
);
6188 sbp
= (caddr_t
)&dest
.user64_sb
;
6190 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
6191 my_size
= sizeof(dest
.user32_sb
);
6192 sbp
= (caddr_t
)&dest
.user32_sb
;
6196 * Check if we raced (post lookup) against the last unlink of a file.
6198 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
6199 source
.sb
.st_nlink
= 1;
6202 if ((error
= copyout(sbp
, ub
, my_size
)) != 0) {
6206 /* caller wants extended security information? */
6207 if (xsecurity
!= USER_ADDR_NULL
) {
6208 /* did we get any? */
6209 if (fsec
== KAUTH_FILESEC_NONE
) {
6210 if (susize(xsecurity_size
, 0) != 0) {
6215 /* find the user buffer size */
6216 xsecurity_bufsize
= fusize(xsecurity_size
);
6218 /* copy out the actual data size */
6219 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
6224 /* if the caller supplied enough room, copy out to it */
6225 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
)) {
6226 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
6231 if (fsec
!= KAUTH_FILESEC_NONE
) {
6232 kauth_filesec_free(fsec
);
6238 * stat_extended: Get file status; with extended security (ACL).
6240 * Parameters: p (ignored)
6241 * uap User argument descriptor (see below)
6244 * Indirect: uap->path Path of file to get status from
6245 * uap->ub User buffer (holds file status info)
6246 * uap->xsecurity ACL to get (extended security)
6247 * uap->xsecurity_size Size of ACL
6249 * Returns: 0 Success
6254 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
6255 __unused
int32_t *retval
)
6257 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6258 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6263 * Returns: 0 Success
6264 * fstatat_internal:??? [see fstatat_internal() in this file]
6267 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
6269 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6270 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0);
6274 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
6276 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6277 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0);
6281 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6283 * Parameters: p (ignored)
6284 * uap User argument descriptor (see below)
6287 * Indirect: uap->path Path of file to get status from
6288 * uap->ub User buffer (holds file status info)
6289 * uap->xsecurity ACL to get (extended security)
6290 * uap->xsecurity_size Size of ACL
6292 * Returns: 0 Success
6297 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
6299 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6300 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6305 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6307 * Parameters: p (ignored)
6308 * uap User argument descriptor (see below)
6311 * Indirect: uap->path Path of file to get status from
6312 * uap->ub User buffer (holds file status info)
6313 * uap->xsecurity ACL to get (extended security)
6314 * uap->xsecurity_size Size of ACL
6316 * Returns: 0 Success
6321 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
6323 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6324 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6325 AT_SYMLINK_NOFOLLOW
);
6329 * Get file status; this version does not follow links.
6332 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
6334 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6335 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6339 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
6341 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6342 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6346 * lstat64_extended: Get file status; can handle large inode numbers; does not
6347 * follow links; with extended security (ACL).
6349 * Parameters: p (ignored)
6350 * uap User argument descriptor (see below)
6353 * Indirect: uap->path Path of file to get status from
6354 * uap->ub User buffer (holds file status info)
6355 * uap->xsecurity ACL to get (extended security)
6356 * uap->xsecurity_size Size of ACL
6358 * Returns: 0 Success
6363 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
6365 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6366 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6367 AT_SYMLINK_NOFOLLOW
);
6371 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
6373 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6377 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6378 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6382 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
6383 __unused
int32_t *retval
)
6385 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6389 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6390 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6394 * Get configurable pathname variables.
6396 * Returns: 0 Success
6400 * Notes: Global implementation constants are intended to be
6401 * implemented in this function directly; all other constants
6402 * are per-FS implementation, and therefore must be handled in
6403 * each respective FS, instead.
6405 * XXX We implement some things globally right now that should actually be
6406 * XXX per-FS; we will need to deal with this at some point.
6410 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
6413 struct nameidata nd
;
6414 vfs_context_t ctx
= vfs_context_current();
6416 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
6417 UIO_USERSPACE
, uap
->path
, ctx
);
6423 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
6425 vnode_put(nd
.ni_vp
);
6431 * Return target name of a symbolic link.
6435 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
6436 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
6442 struct nameidata nd
;
6443 char uio_buf
[UIO_SIZEOF(1)];
6445 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
6448 error
= nameiat(&nd
, fd
);
6456 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
6457 &uio_buf
[0], sizeof(uio_buf
));
6458 uio_addiov(auio
, buf
, bufsize
);
6459 if (vp
->v_type
!= VLNK
) {
6463 error
= mac_vnode_check_readlink(ctx
, vp
);
6466 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
6470 error
= VNOP_READLINK(vp
, auio
, ctx
);
6475 *retval
= bufsize
- (int)uio_resid(auio
);
6480 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
6482 enum uio_seg procseg
;
6484 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6485 return readlinkat_internal(vfs_context_current(), AT_FDCWD
,
6486 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
6487 uap
->count
, procseg
, retval
);
6491 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
6493 enum uio_seg procseg
;
6495 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6496 return readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6497 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
);
6501 * Change file flags, the deep inner layer.
6504 chflags0(vnode_t vp
, struct vnode_attr
*va
,
6505 int (*setattr
)(vnode_t
, void *, vfs_context_t
),
6506 void *arg
, vfs_context_t ctx
)
6508 kauth_action_t action
= 0;
6512 error
= mac_vnode_check_setflags(ctx
, vp
, va
->va_flags
);
6518 /* request authorisation, disregard immutability */
6519 if ((error
= vnode_authattr(vp
, va
, &action
, ctx
)) != 0) {
6523 * Request that the auth layer disregard those file flags it's allowed to when
6524 * authorizing this operation; we need to do this in order to be able to
6525 * clear immutable flags.
6527 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0)) {
6530 error
= (*setattr
)(vp
, arg
, ctx
);
6534 mac_vnode_notify_setflags(ctx
, vp
, va
->va_flags
);
6543 * Change file flags.
6545 * NOTE: this will vnode_put() `vp'
6548 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
6550 struct vnode_attr va
;
6554 VATTR_SET(&va
, va_flags
, flags
);
6556 error
= chflags0(vp
, &va
, (void *)vnode_setattr
, &va
, ctx
);
6559 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6567 * Change flags of a file given a path name.
6571 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
6574 vfs_context_t ctx
= vfs_context_current();
6576 struct nameidata nd
;
6578 AUDIT_ARG(fflags
, uap
->flags
);
6579 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6580 UIO_USERSPACE
, uap
->path
, ctx
);
6588 /* we don't vnode_put() here because chflags1 does internally */
6589 error
= chflags1(vp
, uap
->flags
, ctx
);
6595 * Change flags of a file given a file descriptor.
6599 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
6604 AUDIT_ARG(fd
, uap
->fd
);
6605 AUDIT_ARG(fflags
, uap
->flags
);
6606 if ((error
= file_vnode(uap
->fd
, &vp
))) {
6610 if ((error
= vnode_getwithref(vp
))) {
6615 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6617 /* we don't vnode_put() here because chflags1 does internally */
6618 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
6625 * Change security information on a filesystem object.
6627 * Returns: 0 Success
6628 * EPERM Operation not permitted
6629 * vnode_authattr:??? [anything vnode_authattr can return]
6630 * vnode_authorize:??? [anything vnode_authorize can return]
6631 * vnode_setattr:??? [anything vnode_setattr can return]
6633 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6634 * translated to EPERM before being returned.
6637 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
6639 kauth_action_t action
;
6642 AUDIT_ARG(mode
, vap
->va_mode
);
6643 /* XXX audit new args */
6646 /* chmod calls are not allowed for resource forks. */
6647 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6653 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
6654 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0) {
6658 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6659 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
6660 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6661 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1))) {
6666 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
6667 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
))) {
6672 /* make sure that the caller is allowed to set this security information */
6673 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
6674 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6675 if (error
== EACCES
) {
6681 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0) {
6686 if (VATTR_IS_ACTIVE(vap
, va_mode
)) {
6687 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
6690 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6691 mac_vnode_notify_setowner(ctx
, vp
,
6692 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6693 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
6696 if (VATTR_IS_ACTIVE(vap
, va_acl
)) {
6697 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
6706 * Change mode of a file given a path name.
6708 * Returns: 0 Success
6709 * namei:??? [anything namei can return]
6710 * chmod_vnode:??? [anything chmod_vnode can return]
6713 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
6714 int fd
, int flag
, enum uio_seg segflg
)
6716 struct nameidata nd
;
6719 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6720 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
6722 if ((error
= nameiat(&nd
, fd
))) {
6725 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
6726 vnode_put(nd
.ni_vp
);
6732 * chmod_extended: Change the mode of a file given a path name; with extended
6733 * argument list (including extended security (ACL)).
6735 * Parameters: p Process requesting the open
6736 * uap User argument descriptor (see below)
6739 * Indirect: uap->path Path to object (same as 'chmod')
6740 * uap->uid UID to set
6741 * uap->gid GID to set
6742 * uap->mode File mode to set (same as 'chmod')
6743 * uap->xsecurity ACL to set (or delete)
6745 * Returns: 0 Success
6748 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6750 * XXX: We should enummerate the possible errno values here, and where
6751 * in the code they originated.
6754 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6757 struct vnode_attr va
;
6758 kauth_filesec_t xsecdst
;
6760 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6763 if (uap
->mode
!= -1) {
6764 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6766 if (uap
->uid
!= KAUTH_UID_NONE
) {
6767 VATTR_SET(&va
, va_uid
, uap
->uid
);
6769 if (uap
->gid
!= KAUTH_GID_NONE
) {
6770 VATTR_SET(&va
, va_gid
, uap
->gid
);
6774 switch (uap
->xsecurity
) {
6775 /* explicit remove request */
6776 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6777 VATTR_SET(&va
, va_acl
, NULL
);
6780 case USER_ADDR_NULL
:
6783 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6786 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6787 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6790 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6793 if (xsecdst
!= NULL
) {
6794 kauth_filesec_free(xsecdst
);
6800 * Returns: 0 Success
6801 * chmodat:??? [anything chmodat can return]
6804 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6805 int flag
, enum uio_seg segflg
)
6807 struct vnode_attr va
;
6810 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6812 return chmodat(ctx
, path
, &va
, fd
, flag
, segflg
);
6816 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6818 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6819 AT_FDCWD
, 0, UIO_USERSPACE
);
6823 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6825 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
6829 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6830 uap
->fd
, uap
->flag
, UIO_USERSPACE
);
6834 * Change mode of a file given a file descriptor.
6837 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6844 if ((error
= file_vnode(fd
, &vp
)) != 0) {
6847 if ((error
= vnode_getwithref(vp
)) != 0) {
6851 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6853 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6854 (void)vnode_put(vp
);
6861 * fchmod_extended: Change mode of a file given a file descriptor; with
6862 * extended argument list (including extended security (ACL)).
6864 * Parameters: p Process requesting to change file mode
6865 * uap User argument descriptor (see below)
6868 * Indirect: uap->mode File mode to set (same as 'chmod')
6869 * uap->uid UID to set
6870 * uap->gid GID to set
6871 * uap->xsecurity ACL to set (or delete)
6872 * uap->fd File descriptor of file to change mode
6874 * Returns: 0 Success
6879 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6882 struct vnode_attr va
;
6883 kauth_filesec_t xsecdst
;
6885 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6888 if (uap
->mode
!= -1) {
6889 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6891 if (uap
->uid
!= KAUTH_UID_NONE
) {
6892 VATTR_SET(&va
, va_uid
, uap
->uid
);
6894 if (uap
->gid
!= KAUTH_GID_NONE
) {
6895 VATTR_SET(&va
, va_gid
, uap
->gid
);
6899 switch (uap
->xsecurity
) {
6900 case USER_ADDR_NULL
:
6901 VATTR_SET(&va
, va_acl
, NULL
);
6903 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6904 VATTR_SET(&va
, va_acl
, NULL
);
6907 case CAST_USER_ADDR_T(-1):
6910 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6913 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6916 error
= fchmod1(p
, uap
->fd
, &va
);
6919 switch (uap
->xsecurity
) {
6920 case USER_ADDR_NULL
:
6921 case CAST_USER_ADDR_T(-1):
6924 if (xsecdst
!= NULL
) {
6925 kauth_filesec_free(xsecdst
);
6932 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6934 struct vnode_attr va
;
6937 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6939 return fchmod1(p
, uap
->fd
, &va
);
6944 * Set ownership given a path name.
6948 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6949 gid_t gid
, int flag
, enum uio_seg segflg
)
6952 struct vnode_attr va
;
6954 struct nameidata nd
;
6956 kauth_action_t action
;
6958 AUDIT_ARG(owner
, uid
, gid
);
6960 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6961 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6963 error
= nameiat(&nd
, fd
);
6972 if (uid
!= (uid_t
)VNOVAL
) {
6973 VATTR_SET(&va
, va_uid
, uid
);
6975 if (gid
!= (gid_t
)VNOVAL
) {
6976 VATTR_SET(&va
, va_gid
, gid
);
6980 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6986 /* preflight and authorize attribute changes */
6987 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6990 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6993 error
= vnode_setattr(vp
, &va
, ctx
);
6997 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
7003 * EACCES is only allowed from namei(); permissions failure should
7004 * return EPERM, so we need to translate the error code.
7006 if (error
== EACCES
) {
7015 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
7017 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7018 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
);
7022 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
7024 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7025 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
);
7029 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
7031 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
7035 return fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
7036 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
);
7040 * Set ownership given a file descriptor.
7044 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
7046 struct vnode_attr va
;
7047 vfs_context_t ctx
= vfs_context_current();
7050 kauth_action_t action
;
7052 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7053 AUDIT_ARG(fd
, uap
->fd
);
7055 if ((error
= file_vnode(uap
->fd
, &vp
))) {
7059 if ((error
= vnode_getwithref(vp
))) {
7063 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7066 if (uap
->uid
!= VNOVAL
) {
7067 VATTR_SET(&va
, va_uid
, uap
->uid
);
7069 if (uap
->gid
!= VNOVAL
) {
7070 VATTR_SET(&va
, va_gid
, uap
->gid
);
7074 /* chown calls are not allowed for resource forks. */
7075 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7082 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7088 /* preflight and authorize attribute changes */
7089 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7092 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7093 if (error
== EACCES
) {
7098 error
= vnode_setattr(vp
, &va
, ctx
);
7102 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7107 (void)vnode_put(vp
);
7113 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
7117 if (usrtvp
== USER_ADDR_NULL
) {
7118 struct timeval old_tv
;
7119 /* XXX Y2038 bug because of microtime argument */
7121 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
7124 if (IS_64BIT_PROCESS(current_proc())) {
7125 struct user64_timeval tv
[2];
7126 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7130 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7131 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7133 struct user32_timeval tv
[2];
7134 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7138 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7139 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7146 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
7150 struct vnode_attr va
;
7151 kauth_action_t action
;
7153 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7156 VATTR_SET(&va
, va_access_time
, ts
[0]);
7157 VATTR_SET(&va
, va_modify_time
, ts
[1]);
7159 va
.va_vaflags
|= VA_UTIMES_NULL
;
7163 /* utimes calls are not allowed for resource forks. */
7164 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7171 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7176 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7177 if (!nullflag
&& error
== EACCES
) {
7183 /* since we may not need to auth anything, check here */
7184 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7185 if (!nullflag
&& error
== EACCES
) {
7190 error
= vnode_setattr(vp
, &va
, ctx
);
7194 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7203 * Set the access and modification times of a file.
7207 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
7209 struct timespec ts
[2];
7212 struct nameidata nd
;
7213 vfs_context_t ctx
= vfs_context_current();
7216 * AUDIT: Needed to change the order of operations to do the
7217 * name lookup first because auditing wants the path.
7219 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
7220 UIO_USERSPACE
, uap
->path
, ctx
);
7228 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7229 * the current time instead.
7232 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7236 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
7239 vnode_put(nd
.ni_vp
);
7244 * Set the access and modification times of a file.
7248 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
7250 struct timespec ts
[2];
7255 AUDIT_ARG(fd
, uap
->fd
);
7257 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7260 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
7263 if ((error
= vnode_getwithref(vp
))) {
7268 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
7275 * Truncate a file given its path name.
7279 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
7282 struct vnode_attr va
;
7283 vfs_context_t ctx
= vfs_context_current();
7285 struct nameidata nd
;
7286 kauth_action_t action
;
7288 if (uap
->length
< 0) {
7291 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
7292 UIO_USERSPACE
, uap
->path
, ctx
);
7293 if ((error
= namei(&nd
))) {
7301 VATTR_SET(&va
, va_data_size
, uap
->length
);
7304 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
7310 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7313 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7316 error
= vnode_setattr(vp
, &va
, ctx
);
7320 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
7330 * Truncate a file given a file descriptor.
7334 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
7336 vfs_context_t ctx
= vfs_context_current();
7337 struct vnode_attr va
;
7339 struct fileproc
*fp
;
7343 AUDIT_ARG(fd
, uap
->fd
);
7344 if (uap
->length
< 0) {
7348 if ((error
= fp_lookup(p
, fd
, &fp
, 0))) {
7352 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
7354 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
7363 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
7365 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
7366 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7371 if ((error
= vnode_getwithref(vp
)) != 0) {
7375 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7378 error
= mac_vnode_check_truncate(ctx
,
7379 fp
->f_fglob
->fg_cred
, vp
);
7381 (void)vnode_put(vp
);
7386 VATTR_SET(&va
, va_data_size
, uap
->length
);
7387 error
= vnode_setattr(vp
, &va
, ctx
);
7391 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
7395 (void)vnode_put(vp
);
7403 * Sync an open file with synchronized I/O _file_ integrity completion
7407 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
7409 __pthread_testcancel(1);
7410 return fsync_common(p
, uap
, MNT_WAIT
);
7415 * Sync an open file with synchronized I/O _file_ integrity completion
7417 * Notes: This is a legacy support function that does not test for
7418 * thread cancellation points.
7422 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
7424 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
);
7429 * Sync an open file with synchronized I/O _data_ integrity completion
7433 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
7435 __pthread_testcancel(1);
7436 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
);
7443 * Common fsync code to support both synchronized I/O file integrity completion
7444 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7446 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7447 * will only guarantee that the file data contents are retrievable. If
7448 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7449 * includes additional metadata unnecessary for retrieving the file data
7450 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7453 * Parameters: p The process
7454 * uap->fd The descriptor to synchronize
7455 * flags The data integrity flags
7457 * Returns: int Success
7458 * fp_getfvp:EBADF Bad file descriptor
7459 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7460 * VNOP_FSYNC:??? unspecified
7462 * Notes: We use struct fsync_args because it is a short name, and all
7463 * caller argument structures are otherwise identical.
7466 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
7469 struct fileproc
*fp
;
7470 vfs_context_t ctx
= vfs_context_current();
7473 AUDIT_ARG(fd
, uap
->fd
);
7475 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
7478 if ((error
= vnode_getwithref(vp
))) {
7483 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7485 error
= VNOP_FSYNC(vp
, flags
, ctx
);
7488 /* Sync resource fork shadow file if necessary. */
7490 (vp
->v_flag
& VISNAMEDSTREAM
) &&
7491 (vp
->v_parent
!= NULLVP
) &&
7492 vnode_isshadow(vp
) &&
7493 (fp
->f_flags
& FP_WRITTEN
)) {
7494 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
7498 (void)vnode_put(vp
);
7504 * Duplicate files. Source must be a file, target must be a file or
7507 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7508 * perform inheritance correctly.
7512 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
7514 vnode_t tvp
, fvp
, tdvp
, sdvp
;
7515 struct nameidata fromnd
, tond
;
7517 vfs_context_t ctx
= vfs_context_current();
7519 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
7520 struct vnode_attr va
;
7523 /* Check that the flags are valid. */
7525 if (uap
->flags
& ~CPF_MASK
) {
7529 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
7530 UIO_USERSPACE
, uap
->from
, ctx
);
7531 if ((error
= namei(&fromnd
))) {
7536 NDINIT(&tond
, CREATE
, OP_LINK
,
7537 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7538 UIO_USERSPACE
, uap
->to
, ctx
);
7539 if ((error
= namei(&tond
))) {
7546 if (!(uap
->flags
& CPF_OVERWRITE
)) {
7552 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
7557 /* This calls existing MAC hooks for open */
7558 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
7565 * See unlinkat_internal for an explanation of the potential
7566 * ENOENT from the MAC hook but the gist is that the MAC hook
7567 * can fail because vn_getpath isn't able to return the full
7568 * path. We choose to ignore this failure.
7570 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
7571 if (error
&& error
!= ENOENT
) {
7579 VATTR_SET(&va
, va_type
, fvp
->v_type
);
7580 /* Mask off all but regular access permissions */
7581 VATTR_SET(&va
, va_mode
,
7582 ((((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
7583 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
7587 #endif /* CONFIG_MACF */
7589 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
7597 * If source is the same as the destination (that is the
7598 * same inode number) then there is nothing to do.
7599 * (fixed to have POSIX semantics - CSM 3/2/98)
7605 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
7608 sdvp
= tond
.ni_startdir
;
7610 * nameidone has to happen before we vnode_put(tdvp)
7611 * since it may need to release the fs_nodelock on the tdvp
7631 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7634 * Helper function for doing clones. The caller is expected to provide an
7635 * iocounted source vnode and release it.
7638 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
7639 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
7642 struct nameidata tond
;
7645 boolean_t free_src_acl
;
7646 boolean_t attr_cleanup
;
7648 kauth_action_t action
;
7649 struct componentname
*cnp
;
7651 struct vnode_attr va
;
7652 struct vnode_attr nva
;
7653 uint32_t vnop_flags
;
7655 v_type
= vnode_vtype(fvp
);
7660 action
= KAUTH_VNODE_ADD_FILE
;
7663 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
7664 fvp
->v_mountedhere
) {
7667 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
7673 AUDIT_ARG(fd2
, dst_dirfd
);
7674 AUDIT_ARG(value32
, flags
);
7676 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7677 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
7678 UIO_USERSPACE
, dst
, ctx
);
7679 if ((error
= nameiat(&tond
, dst_dirfd
))) {
7686 free_src_acl
= FALSE
;
7687 attr_cleanup
= FALSE
;
7694 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
7700 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
))) {
7704 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
))) {
7708 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
7709 if (data_read_authorised
) {
7710 action
&= ~KAUTH_VNODE_READ_DATA
;
7712 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
))) {
7717 * certain attributes may need to be changed from the source, we ask for
7721 VATTR_WANTED(&va
, va_uid
);
7722 VATTR_WANTED(&va
, va_gid
);
7723 VATTR_WANTED(&va
, va_mode
);
7724 VATTR_WANTED(&va
, va_flags
);
7725 VATTR_WANTED(&va
, va_acl
);
7727 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0) {
7732 VATTR_SET(&nva
, va_type
, v_type
);
7733 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
7734 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
7735 free_src_acl
= TRUE
;
7738 /* Handle ACL inheritance, initialize vap. */
7739 if (v_type
== VLNK
) {
7740 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
7742 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
7746 attr_cleanup
= TRUE
;
7749 vnop_flags
= VNODE_CLONEFILE_DEFAULT
;
7751 * We've got initial values for all security parameters,
7752 * If we are superuser, then we can change owners to be the
7753 * same as the source. Both superuser and the owner have default
7754 * WRITE_SECURITY privileges so all other fields can be taken
7755 * from source as well.
7757 if (!(flags
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) {
7758 if (VATTR_IS_SUPPORTED(&va
, va_uid
)) {
7759 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
7761 if (VATTR_IS_SUPPORTED(&va
, va_gid
)) {
7762 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
7765 vnop_flags
|= VNODE_CLONEFILE_NOOWNERCOPY
;
7768 if (VATTR_IS_SUPPORTED(&va
, va_mode
)) {
7769 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
7771 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
7772 VATTR_SET(&nva
, va_flags
,
7773 ((va
.va_flags
& ~(UF_DATAVAULT
| SF_RESTRICTED
)) | /* Turn off from source */
7774 (nva
.va_flags
& (UF_DATAVAULT
| SF_RESTRICTED
))));
7777 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
);
7779 if (!error
&& tvp
) {
7780 int update_flags
= 0;
7783 #endif /* CONFIG_FSE */
7786 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
7787 VNODE_LABEL_CREATE
, ctx
);
7790 * If some of the requested attributes weren't handled by the
7791 * VNOP, use our fallback code.
7793 if (!VATTR_ALL_SUPPORTED(&va
)) {
7794 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
7797 // Make sure the name & parent pointers are hooked up
7798 if (tvp
->v_name
== NULL
) {
7799 update_flags
|= VNODE_UPDATE_NAME
;
7801 if (tvp
->v_parent
== NULLVP
) {
7802 update_flags
|= VNODE_UPDATE_PARENT
;
7806 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
7807 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
7811 switch (vnode_vtype(tvp
)) {
7815 fsevent
= FSE_CREATE_FILE
;
7818 fsevent
= FSE_CREATE_DIR
;
7824 if (need_fsevent(fsevent
, tvp
)) {
7826 * The following is a sequence of three explicit events.
7827 * A pair of FSE_CLONE events representing the source and destination
7828 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7829 * fseventsd may coalesce the destination clone and create events
7830 * into a single event resulting in the following sequence for a client
7832 * FSE_CLONE | FSE_CREATE (dst)
7834 add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
,
7836 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
7839 #endif /* CONFIG_FSE */
7844 vn_attribute_cleanup(&nva
, defaulted
);
7846 if (free_src_acl
&& va
.va_acl
) {
7847 kauth_acl_free(va
.va_acl
);
7858 * clone files or directories, target must not exist.
7862 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7863 __unused
int32_t *retval
)
7866 struct nameidata fromnd
;
7869 vfs_context_t ctx
= vfs_context_current();
7871 /* Check that the flags are valid. */
7872 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7876 AUDIT_ARG(fd
, uap
->src_dirfd
);
7878 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7879 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7880 UIO_USERSPACE
, uap
->src
, ctx
);
7881 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
))) {
7888 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7896 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7897 __unused
int32_t *retval
)
7900 struct fileproc
*fp
;
7902 vfs_context_t ctx
= vfs_context_current();
7904 /* Check that the flags are valid. */
7905 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7909 AUDIT_ARG(fd
, uap
->src_fd
);
7910 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7915 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7916 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7921 if ((error
= vnode_getwithref(fvp
))) {
7925 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7927 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7932 file_drop(uap
->src_fd
);
7937 rename_submounts_callback(mount_t mp
, void *arg
)
7940 mount_t pmp
= (mount_t
)arg
;
7941 int prefix_len
= strlen(pmp
->mnt_vfsstat
.f_mntonname
);
7943 if (strncmp(mp
->mnt_vfsstat
.f_mntonname
, pmp
->mnt_vfsstat
.f_mntonname
, prefix_len
) != 0) {
7947 if (mp
->mnt_vfsstat
.f_mntonname
[prefix_len
] != '/') {
7951 if ((error
= vfs_busy(mp
, LK_NOWAIT
))) {
7952 printf("vfs_busy failed with %d for %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
7956 int pathlen
= MAXPATHLEN
;
7957 if ((error
= vn_getpath_ext(mp
->mnt_vnodecovered
, NULL
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
))) {
7958 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
7967 * Rename files. Source and destination must either both be directories,
7968 * or both not be directories. If target is a directory, it must be empty.
7972 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7973 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7975 if (flags
& ~VFS_RENAME_FLAGS_MASK
) {
7979 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
)) {
7985 struct nameidata
*fromnd
, *tond
;
7993 const char *oname
= NULL
;
7994 char *from_name
= NULL
, *to_name
= NULL
;
7995 char *from_name_no_firmlink
= NULL
, *to_name_no_firmlink
= NULL
;
7996 int from_len
= 0, to_len
= 0;
7997 int from_len_no_firmlink
= 0, to_len_no_firmlink
= 0;
7998 int holding_mntlock
;
7999 mount_t locked_mp
= NULL
;
8000 vnode_t oparent
= NULLVP
;
8002 fse_info from_finfo
, to_finfo
;
8004 int from_truncated
= 0, to_truncated
= 0;
8005 int from_truncated_no_firmlink
= 0, to_truncated_no_firmlink
= 0;
8007 struct vnode_attr
*fvap
, *tvap
;
8009 /* carving out a chunk for structs that are too big to be on stack. */
8011 struct nameidata from_node
, to_node
;
8012 struct vnode_attr fv_attr
, tv_attr
;
8014 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
8015 fromnd
= &__rename_data
->from_node
;
8016 tond
= &__rename_data
->to_node
;
8018 holding_mntlock
= 0;
8027 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
8029 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8031 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
8033 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8036 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8037 if ((error
= nameiat(fromnd
, fromfd
))) {
8040 fdvp
= fromnd
->ni_dvp
;
8041 fvp
= fromnd
->ni_vp
;
8043 if (fvp
&& fvp
->v_type
== VDIR
) {
8044 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
8048 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8049 if ((error
= nameiat(tond
, tofd
))) {
8051 * Translate error code for rename("dir1", "dir2/.").
8053 if (error
== EISDIR
&& fvp
->v_type
== VDIR
) {
8058 tdvp
= tond
->ni_dvp
;
8062 #if DEVELOPMENT || DEBUG
8064 * XXX VSWAP: Check for entitlements or special flag here
8065 * so we can restrict access appropriately.
8067 #else /* DEVELOPMENT || DEBUG */
8069 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8074 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8078 #endif /* DEVELOPMENT || DEBUG */
8080 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
8085 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
8090 batched
= vnode_compound_rename_available(fdvp
);
8093 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
8096 get_fse_info(fvp
, &from_finfo
, ctx
);
8098 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
8103 fvap
= &__rename_data
->fv_attr
;
8107 get_fse_info(tvp
, &to_finfo
, ctx
);
8108 } else if (batched
) {
8109 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
8114 tvap
= &__rename_data
->tv_attr
;
8119 #endif /* CONFIG_FSE */
8121 has_listeners
= kauth_authorize_fileop_has_listeners();
8125 if (AUDIT_RECORD_EXISTS()) {
8130 if (need_event
|| has_listeners
) {
8131 if (from_name
== NULL
) {
8132 GET_PATH(from_name
);
8133 if (from_name
== NULL
) {
8139 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
8141 if (from_name_no_firmlink
== NULL
) {
8142 GET_PATH(from_name_no_firmlink
);
8143 if (from_name_no_firmlink
== NULL
) {
8149 from_len_no_firmlink
= safe_getpath_no_firmlink(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name_no_firmlink
, MAXPATHLEN
, &from_truncated_no_firmlink
);
8152 if (need_event
|| need_kpath2
|| has_listeners
) {
8153 if (to_name
== NULL
) {
8155 if (to_name
== NULL
) {
8161 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
8163 if (to_name_no_firmlink
== NULL
) {
8164 GET_PATH(to_name_no_firmlink
);
8165 if (to_name_no_firmlink
== NULL
) {
8171 to_len_no_firmlink
= safe_getpath_no_firmlink(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name_no_firmlink
, MAXPATHLEN
, &to_truncated_no_firmlink
);
8172 if (to_name
&& need_kpath2
) {
8173 AUDIT_ARG(kpath
, to_name
, ARG_KPATH2
);
8178 * Claim: this check will never reject a valid rename.
8179 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8180 * Suppose fdvp and tdvp are not on the same mount.
8181 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
8182 * then you can't move it to within another dir on the same mountpoint.
8183 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8185 * If this check passes, then we are safe to pass these vnodes to the same FS.
8187 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
8191 goto skipped_lookup
;
8195 error
= vn_authorize_renamex_with_paths(fdvp
, fvp
, &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
, flags
, NULL
);
8197 if (error
== ENOENT
) {
8198 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8200 * We encountered a race where after doing the namei, tvp stops
8201 * being valid. If so, simply re-drive the rename call from the
8213 * If the source and destination are the same (i.e. they're
8214 * links to the same vnode) and the target file system is
8215 * case sensitive, then there is nothing to do.
8217 * XXX Come back to this.
8223 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8224 * then assume that this file system is case sensitive.
8226 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
8227 pathconf_val
!= 0) {
8233 * Allow the renaming of mount points.
8234 * - target must not exist
8235 * - target must reside in the same directory as source
8236 * - union mounts cannot be renamed
8237 * - "/" cannot be renamed
8239 * XXX Handle this in VFS after a continued lookup (if we missed
8240 * in the cache to start off)
8242 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8243 * we'll skip past here. The file system is responsible for
8244 * checking that @tvp is not a descendent of @fvp and vice versa
8245 * so it should always return EINVAL if either @tvp or @fvp is the
8248 if ((fvp
->v_flag
& VROOT
) &&
8249 (fvp
->v_type
== VDIR
) &&
8251 (fvp
->v_mountedhere
== NULL
) &&
8253 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
8254 ((fvp
->v_mount
->mnt_kern_flag
& MNTK_SYSTEM
) == 0) &&
8255 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
8258 /* switch fvp to the covered vnode */
8259 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
8260 if ((vnode_getwithref(coveredvp
))) {
8270 * Check for cross-device rename.
8272 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
8273 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
8279 * If source is the same as the destination (that is the
8280 * same inode number) then there is nothing to do...
8281 * EXCEPT if the underlying file system supports case
8282 * insensitivity and is case preserving. In this case
8283 * the file system needs to handle the special case of
8284 * getting the same vnode as target (fvp) and source (tvp).
8286 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8287 * and _PC_CASE_PRESERVING can have this exception, and they need to
8288 * handle the special case of getting the same vnode as target and
8289 * source. NOTE: Then the target is unlocked going into vnop_rename,
8290 * so not to cause locking problems. There is a single reference on tvp.
8292 * NOTE - that fvp == tvp also occurs if they are hard linked and
8293 * that correct behaviour then is just to return success without doing
8296 * XXX filesystem should take care of this itself, perhaps...
8298 if (fvp
== tvp
&& fdvp
== tdvp
) {
8299 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
8300 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
8301 fromnd
->ni_cnd
.cn_namelen
)) {
8306 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
8308 * we're holding a reference and lock
8309 * on locked_mp, but it no longer matches
8310 * what we want to do... so drop our hold
8312 mount_unlock_renames(locked_mp
);
8313 mount_drop(locked_mp
, 0);
8314 holding_mntlock
= 0;
8316 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
8318 * serialize renames that re-shape
8319 * the tree... if holding_mntlock is
8320 * set, then we're ready to go...
8322 * first need to drop the iocounts
8323 * we picked up, second take the
8324 * lock to serialize the access,
8325 * then finally start the lookup
8326 * process over with the lock held
8328 if (!holding_mntlock
) {
8330 * need to grab a reference on
8331 * the mount point before we
8332 * drop all the iocounts... once
8333 * the iocounts are gone, the mount
8336 locked_mp
= fvp
->v_mount
;
8337 mount_ref(locked_mp
, 0);
8340 * nameidone has to happen before we vnode_put(tvp)
8341 * since it may need to release the fs_nodelock on the tvp
8351 * nameidone has to happen before we vnode_put(fdvp)
8352 * since it may need to release the fs_nodelock on the fvp
8359 mount_lock_renames(locked_mp
);
8360 holding_mntlock
= 1;
8366 * when we dropped the iocounts to take
8367 * the lock, we allowed the identity of
8368 * the various vnodes to change... if they did,
8369 * we may no longer be dealing with a rename
8370 * that reshapes the tree... once we're holding
8371 * the iocounts, the vnodes can't change type
8372 * so we're free to drop the lock at this point
8375 if (holding_mntlock
) {
8376 mount_unlock_renames(locked_mp
);
8377 mount_drop(locked_mp
, 0);
8378 holding_mntlock
= 0;
8382 // save these off so we can later verify that fvp is the same
8383 oname
= fvp
->v_name
;
8384 oparent
= fvp
->v_parent
;
8387 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
8388 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
8391 if (holding_mntlock
) {
8393 * we can drop our serialization
8396 mount_unlock_renames(locked_mp
);
8397 mount_drop(locked_mp
, 0);
8398 holding_mntlock
= 0;
8401 if (error
== EDATALESS
) {
8403 * If we've been here before, something has gone
8404 * horribly wrong and we should just get out lest
8405 * we spiral around the drain forever.
8407 if (flags
& VFS_RENAME_DATALESS
) {
8413 * The object we're renaming is dataless (or has a
8414 * dataless descendent) and requires materialization
8415 * before the rename occurs. But we're holding the
8416 * mount point's rename lock, so it's not safe to
8419 * In this case, we release the lock, perform the
8420 * materialization, and start the whole thing over.
8422 error
= vnode_materialize_dataless_file(fvp
,
8423 NAMESPACE_HANDLER_RENAME_OP
);
8427 * The next time around we need to tell the
8428 * file system that the materializtaion has
8431 flags
|= VFS_RENAME_DATALESS
;
8436 if (error
== EKEEPLOOKING
) {
8437 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8438 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8439 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8443 fromnd
->ni_vp
= fvp
;
8446 goto continue_lookup
;
8450 * We may encounter a race in the VNOP where the destination didn't
8451 * exist when we did the namei, but it does by the time we go and
8452 * try to create the entry. In this case, we should re-drive this rename
8453 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8454 * but other filesystems susceptible to this race could return it, too.
8456 if (error
== ERECYCLE
) {
8461 * For compound VNOPs, the authorization callback may return
8462 * ENOENT in case of racing hardlink lookups hitting the name
8463 * cache, redrive the lookup.
8465 if (batched
&& error
== ENOENT
) {
8466 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8475 /* call out to allow 3rd party notification of rename.
8476 * Ignore result of kauth_authorize_fileop call.
8478 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8479 KAUTH_FILEOP_RENAME
,
8480 (uintptr_t)from_name
, (uintptr_t)to_name
);
8481 if (flags
& VFS_RENAME_SWAP
) {
8482 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8483 KAUTH_FILEOP_RENAME
,
8484 (uintptr_t)to_name
, (uintptr_t)from_name
);
8488 if (from_name
!= NULL
&& to_name
!= NULL
) {
8489 if (from_truncated
|| to_truncated
) {
8490 // set it here since only the from_finfo gets reported up to user space
8491 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8495 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
8498 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
8502 add_fsevent(FSE_RENAME
, ctx
,
8503 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8504 FSE_ARG_FINFO
, &from_finfo
,
8505 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8506 FSE_ARG_FINFO
, &to_finfo
,
8508 if (flags
& VFS_RENAME_SWAP
) {
8510 * Strictly speaking, swap is the equivalent of
8511 * *three* renames. FSEvents clients should only take
8512 * the events as a hint, so we only bother reporting
8515 add_fsevent(FSE_RENAME
, ctx
,
8516 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8517 FSE_ARG_FINFO
, &to_finfo
,
8518 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8519 FSE_ARG_FINFO
, &from_finfo
,
8523 add_fsevent(FSE_RENAME
, ctx
,
8524 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8525 FSE_ARG_FINFO
, &from_finfo
,
8526 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8530 #endif /* CONFIG_FSE */
8533 * update filesystem's mount point data
8536 char *cp
, *pathend
, *mpname
;
8542 mp
= fvp
->v_mountedhere
;
8544 if (vfs_busy(mp
, LK_NOWAIT
)) {
8548 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
8550 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
8551 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
8553 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
8556 /* find current mount point prefix */
8557 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
8558 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
8563 /* find last component of target name */
8564 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
8570 /* Update f_mntonname of sub mounts */
8571 vfs_iterate(0, rename_submounts_callback
, (void *)mp
);
8573 /* append name to prefix */
8574 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
8575 bzero(pathend
, maxlen
);
8577 strlcpy(pathend
, mpname
, maxlen
);
8579 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
8583 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
8586 * fix up name & parent pointers. note that we first
8587 * check that fvp has the same name/parent pointers it
8588 * had before the rename call... this is a 'weak' check
8591 * XXX oparent and oname may not be set in the compound vnop case
8593 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
8596 update_flags
= VNODE_UPDATE_NAME
;
8599 update_flags
|= VNODE_UPDATE_PARENT
;
8602 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
8605 if (to_name
!= NULL
) {
8606 RELEASE_PATH(to_name
);
8609 if (to_name_no_firmlink
!= NULL
) {
8610 RELEASE_PATH(to_name_no_firmlink
);
8611 to_name_no_firmlink
= NULL
;
8613 if (from_name
!= NULL
) {
8614 RELEASE_PATH(from_name
);
8617 if (from_name_no_firmlink
!= NULL
) {
8618 RELEASE_PATH(from_name_no_firmlink
);
8619 from_name_no_firmlink
= NULL
;
8621 if (holding_mntlock
) {
8622 mount_unlock_renames(locked_mp
);
8623 mount_drop(locked_mp
, 0);
8624 holding_mntlock
= 0;
8628 * nameidone has to happen before we vnode_put(tdvp)
8629 * since it may need to release the fs_nodelock on the tdvp
8640 * nameidone has to happen before we vnode_put(fdvp)
8641 * since it may need to release the fs_nodelock on the fdvp
8652 * If things changed after we did the namei, then we will re-drive
8653 * this rename call from the top.
8660 FREE(__rename_data
, M_TEMP
);
8665 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
8667 return renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
8668 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0);
8672 renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
8674 return renameat_internal(
8675 vfs_context_current(),
8676 uap
->fromfd
, uap
->from
,
8678 UIO_USERSPACE
, uap
->flags
);
8682 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
8684 return renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
8685 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0);
8689 * Make a directory file.
8691 * Returns: 0 Success
8694 * vnode_authorize:???
8699 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
8700 enum uio_seg segflg
)
8704 int update_flags
= 0;
8706 struct nameidata nd
;
8708 AUDIT_ARG(mode
, vap
->va_mode
);
8709 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
8711 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
8712 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
8715 error
= nameiat(&nd
, fd
);
8727 batched
= vnode_compound_mkdir_available(dvp
);
8729 VATTR_SET(vap
, va_type
, VDIR
);
8733 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8734 * only get EXISTS or EISDIR for existing path components, and not that it could see
8735 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8736 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8738 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
8739 if (error
== EACCES
|| error
== EPERM
) {
8747 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8748 * rather than EACCESS if the target exists.
8750 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
8752 error2
= nameiat(&nd
, fd
);
8766 * make the directory
8768 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
8769 if (error
== EKEEPLOOKING
) {
8771 goto continue_lookup
;
8777 // Make sure the name & parent pointers are hooked up
8778 if (vp
->v_name
== NULL
) {
8779 update_flags
|= VNODE_UPDATE_NAME
;
8781 if (vp
->v_parent
== NULLVP
) {
8782 update_flags
|= VNODE_UPDATE_PARENT
;
8786 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
8790 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
8795 * nameidone has to happen before we vnode_put(dvp)
8796 * since it may need to release the fs_nodelock on the dvp
8811 * mkdir_extended: Create a directory; with extended security (ACL).
8813 * Parameters: p Process requesting to create the directory
8814 * uap User argument descriptor (see below)
8817 * Indirect: uap->path Path of directory to create
8818 * uap->mode Access permissions to set
8819 * uap->xsecurity ACL to set
8821 * Returns: 0 Success
8826 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
8829 kauth_filesec_t xsecdst
;
8830 struct vnode_attr va
;
8832 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
8835 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
8836 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
8841 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8842 if (xsecdst
!= NULL
) {
8843 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
8846 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8848 if (xsecdst
!= NULL
) {
8849 kauth_filesec_free(xsecdst
);
8855 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
8857 struct vnode_attr va
;
8860 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8862 return mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8867 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
8869 struct vnode_attr va
;
8872 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8874 return mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
8879 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
8880 enum uio_seg segflg
, int unlink_flags
)
8884 struct nameidata nd
;
8886 char *no_firmlink_path
= NULL
;
8888 int len_no_firmlink_path
= 0;
8889 int has_listeners
= 0;
8891 int truncated_path
= 0;
8892 int truncated_no_firmlink_path
= 0;
8894 struct vnode_attr va
;
8895 #endif /* CONFIG_FSE */
8896 struct vnode_attr
*vap
= NULL
;
8897 int restart_count
= 0;
8903 * This loop exists to restart rmdir in the unlikely case that two
8904 * processes are simultaneously trying to remove the same directory
8905 * containing orphaned appleDouble files.
8908 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
8909 segflg
, dirpath
, ctx
);
8910 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
8915 error
= nameiat(&nd
, fd
);
8924 batched
= vnode_compound_rmdir_available(vp
);
8926 if (vp
->v_flag
& VROOT
) {
8928 * The root of a mounted filesystem cannot be deleted.
8934 #if DEVELOPMENT || DEBUG
8936 * XXX VSWAP: Check for entitlements or special flag here
8937 * so we can restrict access appropriately.
8939 #else /* DEVELOPMENT || DEBUG */
8941 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
8945 #endif /* DEVELOPMENT || DEBUG */
8948 * Removed a check here; we used to abort if vp's vid
8949 * was not the same as what we'd seen the last time around.
8950 * I do not think that check was valid, because if we retry
8951 * and all dirents are gone, the directory could legitimately
8952 * be recycled but still be present in a situation where we would
8953 * have had permission to delete. Therefore, we won't make
8954 * an effort to preserve that check now that we may not have a
8959 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
8961 if (error
== ENOENT
) {
8962 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8973 if (!vnode_compound_rmdir_available(dvp
)) {
8974 panic("No error, but no compound rmdir?");
8981 need_event
= need_fsevent(FSE_DELETE
, dvp
);
8984 get_fse_info(vp
, &finfo
, ctx
);
8986 error
= vfs_get_notify_attributes(&va
);
8995 has_listeners
= kauth_authorize_fileop_has_listeners();
8996 if (need_event
|| has_listeners
) {
9005 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
9007 if (no_firmlink_path
== NULL
) {
9008 GET_PATH(no_firmlink_path
);
9009 if (no_firmlink_path
== NULL
) {
9015 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
9017 if (truncated_no_firmlink_path
) {
9018 finfo
.mode
|= FSE_TRUNCATED_PATH
;
9023 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9026 /* Couldn't find a vnode */
9030 if (error
== EKEEPLOOKING
) {
9031 goto continue_lookup
;
9032 } else if (batched
&& error
== ENOENT
) {
9033 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
9035 * For compound VNOPs, the authorization callback
9036 * may return ENOENT in case of racing hard link lookups
9037 * redrive the lookup.
9046 * XXX There's no provision for passing flags
9047 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9048 * because it's not empty, then we try again
9049 * with VNOP_REMOVE(), passing in a special
9050 * flag that clever file systems will know
9053 if (error
== ENOTEMPTY
&&
9054 (unlink_flags
& VNODE_REMOVE_DATALESS_DIR
) != 0) {
9056 * If this fails, we want to keep the original
9059 if (vn_remove(dvp
, &vp
, &nd
,
9060 VNODE_REMOVE_DATALESS_DIR
, vap
, ctx
) == 0) {
9065 #if CONFIG_APPLEDOUBLE
9067 * Special case to remove orphaned AppleDouble
9068 * files. I don't like putting this in the kernel,
9069 * but carbon does not like putting this in carbon either,
9072 if (error
== ENOTEMPTY
) {
9073 int ad_error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
9074 if (ad_error
== EBUSY
) {
9081 * Assuming everything went well, we will try the RMDIR again
9084 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9087 #endif /* CONFIG_APPLEDOUBLE */
9089 * Call out to allow 3rd party notification of delete.
9090 * Ignore result of kauth_authorize_fileop call.
9093 if (has_listeners
) {
9094 kauth_authorize_fileop(vfs_context_ucred(ctx
),
9095 KAUTH_FILEOP_DELETE
,
9100 if (vp
->v_flag
& VISHARDLINK
) {
9101 // see the comment in unlink1() about why we update
9102 // the parent of a hard link when it is removed
9103 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
9109 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
9111 add_fsevent(FSE_DELETE
, ctx
,
9112 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
9113 FSE_ARG_FINFO
, &finfo
,
9125 if (no_firmlink_path
!= NULL
) {
9126 RELEASE_PATH(no_firmlink_path
);
9127 no_firmlink_path
= NULL
;
9131 * nameidone has to happen before we vnode_put(dvp)
9132 * since it may need to release the fs_nodelock on the dvp
9141 if (restart_flag
== 0) {
9142 wakeup_one((caddr_t
)vp
);
9145 tsleep(vp
, PVFS
, "rm AD", 1);
9146 } while (restart_flag
!= 0);
9152 * Remove a directory file.
9156 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
9158 return rmdirat_internal(vfs_context_current(), AT_FDCWD
,
9159 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
, 0);
9162 /* Get direntry length padded to 8 byte alignment */
9163 #define DIRENT64_LEN(namlen) \
9164 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9166 /* Get dirent length padded to 4 byte alignment */
9167 #define DIRENT_LEN(namelen) \
9168 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9170 /* Get the end of this dirent */
9171 #define DIRENT_END(dep) \
9172 (((char *)(dep)) + (dep)->d_reclen - 1)
9175 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
9176 int *numdirent
, vfs_context_t ctxp
)
9178 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
9179 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
9180 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
9181 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
9186 struct direntry
*entry64
;
9192 * We're here because the underlying file system does not
9193 * support direnties or we mounted denying support so we must
9194 * fall back to dirents and convert them to direntries.
9196 * Our kernel buffer needs to be smaller since re-packing will
9197 * expand each dirent. The worse case (when the name length
9198 * is 3 or less) corresponds to a struct direntry size of 32
9199 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9200 * (4-byte aligned). So having a buffer that is 3/8 the size
9201 * will prevent us from reading more than we can pack.
9203 * Since this buffer is wired memory, we will limit the
9204 * buffer size to a maximum of 32K. We would really like to
9205 * use 32K in the MIN(), but we use magic number 87371 to
9206 * prevent uio_resid() * 3 / 8 from overflowing.
9208 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
9209 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
9210 if (bufptr
== NULL
) {
9214 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
9215 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
9216 auio
->uio_offset
= uio
->uio_offset
;
9218 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
9220 dep
= (struct dirent
*)bufptr
;
9221 bytesread
= bufsize
- uio_resid(auio
);
9223 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
9226 * Convert all the entries and copy them out to user's buffer.
9228 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
9229 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
9231 if (DIRENT_END(dep
) > ((char *)bufptr
+ bytesread
) ||
9232 DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) {
9233 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
,
9234 vp
->v_mount
->mnt_vfsstat
.f_mntonname
,
9235 vp
->v_name
? vp
->v_name
: "<unknown>");
9240 bzero(entry64
, enbufsize
);
9241 /* Convert a dirent to a dirent64. */
9242 entry64
->d_ino
= dep
->d_ino
;
9243 entry64
->d_seekoff
= 0;
9244 entry64
->d_reclen
= enbufsize
;
9245 entry64
->d_namlen
= dep
->d_namlen
;
9246 entry64
->d_type
= dep
->d_type
;
9247 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
9249 /* Move to next entry. */
9250 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
9252 /* Copy entry64 to user's buffer. */
9253 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
9256 /* Update the real offset using the offset we got from VNOP_READDIR. */
9258 uio
->uio_offset
= auio
->uio_offset
;
9261 FREE(bufptr
, M_TEMP
);
9262 FREE(entry64
, M_TEMP
);
9267 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
9270 * Read a block of directory entries in a file system independent format.
9273 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
9274 off_t
*offset
, int *eofflag
, int flags
)
9277 struct vfs_context context
= *vfs_context_current(); /* local copy */
9278 struct fileproc
*fp
;
9280 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9282 int error
, numdirent
;
9283 char uio_buf
[UIO_SIZEOF(1)];
9285 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
9289 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9290 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9295 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
) {
9296 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
9300 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
9305 if ((error
= vnode_getwithref(vp
))) {
9308 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9311 if (vp
->v_type
!= VDIR
) {
9312 (void)vnode_put(vp
);
9318 error
= mac_vnode_check_readdir(&context
, vp
);
9320 (void)vnode_put(vp
);
9325 loff
= fp
->f_fglob
->fg_offset
;
9326 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9327 uio_addiov(auio
, bufp
, bufsize
);
9329 if (flags
& VNODE_READDIR_EXTENDED
) {
9330 error
= vnode_readdir64(vp
, auio
, flags
, eofflag
, &numdirent
, &context
);
9331 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
9333 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, &numdirent
, &context
);
9334 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
9337 (void)vnode_put(vp
);
9341 if ((user_ssize_t
)bufsize
== uio_resid(auio
)) {
9342 if (union_dircheckp
) {
9343 error
= union_dircheckp(&vp
, fp
, &context
);
9348 (void)vnode_put(vp
);
9353 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
9354 struct vnode
*tvp
= vp
;
9355 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
9357 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9358 fp
->f_fglob
->fg_offset
= 0;
9372 *bytesread
= bufsize
- uio_resid(auio
);
9380 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
9386 AUDIT_ARG(fd
, uap
->fd
);
9387 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
,
9388 &bytesread
, &offset
, &eofflag
, 0);
9391 if (proc_is64bit(p
)) {
9392 user64_long_t base
= (user64_long_t
)offset
;
9393 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
9395 user32_long_t base
= (user32_long_t
)offset
;
9396 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
9398 *retval
= bytesread
;
9404 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
9409 user_size_t bufsize
;
9411 AUDIT_ARG(fd
, uap
->fd
);
9414 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9415 * then the kernel carves out the last 4 bytes to return extended
9416 * information to userspace (namely whether we reached EOF with this call).
9418 if (uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9419 bufsize
= uap
->bufsize
- sizeof(getdirentries64_flags_t
);
9421 bufsize
= uap
->bufsize
;
9424 error
= getdirentries_common(uap
->fd
, uap
->buf
, bufsize
,
9425 &bytesread
, &offset
, &eofflag
, VNODE_READDIR_EXTENDED
);
9428 *retval
= bytesread
;
9429 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
9431 if (error
== 0 && uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9432 getdirentries64_flags_t flags
= 0;
9434 flags
|= GETDIRENTRIES64_EOF
;
9436 error
= copyout(&flags
, (user_addr_t
)uap
->buf
+ bufsize
,
9445 * Set the mode mask for creation of filesystem nodes.
9446 * XXX implement xsecurity
9448 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
9450 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
9452 struct filedesc
*fdp
;
9454 AUDIT_ARG(mask
, newmask
);
9457 *retval
= fdp
->fd_cmask
;
9458 fdp
->fd_cmask
= newmask
& ALLPERMS
;
9464 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9466 * Parameters: p Process requesting to set the umask
9467 * uap User argument descriptor (see below)
9468 * retval umask of the process (parameter p)
9470 * Indirect: uap->newmask umask to set
9471 * uap->xsecurity ACL to set
9473 * Returns: 0 Success
9478 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
9481 kauth_filesec_t xsecdst
;
9483 xsecdst
= KAUTH_FILESEC_NONE
;
9484 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
9485 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
9489 xsecdst
= KAUTH_FILESEC_NONE
;
9492 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
9494 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
9495 kauth_filesec_free(xsecdst
);
9501 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
9503 return umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
);
9507 * Void all references to file by ripping underlying filesystem
9512 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
9515 struct vnode_attr va
;
9516 vfs_context_t ctx
= vfs_context_current();
9518 struct nameidata nd
;
9520 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
9530 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
9535 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
9541 error
= mac_vnode_check_revoke(ctx
, vp
);
9548 VATTR_WANTED(&va
, va_uid
);
9549 if ((error
= vnode_getattr(vp
, &va
, ctx
))) {
9552 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
9553 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
9556 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
))) {
9557 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
9566 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9567 * The following system calls are designed to support features
9568 * which are specific to the HFS & HFS Plus volume formats
9573 * Obtain attribute information on objects in a directory while enumerating
9578 getdirentriesattr(proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
9581 struct fileproc
*fp
;
9583 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9584 uint32_t count
= 0, savecount
= 0;
9585 uint32_t newstate
= 0;
9588 struct attrlist attributelist
;
9589 vfs_context_t ctx
= vfs_context_current();
9591 char uio_buf
[UIO_SIZEOF(1)];
9592 kauth_action_t action
;
9596 /* Get the attributes into kernel space */
9597 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
9600 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
9604 if ((error
= fp_getfvp(p
, fd
, &fp
, &vp
))) {
9607 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9608 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9615 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
9623 if ((error
= vnode_getwithref(vp
))) {
9627 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9630 if (vp
->v_type
!= VDIR
) {
9631 (void)vnode_put(vp
);
9637 error
= mac_vnode_check_readdir(ctx
, vp
);
9639 (void)vnode_put(vp
);
9644 /* set up the uio structure which will contain the users return buffer */
9645 loff
= fp
->f_fglob
->fg_offset
;
9646 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9647 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
9650 * If the only item requested is file names, we can let that past with
9651 * just LIST_DIRECTORY. If they want any other attributes, that means
9652 * they need SEARCH as well.
9654 action
= KAUTH_VNODE_LIST_DIRECTORY
;
9655 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
9656 attributelist
.fileattr
|| attributelist
.dirattr
) {
9657 action
|= KAUTH_VNODE_SEARCH
;
9660 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
9661 /* Believe it or not, uap->options only has 32-bits of valid
9662 * info, so truncate before extending again */
9664 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
9665 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
9669 (void) vnode_put(vp
);
9674 * If we've got the last entry of a directory in a union mount
9675 * then reset the eofflag and pretend there's still more to come.
9676 * The next call will again set eofflag and the buffer will be empty,
9677 * so traverse to the underlying directory and do the directory
9680 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
9681 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
9683 } else { // Empty buffer
9684 struct vnode
*tvp
= vp
;
9685 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
9686 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
9687 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9688 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
9690 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
9698 (void)vnode_put(vp
);
9703 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
9705 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
)))) {
9708 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
)))) {
9711 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
)))) {
9715 *retval
= eofflag
; /* similar to getdirentries */
9719 return error
; /* return error earlier, an retval of 0 or 1 now */
9720 } /* end of getdirentriesattr system call */
9723 * Exchange data between two files
9728 exchangedata(__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
9730 struct nameidata fnd
, snd
;
9731 vfs_context_t ctx
= vfs_context_current();
9735 u_int32_t nameiflags
;
9738 int flen
= 0, slen
= 0;
9739 int from_truncated
= 0, to_truncated
= 0;
9741 fse_info f_finfo
, s_finfo
;
9745 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
9746 nameiflags
|= FOLLOW
;
9749 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
9750 UIO_USERSPACE
, uap
->path1
, ctx
);
9752 error
= namei(&fnd
);
9760 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
9761 UIO_USERSPACE
, uap
->path2
, ctx
);
9763 error
= namei(&snd
);
9772 * if the files are the same, return an inval error
9780 * if the files are on different volumes, return an error
9782 if (svp
->v_mount
!= fvp
->v_mount
) {
9787 /* If they're not files, return an error */
9788 if ((vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
9794 error
= mac_vnode_check_exchangedata(ctx
,
9800 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
9801 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0)) {
9807 need_fsevent(FSE_EXCHANGE
, fvp
) ||
9809 kauth_authorize_fileop_has_listeners()) {
9812 if (fpath
== NULL
|| spath
== NULL
) {
9817 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
9818 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
9821 get_fse_info(fvp
, &f_finfo
, ctx
);
9822 get_fse_info(svp
, &s_finfo
, ctx
);
9823 if (from_truncated
|| to_truncated
) {
9824 // set it here since only the f_finfo gets reported up to user space
9825 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
9829 /* Ok, make the call */
9830 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
9833 const char *tmpname
;
9835 if (fpath
!= NULL
&& spath
!= NULL
) {
9836 /* call out to allow 3rd party notification of exchangedata.
9837 * Ignore result of kauth_authorize_fileop call.
9839 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
9840 (uintptr_t)fpath
, (uintptr_t)spath
);
9844 tmpname
= fvp
->v_name
;
9845 fvp
->v_name
= svp
->v_name
;
9846 svp
->v_name
= tmpname
;
9848 if (fvp
->v_parent
!= svp
->v_parent
) {
9851 tmp
= fvp
->v_parent
;
9852 fvp
->v_parent
= svp
->v_parent
;
9853 svp
->v_parent
= tmp
;
9855 name_cache_unlock();
9858 if (fpath
!= NULL
&& spath
!= NULL
) {
9859 add_fsevent(FSE_EXCHANGE
, ctx
,
9860 FSE_ARG_STRING
, flen
, fpath
,
9861 FSE_ARG_FINFO
, &f_finfo
,
9862 FSE_ARG_STRING
, slen
, spath
,
9863 FSE_ARG_FINFO
, &s_finfo
,
9870 if (fpath
!= NULL
) {
9871 RELEASE_PATH(fpath
);
9873 if (spath
!= NULL
) {
9874 RELEASE_PATH(spath
);
9883 * Return (in MB) the amount of freespace on the given vnode's volume.
9885 uint32_t freespace_mb(vnode_t vp
);
9888 freespace_mb(vnode_t vp
)
9890 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
9891 return ((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
9892 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20;
9900 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
9905 struct nameidata nd
;
9906 struct user64_fssearchblock searchblock
;
9907 struct searchstate
*state
;
9908 struct attrlist
*returnattrs
;
9909 struct timeval timelimit
;
9910 void *searchparams1
, *searchparams2
;
9912 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9913 uint32_t nummatches
;
9915 uint32_t nameiflags
;
9916 vfs_context_t ctx
= vfs_context_current();
9917 char uio_buf
[UIO_SIZEOF(1)];
9919 /* Start by copying in fsearchblock parameter list */
9920 if (IS_64BIT_PROCESS(p
)) {
9921 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
9922 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
9923 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
9925 struct user32_fssearchblock tmp_searchblock
;
9927 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
9928 // munge into 64-bit version
9929 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
9930 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
9931 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
9932 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
9934 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9935 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9937 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
9938 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
9939 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
9940 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
9941 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
9942 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
9943 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
9949 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9951 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
9952 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
) {
9956 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9957 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9958 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9961 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9962 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9963 /* assumes the size is still 556 bytes it will continue to work */
9965 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
9966 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2 * sizeof(uint32_t));
9968 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
9970 /* Now set up the various pointers to the correct place in our newly allocated memory */
9972 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
9973 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
9974 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof(struct attrlist
));
9976 /* Now copy in the stuff given our local variables. */
9978 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
))) {
9982 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
))) {
9986 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
)))) {
9990 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
)))) {
9995 * When searching a union mount, need to set the
9996 * start flag at the first call on each layer to
9997 * reset state for the new volume.
9999 if (uap
->options
& SRCHFS_START
) {
10000 state
->ss_union_layer
= 0;
10002 uap
->options
|= state
->ss_union_flags
;
10004 state
->ss_union_flags
= 0;
10007 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
10008 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
10009 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
10010 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
10011 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
10014 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
10015 attrreference_t
* string_ref
;
10016 u_int32_t
* start_length
;
10017 user64_size_t param_length
;
10019 /* validate searchparams1 */
10020 param_length
= searchblock
.sizeofsearchparams1
;
10021 /* skip the word that specifies length of the buffer */
10022 start_length
= (u_int32_t
*) searchparams1
;
10023 start_length
= start_length
+ 1;
10024 string_ref
= (attrreference_t
*) start_length
;
10026 /* ensure no negative offsets or too big offsets */
10027 if (string_ref
->attr_dataoffset
< 0) {
10031 if (string_ref
->attr_length
> MAXPATHLEN
) {
10036 /* Check for pointer overflow in the string ref */
10037 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
10042 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
10046 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
10052 /* set up the uio structure which will contain the users return buffer */
10053 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10054 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
10057 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
10058 nameiflags
|= FOLLOW
;
10060 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
10061 UIO_USERSPACE
, uap
->path
, ctx
);
10063 error
= namei(&nd
);
10071 * Switch to the root vnode for the volume
10073 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
10081 * If it's a union mount, the path lookup takes
10082 * us to the top layer. But we may need to descend
10083 * to a lower layer. For non-union mounts the layer
10086 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
10087 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0) {
10091 vp
= vp
->v_mount
->mnt_vnodecovered
;
10097 error
= vnode_getwithref(vp
);
10105 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
10114 * If searchblock.maxmatches == 0, then skip the search. This has happened
10115 * before and sometimes the underlying code doesnt deal with it well.
10117 if (searchblock
.maxmatches
== 0) {
10123 * Allright, we have everything we need, so lets make that call.
10125 * We keep special track of the return value from the file system:
10126 * EAGAIN is an acceptable error condition that shouldn't keep us
10127 * from copying out any results...
10130 fserror
= VNOP_SEARCHFS(vp
,
10133 &searchblock
.searchattrs
,
10134 (u_long
)searchblock
.maxmatches
,
10138 (u_long
)uap
->scriptcode
,
10139 (u_long
)uap
->options
,
10141 (struct searchstate
*) &state
->ss_fsstate
,
10145 * If it's a union mount we need to be called again
10146 * to search the mounted-on filesystem.
10148 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
10149 state
->ss_union_flags
= SRCHFS_START
;
10150 state
->ss_union_layer
++; // search next layer down
10158 /* Now copy out the stuff that needs copying out. That means the number of matches, the
10159 * search state. Everything was already put into he return buffer by the vop call. */
10161 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0) {
10165 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0) {
10173 FREE(searchparams1
, M_TEMP
);
10176 } /* end of searchfs system call */
10178 #else /* CONFIG_SEARCHFS */
10181 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
10186 #endif /* CONFIG_SEARCHFS */
10189 #if CONFIG_DATALESS_FILES
10192 * === Namespace Resolver Up-call Mechanism ===
10194 * When I/O is performed to a dataless file or directory (read, write,
10195 * lookup-in, etc.), the file system performs an upcall to the namespace
10196 * resolver (filecoordinationd) to materialize the object.
10198 * We need multiple up-calls to be in flight at once, and we need these
10199 * up-calls to be interruptible, thus the following implementation:
10201 * => The nspace_resolver_request represents the in-kernel request state.
10202 * It contains a request ID, storage space for the errno code returned
10203 * by filecoordinationd, and flags.
10205 * => The request ID is simply a global monotonically incrementing 32-bit
10206 * number. Outstanding requests are stored in a hash table, and the
10207 * hash function is extremely simple.
10209 * => When an upcall is to be made to filecoordinationd, a request structure
10210 * is allocated on the stack (it is small, and needs to live only during
10211 * the duration of the call to resolve_nspace_item_ext()). It is
10212 * initialized and inserted into the table. Some backpressure from
10213 * filecoordinationd is applied by limiting the numnber of entries that
10214 * can be inserted into the table (and thus limiting the number of
10215 * outstanding requests issued to filecoordinationd); waiting for an
10216 * available slot is interruptible.
10218 * => Once the request has been inserted into the table, the up-call is made
10219 * to filecoordinationd via a MiG-generated stub. The up-call returns
10220 * immediately and filecoordinationd processes the request asynchronously.
10222 * => The caller now waits for the request to complete. Tnis is achieved by
10223 * sleeping on the address of the request structure and waiting for
10224 * filecoordinationd to mark the request structure as complete. This
10225 * is an interruptible sleep call; if interrupted, the request structure
10226 * is removed from the table and EINTR is returned to the caller. If
10227 * this occurs, an advisory up-call is made to filecoordinationd with
10228 * the request ID to indicate that the request can be aborted or
10229 * de-prioritized at the discretion of filecoordinationd.
10231 * => When filecoordinationd has completed the request, it signals completion
10232 * by writing to the vfs.nspace.complete sysctl node. Only a process
10233 * decorated as a namespace resolver can write to this sysctl node. The
10234 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10235 * The request ID is looked up in the table, and if the request is found,
10236 * the error code is stored in the request structure and a wakeup()
10237 * issued on the address of the request structure. If the request is not
10238 * found, we simply drop the completion notification, assuming that the
10239 * caller was interrupted.
10241 * => When the waiting thread wakes up, it extracts the error code from the
10242 * request structure, removes the request from the table, and returns the
10243 * error code to the calling function. Fini!
10246 struct nspace_resolver_request
{
10247 LIST_ENTRY(nspace_resolver_request
) r_hashlink
;
10249 int r_resolver_error
;
10253 #define RRF_COMPLETE 0x0001
10256 next_nspace_req_id(void)
10258 static uint32_t next_req_id
;
10260 return OSAddAtomic(1, &next_req_id
);
10263 #define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10264 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
10266 static LIST_HEAD(nspace_resolver_requesthead
,
10267 nspace_resolver_request
) * nspace_resolver_request_hashtbl
;
10268 static u_long nspace_resolver_request_hashmask
;
10269 static u_int nspace_resolver_request_count
;
10270 static bool nspace_resolver_request_wait_slot
;
10271 static lck_grp_t
*nspace_resolver_request_lck_grp
;
10272 static lck_mtx_t nspace_resolver_request_hash_mutex
;
10274 #define NSPACE_REQ_LOCK() \
10275 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10276 #define NSPACE_REQ_UNLOCK() \
10277 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10279 #define NSPACE_RESOLVER_HASH(req_id) \
10280 (&nspace_resolver_request_hashtbl[(req_id) & \
10281 nspace_resolver_request_hashmask])
10283 static struct nspace_resolver_request
*
10284 nspace_resolver_req_lookup(uint32_t req_id
)
10286 struct nspace_resolver_requesthead
*bucket
;
10287 struct nspace_resolver_request
*req
;
10289 bucket
= NSPACE_RESOLVER_HASH(req_id
);
10290 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10291 if (req
->r_req_id
== req_id
) {
10300 nspace_resolver_req_add(struct nspace_resolver_request
*req
)
10302 struct nspace_resolver_requesthead
*bucket
;
10305 while (nspace_resolver_request_count
>=
10306 NSPACE_RESOLVER_MAX_OUTSTANDING
) {
10307 nspace_resolver_request_wait_slot
= true;
10308 error
= msleep(&nspace_resolver_request_count
,
10309 &nspace_resolver_request_hash_mutex
,
10310 PVFS
| PCATCH
, "nspacerq", NULL
);
10316 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10318 assert(nspace_resolver_req_lookup(req
->r_req_id
) == NULL
);
10319 #endif /* DIAGNOSTIC */
10320 LIST_INSERT_HEAD(bucket
, req
, r_hashlink
);
10321 nspace_resolver_request_count
++;
10327 nspace_resolver_req_remove(struct nspace_resolver_request
*req
)
10329 struct nspace_resolver_requesthead
*bucket
;
10331 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10333 assert(nspace_resolver_req_lookup(req
->r_req_id
) != NULL
);
10334 #endif /* DIAGNOSTIC */
10335 LIST_REMOVE(req
, r_hashlink
);
10336 nspace_resolver_request_count
--;
10338 if (nspace_resolver_request_wait_slot
) {
10339 nspace_resolver_request_wait_slot
= false;
10340 wakeup(&nspace_resolver_request_count
);
10345 nspace_resolver_req_cancel(uint32_t req_id
)
10350 // Failures here aren't fatal -- the cancellation message
10351 // sent to the resolver is merely advisory.
10353 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10354 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10358 kr
= send_nspace_resolve_cancel(mp
, req_id
);
10359 if (kr
!= KERN_SUCCESS
) {
10360 os_log_error(OS_LOG_DEFAULT
,
10361 "NSPACE send_nspace_resolve_cancel failure: %d", kr
);
10364 ipc_port_release_send(mp
);
10368 nspace_resolver_req_wait(struct nspace_resolver_request
*req
)
10370 bool send_cancel_message
= false;
10375 while ((req
->r_flags
& RRF_COMPLETE
) == 0) {
10376 error
= msleep(req
, &nspace_resolver_request_hash_mutex
,
10377 PVFS
| PCATCH
, "nspace", NULL
);
10378 if (error
&& error
!= ERESTART
) {
10379 req
->r_resolver_error
= (error
== EINTR
) ? EINTR
:
10381 send_cancel_message
= true;
10386 nspace_resolver_req_remove(req
);
10388 NSPACE_REQ_UNLOCK();
10390 if (send_cancel_message
) {
10391 nspace_resolver_req_cancel(req
->r_req_id
);
10394 return req
->r_resolver_error
;
10398 nspace_resolver_req_mark_complete(
10399 struct nspace_resolver_request
*req
,
10400 int resolver_error
)
10402 req
->r_resolver_error
= resolver_error
;
10403 req
->r_flags
|= RRF_COMPLETE
;
10408 nspace_resolver_req_completed(uint32_t req_id
, int resolver_error
)
10410 struct nspace_resolver_request
*req
;
10414 // If we don't find the request corresponding to our req_id,
10415 // just drop the completion signal on the floor; it's likely
10416 // that the requester interrupted with a signal.
10418 req
= nspace_resolver_req_lookup(req_id
);
10420 nspace_resolver_req_mark_complete(req
, resolver_error
);
10423 NSPACE_REQ_UNLOCK();
10426 static struct proc
*nspace_resolver_proc
;
10429 nspace_resolver_get_proc_state(struct proc
*p
, int *is_resolver
)
10431 *is_resolver
= ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10432 p
== nspace_resolver_proc
) ? 1 : 0;
10437 nspace_resolver_set_proc_state(struct proc
*p
, int is_resolver
)
10439 vfs_context_t ctx
= vfs_context_current();
10443 // The system filecoordinationd runs as uid == 0. This also
10444 // has the nice side-effect of filtering out filecoordinationd
10445 // running in the simulator.
10447 if (!vfs_context_issuser(ctx
)) {
10451 error
= priv_check_cred(vfs_context_ucred(ctx
),
10452 PRIV_VFS_DATALESS_RESOLVER
, 0);
10460 if (nspace_resolver_proc
== NULL
) {
10462 p
->p_lflag
|= P_LNSPACE_RESOLVER
;
10464 nspace_resolver_proc
= p
;
10469 NSPACE_REQ_UNLOCK();
10471 // This is basically just like the exit case.
10472 // nspace_resolver_exited() will verify that the
10473 // process is the resolver, and will clear the
10475 nspace_resolver_exited(p
);
10482 nspace_materialization_get_proc_state(struct proc
*p
, int *is_prevented
)
10484 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) != 0 ||
10485 (p
->p_vfs_iopolicy
&
10486 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) == 0) {
10495 nspace_materialization_set_proc_state(struct proc
*p
, int is_prevented
)
10497 if (p
->p_lflag
& P_LNSPACE_RESOLVER
) {
10498 return is_prevented
? 0 : EBUSY
;
10501 if (is_prevented
) {
10502 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
), &p
->p_vfs_iopolicy
);
10504 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
, &p
->p_vfs_iopolicy
);
10510 nspace_materialization_get_thread_state(int *is_prevented
)
10512 uthread_t ut
= get_bsdthread_info(current_thread());
10514 *is_prevented
= (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) ? 1 : 0;
10519 nspace_materialization_set_thread_state(int is_prevented
)
10521 uthread_t ut
= get_bsdthread_info(current_thread());
10523 if (is_prevented
) {
10524 ut
->uu_flag
|= UT_NSPACE_NODATALESSFAULTS
;
10526 ut
->uu_flag
&= ~UT_NSPACE_NODATALESSFAULTS
;
10532 nspace_materialization_is_prevented(void)
10534 proc_t p
= current_proc();
10535 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
10536 vfs_context_t ctx
= vfs_context_current();
10539 * Kernel context ==> return EDEADLK, as we would with any random
10540 * process decorated as no-materialize.
10542 if (ctx
== vfs_context_kernel()) {
10547 * If the process has the dataless-manipulation entitlement,
10548 * materialization is prevented, and depending on the kind
10549 * of file system operation, things get to proceed as if the
10550 * object is not dataless.
10552 if (vfs_context_is_dataless_manipulator(ctx
)) {
10553 return EJUSTRETURN
;
10557 * Per-thread decorations override any process-wide decorations.
10558 * (Foundation uses this, and this overrides even the dataless-
10559 * manipulation entitlement so as to make API contracts consistent.)
10562 if (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) {
10565 if (ut
->uu_flag
& UT_NSPACE_FORCEDATALESSFAULTS
) {
10571 * If the process's iopolicy specifies that dataless files
10572 * can be materialized, then we let it go ahead.
10574 if (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) {
10579 * The default behavior is to not materialize dataless files;
10580 * return to the caller that deadlock was detected.
10585 /* the vfs.nspace branch */
10586 SYSCTL_NODE(_vfs
, OID_AUTO
, nspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
, NULL
, "vfs nspace hinge");
10589 sysctl_nspace_resolver(__unused
struct sysctl_oid
*oidp
,
10590 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10592 struct proc
*p
= req
->p
;
10593 int new_value
, old_value
, changed
= 0;
10596 error
= nspace_resolver_get_proc_state(p
, &old_value
);
10601 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10603 if (error
== 0 && changed
) {
10604 error
= nspace_resolver_set_proc_state(p
, new_value
);
10609 /* decorate this process as the dataless file resolver */
10610 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, resolver
,
10611 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10612 0, 0, sysctl_nspace_resolver
, "I", "");
10615 sysctl_nspace_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10616 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10618 struct proc
*p
= req
->p
;
10619 int new_value
, old_value
, changed
= 0;
10622 error
= nspace_materialization_get_proc_state(p
, &old_value
);
10627 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10629 if (error
== 0 && changed
) {
10630 error
= nspace_materialization_set_proc_state(p
, new_value
);
10635 /* decorate this process as not wanting to materialize dataless files */
10636 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, prevent_materialization
,
10637 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10638 0, 0, sysctl_nspace_prevent_materialization
, "I", "");
10641 sysctl_nspace_thread_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10642 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10644 int new_value
, old_value
, changed
= 0;
10647 error
= nspace_materialization_get_thread_state(&old_value
);
10652 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10654 if (error
== 0 && changed
) {
10655 error
= nspace_materialization_set_thread_state(new_value
);
10660 /* decorate this thread as not wanting to materialize dataless files */
10661 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, thread_prevent_materialization
,
10662 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10663 0, 0, sysctl_nspace_thread_prevent_materialization
, "I", "");
10666 sysctl_nspace_complete(__unused
struct sysctl_oid
*oidp
, __unused
void *arg1
,
10667 __unused
int arg2
, struct sysctl_req
*req
)
10669 struct proc
*p
= req
->p
;
10670 uint32_t req_status
[2] = { 0, 0 };
10671 int error
, is_resolver
, changed
= 0;
10673 error
= nspace_resolver_get_proc_state(p
, &is_resolver
);
10678 if (!is_resolver
) {
10682 error
= sysctl_io_opaque(req
, req_status
, sizeof(req_status
),
10689 * req_status[0] is the req_id
10691 * req_status[1] is the errno
10693 if (error
== 0 && changed
) {
10694 nspace_resolver_req_completed(req_status
[0],
10695 (int)req_status
[1]);
10700 /* Resolver reports completed reqs here. */
10701 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, complete
,
10702 CTLTYPE_OPAQUE
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10703 0, 0, sysctl_nspace_complete
, "-", "");
10705 #endif /* CONFIG_DATALESS_FILES */
10707 #if CONFIG_DATALESS_FILES
10708 #define __no_dataless_unused /* nothing */
10710 #define __no_dataless_unused __unused
10714 nspace_resolver_init(void)
10716 #if CONFIG_DATALESS_FILES
10717 nspace_resolver_request_lck_grp
=
10718 lck_grp_alloc_init("file namespace resolver", NULL
);
10720 lck_mtx_init(&nspace_resolver_request_hash_mutex
,
10721 nspace_resolver_request_lck_grp
, NULL
);
10723 nspace_resolver_request_hashtbl
=
10724 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE
,
10725 M_VNODE
/* XXX */, &nspace_resolver_request_hashmask
);
10726 #endif /* CONFIG_DATALESS_FILES */
10730 nspace_resolver_exited(struct proc
*p __no_dataless_unused
)
10732 #if CONFIG_DATALESS_FILES
10733 struct nspace_resolver_requesthead
*bucket
;
10734 struct nspace_resolver_request
*req
;
10739 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10740 p
== nspace_resolver_proc
) {
10741 for (idx
= 0; idx
<= nspace_resolver_request_hashmask
; idx
++) {
10742 bucket
= &nspace_resolver_request_hashtbl
[idx
];
10743 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10744 nspace_resolver_req_mark_complete(req
,
10748 nspace_resolver_proc
= NULL
;
10751 NSPACE_REQ_UNLOCK();
10752 #endif /* CONFIG_DATALESS_FILES */
10756 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
10758 return resolve_nspace_item_ext(vp
, op
, NULL
);
10761 #define DATALESS_RESOLVER_ENTITLEMENT \
10762 "com.apple.private.vfs.dataless-resolver"
10763 #define DATALESS_MANIPULATION_ENTITLEMENT \
10764 "com.apple.private.vfs.dataless-manipulation"
10767 * Return TRUE if the vfs context is associated with a process entitled
10768 * for dataless manipulation.
10770 * XXX Arguably belongs in vfs_subr.c, but is here because of the
10771 * complication around CONFIG_DATALESS_FILES.
10774 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused
)
10776 #if CONFIG_DATALESS_FILES
10777 assert(ctx
->vc_thread
== current_thread());
10778 task_t
const task
= current_task();
10779 return IOTaskHasEntitlement(task
, DATALESS_MANIPULATION_ENTITLEMENT
) ||
10780 IOTaskHasEntitlement(task
, DATALESS_RESOLVER_ENTITLEMENT
);
10783 #endif /* CONFIG_DATALESS_FILES */
10787 resolve_nspace_item_ext(
10788 struct vnode
*vp __no_dataless_unused
,
10789 uint64_t op __no_dataless_unused
,
10790 void *arg __unused
)
10792 #if CONFIG_DATALESS_FILES
10798 struct nspace_resolver_request req
;
10800 // only allow namespace events on regular files, directories and symlinks.
10801 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
10806 // if this is a snapshot event and the vnode is on a
10807 // disk image just pretend nothing happened since any
10808 // change to the disk image will cause the disk image
10809 // itself to get backed up and this avoids multi-way
10810 // deadlocks between the snapshot handler and the ever
10811 // popular diskimages-helper process. the variable
10812 // nspace_allow_virtual_devs allows this behavior to
10813 // be overridden (for use by the Mobile TimeMachine
10814 // testing infrastructure which uses disk images)
10816 if (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
) {
10817 os_log_debug(OS_LOG_DEFAULT
, "NSPACE SNAPSHOT not handled");
10821 error
= nspace_materialization_is_prevented();
10823 os_log_debug(OS_LOG_DEFAULT
,
10824 "NSPACE process/thread is decorated as no-materialization");
10828 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10829 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10830 os_log_error(OS_LOG_DEFAULT
, "NSPACE no port");
10831 // Treat this like being unable to access the backing
10836 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
10837 if (path
== NULL
) {
10839 goto out_release_port
;
10841 path_len
= MAXPATHLEN
;
10843 error
= vn_getpath(vp
, path
, &path_len
);
10845 int xxx_rdar44371223
; /* XXX Mig bug */
10846 req
.r_req_id
= next_nspace_req_id();
10847 req
.r_resolver_error
= 0;
10851 error
= nspace_resolver_req_add(&req
);
10852 NSPACE_REQ_UNLOCK();
10854 goto out_release_port
;
10857 os_log_debug(OS_LOG_DEFAULT
, "NSPACE resolve_path call");
10858 kr
= send_nspace_resolve_path(mp
, req
.r_req_id
,
10859 current_proc()->p_pid
, (uint32_t)(op
& 0xffffffff),
10860 path
, &xxx_rdar44371223
);
10861 if (kr
!= KERN_SUCCESS
) {
10862 // Also treat this like being unable to access
10863 // the backing store server.
10864 os_log_error(OS_LOG_DEFAULT
,
10865 "NSPACE resolve_path failure: %d", kr
);
10869 nspace_resolver_req_remove(&req
);
10870 NSPACE_REQ_UNLOCK();
10871 goto out_release_port
;
10874 // Give back the memory we allocated earlier while
10875 // we wait; we no longer need it.
10876 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
10879 // Request has been submitted to the resolver.
10880 // Now (interruptibly) wait for completion.
10881 // Upon requrn, the request will have been removed
10882 // from the lookup table.
10883 error
= nspace_resolver_req_wait(&req
);
10887 if (path
!= NULL
) {
10888 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
10890 ipc_port_release_send(mp
);
10895 #endif /* CONFIG_DATALESS_FILES */
10899 nspace_snapshot_event(__unused vnode_t vp
, __unused
time_t ctime
,
10900 __unused
uint64_t op_type
, __unused
void *arg
)
10907 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
10909 struct vnode_attr va
;
10913 VATTR_WANTED(&va
, va_fsid
);
10914 VATTR_WANTED(&va
, va_fileid
);
10916 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
10917 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
10920 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
10928 static unsigned long
10929 fsctl_bogus_command_compat(unsigned long cmd
)
10932 case IOCBASECMD(FSIOC_SYNC_VOLUME
):
10933 return FSIOC_SYNC_VOLUME
;
10934 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
):
10935 return FSIOC_ROUTEFS_SETROUTEID
;
10936 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
):
10937 return FSIOC_SET_PACKAGE_EXTS
;
10938 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
):
10939 return FSIOC_SET_FSTYPENAME_OVERRIDE
;
10940 case IOCBASECMD(DISK_CONDITIONER_IOC_GET
):
10941 return DISK_CONDITIONER_IOC_GET
;
10942 case IOCBASECMD(DISK_CONDITIONER_IOC_SET
):
10943 return DISK_CONDITIONER_IOC_SET
;
10944 case IOCBASECMD(FSIOC_FIOSEEKHOLE
):
10945 return FSIOC_FIOSEEKHOLE
;
10946 case IOCBASECMD(FSIOC_FIOSEEKDATA
):
10947 return FSIOC_FIOSEEKDATA
;
10948 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
):
10949 return SPOTLIGHT_IOC_GET_MOUNT_TIME
;
10950 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
):
10951 return SPOTLIGHT_IOC_GET_LAST_MTIME
;
10958 cas_bsdflags_setattr(vnode_t vp
, void *arg
, vfs_context_t ctx
)
10960 return VNOP_IOCTL(vp
, FSIOC_CAS_BSDFLAGS
, arg
, FWRITE
, ctx
);
10964 * Make a filesystem-specific control call:
10968 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
10973 #define STK_PARAMS 128
10974 char stkbuf
[STK_PARAMS
] = {0};
10975 caddr_t data
, memp
;
10976 vnode_t vp
= *arg_vp
;
10978 if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
10982 cmd
= fsctl_bogus_command_compat(cmd
);
10984 size
= IOCPARM_LEN(cmd
);
10985 if (size
> IOCPARM_MAX
) {
10989 is64bit
= proc_is64bit(p
);
10993 if (size
> sizeof(stkbuf
)) {
10994 if ((memp
= (caddr_t
)kalloc(size
)) == 0) {
11002 if (cmd
& IOC_IN
) {
11004 error
= copyin(udata
, data
, size
);
11013 *(user_addr_t
*)data
= udata
;
11015 *(uint32_t *)data
= (uint32_t)udata
;
11018 } else if ((cmd
& IOC_OUT
) && size
) {
11020 * Zero the buffer so the user always
11021 * gets back something deterministic.
11024 } else if (cmd
& IOC_VOID
) {
11026 *(user_addr_t
*)data
= udata
;
11028 *(uint32_t *)data
= (uint32_t)udata
;
11032 /* Check to see if it's a generic command */
11034 case FSIOC_SYNC_VOLUME
: {
11035 struct vfs_attr vfa
;
11036 mount_t mp
= vp
->v_mount
;
11040 /* record vid of vp so we can drop it below. */
11041 uint32_t vvid
= vp
->v_id
;
11044 * Then grab mount_iterref so that we can release the vnode.
11045 * Without this, a thread may call vnode_iterate_prepare then
11046 * get into a deadlock because we've never released the root vp
11048 error
= mount_iterref(mp
, 0);
11055 if (*(uint32_t*)data
& FSCTL_SYNC_WAIT
) {
11060 * If the filessytem supports multiple filesytems in a
11061 * partition (For eg APFS volumes in a container, it knows
11062 * that the waitfor argument to VFS_SYNC are flags.
11064 VFSATTR_INIT(&vfa
);
11065 VFSATTR_WANTED(&vfa
, f_capabilities
);
11066 if ((vfs_getattr(mp
, &vfa
, vfs_context_current()) == 0) &&
11067 VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) &&
11068 ((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
)) &&
11069 ((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
))) {
11073 /* issue the sync for this volume */
11074 (void)sync_callback(mp
, &arg
);
11077 * Then release the mount_iterref once we're done syncing; it's not
11078 * needed for the VNOP_IOCTL below
11080 mount_iterdrop(mp
);
11082 if (arg
& FSCTL_SYNC_FULLSYNC
) {
11083 /* re-obtain vnode iocount on the root vp, if possible */
11084 error
= vnode_getwithvid(vp
, vvid
);
11086 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
11090 /* mark the argument VP as having been released */
11095 case FSIOC_ROUTEFS_SETROUTEID
: {
11097 char routepath
[MAXPATHLEN
];
11100 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11103 bzero(routepath
, MAXPATHLEN
);
11104 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
11108 error
= routefs_kernel_mount(routepath
);
11116 case FSIOC_SET_PACKAGE_EXTS
: {
11117 user_addr_t ext_strings
;
11118 uint32_t num_entries
;
11119 uint32_t max_width
;
11121 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0))) {
11125 if ((is64bit
&& size
!= sizeof(user64_package_ext_info
))
11126 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
11127 // either you're 64-bit and passed a 64-bit struct or
11128 // you're 32-bit and passed a 32-bit struct. otherwise
11135 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
11136 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
11137 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
11139 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
11140 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
11141 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
11143 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
11147 case FSIOC_SET_FSTYPENAME_OVERRIDE
:
11149 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11153 mount_lock(vp
->v_mount
);
11154 if (data
[0] != 0) {
11155 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
11156 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
11157 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11158 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
11159 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
11162 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11163 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
11165 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
11166 vp
->v_mount
->fstypename_override
[0] = '\0';
11168 mount_unlock(vp
->v_mount
);
11173 case DISK_CONDITIONER_IOC_GET
: {
11174 error
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11178 case DISK_CONDITIONER_IOC_SET
: {
11179 error
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11183 case FSIOC_CAS_BSDFLAGS
: {
11184 struct fsioc_cas_bsdflags
*cas
= (struct fsioc_cas_bsdflags
*)data
;
11185 struct vnode_attr va
;
11188 VATTR_SET(&va
, va_flags
, cas
->new_flags
);
11190 error
= chflags0(vp
, &va
, cas_bsdflags_setattr
, cas
, ctx
);
11194 case FSIOC_FD_ONLY_OPEN_ONCE
: {
11195 if (vnode_usecount(vp
) > 1) {
11204 /* other, known commands shouldn't be passed down here */
11207 case F_TRIM_ACTIVE_FILE
:
11209 case F_TRANSCODEKEY
:
11210 case F_GETPROTECTIONLEVEL
:
11211 case F_GETDEFAULTPROTLEVEL
:
11212 case F_MAKECOMPRESSED
:
11213 case F_SET_GREEDY_MODE
:
11214 case F_SETSTATICCONTENT
:
11216 case F_SETBACKINGSTORE
:
11217 case F_GETPATH_MTMINFO
:
11218 case APFSIOC_REVERT_TO_SNAPSHOT
:
11219 case FSIOC_FIOSEEKHOLE
:
11220 case FSIOC_FIOSEEKDATA
:
11221 case HFS_GET_BOOT_INFO
:
11222 case HFS_SET_BOOT_INFO
:
11226 case F_BARRIERFSYNC
:
11232 /* Invoke the filesystem-specific code */
11233 error
= VNOP_IOCTL(vp
, cmd
, data
, options
, ctx
);
11235 } /* end switch stmt */
11238 * if no errors, copy any data to user. Size was
11239 * already set and checked above.
11241 if (error
== 0 && (cmd
& IOC_OUT
) && size
) {
11242 error
= copyout(data
, udata
, size
);
11255 fsctl(proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
11258 struct nameidata nd
;
11261 vfs_context_t ctx
= vfs_context_current();
11263 AUDIT_ARG(cmd
, uap
->cmd
);
11264 AUDIT_ARG(value32
, uap
->options
);
11265 /* Get the vnode for the file we are getting info on: */
11268 // if we come through fsctl() then the file is by definition not open.
11269 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11270 // lest the caller mistakenly thinks the only open is their own (but in
11271 // reality it's someone elses).
11273 if (uap
->cmd
== FSIOC_FD_ONLY_OPEN_ONCE
) {
11276 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
11277 nameiflags
|= FOLLOW
;
11279 if (uap
->cmd
== FSIOC_FIRMLINK_CTL
) {
11280 nameiflags
|= (CN_FIRMLINK_NOFOLLOW
| NOCACHE
);
11282 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
11283 UIO_USERSPACE
, uap
->path
, ctx
);
11284 if ((error
= namei(&nd
))) {
11291 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
11297 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11307 ffsctl(proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
11311 vfs_context_t ctx
= vfs_context_current();
11314 AUDIT_ARG(fd
, uap
->fd
);
11315 AUDIT_ARG(cmd
, uap
->cmd
);
11316 AUDIT_ARG(value32
, uap
->options
);
11318 /* Get the vnode for the file we are getting info on: */
11319 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11323 if ((error
= vnode_getwithref(vp
))) {
11329 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
11336 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11340 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11347 /* end of fsctl system call */
11350 * Retrieve the data of an extended attribute.
11353 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
11356 struct nameidata nd
;
11357 char attrname
[XATTR_MAXNAMELEN
+ 1];
11358 vfs_context_t ctx
= vfs_context_current();
11360 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11361 size_t attrsize
= 0;
11363 u_int32_t nameiflags
;
11365 char uio_buf
[UIO_SIZEOF(1)];
11367 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11371 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11372 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11373 if ((error
= namei(&nd
))) {
11379 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11383 if (xattr_protected(attrname
)) {
11384 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
11390 * the specific check for 0xffffffff is a hack to preserve
11391 * binaray compatibilty in K64 with applications that discovered
11392 * that passing in a buf pointer and a size of -1 resulted in
11393 * just the size of the indicated extended attribute being returned.
11394 * this isn't part of the documented behavior, but because of the
11395 * original implemtation's check for "uap->size > 0", this behavior
11396 * was allowed. In K32 that check turned into a signed comparison
11397 * even though uap->size is unsigned... in K64, we blow by that
11398 * check because uap->size is unsigned and doesn't get sign smeared
11399 * in the munger for a 32 bit user app. we also need to add a
11400 * check to limit the maximum size of the buffer being passed in...
11401 * unfortunately, the underlying fileystems seem to just malloc
11402 * the requested size even if the actual extended attribute is tiny.
11403 * because that malloc is for kernel wired memory, we have to put a
11404 * sane limit on it.
11406 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11407 * U64 running on K64 will yield -1 (64 bits wide)
11408 * U32/U64 running on K32 will yield -1 (32 bits wide)
11410 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1) {
11415 if (uap
->size
> (size_t)XATTR_MAXSIZE
) {
11416 uap
->size
= XATTR_MAXSIZE
;
11419 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11420 &uio_buf
[0], sizeof(uio_buf
));
11421 uio_addiov(auio
, uap
->value
, uap
->size
);
11424 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
11429 *retval
= uap
->size
- uio_resid(auio
);
11431 *retval
= (user_ssize_t
)attrsize
;
11438 * Retrieve the data of an extended attribute.
11441 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
11444 char attrname
[XATTR_MAXNAMELEN
+ 1];
11446 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11447 size_t attrsize
= 0;
11450 char uio_buf
[UIO_SIZEOF(1)];
11452 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11456 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11459 if ((error
= vnode_getwithref(vp
))) {
11460 file_drop(uap
->fd
);
11463 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11467 if (xattr_protected(attrname
)) {
11471 if (uap
->value
&& uap
->size
> 0) {
11472 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11473 &uio_buf
[0], sizeof(uio_buf
));
11474 uio_addiov(auio
, uap
->value
, uap
->size
);
11477 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11479 (void)vnode_put(vp
);
11480 file_drop(uap
->fd
);
11483 *retval
= uap
->size
- uio_resid(auio
);
11485 *retval
= (user_ssize_t
)attrsize
;
11491 * Set the data of an extended attribute.
11494 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
11497 struct nameidata nd
;
11498 char attrname
[XATTR_MAXNAMELEN
+ 1];
11499 vfs_context_t ctx
= vfs_context_current();
11501 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11503 u_int32_t nameiflags
;
11505 char uio_buf
[UIO_SIZEOF(1)];
11507 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11511 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11513 if (error
== EPERM
) {
11514 /* if the string won't fit in attrname, copyinstr emits EPERM */
11515 return ENAMETOOLONG
;
11517 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11520 if (xattr_protected(attrname
)) {
11523 if (uap
->size
!= 0 && uap
->value
== 0) {
11527 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11528 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11529 if ((error
= namei(&nd
))) {
11535 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11536 &uio_buf
[0], sizeof(uio_buf
));
11537 uio_addiov(auio
, uap
->value
, uap
->size
);
11539 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
11542 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11553 * Set the data of an extended attribute.
11556 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
11559 char attrname
[XATTR_MAXNAMELEN
+ 1];
11561 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11564 char uio_buf
[UIO_SIZEOF(1)];
11566 vfs_context_t ctx
= vfs_context_current();
11569 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11573 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11575 if (error
== EPERM
) {
11576 /* if the string won't fit in attrname, copyinstr emits EPERM */
11577 return ENAMETOOLONG
;
11579 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11582 if (xattr_protected(attrname
)) {
11585 if (uap
->size
!= 0 && uap
->value
== 0) {
11588 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11591 if ((error
= vnode_getwithref(vp
))) {
11592 file_drop(uap
->fd
);
11595 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11596 &uio_buf
[0], sizeof(uio_buf
));
11597 uio_addiov(auio
, uap
->value
, uap
->size
);
11599 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
11602 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11608 file_drop(uap
->fd
);
11614 * Remove an extended attribute.
11615 * XXX Code duplication here.
11618 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
11621 struct nameidata nd
;
11622 char attrname
[XATTR_MAXNAMELEN
+ 1];
11623 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11624 vfs_context_t ctx
= vfs_context_current();
11626 u_int32_t nameiflags
;
11629 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11633 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11637 if (xattr_protected(attrname
)) {
11640 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11641 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11642 if ((error
= namei(&nd
))) {
11648 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
11651 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11662 * Remove an extended attribute.
11663 * XXX Code duplication here.
11666 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
11669 char attrname
[XATTR_MAXNAMELEN
+ 1];
11673 vfs_context_t ctx
= vfs_context_current();
11676 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11680 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11684 if (xattr_protected(attrname
)) {
11687 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11690 if ((error
= vnode_getwithref(vp
))) {
11691 file_drop(uap
->fd
);
11695 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
11698 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11704 file_drop(uap
->fd
);
11710 * Retrieve the list of extended attribute names.
11711 * XXX Code duplication here.
11714 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
11717 struct nameidata nd
;
11718 vfs_context_t ctx
= vfs_context_current();
11720 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11721 size_t attrsize
= 0;
11722 u_int32_t nameiflags
;
11724 char uio_buf
[UIO_SIZEOF(1)];
11726 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11730 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11731 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11732 if ((error
= namei(&nd
))) {
11737 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11738 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
11739 &uio_buf
[0], sizeof(uio_buf
));
11740 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11743 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
11747 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11749 *retval
= (user_ssize_t
)attrsize
;
11755 * Retrieve the list of extended attribute names.
11756 * XXX Code duplication here.
11759 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
11763 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11764 size_t attrsize
= 0;
11766 char uio_buf
[UIO_SIZEOF(1)];
11768 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11772 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11775 if ((error
= vnode_getwithref(vp
))) {
11776 file_drop(uap
->fd
);
11779 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11780 auio
= uio_createwithbuffer(1, 0, spacetype
,
11781 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
11782 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11785 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11788 file_drop(uap
->fd
);
11790 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11792 *retval
= (user_ssize_t
)attrsize
;
11798 fsgetpath_internal(vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
11799 vm_size_t bufsize
, caddr_t buf
, uint32_t options
, int *pathlen
)
11802 struct mount
*mp
= NULL
;
11806 /* maximum number of times to retry build_path */
11807 unsigned int retries
= 0x10;
11809 if (bufsize
> PAGE_SIZE
) {
11818 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
11819 error
= ENOTSUP
; /* unexpected failure */
11825 struct vfs_attr vfsattr
;
11826 int use_vfs_root
= TRUE
;
11828 VFSATTR_INIT(&vfsattr
);
11829 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
11830 if (!(options
& FSOPT_ISREALFSID
) &&
11831 vfs_getattr(mp
, &vfsattr
, vfs_context_kernel()) == 0 &&
11832 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
11833 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
) &&
11834 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
)) {
11835 use_vfs_root
= FALSE
;
11839 if (use_vfs_root
) {
11840 error
= VFS_ROOT(mp
, &vp
, ctx
);
11842 error
= VFS_VGET(mp
, objid
, &vp
, ctx
);
11845 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
11848 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
11850 * If the fileid isn't found and we're in a union
11851 * mount volume, then see if the fileid is in the
11852 * mounted-on volume.
11854 struct mount
*tmp
= mp
;
11855 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
11857 if (vfs_busy(mp
, LK_NOWAIT
) == 0) {
11869 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
11876 /* Obtain the absolute path to this vnode. */
11877 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
11878 if (options
& FSOPT_NOFIRMLINKPATH
) {
11879 bpflags
|= BUILDPATH_NO_FIRMLINK
;
11881 bpflags
|= BUILDPATH_CHECK_MOVED
;
11882 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
11886 /* there was a race building the path, try a few more times */
11887 if (error
== EAGAIN
) {
11898 AUDIT_ARG(text
, buf
);
11900 if (kdebug_enable
) {
11901 long dbg_parms
[NUMPARMS
];
11904 dbg_namelen
= (int)sizeof(dbg_parms
);
11906 if (length
< dbg_namelen
) {
11907 memcpy((char *)dbg_parms
, buf
, length
);
11908 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
11910 dbg_namelen
= length
;
11912 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
11915 kdebug_vfs_lookup(dbg_parms
, dbg_namelen
, (void *)vp
,
11916 KDBG_VFS_LOOKUP_FLAG_LOOKUP
);
11919 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
11926 * Obtain the full pathname of a file system object by id.
11929 fsgetpath_extended(user_addr_t buf
, int bufsize
, user_addr_t user_fsid
, uint64_t objid
,
11930 uint32_t options
, user_ssize_t
*retval
)
11932 vfs_context_t ctx
= vfs_context_current();
11938 if (options
& ~(FSOPT_NOFIRMLINKPATH
| FSOPT_ISREALFSID
)) {
11942 if ((error
= copyin(user_fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
11945 AUDIT_ARG(value32
, fsid
.val
[0]);
11946 AUDIT_ARG(value64
, objid
);
11947 /* Restrict output buffer size for now. */
11949 if (bufsize
> PAGE_SIZE
|| bufsize
<= 0) {
11952 MALLOC(realpath
, char *, bufsize
, M_TEMP
, M_WAITOK
| M_ZERO
);
11953 if (realpath
== NULL
) {
11957 error
= fsgetpath_internal(ctx
, fsid
.val
[0], objid
, bufsize
, realpath
,
11964 error
= copyout((caddr_t
)realpath
, buf
, length
);
11966 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
11969 FREE(realpath
, M_TEMP
);
11975 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
11977 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
11982 fsgetpath_ext(__unused proc_t p
, struct fsgetpath_ext_args
*uap
, user_ssize_t
*retval
)
11984 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
11985 uap
->options
, retval
);
11989 * Common routine to handle various flavors of statfs data heading out
11992 * Returns: 0 Success
11996 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
11997 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
11998 boolean_t partial_copy
)
12001 int my_size
, copy_size
;
12004 struct user64_statfs sfs
;
12005 my_size
= copy_size
= sizeof(sfs
);
12006 bzero(&sfs
, my_size
);
12007 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12008 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
12009 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12010 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
12011 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
12012 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
12013 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
12014 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
12015 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
12016 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
12017 sfs
.f_fsid
= sfsp
->f_fsid
;
12018 sfs
.f_owner
= sfsp
->f_owner
;
12019 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12020 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12022 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12024 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12025 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12027 if (partial_copy
) {
12028 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12030 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12032 struct user32_statfs sfs
;
12034 my_size
= copy_size
= sizeof(sfs
);
12035 bzero(&sfs
, my_size
);
12037 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12038 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
12039 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12042 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12043 * have to fudge the numbers here in that case. We inflate the blocksize in order
12044 * to reflect the filesystem size as best we can.
12046 if ((sfsp
->f_blocks
> INT_MAX
)
12047 /* Hack for 4061702 . I think the real fix is for Carbon to
12048 * look for some volume capability and not depend on hidden
12049 * semantics agreed between a FS and carbon.
12050 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12051 * for Carbon to set bNoVolumeSizes volume attribute.
12052 * Without this the webdavfs files cannot be copied onto
12053 * disk as they look huge. This change should not affect
12054 * XSAN as they should not setting these to -1..
12056 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
12057 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
12058 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
12062 * Work out how far we have to shift the block count down to make it fit.
12063 * Note that it's possible to have to shift so far that the resulting
12064 * blocksize would be unreportably large. At that point, we will clip
12065 * any values that don't fit.
12067 * For safety's sake, we also ensure that f_iosize is never reported as
12068 * being smaller than f_bsize.
12070 for (shift
= 0; shift
< 32; shift
++) {
12071 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
) {
12074 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
) {
12078 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
12079 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
12080 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
12081 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
12082 #undef __SHIFT_OR_CLIP
12083 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
12084 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
12086 /* filesystem is small enough to be reported honestly */
12087 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
12088 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
12089 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
12090 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
12091 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
12093 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
12094 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
12095 sfs
.f_fsid
= sfsp
->f_fsid
;
12096 sfs
.f_owner
= sfsp
->f_owner
;
12097 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12098 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12100 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12102 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12103 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12105 if (partial_copy
) {
12106 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12108 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12111 if (sizep
!= NULL
) {
12118 * copy stat structure into user_stat structure.
12121 munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
12123 bzero(usbp
, sizeof(*usbp
));
12125 usbp
->st_dev
= sbp
->st_dev
;
12126 usbp
->st_ino
= sbp
->st_ino
;
12127 usbp
->st_mode
= sbp
->st_mode
;
12128 usbp
->st_nlink
= sbp
->st_nlink
;
12129 usbp
->st_uid
= sbp
->st_uid
;
12130 usbp
->st_gid
= sbp
->st_gid
;
12131 usbp
->st_rdev
= sbp
->st_rdev
;
12132 #ifndef _POSIX_C_SOURCE
12133 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12134 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12135 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12136 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12137 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12138 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12140 usbp
->st_atime
= sbp
->st_atime
;
12141 usbp
->st_atimensec
= sbp
->st_atimensec
;
12142 usbp
->st_mtime
= sbp
->st_mtime
;
12143 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12144 usbp
->st_ctime
= sbp
->st_ctime
;
12145 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12147 usbp
->st_size
= sbp
->st_size
;
12148 usbp
->st_blocks
= sbp
->st_blocks
;
12149 usbp
->st_blksize
= sbp
->st_blksize
;
12150 usbp
->st_flags
= sbp
->st_flags
;
12151 usbp
->st_gen
= sbp
->st_gen
;
12152 usbp
->st_lspare
= sbp
->st_lspare
;
12153 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12154 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12158 munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
12160 bzero(usbp
, sizeof(*usbp
));
12162 usbp
->st_dev
= sbp
->st_dev
;
12163 usbp
->st_ino
= sbp
->st_ino
;
12164 usbp
->st_mode
= sbp
->st_mode
;
12165 usbp
->st_nlink
= sbp
->st_nlink
;
12166 usbp
->st_uid
= sbp
->st_uid
;
12167 usbp
->st_gid
= sbp
->st_gid
;
12168 usbp
->st_rdev
= sbp
->st_rdev
;
12169 #ifndef _POSIX_C_SOURCE
12170 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12171 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12172 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12173 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12174 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12175 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12177 usbp
->st_atime
= sbp
->st_atime
;
12178 usbp
->st_atimensec
= sbp
->st_atimensec
;
12179 usbp
->st_mtime
= sbp
->st_mtime
;
12180 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12181 usbp
->st_ctime
= sbp
->st_ctime
;
12182 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12184 usbp
->st_size
= sbp
->st_size
;
12185 usbp
->st_blocks
= sbp
->st_blocks
;
12186 usbp
->st_blksize
= sbp
->st_blksize
;
12187 usbp
->st_flags
= sbp
->st_flags
;
12188 usbp
->st_gen
= sbp
->st_gen
;
12189 usbp
->st_lspare
= sbp
->st_lspare
;
12190 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12191 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12195 * copy stat64 structure into user_stat64 structure.
12198 munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
12200 bzero(usbp
, sizeof(*usbp
));
12202 usbp
->st_dev
= sbp
->st_dev
;
12203 usbp
->st_ino
= sbp
->st_ino
;
12204 usbp
->st_mode
= sbp
->st_mode
;
12205 usbp
->st_nlink
= sbp
->st_nlink
;
12206 usbp
->st_uid
= sbp
->st_uid
;
12207 usbp
->st_gid
= sbp
->st_gid
;
12208 usbp
->st_rdev
= sbp
->st_rdev
;
12209 #ifndef _POSIX_C_SOURCE
12210 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12211 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12212 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12213 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12214 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12215 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12216 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12217 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12219 usbp
->st_atime
= sbp
->st_atime
;
12220 usbp
->st_atimensec
= sbp
->st_atimensec
;
12221 usbp
->st_mtime
= sbp
->st_mtime
;
12222 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12223 usbp
->st_ctime
= sbp
->st_ctime
;
12224 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12225 usbp
->st_birthtime
= sbp
->st_birthtime
;
12226 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12228 usbp
->st_size
= sbp
->st_size
;
12229 usbp
->st_blocks
= sbp
->st_blocks
;
12230 usbp
->st_blksize
= sbp
->st_blksize
;
12231 usbp
->st_flags
= sbp
->st_flags
;
12232 usbp
->st_gen
= sbp
->st_gen
;
12233 usbp
->st_lspare
= sbp
->st_lspare
;
12234 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12235 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12239 munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
12241 bzero(usbp
, sizeof(*usbp
));
12243 usbp
->st_dev
= sbp
->st_dev
;
12244 usbp
->st_ino
= sbp
->st_ino
;
12245 usbp
->st_mode
= sbp
->st_mode
;
12246 usbp
->st_nlink
= sbp
->st_nlink
;
12247 usbp
->st_uid
= sbp
->st_uid
;
12248 usbp
->st_gid
= sbp
->st_gid
;
12249 usbp
->st_rdev
= sbp
->st_rdev
;
12250 #ifndef _POSIX_C_SOURCE
12251 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12252 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12253 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12254 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12255 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12256 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12257 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12258 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12260 usbp
->st_atime
= sbp
->st_atime
;
12261 usbp
->st_atimensec
= sbp
->st_atimensec
;
12262 usbp
->st_mtime
= sbp
->st_mtime
;
12263 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12264 usbp
->st_ctime
= sbp
->st_ctime
;
12265 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12266 usbp
->st_birthtime
= sbp
->st_birthtime
;
12267 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12269 usbp
->st_size
= sbp
->st_size
;
12270 usbp
->st_blocks
= sbp
->st_blocks
;
12271 usbp
->st_blksize
= sbp
->st_blksize
;
12272 usbp
->st_flags
= sbp
->st_flags
;
12273 usbp
->st_gen
= sbp
->st_gen
;
12274 usbp
->st_lspare
= sbp
->st_lspare
;
12275 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12276 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12280 * Purge buffer cache for simulating cold starts
12283 vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
12285 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
12287 return VNODE_RETURNED
;
12291 vfs_purge_callback(mount_t mp
, __unused
void * arg
)
12293 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
12295 return VFS_RETURNED
;
12299 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
12301 if (!kauth_cred_issuser(kauth_cred_get())) {
12305 vfs_iterate(0 /* flags */, vfs_purge_callback
, NULL
);
12311 * gets the vnode associated with the (unnamed) snapshot directory
12312 * for a Filesystem. The snapshot directory vnode is returned with
12313 * an iocount on it.
12316 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
12318 return VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
);
12322 * Get the snapshot vnode.
12324 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12325 * needs nameidone() on ndp.
12327 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12329 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12333 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
12334 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
12335 #if !CONFIG_TRIGGERS
12338 enum path_operation pathop
,
12344 struct vfs_attr vfa
;
12349 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
12354 if (!vnode_isvroot(*rvpp
)) {
12359 /* Make sure the filesystem supports snapshots */
12360 VFSATTR_INIT(&vfa
);
12361 VFSATTR_WANTED(&vfa
, f_capabilities
);
12362 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
12363 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
12364 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
12365 VOL_CAP_INT_SNAPSHOT
)) ||
12366 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
12367 VOL_CAP_INT_SNAPSHOT
))) {
12372 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
12377 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12378 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12384 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12385 * (the length returned by copyinstr includes the terminating NUL)
12387 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
12388 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
12392 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++) {
12395 if (i
< (int)name_len
) {
12401 if (op
== CREATE
) {
12402 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
12404 } else if (op
== DELETE
) {
12405 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
12413 /* Check if the snapshot already exists ... */
12414 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
12415 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
12416 ndp
->ni_dvp
= *sdvpp
;
12418 error
= namei(ndp
);
12420 FREE(name_buf
, M_TEMP
);
12436 * create a filesystem snapshot (for supporting filesystems)
12438 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12439 * We get to the (unnamed) snapshot directory vnode and create the vnode
12440 * for the snapshot in it.
12444 * a) Passed in name for snapshot cannot have slashes.
12445 * b) name can't be "." or ".."
12447 * Since this requires superuser privileges, vnode_authorize calls are not
12451 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12454 vnode_t rvp
, snapdvp
;
12456 struct nameidata namend
;
12458 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
12464 if (namend
.ni_vp
) {
12465 vnode_put(namend
.ni_vp
);
12468 struct vnode_attr va
;
12469 vnode_t vp
= NULLVP
;
12472 VATTR_SET(&va
, va_type
, VREG
);
12473 VATTR_SET(&va
, va_mode
, 0);
12475 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
12476 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
12477 if (!error
&& vp
) {
12482 nameidone(&namend
);
12483 vnode_put(snapdvp
);
12489 * Delete a Filesystem snapshot
12491 * get the vnode for the unnamed snapshot directory and the snapshot and
12492 * delete the snapshot.
12495 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12498 vnode_t rvp
, snapdvp
;
12500 struct nameidata namend
;
12502 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
12508 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
12509 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
12511 vnode_put(namend
.ni_vp
);
12512 nameidone(&namend
);
12513 vnode_put(snapdvp
);
12520 * Revert a filesystem to a snapshot
12522 * Marks the filesystem to revert to the given snapshot on next mount.
12525 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12531 struct fs_snapshot_revert_args revert_data
;
12532 struct componentname cnp
;
12536 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12540 mp
= vnode_mount(rvp
);
12542 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12543 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12545 FREE(name_buf
, M_TEMP
);
12551 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
12553 FREE(name_buf
, M_TEMP
);
12560 * Grab mount_iterref so that we can release the vnode,
12561 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12563 error
= mount_iterref(mp
, 0);
12566 FREE(name_buf
, M_TEMP
);
12570 memset(&cnp
, 0, sizeof(cnp
));
12571 cnp
.cn_pnbuf
= (char *)name_buf
;
12572 cnp
.cn_nameiop
= LOOKUP
;
12573 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12574 cnp
.cn_pnlen
= MAXPATHLEN
;
12575 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12576 cnp
.cn_namelen
= (int)name_len
;
12577 revert_data
.sr_cnp
= &cnp
;
12579 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
12580 mount_iterdrop(mp
);
12581 FREE(name_buf
, M_TEMP
);
12584 /* If there was any error, try again using VNOP_IOCTL */
12587 struct nameidata namend
;
12589 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
12596 error
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
12599 vnode_put(namend
.ni_vp
);
12600 nameidone(&namend
);
12601 vnode_put(snapdvp
);
12609 * rename a Filesystem snapshot
12611 * get the vnode for the unnamed snapshot directory and the snapshot and
12612 * rename the snapshot. This is a very specialised (and simple) case of
12613 * rename(2) (which has to deal with a lot more complications). It differs
12614 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12617 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
12618 __unused
uint32_t flags
, vfs_context_t ctx
)
12620 vnode_t rvp
, snapdvp
;
12622 caddr_t newname_buf
;
12625 struct nameidata
*fromnd
, *tond
;
12626 /* carving out a chunk for structs that are too big to be on stack. */
12628 struct nameidata from_node
;
12629 struct nameidata to_node
;
12632 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
12633 fromnd
= &__rename_data
->from_node
;
12634 tond
= &__rename_data
->to_node
;
12636 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
12641 fvp
= fromnd
->ni_vp
;
12643 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12644 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
12650 * Some sanity checks- new name can't be empty, "." or ".." or have
12652 * (the length returned by copyinstr includes the terminating NUL)
12654 * The FS rename VNOP is suppossed to handle this but we'll pick it
12657 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
12658 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
12662 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++) {
12665 if (i
< (int)name_len
) {
12671 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
12678 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
12679 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
12680 tond
->ni_dvp
= snapdvp
;
12682 error
= namei(tond
);
12685 } else if (tond
->ni_vp
) {
12687 * snapshot rename behaves differently than rename(2) - if the
12688 * new name exists, EEXIST is returned.
12690 vnode_put(tond
->ni_vp
);
12695 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
12696 &tond
->ni_cnd
, ctx
);
12701 FREE(newname_buf
, M_TEMP
);
12703 vnode_put(snapdvp
);
12707 FREE(__rename_data
, M_TEMP
);
12712 * Mount a Filesystem snapshot
12714 * get the vnode for the unnamed snapshot directory and the snapshot and
12715 * mount the snapshot.
12718 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
12719 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
12721 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
12723 struct nameidata
*snapndp
, *dirndp
;
12724 /* carving out a chunk for structs that are too big to be on stack. */
12726 struct nameidata snapnd
;
12727 struct nameidata dirnd
;
12728 } * __snapshot_mount_data
;
12730 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
12732 snapndp
= &__snapshot_mount_data
->snapnd
;
12733 dirndp
= &__snapshot_mount_data
->dirnd
;
12735 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
12741 snapvp
= snapndp
->ni_vp
;
12742 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
12747 /* Get the vnode to be covered */
12748 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
12749 UIO_USERSPACE
, directory
, ctx
);
12750 error
= namei(dirndp
);
12755 vp
= dirndp
->ni_vp
;
12756 pvp
= dirndp
->ni_dvp
;
12758 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
12761 mount_t mp
= vnode_mount(rvp
);
12762 struct fs_snapshot_mount_args smnt_data
;
12764 smnt_data
.sm_mp
= mp
;
12765 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
12766 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
12767 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags
& MNT_DONTBROWSE
,
12768 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
12776 vnode_put(snapdvp
);
12778 nameidone(snapndp
);
12780 FREE(__snapshot_mount_data
, M_TEMP
);
12785 * Root from a snapshot of the filesystem
12787 * Marks the filesystem to root from the given snapshot on next boot.
12790 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12796 struct fs_snapshot_root_args root_data
;
12797 struct componentname cnp
;
12801 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12805 mp
= vnode_mount(rvp
);
12807 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12808 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12810 FREE(name_buf
, M_TEMP
);
12815 // XXX MAC checks ?
12818 * Grab mount_iterref so that we can release the vnode,
12819 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12821 error
= mount_iterref(mp
, 0);
12824 FREE(name_buf
, M_TEMP
);
12828 memset(&cnp
, 0, sizeof(cnp
));
12829 cnp
.cn_pnbuf
= (char *)name_buf
;
12830 cnp
.cn_nameiop
= LOOKUP
;
12831 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12832 cnp
.cn_pnlen
= MAXPATHLEN
;
12833 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12834 cnp
.cn_namelen
= (int)name_len
;
12835 root_data
.sr_cnp
= &cnp
;
12837 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
12839 mount_iterdrop(mp
);
12840 FREE(name_buf
, M_TEMP
);
12846 * FS snapshot operations dispatcher
12849 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
12850 __unused
int32_t *retval
)
12853 vfs_context_t ctx
= vfs_context_current();
12855 AUDIT_ARG(fd
, uap
->dirfd
);
12856 AUDIT_ARG(value32
, uap
->op
);
12858 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
12864 * Enforce user authorization for snapshot modification operations
12866 if ((uap
->op
!= SNAPSHOT_OP_MOUNT
) &&
12867 (uap
->op
!= SNAPSHOT_OP_ROOT
)) {
12868 vnode_t dvp
= NULLVP
;
12869 vnode_t devvp
= NULLVP
;
12872 error
= vnode_getfromfd(ctx
, uap
->dirfd
, &dvp
);
12876 mp
= vnode_mount(dvp
);
12877 devvp
= mp
->mnt_devvp
;
12879 /* get an iocount on devvp */
12880 if (devvp
== NULLVP
) {
12881 error
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 0, &devvp
, ctx
);
12882 /* for mounts which arent block devices */
12883 if (error
== ENOENT
) {
12887 error
= vnode_getwithref(devvp
);
12895 if ((vfs_context_issuser(ctx
) == 0) &&
12896 (vnode_authorize(devvp
, NULL
, KAUTH_VNODE_WRITE_DATA
, ctx
) != 0)) {
12908 case SNAPSHOT_OP_CREATE
:
12909 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12911 case SNAPSHOT_OP_DELETE
:
12912 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12914 case SNAPSHOT_OP_RENAME
:
12915 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
12918 case SNAPSHOT_OP_MOUNT
:
12919 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
12920 uap
->data
, uap
->flags
, ctx
);
12922 case SNAPSHOT_OP_REVERT
:
12923 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12925 #if CONFIG_MNT_ROOTSNAP
12926 case SNAPSHOT_OP_ROOT
:
12927 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12929 #endif /* CONFIG_MNT_ROOTSNAP */