2 * Copyright (c) 1995-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/sysctl.h>
98 #include <sys/xattr.h>
99 #include <sys/fcntl.h>
100 #include <sys/fsctl.h>
101 #include <sys/ubc_internal.h>
102 #include <sys/disk.h>
103 #include <sys/content_protection.h>
104 #include <sys/clonefile.h>
105 #include <sys/snapshot.h>
106 #include <sys/priv.h>
107 #include <sys/fsgetpath.h>
108 #include <machine/cons.h>
109 #include <machine/limits.h>
110 #include <miscfs/specfs/specdev.h>
112 #include <vfs/vfs_disk_conditioner.h>
114 #include <security/audit/audit.h>
115 #include <bsm/audit_kevents.h>
117 #include <mach/mach_types.h>
118 #include <kern/kern_types.h>
119 #include <kern/kalloc.h>
120 #include <kern/task.h>
122 #include <vm/vm_pageout.h>
123 #include <vm/vm_protos.h>
125 #include <libkern/OSAtomic.h>
126 #include <pexpert/pexpert.h>
127 #include <IOKit/IOBSD.h>
130 #include <kern/host.h>
131 #include <kern/ipc_misc.h>
132 #include <mach/host_priv.h>
133 #include <mach/vfs_nspace.h>
136 #include <nfs/nfs_conf.h>
139 #include <miscfs/routefs/routefs.h>
143 #include <security/mac.h>
144 #include <security/mac_framework.h>
148 #define GET_PATH(x) \
149 (x) = get_pathbuff();
150 #define RELEASE_PATH(x) \
153 #define GET_PATH(x) \
154 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
155 #define RELEASE_PATH(x) \
156 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
157 #endif /* CONFIG_FSE */
159 #ifndef HFS_GET_BOOT_INFO
160 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
163 #ifndef HFS_SET_BOOT_INFO
164 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
167 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
168 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
171 extern void disk_conditioner_unmount(mount_t mp
);
173 /* struct for checkdirs iteration */
178 /* callback for checkdirs iteration */
179 static int checkdirs_callback(proc_t p
, void * arg
);
181 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
182 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
183 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
184 static int getfsstat_callback(mount_t mp
, void * arg
);
185 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
186 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
187 static int sync_callback(mount_t
, void *);
188 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
189 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
190 boolean_t partial_copy
);
191 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
192 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
193 struct componentname
*cnp
, user_addr_t fsmountargs
,
194 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
196 void vfs_notify_mount(vnode_t pdvp
);
198 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
200 struct fd_vn_data
* fg_vn_data_alloc(void);
203 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
204 * Concurrent lookups (or lookups by ids) on hard links can cause the
205 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
206 * does) to return ENOENT as the path cannot be returned from the name cache
207 * alone. We have no option but to retry and hope to get one namei->reverse path
208 * generation done without an intervening lookup, lookup by id on the hard link
209 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
210 * which currently are the MAC hooks for rename, unlink and rmdir.
212 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
214 /* Max retry limit for rename due to vnode recycling. */
215 #define MAX_RENAME_ERECYCLE_RETRIES 1024
217 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
,
220 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, uint32_t options
, int *);
222 #ifdef CONFIG_IMGSRC_ACCESS
223 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
224 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
225 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
226 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
227 static void mount_end_update(mount_t mp
);
228 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
229 #endif /* CONFIG_IMGSRC_ACCESS */
231 #if CONFIG_LOCKERBOOT
232 int mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
233 const char *pbdevpath
);
237 #if CONFIG_MNT_ROOTSNAP
238 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
);
240 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
) __attribute__((unused
));
243 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
246 int sync_internal(void);
249 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
251 extern lck_grp_t
*fd_vn_lck_grp
;
252 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
253 extern lck_attr_t
*fd_vn_lck_attr
;
256 * incremented each time a mount or unmount operation occurs
257 * used to invalidate the cached value of the rootvp in the
258 * mount structure utilized by cache_lookup_path
260 uint32_t mount_generation
= 0;
262 /* counts number of mount and unmount operations */
263 unsigned int vfs_nummntops
= 0;
265 extern const struct fileops vnops
;
266 #if CONFIG_APPLEDOUBLE
267 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
268 #endif /* CONFIG_APPLEDOUBLE */
271 * Virtual File System System Calls
274 #if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
276 * Private in-kernel mounting spi (NFS only, not exported)
280 vfs_iskernelmount(mount_t mp
)
282 return (mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
;
287 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
288 void *data
, __unused
size_t datalen
, int syscall_flags
, uint32_t kern_flags
, vfs_context_t ctx
)
294 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
295 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
298 * Get the vnode to be covered if it's not supplied
303 if (kern_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_VMVOL
| KERNEL_MOUNT_DATAVOL
)) {
304 printf("failed to locate mount-on path: %s ", path
);
312 char *pnbuf
= CAST_DOWN(char *, path
);
314 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
315 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
319 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
320 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
330 #endif /* CONFIG_NFS_CLIENT || DEVFS */
333 * Mount a file system.
337 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
339 struct __mac_mount_args muap
;
341 muap
.type
= uap
->type
;
342 muap
.path
= uap
->path
;
343 muap
.flags
= uap
->flags
;
344 muap
.data
= uap
->data
;
345 muap
.mac_p
= USER_ADDR_NULL
;
346 return __mac_mount(p
, &muap
, retval
);
350 fmount(__unused proc_t p
, struct fmount_args
*uap
, __unused
int32_t *retval
)
352 struct componentname cn
;
353 vfs_context_t ctx
= vfs_context_current();
356 int flags
= uap
->flags
;
357 char fstypename
[MFSNAMELEN
];
358 char *labelstr
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */
362 AUDIT_ARG(fd
, uap
->fd
);
363 AUDIT_ARG(fflags
, flags
);
364 /* fstypename will get audited by mount_common */
366 /* Sanity check the flags */
367 if (flags
& (MNT_IMGSRC_BY_INDEX
| MNT_ROOTFS
)) {
371 if (flags
& MNT_UNION
) {
375 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
380 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
384 if ((error
= vnode_getwithref(vp
)) != 0) {
389 pvp
= vnode_getparent(vp
);
396 memset(&cn
, 0, sizeof(struct componentname
));
397 MALLOC(cn
.cn_pnbuf
, char *, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
398 cn
.cn_pnlen
= MAXPATHLEN
;
400 if ((error
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) {
401 FREE(cn
.cn_pnbuf
, M_TEMP
);
408 error
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
);
410 FREE(cn
.cn_pnbuf
, M_TEMP
);
419 vfs_notify_mount(vnode_t pdvp
)
421 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
422 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
427 * Mount a file system taking into account MAC label behavior.
428 * See mount(2) man page for more information
430 * Parameters: p Process requesting the mount
431 * uap User argument descriptor (see below)
434 * Indirect: uap->type Filesystem type
435 * uap->path Path to mount
436 * uap->data Mount arguments
437 * uap->mac_p MAC info
438 * uap->flags Mount flags
444 boolean_t root_fs_upgrade_try
= FALSE
;
447 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
451 int need_nameidone
= 0;
452 vfs_context_t ctx
= vfs_context_current();
453 char fstypename
[MFSNAMELEN
];
456 char *labelstr
= NULL
;
457 int flags
= uap
->flags
;
459 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
460 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
465 * Get the fs type name from user space
467 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
473 * Get the vnode to be covered
475 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
476 UIO_USERSPACE
, uap
->path
, ctx
);
485 #ifdef CONFIG_IMGSRC_ACCESS
486 /* Mounting image source cannot be batched with other operations */
487 if (flags
== MNT_IMGSRC_BY_INDEX
) {
488 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
489 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
492 #endif /* CONFIG_IMGSRC_ACCESS */
496 * Get the label string (if any) from user space
498 if (uap
->mac_p
!= USER_ADDR_NULL
) {
503 struct user64_mac mac64
;
504 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
505 mac
.m_buflen
= mac64
.m_buflen
;
506 mac
.m_string
= mac64
.m_string
;
508 struct user32_mac mac32
;
509 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
510 mac
.m_buflen
= mac32
.m_buflen
;
511 mac
.m_string
= mac32
.m_string
;
516 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
517 (mac
.m_buflen
< 2)) {
521 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
522 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
526 AUDIT_ARG(mac_string
, labelstr
);
528 #endif /* CONFIG_MACF */
530 AUDIT_ARG(fflags
, flags
);
533 if (flags
& MNT_UNION
) {
534 /* No union mounts on release kernels */
540 if ((vp
->v_flag
& VROOT
) &&
541 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
542 if (!(flags
& MNT_UNION
)) {
546 * For a union mount on '/', treat it as fresh
547 * mount instead of update.
548 * Otherwise, union mouting on '/' used to panic the
549 * system before, since mnt_vnodecovered was found to
550 * be NULL for '/' which is required for unionlookup
551 * after it gets ENOENT on union mount.
553 flags
= (flags
& ~(MNT_UPDATE
));
557 if ((flags
& MNT_RDONLY
) == 0) {
558 /* Release kernels are not allowed to mount "/" as rw */
564 * See 7392553 for more details on why this check exists.
565 * Suffice to say: If this check is ON and something tries
566 * to mount the rootFS RW, we'll turn off the codesign
567 * bitmap optimization.
569 #if CHECK_CS_VALIDATION_BITMAP
570 if ((flags
& MNT_RDONLY
) == 0) {
571 root_fs_upgrade_try
= TRUE
;
576 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
577 labelstr
, FALSE
, ctx
);
583 FREE(labelstr
, M_MACTEMP
);
585 #endif /* CONFIG_MACF */
593 if (need_nameidone
) {
601 * common mount implementation (final stage of mounting)
604 * fstypename file system type (ie it's vfs name)
605 * pvp parent of covered vnode
607 * cnp component name (ie path) of covered vnode
608 * flags generic mount flags
609 * fsmountargs file system specific data
610 * labelstr optional MAC label
611 * kernelmount TRUE for mounts initiated from inside the kernel
612 * ctx caller's context
615 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
616 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
617 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
620 #pragma unused(labelstr)
622 struct vnode
*devvp
= NULLVP
;
623 struct vnode
*device_vnode
= NULLVP
;
628 struct vfstable
*vfsp
= (struct vfstable
*)0;
629 struct proc
*p
= vfs_context_proc(ctx
);
631 user_addr_t devpath
= USER_ADDR_NULL
;
634 boolean_t vfsp_ref
= FALSE
;
635 boolean_t is_rwlock_locked
= FALSE
;
636 boolean_t did_rele
= FALSE
;
637 boolean_t have_usecount
= FALSE
;
639 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM
640 /* Check for mutually-exclusive flag bits */
641 uint32_t checkflags
= (internal_flags
& (KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
));
643 while (checkflags
!= 0) {
644 checkflags
&= (checkflags
- 1);
649 //not allowed to request multiple mount-by-role flags
656 * Process an update for an existing mount
658 if (flags
& MNT_UPDATE
) {
659 if ((vp
->v_flag
& VROOT
) == 0) {
665 /* unmount in progress return error */
667 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
673 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
674 is_rwlock_locked
= TRUE
;
676 * We only allow the filesystem to be reloaded if it
677 * is currently mounted read-only.
679 if ((flags
& MNT_RELOAD
) &&
680 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
686 * If content protection is enabled, update mounts are not
687 * allowed to turn it off.
689 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
690 ((flags
& MNT_CPROTECT
) == 0)) {
696 * can't turn off MNT_REMOVABLE either but it may be an unexpected
697 * failure to return an error for this so we'll just silently
698 * add it if it is not passed in.
700 if ((mp
->mnt_flag
& MNT_REMOVABLE
) &&
701 ((flags
& MNT_REMOVABLE
) == 0)) {
702 flags
|= MNT_REMOVABLE
;
705 #ifdef CONFIG_IMGSRC_ACCESS
706 /* Can't downgrade the backer of the root FS */
707 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
708 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
712 #endif /* CONFIG_IMGSRC_ACCESS */
715 * Only root, or the user that did the original mount is
716 * permitted to update it.
718 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
719 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
723 error
= mac_mount_check_remount(ctx
, mp
);
729 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
730 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
732 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
733 flags
|= MNT_NOSUID
| MNT_NODEV
;
734 if (mp
->mnt_flag
& MNT_NOEXEC
) {
742 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
744 vfsp
= mp
->mnt_vtable
;
749 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
750 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
752 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
753 flags
|= MNT_NOSUID
| MNT_NODEV
;
754 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
) {
759 /* XXXAUDIT: Should we capture the type on the error path as well? */
760 AUDIT_ARG(text
, fstypename
);
762 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
763 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
764 vfsp
->vfc_refcount
++;
776 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
777 * except in ROSV configs.
779 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) &&
780 ((internal_flags
& (KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
)) == 0)) {
781 error
= EINVAL
; /* unsupported request */
785 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
791 * Allocate and initialize the filesystem (mount_t)
793 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
795 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
798 /* Initialize the default IO constraints */
799 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
800 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
801 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
802 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
803 mp
->mnt_devblocksize
= DEV_BSIZE
;
804 mp
->mnt_alignmentmask
= PAGE_MASK
;
805 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
808 mp
->mnt_realrootvp
= NULLVP
;
809 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
811 TAILQ_INIT(&mp
->mnt_vnodelist
);
812 TAILQ_INIT(&mp
->mnt_workerqueue
);
813 TAILQ_INIT(&mp
->mnt_newvnodes
);
815 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
816 is_rwlock_locked
= TRUE
;
817 mp
->mnt_op
= vfsp
->vfc_vfsops
;
818 mp
->mnt_vtable
= vfsp
;
819 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
820 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
821 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
823 int pathlen
= MAXPATHLEN
;
825 if (vn_getpath_ext(vp
, pvp
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
)) {
826 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
829 mp
->mnt_vnodecovered
= vp
;
830 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
831 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
832 mp
->mnt_devbsdunit
= 0;
834 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
835 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
837 #if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
839 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
841 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0) {
842 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
844 #endif /* CONFIG_NFS_CLIENT || DEVFS */
849 * Set the mount level flags.
851 if (flags
& MNT_RDONLY
) {
852 mp
->mnt_flag
|= MNT_RDONLY
;
853 } else if (mp
->mnt_flag
& MNT_RDONLY
) {
854 // disallow read/write upgrades of file systems that
855 // had the TYPENAME_OVERRIDE feature set.
856 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
860 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
862 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
863 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
864 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
865 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
866 MNT_QUARANTINE
| MNT_CPROTECT
);
871 * On release builds of iOS based platforms, always enforce NOSUID on
872 * all mounts. We do this here because we can catch update mounts as well as
873 * non-update mounts in this case.
875 mp
->mnt_flag
|= (MNT_NOSUID
);
879 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
880 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
881 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
882 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
883 MNT_QUARANTINE
| MNT_CPROTECT
);
886 if (flags
& MNT_MULTILABEL
) {
887 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
891 mp
->mnt_flag
|= MNT_MULTILABEL
;
895 * Process device path for local file systems if requested
897 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
898 !(internal_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_DATAVOL
| KERNEL_MOUNT_VMVOL
))) {
899 //snapshot, vm, datavolume mounts are special
900 if (vfs_context_is64bit(ctx
)) {
901 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
904 fsmountargs
+= sizeof(devpath
);
907 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
910 /* munge into LP64 addr */
911 devpath
= CAST_USER_ADDR_T(tmp
);
912 fsmountargs
+= sizeof(tmp
);
915 /* Lookup device and authorize access to it */
919 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
920 if ((error
= namei(&nd
))) {
924 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
929 if (devvp
->v_type
!= VBLK
) {
933 if (major(devvp
->v_rdev
) >= nblkdev
) {
938 * If mount by non-root, then verify that user has necessary
939 * permissions on the device.
941 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
942 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
944 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
945 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
947 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0) {
952 /* On first mount, preflight and open device */
953 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
954 if ((error
= vnode_ref(devvp
))) {
958 * Disallow multiple mounts of the same device.
959 * Disallow mounting of a device that is currently in use
960 * (except for root, which might share swap device for miniroot).
961 * Flush out any old buffers remaining from a previous use.
963 if ((error
= vfs_mountedon(devvp
))) {
967 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
971 if ((error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
))) {
975 if ((error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0))) {
979 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
981 error
= mac_vnode_check_open(ctx
,
983 ronly
? FREAD
: FREAD
| FWRITE
);
988 if ((error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
))) {
992 mp
->mnt_devvp
= devvp
;
993 device_vnode
= devvp
;
994 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
995 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
996 (device_vnode
= mp
->mnt_devvp
)) {
1000 * If upgrade to read-write by non-root, then verify
1001 * that user has necessary permissions on the device.
1003 vnode_getalways(device_vnode
);
1005 if (suser(vfs_context_ucred(ctx
), NULL
) &&
1006 (error
= vnode_authorize(device_vnode
, NULL
,
1007 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
1009 vnode_put(device_vnode
);
1013 /* Tell the device that we're upgrading */
1014 dev
= (dev_t
)device_vnode
->v_rdev
;
1017 if ((u_int
)maj
>= (u_int
)nblkdev
) {
1018 panic("Volume mounted on a device with invalid major number.");
1021 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
1022 vnode_put(device_vnode
);
1023 device_vnode
= NULLVP
;
1028 } // localargs && !(snapshot | data | vm)
1031 if ((flags
& MNT_UPDATE
) == 0) {
1032 mac_mount_label_init(mp
);
1033 mac_mount_label_associate(ctx
, mp
);
1036 if ((flags
& MNT_UPDATE
) != 0) {
1037 error
= mac_mount_check_label_update(ctx
, mp
);
1045 * Mount the filesystem. We already asserted that internal_flags
1046 * cannot have more than one mount-by-role bit set.
1048 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
1049 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
1050 (caddr_t
)fsmountargs
, 0, ctx
);
1051 } else if (internal_flags
& KERNEL_MOUNT_DATAVOL
) {
1052 #if CONFIG_ROSV_STARTUP
1053 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1054 fs_role_mount_args_t frma
= {origin_mp
, VFS_DATA_ROLE
};
1055 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1057 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE
, error
);
1059 /* Mark volume associated with system volume */
1060 mp
->mnt_kern_flag
|= MNTK_SYSTEM
;
1062 /* Attempt to acquire the mnt_devvp and set it up */
1063 struct vnode
*mp_devvp
= NULL
;
1064 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1065 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1066 0, &mp_devvp
, vfs_context_kernel());
1068 mp
->mnt_devvp
= mp_devvp
;
1069 //vnode_lookup took an iocount, need to drop it.
1070 vnode_put(mp_devvp
);
1071 // now set `device_vnode` to the devvp that was acquired.
1072 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1073 // note that though the iocount above was dropped, the mount acquires
1074 // an implicit reference against the device.
1075 device_vnode
= mp_devvp
;
1082 } else if (internal_flags
& KERNEL_MOUNT_VMVOL
) {
1084 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1085 fs_role_mount_args_t frma
= {origin_mp
, VFS_VM_ROLE
};
1086 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1088 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE
, error
);
1090 /* Mark volume associated with system volume and a swap mount */
1091 mp
->mnt_kern_flag
|= (MNTK_SYSTEM
| MNTK_SWAP_MOUNT
);
1092 /* Attempt to acquire the mnt_devvp and set it up */
1093 struct vnode
*mp_devvp
= NULL
;
1094 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1095 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1096 0, &mp_devvp
, vfs_context_kernel());
1098 mp
->mnt_devvp
= mp_devvp
;
1099 //vnode_lookup took an iocount, need to drop it.
1100 vnode_put(mp_devvp
);
1102 // now set `device_vnode` to the devvp that was acquired.
1103 // note that though the iocount above was dropped, the mount acquires
1104 // an implicit reference against the device.
1105 device_vnode
= mp_devvp
;
1113 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
1116 if (flags
& MNT_UPDATE
) {
1117 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) {
1118 mp
->mnt_flag
&= ~MNT_RDONLY
;
1121 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
1122 mp
->mnt_kern_flag
&= ~MNTK_WANTRDWR
;
1124 mp
->mnt_flag
= flag
; /* restore flag value */
1126 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
1127 lck_rw_done(&mp
->mnt_rwlock
);
1128 is_rwlock_locked
= FALSE
;
1130 enablequotas(mp
, ctx
);
1136 * Put the new filesystem on the mount list after root.
1139 struct vfs_attr vfsattr
;
1141 error
= mac_mount_check_mount_late(ctx
, mp
);
1146 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
1147 error
= VFS_ROOT(mp
, &rvp
, ctx
);
1149 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
1152 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
1154 * drop reference provided by VFS_ROOT
1164 vnode_lock_spin(vp
);
1165 CLR(vp
->v_flag
, VMOUNT
);
1166 vp
->v_mountedhere
= mp
;
1170 * taking the name_cache_lock exclusively will
1171 * insure that everyone is out of the fast path who
1172 * might be trying to use a now stale copy of
1173 * vp->v_mountedhere->mnt_realrootvp
1174 * bumping mount_generation causes the cached values
1179 name_cache_unlock();
1181 error
= vnode_ref(vp
);
1186 have_usecount
= TRUE
;
1188 error
= checkdirs(vp
, ctx
);
1190 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1194 * there is no cleanup code here so I have made it void
1195 * we need to revisit this
1197 (void)VFS_START(mp
, 0, ctx
);
1199 if (mount_list_add(mp
) != 0) {
1201 * The system is shutting down trying to umount
1202 * everything, so fail with a plausible errno.
1207 lck_rw_done(&mp
->mnt_rwlock
);
1208 is_rwlock_locked
= FALSE
;
1210 /* Check if this mounted file system supports EAs or named streams. */
1211 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1212 VFSATTR_INIT(&vfsattr
);
1213 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
1214 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
1215 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
1216 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
1217 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
1218 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
1219 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1222 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
1223 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
1224 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
1227 /* Check if this file system supports path from id lookups. */
1228 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
1229 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
1230 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1231 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
1232 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1233 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1236 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
1237 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
1238 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1241 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1242 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1244 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1245 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1247 /* increment the operations count */
1248 OSAddAtomic(1, &vfs_nummntops
);
1249 enablequotas(mp
, ctx
);
1252 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1255 * cache the IO attributes for the underlying physical media...
1256 * an error return indicates the underlying driver doesn't
1257 * support all the queries necessary... however, reasonable
1258 * defaults will have been set, so no reason to bail or care
1260 vfs_init_io_attributes(device_vnode
, mp
);
1263 /* Now that mount is setup, notify the listeners */
1264 vfs_notify_mount(pvp
);
1265 IOBSDMountChange(mp
, kIOMountChangeMount
);
1267 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1268 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1269 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1270 mp
->mnt_vtable
->vfc_name
, error
);
1273 vnode_lock_spin(vp
);
1274 CLR(vp
->v_flag
, VMOUNT
);
1277 mp
->mnt_vtable
->vfc_refcount
--;
1278 mount_list_unlock();
1281 vnode_rele(device_vnode
);
1282 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
);
1284 lck_rw_done(&mp
->mnt_rwlock
);
1285 is_rwlock_locked
= FALSE
;
1288 * if we get here, we have a mount structure that needs to be freed,
1289 * but since the coveredvp hasn't yet been updated to point at it,
1290 * no need to worry about other threads holding a crossref on this mp
1291 * so it's ok to just free it
1293 mount_lock_destroy(mp
);
1295 mac_mount_label_destroy(mp
);
1297 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1301 * drop I/O count on the device vp if there was one
1303 if (devpath
&& devvp
) {
1309 /* Error condition exits */
1311 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1314 * If the mount has been placed on the covered vp,
1315 * it may have been discovered by now, so we have
1316 * to treat this just like an unmount
1318 mount_lock_spin(mp
);
1319 mp
->mnt_lflag
|= MNT_LDEAD
;
1322 if (device_vnode
!= NULLVP
) {
1323 vnode_rele(device_vnode
);
1324 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
1329 vnode_lock_spin(vp
);
1332 vp
->v_mountedhere
= (mount_t
) 0;
1336 if (have_usecount
) {
1340 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
)) {
1344 if (devpath
&& devvp
) {
1348 /* Release mnt_rwlock only when it was taken */
1349 if (is_rwlock_locked
== TRUE
) {
1350 lck_rw_done(&mp
->mnt_rwlock
);
1354 if (mp
->mnt_crossref
) {
1355 mount_dropcrossref(mp
, vp
, 0);
1357 mount_lock_destroy(mp
);
1359 mac_mount_label_destroy(mp
);
1361 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1366 vfsp
->vfc_refcount
--;
1367 mount_list_unlock();
1374 * Flush in-core data, check for competing mount attempts,
1378 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1381 #pragma unused(cnp,fsname)
1383 struct vnode_attr va
;
1388 * If the user is not root, ensure that they own the directory
1389 * onto which we are attempting to mount.
1392 VATTR_WANTED(&va
, va_uid
);
1393 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1394 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1395 (!vfs_context_issuser(ctx
)))) {
1401 if ((error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
))) {
1405 if ((error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0))) {
1409 if (vp
->v_type
!= VDIR
) {
1414 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1420 error
= mac_mount_check_mount(ctx
, vp
,
1427 vnode_lock_spin(vp
);
1428 SET(vp
->v_flag
, VMOUNT
);
1435 #if CONFIG_IMGSRC_ACCESS
1437 #define DEBUG_IMGSRC 0
1440 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
1442 #define IMGSRC_DEBUG(args...) do { } while(0)
1446 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1448 struct nameidata nd
;
1449 vnode_t vp
, realdevvp
;
1452 enum uio_seg uio
= UIO_USERSPACE
;
1454 if (ctx
== vfs_context_kernel()) {
1458 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, uio
, devpath
, ctx
);
1459 if ((error
= namei(&nd
))) {
1460 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1466 if (!vnode_isblk(vp
)) {
1467 IMGSRC_DEBUG("Not block device.\n");
1472 realdevvp
= mp
->mnt_devvp
;
1473 if (realdevvp
== NULLVP
) {
1474 IMGSRC_DEBUG("No device backs the mount.\n");
1479 error
= vnode_getwithref(realdevvp
);
1481 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1485 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1486 IMGSRC_DEBUG("Wrong dev_t.\n");
1491 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1494 * If mount by non-root, then verify that user has necessary
1495 * permissions on the device.
1497 if (!vfs_context_issuser(ctx
)) {
1498 accessmode
= KAUTH_VNODE_READ_DATA
;
1499 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1500 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1502 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1503 IMGSRC_DEBUG("Access denied.\n");
1511 vnode_put(realdevvp
);
1524 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1525 * and call checkdirs()
1528 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1532 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1534 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1535 mp
->mnt_vtable
->vfc_name
, vnode_getname(vp
));
1537 vnode_lock_spin(vp
);
1538 CLR(vp
->v_flag
, VMOUNT
);
1539 vp
->v_mountedhere
= mp
;
1543 * taking the name_cache_lock exclusively will
1544 * insure that everyone is out of the fast path who
1545 * might be trying to use a now stale copy of
1546 * vp->v_mountedhere->mnt_realrootvp
1547 * bumping mount_generation causes the cached values
1552 name_cache_unlock();
1554 error
= vnode_ref(vp
);
1559 error
= checkdirs(vp
, ctx
);
1561 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1568 mp
->mnt_vnodecovered
= NULLVP
;
1574 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1577 vnode_lock_spin(vp
);
1578 vp
->v_mountedhere
= (mount_t
)NULL
;
1581 mp
->mnt_vnodecovered
= NULLVP
;
1585 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1589 /* unmount in progress return error */
1590 mount_lock_spin(mp
);
1591 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1596 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1599 * We only allow the filesystem to be reloaded if it
1600 * is currently mounted read-only.
1602 if ((flags
& MNT_RELOAD
) &&
1603 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1609 * Only root, or the user that did the original mount is
1610 * permitted to update it.
1612 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1613 (!vfs_context_issuser(ctx
))) {
1618 error
= mac_mount_check_remount(ctx
, mp
);
1626 lck_rw_done(&mp
->mnt_rwlock
);
1633 mount_end_update(mount_t mp
)
1635 lck_rw_done(&mp
->mnt_rwlock
);
1639 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1643 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1647 vp
= imgsrc_rootvnodes
[height
];
1648 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1657 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
,
1658 struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
,
1659 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1663 boolean_t placed
= FALSE
;
1664 struct vfstable
*vfsp
;
1665 user_addr_t devpath
;
1666 char *old_mntonname
;
1672 /* If we didn't imageboot, nothing to move */
1673 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1677 /* Only root can do this */
1678 if (!vfs_context_issuser(ctx
)) {
1682 IMGSRC_DEBUG("looking for root vnode.\n");
1685 * Get root vnode of filesystem we're moving.
1689 struct user64_mnt_imgsrc_args mia64
;
1690 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1692 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1696 height
= mia64
.mi_height
;
1697 flags
= mia64
.mi_flags
;
1698 devpath
= mia64
.mi_devpath
;
1700 struct user32_mnt_imgsrc_args mia32
;
1701 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1703 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1707 height
= mia32
.mi_height
;
1708 flags
= mia32
.mi_flags
;
1709 devpath
= mia32
.mi_devpath
;
1713 * For binary compatibility--assumes one level of nesting.
1716 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
1721 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
1725 /* munge into LP64 addr */
1726 devpath
= CAST_USER_ADDR_T(tmp
);
1734 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1738 error
= get_imgsrc_rootvnode(height
, &rvp
);
1740 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error
);
1744 IMGSRC_DEBUG("got old root vnode\n");
1746 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1748 /* Can only move once */
1749 mp
= vnode_mount(rvp
);
1750 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1751 IMGSRC_DEBUG("Already moved.\n");
1756 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp
->mnt_vtable
->vfc_name
);
1757 IMGSRC_DEBUG("Starting updated.\n");
1759 /* Get exclusive rwlock on mount, authorize update on mp */
1760 error
= mount_begin_update(mp
, ctx
, 0);
1762 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1767 * It can only be moved once. Flag is set under the rwlock,
1768 * so we're now safe to proceed.
1770 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1771 IMGSRC_DEBUG("Already moved [2]\n");
1775 IMGSRC_DEBUG("Preparing coveredvp.\n");
1777 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1778 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1780 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1784 IMGSRC_DEBUG("Covered vp OK.\n");
1786 /* Sanity check the name caller has provided */
1787 vfsp
= mp
->mnt_vtable
;
1788 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1789 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1790 vfsp
->vfc_name
, fsname
);
1795 /* Check the device vnode and update mount-from name, for local filesystems */
1796 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1797 IMGSRC_DEBUG("Local, doing device validation.\n");
1799 if (devpath
!= USER_ADDR_NULL
) {
1800 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1802 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1811 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1812 * and increment the name cache's mount generation
1815 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1816 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1823 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1824 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1826 /* Forbid future moves */
1828 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1831 /* Finally, add to mount list, completely ready to go */
1832 if (mount_list_add(mp
) != 0) {
1834 * The system is shutting down trying to umount
1835 * everything, so fail with a plausible errno.
1841 mount_end_update(mp
);
1843 FREE(old_mntonname
, M_TEMP
);
1845 vfs_notify_mount(pvp
);
1849 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1852 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1857 * Placing the mp on the vnode clears VMOUNT,
1858 * so cleanup is different after that point
1861 /* Rele the vp, clear VMOUNT and v_mountedhere */
1862 undo_place_on_covered_vp(mp
, vp
);
1864 vnode_lock_spin(vp
);
1865 CLR(vp
->v_flag
, VMOUNT
);
1869 mount_end_update(mp
);
1873 FREE(old_mntonname
, M_TEMP
);
1877 #if CONFIG_LOCKERBOOT
1880 mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
1881 const char *pbdevpath
)
1884 struct nameidata nd
;
1885 boolean_t cleanup_nd
= FALSE
;
1886 vfs_context_t ctx
= vfs_context_kernel();
1887 boolean_t is64
= TRUE
;
1888 boolean_t by_index
= TRUE
;
1889 struct user64_mnt_imgsrc_args mia64
= {
1892 .mi_devpath
= CAST_USER_ADDR_T(pbdevpath
),
1894 user_addr_t mia64addr
= CAST_USER_ADDR_T(&mia64
);
1896 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
1897 UIO_SYSSPACE
, CAST_USER_ADDR_T(mntpoint
), ctx
);
1900 IMGSRC_DEBUG("namei: %d\n", error
);
1905 error
= relocate_imageboot_source(nd
.ni_dvp
, nd
.ni_vp
,
1906 &nd
.ni_cnd
, fsname
, ctx
, is64
, mia64addr
, by_index
);
1910 int stashed
= error
;
1912 error
= vnode_put(nd
.ni_vp
);
1914 panic("vnode_put() returned non-zero: %d", error
);
1918 error
= vnode_put(nd
.ni_dvp
);
1920 panic("vnode_put() returned non-zero: %d", error
);
1929 #endif /* CONFIG_LOCKERBOOT */
1930 #endif /* CONFIG_IMGSRC_ACCESS */
1933 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1935 struct nameidata qnd
;
1937 char qfpath
[MAXPATHLEN
];
1938 const char *qfname
= QUOTAFILENAME
;
1939 const char *qfopsname
= QUOTAOPSNAME
;
1940 const char *qfextension
[] = INITQFNAMES
;
1942 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1943 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0) {
1947 * Enable filesystem disk quotas if necessary.
1948 * We ignore errors as this should not interfere with final mount
1950 for (type
= 0; type
< MAXQUOTAS
; type
++) {
1951 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1952 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1953 CAST_USER_ADDR_T(qfpath
), ctx
);
1954 if (namei(&qnd
) != 0) {
1955 continue; /* option file to trigger quotas is not present */
1957 vnode_put(qnd
.ni_vp
);
1959 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1961 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1968 checkdirs_callback(proc_t p
, void * arg
)
1970 struct cdirargs
* cdrp
= (struct cdirargs
*)arg
;
1971 vnode_t olddp
= cdrp
->olddp
;
1972 vnode_t newdp
= cdrp
->newdp
;
1973 struct filedesc
*fdp
;
1974 vnode_t new_cvp
= newdp
;
1975 vnode_t new_rvp
= newdp
;
1976 vnode_t old_cvp
= NULL
;
1977 vnode_t old_rvp
= NULL
;
1980 * XXX Also needs to iterate each thread in the process to see if it
1981 * XXX is using a per-thread current working directory, and, if so,
1982 * XXX update that as well.
1986 * First, with the proc_fdlock held, check to see if we will need
1987 * to do any work. If not, we will get out fast.
1992 (fdp
->fd_cdir
!= olddp
&& fdp
->fd_rdir
!= olddp
)) {
1994 return PROC_RETURNED
;
1999 * Ok, we will have to do some work. Always take two refs
2000 * because we might need that many. We'll dispose of whatever
2001 * we ended up not using.
2003 if (vnode_ref(newdp
) != 0) {
2004 return PROC_RETURNED
;
2006 if (vnode_ref(newdp
) != 0) {
2008 return PROC_RETURNED
;
2011 proc_dirs_lock_exclusive(p
);
2013 * Now do the work. Note: we dropped the proc_fdlock, so we
2014 * have to do all of the checks again.
2019 if (fdp
->fd_cdir
== olddp
) {
2021 fdp
->fd_cdir
= newdp
;
2024 if (fdp
->fd_rdir
== olddp
) {
2026 fdp
->fd_rdir
= newdp
;
2031 proc_dirs_unlock_exclusive(p
);
2034 * Dispose of any references that are no longer needed.
2036 if (old_cvp
!= NULL
) {
2037 vnode_rele(old_cvp
);
2039 if (old_rvp
!= NULL
) {
2040 vnode_rele(old_rvp
);
2042 if (new_cvp
!= NULL
) {
2043 vnode_rele(new_cvp
);
2045 if (new_rvp
!= NULL
) {
2046 vnode_rele(new_rvp
);
2049 return PROC_RETURNED
;
2055 * Scan all active processes to see if any of them have a current
2056 * or root directory onto which the new filesystem has just been
2057 * mounted. If so, replace them with the new mount point.
2060 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
2065 struct cdirargs cdr
;
2067 if (olddp
->v_usecount
== 1) {
2070 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
2074 panic("mount: lost mount: error %d", err
);
2081 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2082 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
2084 if (rootvnode
== olddp
) {
2096 * Unmount a file system.
2098 * Note: unmount takes a path to the vnode mounted on as argument,
2099 * not special file (as before).
2103 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
2108 struct nameidata nd
;
2109 vfs_context_t ctx
= vfs_context_current();
2111 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
2112 UIO_USERSPACE
, uap
->path
, ctx
);
2122 error
= mac_mount_check_umount(ctx
, mp
);
2129 * Must be the root of the filesystem
2131 if ((vp
->v_flag
& VROOT
) == 0) {
2137 /* safedounmount consumes the mount ref */
2138 return safedounmount(mp
, uap
->flags
, ctx
);
2142 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
2146 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
2147 if (mp
== (mount_t
)0) {
2152 /* safedounmount consumes the mount ref */
2153 return safedounmount(mp
, flags
, ctx
);
2158 * The mount struct comes with a mount ref which will be consumed.
2159 * Do the actual file system unmount, prevent some common foot shooting.
2162 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2165 proc_t p
= vfs_context_proc(ctx
);
2168 * If the file system is not responding and MNT_NOBLOCK
2169 * is set and not a forced unmount then return EBUSY.
2171 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
2172 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
2178 * Skip authorization if the mount is tagged as permissive and
2179 * this is not a forced-unmount attempt.
2181 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
2183 * Only root, or the user that did the original mount is
2184 * permitted to unmount this filesystem.
2186 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
2187 (error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
2192 * Don't allow unmounting the root file system (or the associated VM or DATA mounts) .
2194 if ((mp
->mnt_flag
& MNT_ROOTFS
) || (mp
->mnt_kern_flag
& MNTK_SYSTEM
)) {
2195 error
= EBUSY
; /* the root (or associated volumes) is always busy */
2199 #ifdef CONFIG_IMGSRC_ACCESS
2200 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
2204 #endif /* CONFIG_IMGSRC_ACCESS */
2206 return dounmount(mp
, flags
, 1, ctx
);
2214 * Do the actual file system unmount.
2217 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
2219 vnode_t coveredvp
= (vnode_t
)0;
2222 int forcedunmount
= 0;
2224 struct vnode
*devvp
= NULLVP
;
2226 proc_t p
= vfs_context_proc(ctx
);
2228 int pflags_save
= 0;
2229 #endif /* CONFIG_TRIGGERS */
2232 if (!(flags
& MNT_FORCE
)) {
2233 fsevent_unmount(mp
, ctx
); /* has to come first! */
2240 * If already an unmount in progress just return EBUSY.
2241 * Even a forced unmount cannot override.
2243 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
2251 if (flags
& MNT_FORCE
) {
2253 mp
->mnt_lflag
|= MNT_LFORCE
;
2257 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2258 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
2262 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
2263 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
2264 mp
->mnt_flag
&= ~MNT_ASYNC
;
2266 * anyone currently in the fast path that
2267 * trips over the cached rootvp will be
2268 * dumped out and forced into the slow path
2269 * to regenerate a new cached value
2271 mp
->mnt_realrootvp
= NULLVP
;
2274 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
2276 * Force unmount any mounts in this filesystem.
2277 * If any unmounts fail - just leave them dangling.
2280 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
2284 * taking the name_cache_lock exclusively will
2285 * insure that everyone is out of the fast path who
2286 * might be trying to use a now stale copy of
2287 * vp->v_mountedhere->mnt_realrootvp
2288 * bumping mount_generation causes the cached values
2293 name_cache_unlock();
2296 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2301 if (forcedunmount
== 0) {
2302 ubc_umount(mp
); /* release cached vnodes */
2303 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2304 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
2307 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2308 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2309 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2315 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
2318 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
2321 if (forcedunmount
) {
2322 lflags
|= FORCECLOSE
;
2324 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
2325 if ((forcedunmount
== 0) && error
) {
2327 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2328 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2329 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2333 /* make sure there are no one in the mount iterations or lookup */
2334 mount_iterdrain(mp
);
2336 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
2338 mount_iterreset(mp
);
2340 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2341 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2342 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2346 /* increment the operations count */
2348 OSAddAtomic(1, &vfs_nummntops
);
2351 if (mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
2352 /* hold an io reference and drop the usecount before close */
2353 devvp
= mp
->mnt_devvp
;
2354 vnode_getalways(devvp
);
2356 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
2358 vnode_clearmountedon(devvp
);
2361 lck_rw_done(&mp
->mnt_rwlock
);
2362 mount_list_remove(mp
);
2363 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2365 /* mark the mount point hook in the vp but not drop the ref yet */
2366 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2368 * The covered vnode needs special handling. Trying to get an
2369 * iocount must not block here as this may lead to deadlocks
2370 * if the Filesystem to which the covered vnode belongs is
2371 * undergoing forced unmounts. Since we hold a usecount, the
2372 * vnode cannot be reused (it can, however, still be terminated)
2374 vnode_getalways(coveredvp
);
2375 vnode_lock_spin(coveredvp
);
2378 coveredvp
->v_mountedhere
= (struct mount
*)0;
2379 CLR(coveredvp
->v_flag
, VMOUNT
);
2381 vnode_unlock(coveredvp
);
2382 vnode_put(coveredvp
);
2386 mp
->mnt_vtable
->vfc_refcount
--;
2387 mount_list_unlock();
2389 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2390 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2392 mp
->mnt_lflag
|= MNT_LDEAD
;
2394 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2396 * do the wakeup here
2397 * in case we block in mount_refdrain
2398 * which will drop the mount lock
2399 * and allow anyone blocked in vfs_busy
2400 * to wakeup and see the LDEAD state
2402 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2403 wakeup((caddr_t
)mp
);
2407 /* free disk_conditioner_info structure for this mount */
2408 disk_conditioner_unmount(mp
);
2411 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2412 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2417 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2418 // Restore P_NOREMOTEHANG bit to its previous value
2419 if ((pflags_save
& P_NOREMOTEHANG
) == 0) {
2420 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2425 * Callback and context are set together under the mount lock, and
2426 * never cleared, so we're safe to examine them here, drop the lock,
2429 if (mp
->mnt_triggercallback
!= NULL
) {
2432 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2433 } else if (did_vflush
) {
2434 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2441 #endif /* CONFIG_TRIGGERS */
2443 lck_rw_done(&mp
->mnt_rwlock
);
2446 wakeup((caddr_t
)mp
);
2450 if ((coveredvp
!= NULLVP
)) {
2451 vnode_t pvp
= NULLVP
;
2454 * The covered vnode needs special handling. Trying to
2455 * get an iocount must not block here as this may lead
2456 * to deadlocks if the Filesystem to which the covered
2457 * vnode belongs is undergoing forced unmounts. Since we
2458 * hold a usecount, the vnode cannot be reused
2459 * (it can, however, still be terminated).
2461 vnode_getalways(coveredvp
);
2463 mount_dropcrossref(mp
, coveredvp
, 0);
2465 * We'll _try_ to detect if this really needs to be
2466 * done. The coveredvp can only be in termination (or
2467 * terminated) if the coveredvp's mount point is in a
2468 * forced unmount (or has been) since we still hold the
2471 if (!vnode_isrecycled(coveredvp
)) {
2472 pvp
= vnode_getparent(coveredvp
);
2474 if (coveredvp
->v_resolve
) {
2475 vnode_trigger_rearm(coveredvp
, ctx
);
2480 vnode_rele(coveredvp
);
2481 vnode_put(coveredvp
);
2485 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2488 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2489 mount_lock_destroy(mp
);
2491 mac_mount_label_destroy(mp
);
2493 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2495 panic("dounmount: no coveredvp");
2502 * Unmount any mounts in this filesystem.
2505 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2508 fsid_t
*fsids
, fsid
;
2510 int count
= 0, i
, m
= 0;
2515 // Get an array to hold the submounts fsids.
2516 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2518 fsids_sz
= count
* sizeof(fsid_t
);
2519 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2520 if (fsids
== NULL
) {
2521 mount_list_unlock();
2524 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2527 * Fill the array with submount fsids.
2528 * Since mounts are always added to the tail of the mount list, the
2529 * list is always in mount order.
2530 * For each mount check if the mounted-on vnode belongs to a
2531 * mount that's already added to our array of mounts to be unmounted.
2533 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2534 vp
= smp
->mnt_vnodecovered
;
2538 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2539 for (i
= 0; i
<= m
; i
++) {
2540 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2541 fsids
[i
].val
[1] == fsid
.val
[1]) {
2542 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2547 mount_list_unlock();
2549 // Unmount the submounts in reverse order. Ignore errors.
2550 for (i
= m
; i
> 0; i
--) {
2551 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2554 mount_iterdrop(smp
);
2555 (void) dounmount(smp
, flags
, 1, ctx
);
2560 FREE(fsids
, M_TEMP
);
2565 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2570 if (mp
->mnt_crossref
< 0) {
2571 panic("mount cross refs -ve");
2574 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2576 vnode_put_locked(dp
);
2580 mount_lock_destroy(mp
);
2582 mac_mount_label_destroy(mp
);
2584 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2588 vnode_put_locked(dp
);
2595 * Sync each mounted filesystem.
2601 int print_vmpage_stat
= 0;
2604 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2605 * mounted read-write with the passed waitfor value.
2607 * Parameters: mp mount-point descriptor per mounted file-system instance.
2608 * arg user argument (please see below)
2610 * User argument is a pointer to 32 bit unsigned integer which describes the
2611 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2612 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2615 * Returns: VFS_RETURNED
2618 sync_callback(mount_t mp
, void *arg
)
2620 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2621 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2622 unsigned waitfor
= MNT_NOWAIT
;
2625 waitfor
= *(uint32_t*)arg
;
2628 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2629 if (waitfor
!= MNT_WAIT
&&
2630 waitfor
!= (MNT_WAIT
| MNT_VOLUME
) &&
2631 waitfor
!= MNT_NOWAIT
&&
2632 waitfor
!= (MNT_NOWAIT
| MNT_VOLUME
) &&
2633 waitfor
!= MNT_DWAIT
&&
2634 waitfor
!= (MNT_DWAIT
| MNT_VOLUME
)) {
2635 panic("Passed inappropriate waitfor %u to "
2636 "sync_callback()", waitfor
);
2639 mp
->mnt_flag
&= ~MNT_ASYNC
;
2640 (void)VFS_SYNC(mp
, waitfor
, vfs_context_kernel());
2642 mp
->mnt_flag
|= MNT_ASYNC
;
2646 return VFS_RETURNED
;
2651 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2653 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2655 if (print_vmpage_stat
) {
2656 vm_countdirtypages();
2663 #endif /* DIAGNOSTIC */
2669 SYNC_ONLY_RELIABLE_MEDIA
= 1,
2670 SYNC_ONLY_UNRELIABLE_MEDIA
= 2
2674 sync_internal_callback(mount_t mp
, void *arg
)
2677 int is_reliable
= !(mp
->mnt_kern_flag
& MNTK_VIRTUALDEV
) &&
2678 (mp
->mnt_flag
& MNT_LOCAL
);
2679 sync_type_t sync_type
= *((sync_type_t
*)arg
);
2681 if ((sync_type
== SYNC_ONLY_RELIABLE_MEDIA
) && !is_reliable
) {
2682 return VFS_RETURNED
;
2683 } else if ((sync_type
== SYNC_ONLY_UNRELIABLE_MEDIA
) && is_reliable
) {
2684 return VFS_RETURNED
;
2688 (void)sync_callback(mp
, NULL
);
2690 return VFS_RETURNED
;
2693 int sync_thread_state
= 0;
2694 int sync_timeout_seconds
= 5;
2696 #define SYNC_THREAD_RUN 0x0001
2697 #define SYNC_THREAD_RUNNING 0x0002
2700 sync_thread(__unused
void *arg
, __unused wait_result_t wr
)
2702 sync_type_t sync_type
;
2704 lck_mtx_lock(sync_mtx_lck
);
2705 while (sync_thread_state
& SYNC_THREAD_RUN
) {
2706 sync_thread_state
&= ~SYNC_THREAD_RUN
;
2707 lck_mtx_unlock(sync_mtx_lck
);
2709 sync_type
= SYNC_ONLY_RELIABLE_MEDIA
;
2710 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2711 sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
;
2712 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2714 lck_mtx_lock(sync_mtx_lck
);
2717 * This wakeup _has_ to be issued before the lock is released otherwise
2718 * we may end up waking up a thread in sync_internal which is
2719 * expecting a wakeup from a thread it just created and not from this
2720 * thread which is about to exit.
2722 wakeup(&sync_thread_state
);
2723 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2724 lck_mtx_unlock(sync_mtx_lck
);
2726 if (print_vmpage_stat
) {
2727 vm_countdirtypages();
2734 #endif /* DIAGNOSTIC */
2737 struct timeval sync_timeout_last_print
= {.tv_sec
= 0, .tv_usec
= 0};
2740 * An in-kernel sync for power management to call.
2741 * This function always returns within sync_timeout seconds.
2743 __private_extern__
int
2748 int thread_created
= FALSE
;
2749 struct timespec ts
= {.tv_sec
= sync_timeout_seconds
, .tv_nsec
= 0};
2751 lck_mtx_lock(sync_mtx_lck
);
2752 sync_thread_state
|= SYNC_THREAD_RUN
;
2753 if (!(sync_thread_state
& SYNC_THREAD_RUNNING
)) {
2756 sync_thread_state
|= SYNC_THREAD_RUNNING
;
2757 kr
= kernel_thread_start(sync_thread
, NULL
, &thd
);
2758 if (kr
!= KERN_SUCCESS
) {
2759 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2760 lck_mtx_unlock(sync_mtx_lck
);
2761 printf("sync_thread failed\n");
2764 thread_created
= TRUE
;
2767 error
= msleep((caddr_t
)&sync_thread_state
, sync_mtx_lck
,
2768 (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2773 if (now
.tv_sec
- sync_timeout_last_print
.tv_sec
> 120) {
2774 printf("sync timed out: %d sec\n", sync_timeout_seconds
);
2775 sync_timeout_last_print
.tv_sec
= now
.tv_sec
;
2779 if (thread_created
) {
2780 thread_deallocate(thd
);
2784 } /* end of sync_internal call */
2787 * Change filesystem quotas.
2791 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2794 int error
, quota_cmd
, quota_status
= 0;
2797 struct nameidata nd
;
2798 vfs_context_t ctx
= vfs_context_current();
2799 struct dqblk my_dqblk
= {};
2801 AUDIT_ARG(uid
, uap
->uid
);
2802 AUDIT_ARG(cmd
, uap
->cmd
);
2803 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2809 mp
= nd
.ni_vp
->v_mount
;
2811 vnode_put(nd
.ni_vp
);
2814 /* copyin any data we will need for downstream code */
2815 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2817 switch (quota_cmd
) {
2819 /* uap->arg specifies a file from which to take the quotas */
2820 fnamelen
= MAXPATHLEN
;
2821 datap
= kalloc(MAXPATHLEN
);
2822 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2825 /* uap->arg is a pointer to a dqblk structure. */
2826 datap
= (caddr_t
) &my_dqblk
;
2830 /* uap->arg is a pointer to a dqblk structure. */
2831 datap
= (caddr_t
) &my_dqblk
;
2832 if (proc_is64bit(p
)) {
2833 struct user_dqblk my_dqblk64
;
2834 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof(my_dqblk64
));
2836 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2839 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof(my_dqblk
));
2843 /* uap->arg is a pointer to an integer */
2844 datap
= (caddr_t
) "a_status
;
2852 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2855 switch (quota_cmd
) {
2857 if (datap
!= NULL
) {
2858 kfree(datap
, MAXPATHLEN
);
2862 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2864 if (proc_is64bit(p
)) {
2865 struct user_dqblk my_dqblk64
;
2867 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
));
2868 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2869 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof(my_dqblk64
));
2871 error
= copyout(datap
, uap
->arg
, sizeof(struct dqblk
));
2876 /* uap->arg is a pointer to an integer */
2878 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2890 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2897 * Get filesystem statistics.
2899 * Returns: 0 Success
2901 * vfs_update_vfsstat:???
2902 * munge_statfs:EFAULT
2906 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2909 struct vfsstatfs
*sp
;
2911 struct nameidata nd
;
2912 vfs_context_t ctx
= vfs_context_current();
2915 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2916 UIO_USERSPACE
, uap
->path
, ctx
);
2923 sp
= &mp
->mnt_vfsstat
;
2927 error
= mac_mount_check_stat(ctx
, mp
);
2934 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2940 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2946 * Get filesystem statistics.
2950 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2954 struct vfsstatfs
*sp
;
2957 AUDIT_ARG(fd
, uap
->fd
);
2959 if ((error
= file_vnode(uap
->fd
, &vp
))) {
2963 error
= vnode_getwithref(vp
);
2969 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2978 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2984 sp
= &mp
->mnt_vfsstat
;
2985 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2989 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2999 vfs_get_statfs64(struct mount
*mp
, struct statfs64
*sfs
)
3001 struct vfsstatfs
*vsfs
= &mp
->mnt_vfsstat
;
3003 bzero(sfs
, sizeof(*sfs
));
3005 sfs
->f_bsize
= vsfs
->f_bsize
;
3006 sfs
->f_iosize
= (int32_t)vsfs
->f_iosize
;
3007 sfs
->f_blocks
= vsfs
->f_blocks
;
3008 sfs
->f_bfree
= vsfs
->f_bfree
;
3009 sfs
->f_bavail
= vsfs
->f_bavail
;
3010 sfs
->f_files
= vsfs
->f_files
;
3011 sfs
->f_ffree
= vsfs
->f_ffree
;
3012 sfs
->f_fsid
= vsfs
->f_fsid
;
3013 sfs
->f_owner
= vsfs
->f_owner
;
3014 sfs
->f_type
= mp
->mnt_vtable
->vfc_typenum
;
3015 sfs
->f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
3016 sfs
->f_fssubtype
= vsfs
->f_fssubtype
;
3017 sfs
->f_flags_ext
= ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
)) ? MNT_EXT_ROOT_DATA_VOL
: 0;
3018 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
3019 strlcpy(&sfs
->f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
3021 strlcpy(&sfs
->f_fstypename
[0], &vsfs
->f_fstypename
[0], MFSTYPENAMELEN
);
3023 strlcpy(&sfs
->f_mntonname
[0], &vsfs
->f_mntonname
[0], MAXPATHLEN
);
3024 strlcpy(&sfs
->f_mntfromname
[0], &vsfs
->f_mntfromname
[0], MAXPATHLEN
);
3028 * Get file system statistics in 64-bit mode
3031 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
3035 struct nameidata nd
;
3036 struct statfs64 sfs
;
3037 vfs_context_t ctxp
= vfs_context_current();
3040 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
3041 UIO_USERSPACE
, uap
->path
, ctxp
);
3051 error
= mac_mount_check_stat(ctxp
, mp
);
3058 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
3064 vfs_get_statfs64(mp
, &sfs
);
3065 if ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
) &&
3066 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3067 /* This process does not want to see a seperate data volume mountpoint */
3068 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3070 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3077 * Get file system statistics in 64-bit mode
3080 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
3084 struct statfs64 sfs
;
3087 AUDIT_ARG(fd
, uap
->fd
);
3089 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3093 error
= vnode_getwithref(vp
);
3099 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
3108 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3114 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
3118 vfs_get_statfs64(mp
, &sfs
);
3119 if ((mp
->mnt_kern_flag
& MNTK_SYSTEM
) && !(mp
->mnt_kern_flag
& MNTK_SWAP_MOUNT
) && !(mp
->mnt_flag
& MNT_ROOTFS
) &&
3120 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3121 /* This process does not want to see a seperate data volume mountpoint */
3122 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3124 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3133 struct getfsstat_struct
{
3144 getfsstat_callback(mount_t mp
, void * arg
)
3146 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3147 struct vfsstatfs
*sp
;
3149 vfs_context_t ctx
= vfs_context_current();
3151 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3153 error
= mac_mount_check_stat(ctx
, mp
);
3155 fstp
->error
= error
;
3156 return VFS_RETURNED_DONE
;
3159 sp
= &mp
->mnt_vfsstat
;
3161 * If MNT_NOWAIT is specified, do not refresh the
3162 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
3164 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3165 (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3166 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3167 (error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
)))) {
3168 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3169 return VFS_RETURNED
;
3173 * Need to handle LP64 version of struct statfs
3175 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
3177 fstp
->error
= error
;
3178 return VFS_RETURNED_DONE
;
3180 fstp
->sfsp
+= my_size
;
3184 error
= mac_mount_label_get(mp
, *fstp
->mp
);
3186 fstp
->error
= error
;
3187 return VFS_RETURNED_DONE
;
3194 return VFS_RETURNED
;
3198 * Get statistics on all filesystems.
3201 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
3203 struct __mac_getfsstat_args muap
;
3205 muap
.buf
= uap
->buf
;
3206 muap
.bufsize
= uap
->bufsize
;
3207 muap
.mac
= USER_ADDR_NULL
;
3209 muap
.flags
= uap
->flags
;
3211 return __mac_getfsstat(p
, &muap
, retval
);
3215 * __mac_getfsstat: Get MAC-related file system statistics
3217 * Parameters: p (ignored)
3218 * uap User argument descriptor (see below)
3219 * retval Count of file system statistics (N stats)
3221 * Indirect: uap->bufsize Buffer size
3222 * uap->macsize MAC info size
3223 * uap->buf Buffer where information will be returned
3225 * uap->flags File system flags
3228 * Returns: 0 Success
3233 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
3237 size_t count
, maxcount
, bufsize
, macsize
;
3238 struct getfsstat_struct fst
;
3240 if ((unsigned)uap
->bufsize
> INT_MAX
|| (unsigned)uap
->macsize
> INT_MAX
) {
3244 bufsize
= (size_t) uap
->bufsize
;
3245 macsize
= (size_t) uap
->macsize
;
3247 if (IS_64BIT_PROCESS(p
)) {
3248 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
3250 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
3258 if (uap
->mac
!= USER_ADDR_NULL
) {
3263 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
3264 if (count
!= maxcount
) {
3268 /* Copy in the array */
3269 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
3274 error
= copyin(uap
->mac
, mp0
, macsize
);
3276 FREE(mp0
, M_MACTEMP
);
3280 /* Normalize to an array of user_addr_t */
3281 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
3283 FREE(mp0
, M_MACTEMP
);
3287 for (i
= 0; i
< count
; i
++) {
3288 if (IS_64BIT_PROCESS(p
)) {
3289 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
3291 mp
[i
] = (user_addr_t
)mp0
[i
];
3294 FREE(mp0
, M_MACTEMP
);
3301 fst
.flags
= uap
->flags
;
3304 fst
.maxcount
= maxcount
;
3307 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat_callback
, &fst
);
3310 FREE(mp
, M_MACTEMP
);
3314 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3318 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3319 *retval
= fst
.maxcount
;
3321 *retval
= fst
.count
;
3327 getfsstat64_callback(mount_t mp
, void * arg
)
3329 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3330 struct vfsstatfs
*sp
;
3331 struct statfs64 sfs
;
3334 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3336 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3338 fstp
->error
= error
;
3339 return VFS_RETURNED_DONE
;
3342 sp
= &mp
->mnt_vfsstat
;
3344 * If MNT_NOWAIT is specified, do not refresh the fsstat
3345 * cache. MNT_WAIT overrides MNT_NOWAIT.
3347 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3348 * getfsstat, since the constants are out of the same
3351 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3352 ((((fstp
->flags
& MNT_NOWAIT
) == 0) || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3353 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3354 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)))) {
3355 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3356 return VFS_RETURNED
;
3359 vfs_get_statfs64(mp
, &sfs
);
3360 error
= copyout(&sfs
, fstp
->sfsp
, sizeof(sfs
));
3362 fstp
->error
= error
;
3363 return VFS_RETURNED_DONE
;
3365 fstp
->sfsp
+= sizeof(sfs
);
3368 return VFS_RETURNED
;
3372 * Get statistics on all file systems in 64 bit mode.
3375 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
3378 int count
, maxcount
;
3379 struct getfsstat_struct fst
;
3381 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
3387 fst
.flags
= uap
->flags
;
3390 fst
.maxcount
= maxcount
;
3392 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat64_callback
, &fst
);
3395 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3399 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3400 *retval
= fst
.maxcount
;
3402 *retval
= fst
.count
;
3409 * gets the associated vnode with the file descriptor passed.
3413 * ctx - vfs context of caller
3414 * fd - file descriptor for which vnode is required.
3415 * vpp - Pointer to pointer to vnode to be returned.
3417 * The vnode is returned with an iocount so any vnode obtained
3418 * by this call needs a vnode_put
3422 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
3426 struct fileproc
*fp
;
3427 proc_t p
= vfs_context_proc(ctx
);
3431 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
3436 error
= vnode_getwithref(vp
);
3438 (void)fp_drop(p
, fd
, fp
, 0);
3442 (void)fp_drop(p
, fd
, fp
, 0);
3448 * Wrapper function around namei to start lookup from a directory
3449 * specified by a file descriptor ni_dirfd.
3451 * In addition to all the errors returned by namei, this call can
3452 * return ENOTDIR if the file descriptor does not refer to a directory.
3453 * and EBADF if the file descriptor is not valid.
3456 nameiat(struct nameidata
*ndp
, int dirfd
)
3458 if ((dirfd
!= AT_FDCWD
) &&
3459 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
3460 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3464 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3465 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3470 c
= *((char *)(ndp
->ni_dirp
));
3476 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3482 if (vnode_vtype(dvp_at
) != VDIR
) {
3487 ndp
->ni_dvp
= dvp_at
;
3488 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3490 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3500 * Change current working directory to a given file descriptor.
3504 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3506 struct filedesc
*fdp
= p
->p_fd
;
3512 vfs_context_t ctx
= vfs_context_current();
3514 AUDIT_ARG(fd
, uap
->fd
);
3515 if (per_thread
&& uap
->fd
== -1) {
3517 * Switching back from per-thread to per process CWD; verify we
3518 * in fact have one before proceeding. The only success case
3519 * for this code path is to return 0 preemptively after zapping
3520 * the thread structure contents.
3522 thread_t th
= vfs_context_thread(ctx
);
3524 uthread_t uth
= get_bsdthread_info(th
);
3526 uth
->uu_cdir
= NULLVP
;
3527 if (tvp
!= NULLVP
) {
3535 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3538 if ((error
= vnode_getwithref(vp
))) {
3543 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3545 if (vp
->v_type
!= VDIR
) {
3551 error
= mac_vnode_check_chdir(ctx
, vp
);
3556 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3561 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3562 if (vfs_busy(mp
, LK_NOWAIT
)) {
3566 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3577 if ((error
= vnode_ref(vp
))) {
3583 thread_t th
= vfs_context_thread(ctx
);
3585 uthread_t uth
= get_bsdthread_info(th
);
3588 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3594 proc_dirs_lock_exclusive(p
);
3599 proc_dirs_unlock_exclusive(p
);
3616 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3618 return common_fchdir(p
, uap
, 0);
3622 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3624 return common_fchdir(p
, (void *)uap
, 1);
3629 * Change current working directory (".").
3631 * Returns: 0 Success
3632 * change_dir:ENOTDIR
3634 * vnode_ref:ENOENT No such file or directory
3638 chdir_internal(proc_t p
, vfs_context_t ctx
, struct nameidata
*ndp
, int per_thread
)
3640 struct filedesc
*fdp
= p
->p_fd
;
3644 error
= change_dir(ndp
, ctx
);
3648 if ((error
= vnode_ref(ndp
->ni_vp
))) {
3649 vnode_put(ndp
->ni_vp
);
3653 * drop the iocount we picked up in change_dir
3655 vnode_put(ndp
->ni_vp
);
3658 thread_t th
= vfs_context_thread(ctx
);
3660 uthread_t uth
= get_bsdthread_info(th
);
3662 uth
->uu_cdir
= ndp
->ni_vp
;
3663 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3665 vnode_rele(ndp
->ni_vp
);
3669 proc_dirs_lock_exclusive(p
);
3672 fdp
->fd_cdir
= ndp
->ni_vp
;
3674 proc_dirs_unlock_exclusive(p
);
3686 * Change current working directory (".").
3688 * Returns: 0 Success
3689 * chdir_internal:ENOTDIR
3690 * chdir_internal:ENOENT No such file or directory
3691 * chdir_internal:???
3695 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3697 struct nameidata nd
;
3698 vfs_context_t ctx
= vfs_context_current();
3700 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3701 UIO_USERSPACE
, uap
->path
, ctx
);
3703 return chdir_internal(p
, ctx
, &nd
, per_thread
);
3710 * Change current working directory (".") for the entire process
3712 * Parameters: p Process requesting the call
3713 * uap User argument descriptor (see below)
3716 * Indirect parameters: uap->path Directory path
3718 * Returns: 0 Success
3719 * common_chdir: ENOTDIR
3720 * common_chdir: ENOENT No such file or directory
3725 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3727 return common_chdir(p
, (void *)uap
, 0);
3733 * Change current working directory (".") for a single thread
3735 * Parameters: p Process requesting the call
3736 * uap User argument descriptor (see below)
3739 * Indirect parameters: uap->path Directory path
3741 * Returns: 0 Success
3742 * common_chdir: ENOTDIR
3743 * common_chdir: ENOENT No such file or directory
3748 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3750 return common_chdir(p
, (void *)uap
, 1);
3755 * Change notion of root (``/'') directory.
3759 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3761 struct filedesc
*fdp
= p
->p_fd
;
3763 struct nameidata nd
;
3765 vfs_context_t ctx
= vfs_context_current();
3767 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
3771 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3772 UIO_USERSPACE
, uap
->path
, ctx
);
3773 error
= change_dir(&nd
, ctx
);
3779 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3782 vnode_put(nd
.ni_vp
);
3787 if ((error
= vnode_ref(nd
.ni_vp
))) {
3788 vnode_put(nd
.ni_vp
);
3791 vnode_put(nd
.ni_vp
);
3794 * This lock provides the guarantee that as long as you hold the lock
3795 * fdp->fd_rdir has a usecount on it. This is used to take an iocount
3796 * on a referenced vnode in namei when determining the rootvnode for
3799 /* needed for synchronization with lookup */
3800 proc_dirs_lock_exclusive(p
);
3801 /* needed for setting the flag and other activities on the fd itself */
3804 fdp
->fd_rdir
= nd
.ni_vp
;
3805 fdp
->fd_flags
|= FD_CHROOT
;
3807 proc_dirs_unlock_exclusive(p
);
3817 * Common routine for chroot and chdir.
3819 * Returns: 0 Success
3820 * ENOTDIR Not a directory
3821 * namei:??? [anything namei can return]
3822 * vnode_authorize:??? [anything vnode_authorize can return]
3825 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3830 if ((error
= namei(ndp
))) {
3836 if (vp
->v_type
!= VDIR
) {
3842 error
= mac_vnode_check_chdir(ctx
, vp
);
3849 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3859 * Free the vnode data (for directories) associated with the file glob.
3862 fg_vn_data_alloc(void)
3864 struct fd_vn_data
*fvdata
;
3866 /* Allocate per fd vnode data */
3867 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3868 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3869 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3874 * Free the vnode data (for directories) associated with the file glob.
3877 fg_vn_data_free(void *fgvndata
)
3879 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3881 if (fvdata
->fv_buf
) {
3882 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3884 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3885 FREE(fvdata
, M_FD_VN_DATA
);
3889 * Check permissions, allocate an open file structure,
3890 * and call the device open routine if any.
3892 * Returns: 0 Success
3903 * XXX Need to implement uid, gid
3906 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3907 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3910 proc_t p
= vfs_context_proc(ctx
);
3911 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3912 struct fileproc
*fp
;
3915 int type
, indx
, error
;
3917 struct vfs_context context
;
3921 if ((oflags
& O_ACCMODE
) == O_ACCMODE
) {
3925 flags
= FFLAGS(uflags
);
3926 CLR(flags
, FENCRYPTED
);
3927 CLR(flags
, FUNENCRYPTED
);
3929 AUDIT_ARG(fflags
, oflags
);
3930 AUDIT_ARG(mode
, vap
->va_mode
);
3932 if ((error
= falloc_withalloc(p
,
3933 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3936 uu
->uu_dupfd
= -indx
- 1;
3938 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3939 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)) { /* XXX from fdopen */
3940 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3941 fp_drop(p
, indx
, NULL
, 0);
3946 if (error
== ERESTART
) {
3949 fp_free(p
, indx
, fp
);
3955 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3956 fp
->f_fglob
->fg_ops
= &vnops
;
3957 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3959 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3960 lf
.l_whence
= SEEK_SET
;
3963 if (flags
& O_EXLOCK
) {
3964 lf
.l_type
= F_WRLCK
;
3966 lf
.l_type
= F_RDLCK
;
3969 if ((flags
& FNONBLOCK
) == 0) {
3973 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3979 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
))) {
3982 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3985 /* try to truncate by setting the size attribute */
3986 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0)) {
3991 * For directories we hold some additional information in the fd.
3993 if (vnode_vtype(vp
) == VDIR
) {
3994 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3996 fp
->f_fglob
->fg_vn_data
= NULL
;
4002 * The first terminal open (without a O_NOCTTY) by a session leader
4003 * results in it being set as the controlling terminal.
4005 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
4006 !(flags
& O_NOCTTY
)) {
4009 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
4010 (caddr_t
)&tmp
, ctx
);
4014 if (flags
& O_CLOEXEC
) {
4015 *fdflags(p
, indx
) |= UF_EXCLOSE
;
4017 if (flags
& O_CLOFORK
) {
4018 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
4020 procfdtbl_releasefd(p
, indx
, NULL
);
4022 #if CONFIG_SECLUDED_MEMORY
4023 if (secluded_for_filecache
&&
4024 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
4025 vnode_vtype(vp
) == VREG
) {
4026 memory_object_control_t moc
;
4028 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
4030 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
4031 /* nothing to do... */
4032 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
4033 /* writable -> no longer eligible for secluded pages */
4034 memory_object_mark_eligible_for_secluded(moc
,
4036 } else if (secluded_for_filecache
== 1) {
4037 char pathname
[32] = { 0, };
4039 /* XXX FBDP: better way to detect /Applications/ ? */
4040 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4041 (void)copyinstr(ndp
->ni_dirp
,
4046 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
4051 pathname
[sizeof(pathname
) - 1] = '\0';
4052 if (strncmp(pathname
,
4054 strlen("/Applications/")) == 0 &&
4056 "/Applications/Camera.app/",
4057 strlen("/Applications/Camera.app/")) != 0) {
4060 * AND from "/Applications/"
4061 * AND not from "/Applications/Camera.app/"
4062 * ==> eligible for secluded
4064 memory_object_mark_eligible_for_secluded(moc
,
4067 } else if (secluded_for_filecache
== 2) {
4069 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4071 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4073 /* not implemented... */
4075 size_t len
= strlen(vp
->v_name
);
4076 if (!strncmp(vp
->v_name
, DYLD_SHARED_CACHE_NAME
, len
) ||
4077 !strncmp(vp
->v_name
, "dyld", len
) ||
4078 !strncmp(vp
->v_name
, "launchd", len
) ||
4079 !strncmp(vp
->v_name
, "Camera", len
) ||
4080 !strncmp(vp
->v_name
, "mediaserverd", len
) ||
4081 !strncmp(vp
->v_name
, "SpringBoard", len
) ||
4082 !strncmp(vp
->v_name
, "backboardd", len
)) {
4084 * This file matters when launching Camera:
4085 * do not store its contents in the secluded
4086 * pool that will be drained on Camera launch.
4088 memory_object_mark_eligible_for_secluded(moc
,
4093 #endif /* CONFIG_SECLUDED_MEMORY */
4095 fp_drop(p
, indx
, fp
, 1);
4102 context
= *vfs_context_current();
4103 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
4105 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
4106 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
4107 lf
.l_whence
= SEEK_SET
;
4110 lf
.l_type
= F_UNLCK
;
4113 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
4116 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
4118 fp_free(p
, indx
, fp
);
4124 * While most of the *at syscall handlers can call nameiat() which
4125 * is a wrapper around namei, the use of namei and initialisation
4126 * of nameidata are far removed and in different functions - namei
4127 * gets called in vn_open_auth for open1. So we'll just do here what
4131 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
4132 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
4135 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
4139 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4140 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
4145 c
= *((char *)(ndp
->ni_dirp
));
4151 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
4157 if (vnode_vtype(dvp_at
) != VDIR
) {
4162 ndp
->ni_dvp
= dvp_at
;
4163 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
4164 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
4171 return open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
);
4175 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
4177 * Parameters: p Process requesting the open
4178 * uap User argument descriptor (see below)
4179 * retval Pointer to an area to receive the
4180 * return calue from the system call
4182 * Indirect: uap->path Path to open (same as 'open')
4183 * uap->flags Flags to open (same as 'open'
4184 * uap->uid UID to set, if creating
4185 * uap->gid GID to set, if creating
4186 * uap->mode File mode, if creating (same as 'open')
4187 * uap->xsecurity ACL to set, if creating
4189 * Returns: 0 Success
4192 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4194 * XXX: We should enummerate the possible errno values here, and where
4195 * in the code they originated.
4198 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
4200 struct filedesc
*fdp
= p
->p_fd
;
4202 kauth_filesec_t xsecdst
;
4203 struct vnode_attr va
;
4204 struct nameidata nd
;
4207 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4210 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
4211 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
4216 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4217 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4218 if (uap
->uid
!= KAUTH_UID_NONE
) {
4219 VATTR_SET(&va
, va_uid
, uap
->uid
);
4221 if (uap
->gid
!= KAUTH_GID_NONE
) {
4222 VATTR_SET(&va
, va_gid
, uap
->gid
);
4224 if (xsecdst
!= NULL
) {
4225 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4228 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4229 uap
->path
, vfs_context_current());
4231 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4232 fileproc_alloc_init
, NULL
, retval
);
4233 if (xsecdst
!= NULL
) {
4234 kauth_filesec_free(xsecdst
);
4241 * Go through the data-protected atomically controlled open (2)
4243 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4246 open_dprotected_np(__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
)
4248 int flags
= uap
->flags
;
4249 int class = uap
->class;
4250 int dpflags
= uap
->dpflags
;
4253 * Follow the same path as normal open(2)
4254 * Look up the item if it exists, and acquire the vnode.
4256 struct filedesc
*fdp
= p
->p_fd
;
4257 struct vnode_attr va
;
4258 struct nameidata nd
;
4263 /* Mask off all but regular access permissions */
4264 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4265 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4267 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4268 uap
->path
, vfs_context_current());
4271 * Initialize the extra fields in vnode_attr to pass down our
4273 * 1. target cprotect class.
4274 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4276 if (flags
& O_CREAT
) {
4277 /* lower level kernel code validates that the class is valid before applying it. */
4278 if (class != PROTECTION_CLASS_DEFAULT
) {
4280 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4281 * file behave the same as open (2)
4283 VATTR_SET(&va
, va_dataprotect_class
, class);
4287 if (dpflags
& (O_DP_GETRAWENCRYPTED
| O_DP_GETRAWUNENCRYPTED
)) {
4288 if (flags
& (O_RDWR
| O_WRONLY
)) {
4289 /* Not allowed to write raw encrypted bytes */
4292 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
4293 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
4295 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
4296 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
4300 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4301 fileproc_alloc_init
, NULL
, retval
);
4307 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
4308 int fd
, enum uio_seg segflg
, int *retval
)
4310 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
4311 struct vnode_attr va
;
4312 struct nameidata nd
;
4316 /* Mask off all but regular access permissions */
4317 cmode
= ((mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4318 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4320 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
4323 return open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
4328 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
4330 __pthread_testcancel(1);
4331 return open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
);
4335 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
4338 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4339 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
);
4343 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
4346 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4347 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
);
4351 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
4353 __pthread_testcancel(1);
4354 return openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
);
4358 * openbyid_np: open a file given a file system id and a file system object id
4359 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4360 * file systems that don't support object ids it is a node id (uint64_t).
4362 * Parameters: p Process requesting the open
4363 * uap User argument descriptor (see below)
4364 * retval Pointer to an area to receive the
4365 * return calue from the system call
4367 * Indirect: uap->path Path to open (same as 'open')
4369 * uap->fsid id of target file system
4370 * uap->objid id of target file system object
4371 * uap->flags Flags to open (same as 'open')
4373 * Returns: 0 Success
4377 * XXX: We should enummerate the possible errno values here, and where
4378 * in the code they originated.
4381 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
4387 int buflen
= MAXPATHLEN
;
4389 vfs_context_t ctx
= vfs_context_current();
4391 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
4395 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
4399 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4400 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
4404 AUDIT_ARG(value32
, fsid
.val
[0]);
4405 AUDIT_ARG(value64
, objid
);
4407 /*resolve path from fsis, objid*/
4409 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
4414 error
= fsgetpath_internal( ctx
, fsid
.val
[0], objid
, buflen
,
4415 buf
, FSOPT_ISREALFSID
, &pathlen
);
4421 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
4429 error
= openat_internal(
4430 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
4439 * Create a special file.
4441 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
4444 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
4446 struct vnode_attr va
;
4447 vfs_context_t ctx
= vfs_context_current();
4449 struct nameidata nd
;
4453 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4454 VATTR_SET(&va
, va_rdev
, uap
->dev
);
4456 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4457 if ((uap
->mode
& S_IFMT
) == S_IFIFO
) {
4458 return mkfifo1(ctx
, uap
->path
, &va
);
4461 AUDIT_ARG(mode
, uap
->mode
);
4462 AUDIT_ARG(value32
, uap
->dev
);
4464 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
4467 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
4468 UIO_USERSPACE
, uap
->path
, ctx
);
4481 switch (uap
->mode
& S_IFMT
) {
4483 VATTR_SET(&va
, va_type
, VCHR
);
4486 VATTR_SET(&va
, va_type
, VBLK
);
4494 error
= mac_vnode_check_create(ctx
,
4495 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
4501 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4505 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0) {
4510 int update_flags
= 0;
4512 // Make sure the name & parent pointers are hooked up
4513 if (vp
->v_name
== NULL
) {
4514 update_flags
|= VNODE_UPDATE_NAME
;
4516 if (vp
->v_parent
== NULLVP
) {
4517 update_flags
|= VNODE_UPDATE_PARENT
;
4521 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4525 add_fsevent(FSE_CREATE_FILE
, ctx
,
4533 * nameidone has to happen before we vnode_put(dvp)
4534 * since it may need to release the fs_nodelock on the dvp
4547 * Create a named pipe.
4549 * Returns: 0 Success
4552 * vnode_authorize:???
4556 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4560 struct nameidata nd
;
4562 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4563 UIO_USERSPACE
, upath
, ctx
);
4571 /* check that this is a new file and authorize addition */
4576 VATTR_SET(vap
, va_type
, VFIFO
);
4578 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
4582 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4585 * nameidone has to happen before we vnode_put(dvp)
4586 * since it may need to release the fs_nodelock on the dvp
4600 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4602 * Parameters: p Process requesting the open
4603 * uap User argument descriptor (see below)
4606 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4607 * uap->uid UID to set
4608 * uap->gid GID to set
4609 * uap->mode File mode to set (same as 'mkfifo')
4610 * uap->xsecurity ACL to set, if creating
4612 * Returns: 0 Success
4615 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4617 * XXX: We should enummerate the possible errno values here, and where
4618 * in the code they originated.
4621 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4624 kauth_filesec_t xsecdst
;
4625 struct vnode_attr va
;
4627 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4629 xsecdst
= KAUTH_FILESEC_NONE
;
4630 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4631 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
4637 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4638 if (uap
->uid
!= KAUTH_UID_NONE
) {
4639 VATTR_SET(&va
, va_uid
, uap
->uid
);
4641 if (uap
->gid
!= KAUTH_GID_NONE
) {
4642 VATTR_SET(&va
, va_gid
, uap
->gid
);
4644 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4645 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4648 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4650 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4651 kauth_filesec_free(xsecdst
);
4658 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4660 struct vnode_attr va
;
4663 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4665 return mkfifo1(vfs_context_current(), uap
->path
, &va
);
4670 my_strrchr(char *p
, int ch
)
4674 for (save
= NULL
;; ++p
) {
4685 extern int safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
);
4686 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4687 extern int safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4690 safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
)
4692 int ret
, len
= _len
;
4694 *truncated_path
= 0;
4697 ret
= vn_getpath(dvp
, path
, &len
);
4699 ret
= vn_getpath_no_firmlink(dvp
, path
, &len
);
4701 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4703 path
[len
- 1] = '/';
4704 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
- len
) + 1;
4705 if (len
> MAXPATHLEN
) {
4708 // the string got truncated!
4709 *truncated_path
= 1;
4710 ptr
= my_strrchr(path
, '/');
4712 *ptr
= '\0'; // chop off the string at the last directory component
4714 len
= strlen(path
) + 1;
4717 } else if (ret
== 0) {
4718 *truncated_path
= 1;
4719 } else if (ret
!= 0) {
4720 struct vnode
*mydvp
= dvp
;
4722 if (ret
!= ENOSPC
) {
4723 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4724 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4726 *truncated_path
= 1;
4729 if (mydvp
->v_parent
!= NULL
) {
4730 mydvp
= mydvp
->v_parent
;
4731 } else if (mydvp
->v_mount
) {
4732 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4735 // no parent and no mount point? only thing is to punt and say "/" changed
4736 strlcpy(path
, "/", _len
);
4741 if (mydvp
== NULL
) {
4747 ret
= vn_getpath(mydvp
, path
, &len
);
4749 ret
= vn_getpath_no_firmlink(mydvp
, path
, &len
);
4751 } while (ret
== ENOSPC
);
4758 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4760 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 1);
4764 safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4766 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 0);
4770 * Make a hard file link.
4772 * Returns: 0 Success
4777 * vnode_authorize:???
4782 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4783 user_addr_t link
, int flag
, enum uio_seg segflg
)
4785 vnode_t vp
, pvp
, dvp
, lvp
;
4786 struct nameidata nd
;
4792 int need_event
, has_listeners
, need_kpath2
;
4793 char *target_path
= NULL
;
4796 vp
= dvp
= lvp
= NULLVP
;
4798 /* look up the object we are linking to */
4799 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4800 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4803 error
= nameiat(&nd
, fd1
);
4805 if (error
== EPERM
) {
4806 printf("XXX 54841485: nameiat() src EPERM\n");
4815 * Normally, linking to directories is not supported.
4816 * However, some file systems may have limited support.
4818 if (vp
->v_type
== VDIR
) {
4819 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4820 error
= EPERM
; /* POSIX */
4821 printf("XXX 54841485: VDIR EPERM\n");
4825 /* Linking to a directory requires ownership. */
4826 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4827 struct vnode_attr dva
;
4830 VATTR_WANTED(&dva
, va_uid
);
4831 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4832 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4833 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4840 /* lookup the target node */
4844 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4845 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4847 error
= nameiat(&nd
, fd2
);
4849 if (error
== EPERM
) {
4850 printf("XXX 54841485: nameiat() dst EPERM\n");
4858 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0) {
4859 if (error
== EPERM
) {
4860 printf("XXX 54841485: mac_vnode_check_link() EPERM\n");
4866 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4867 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0) {
4868 if (error
== EPERM
) {
4869 printf("XXX 54841485: vnode_authorize() LINKTARGET EPERM\n");
4874 /* target node must not exist */
4875 if (lvp
!= NULLVP
) {
4879 /* cannot link across mountpoints */
4880 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4885 /* authorize creation of the target note */
4886 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4887 if (error
== EPERM
) {
4888 printf("XXX 54841485: vnode_authorize() ADD_FILE EPERM\n");
4893 /* and finally make the link */
4894 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4896 if (error
== EPERM
) {
4897 printf("XXX 54841485: VNOP_LINK() EPERM\n");
4903 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4907 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4911 has_listeners
= kauth_authorize_fileop_has_listeners();
4915 if (AUDIT_RECORD_EXISTS()) {
4920 if (need_event
|| has_listeners
|| need_kpath2
) {
4921 char *link_to_path
= NULL
;
4922 int len
, link_name_len
;
4924 /* build the path to the new link file */
4925 GET_PATH(target_path
);
4926 if (target_path
== NULL
) {
4931 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4933 AUDIT_ARG(kpath
, target_path
, ARG_KPATH2
);
4935 if (has_listeners
) {
4936 /* build the path to file we are linking to */
4937 GET_PATH(link_to_path
);
4938 if (link_to_path
== NULL
) {
4943 link_name_len
= MAXPATHLEN
;
4944 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4946 * Call out to allow 3rd party notification of rename.
4947 * Ignore result of kauth_authorize_fileop call.
4949 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4950 (uintptr_t)link_to_path
,
4951 (uintptr_t)target_path
);
4953 if (link_to_path
!= NULL
) {
4954 RELEASE_PATH(link_to_path
);
4959 /* construct fsevent */
4960 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4962 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4965 // build the path to the destination of the link
4966 add_fsevent(FSE_CREATE_FILE
, ctx
,
4967 FSE_ARG_STRING
, len
, target_path
,
4968 FSE_ARG_FINFO
, &finfo
,
4973 // need an iocount on pvp in this case
4974 if (pvp
&& pvp
!= dvp
) {
4975 error
= vnode_get(pvp
);
4982 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4983 FSE_ARG_VNODE
, pvp
, FSE_ARG_DONE
);
4985 if (pvp
&& pvp
!= dvp
) {
4993 * nameidone has to happen before we vnode_put(dvp)
4994 * since it may need to release the fs_nodelock on the dvp
4997 if (target_path
!= NULL
) {
4998 RELEASE_PATH(target_path
);
5012 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
5014 return linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
5015 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
);
5019 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
5021 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
) {
5025 return linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
5026 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
);
5030 * Make a symbolic link.
5032 * We could add support for ACLs here too...
5036 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
5037 user_addr_t link
, enum uio_seg segflg
)
5039 struct vnode_attr va
;
5042 struct nameidata nd
;
5048 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
5049 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
5050 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
5052 path
= (char *)path_data
;
5057 AUDIT_ARG(text
, path
); /* This is the link string */
5059 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
5062 error
= nameiat(&nd
, fd
);
5069 p
= vfs_context_proc(ctx
);
5071 VATTR_SET(&va
, va_type
, VLNK
);
5072 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
5075 error
= mac_vnode_check_create(ctx
,
5076 dvp
, &nd
.ni_cnd
, &va
);
5089 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
5091 /* get default ownership, etc. */
5093 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
5096 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
5099 /* do fallback attribute handling */
5100 if (error
== 0 && vp
) {
5101 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
5105 if (error
== 0 && vp
) {
5106 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
5111 int update_flags
= 0;
5113 /*check if a new vnode was created, else try to get one*/
5115 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
5117 nd
.ni_op
= OP_LOOKUP
;
5119 nd
.ni_cnd
.cn_flags
= 0;
5120 error
= nameiat(&nd
, fd
);
5128 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
5129 /* call out to allow 3rd party notification of rename.
5130 * Ignore result of kauth_authorize_fileop call.
5132 if (kauth_authorize_fileop_has_listeners() &&
5134 char *new_link_path
= NULL
;
5137 /* build the path to the new link file */
5138 new_link_path
= get_pathbuff();
5140 vn_getpath(dvp
, new_link_path
, &len
);
5141 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
5142 new_link_path
[len
- 1] = '/';
5143 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
- len
);
5146 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
5147 (uintptr_t)path
, (uintptr_t)new_link_path
);
5148 if (new_link_path
!= NULL
) {
5149 release_pathbuff(new_link_path
);
5153 // Make sure the name & parent pointers are hooked up
5154 if (vp
->v_name
== NULL
) {
5155 update_flags
|= VNODE_UPDATE_NAME
;
5157 if (vp
->v_parent
== NULLVP
) {
5158 update_flags
|= VNODE_UPDATE_PARENT
;
5162 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
5166 add_fsevent(FSE_CREATE_FILE
, ctx
,
5174 * nameidone has to happen before we vnode_put(dvp)
5175 * since it may need to release the fs_nodelock on the dvp
5184 if (path
&& (path
!= (char *)path_data
)) {
5185 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
5192 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
5194 return symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
5195 uap
->link
, UIO_USERSPACE
);
5199 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
5200 __unused
int32_t *retval
)
5202 return symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
5203 uap
->path2
, UIO_USERSPACE
);
5207 * Delete a whiteout from the filesystem.
5208 * No longer supported.
5211 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
5217 * Delete a name from the filesystem.
5221 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
5222 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
5224 struct nameidata nd
;
5227 struct componentname
*cnp
;
5229 char *no_firmlink_path
= NULL
;
5231 int len_no_firmlink_path
= 0;
5234 struct vnode_attr va
;
5240 int truncated_no_firmlink_path
;
5242 struct vnode_attr
*vap
;
5244 int retry_count
= 0;
5247 cn_flags
= LOCKPARENT
;
5248 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
)) {
5249 cn_flags
|= AUDITVNPATH1
;
5251 /* If a starting dvp is passed, it trumps any fd passed. */
5257 /* unlink or delete is allowed on rsrc forks and named streams */
5258 cn_flags
|= CN_ALLOWRSRCFORK
;
5267 truncated_no_firmlink_path
= 0;
5270 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
5272 nd
.ni_dvp
= start_dvp
;
5273 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
5277 error
= nameiat(&nd
, fd
);
5286 /* With Carbon delete semantics, busy files cannot be deleted */
5287 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
5288 flags
|= VNODE_REMOVE_NODELETEBUSY
;
5291 /* Skip any potential upcalls if told to. */
5292 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
5293 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
5297 batched
= vnode_compound_remove_available(vp
);
5299 * The root of a mounted filesystem cannot be deleted.
5301 if ((vp
->v_flag
& VROOT
) || (dvp
->v_mount
!= vp
->v_mount
)) {
5306 #if DEVELOPMENT || DEBUG
5308 * XXX VSWAP: Check for entitlements or special flag here
5309 * so we can restrict access appropriately.
5311 #else /* DEVELOPMENT || DEBUG */
5313 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
5317 #endif /* DEVELOPMENT || DEBUG */
5320 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
5322 if (error
== ENOENT
) {
5323 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5334 if (!vnode_compound_remove_available(dvp
)) {
5335 panic("No vp, but no compound remove?");
5340 need_event
= need_fsevent(FSE_DELETE
, dvp
);
5343 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
5344 /* XXX need to get these data in batched VNOP */
5345 get_fse_info(vp
, &finfo
, ctx
);
5348 error
= vfs_get_notify_attributes(&va
);
5357 has_listeners
= kauth_authorize_fileop_has_listeners();
5358 if (need_event
|| has_listeners
) {
5366 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
5367 if (no_firmlink_path
== NULL
) {
5368 GET_PATH(no_firmlink_path
);
5369 if (no_firmlink_path
== NULL
) {
5374 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
5378 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
) {
5379 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
5383 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
5385 if (error
== EKEEPLOOKING
) {
5387 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5390 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
5391 panic("EKEEPLOOKING, but continue flag not set?");
5394 if (vnode_isdir(vp
)) {
5398 goto continue_lookup
;
5399 } else if (error
== ENOENT
&& batched
) {
5400 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5402 * For compound VNOPs, the authorization callback may
5403 * return ENOENT in case of racing hardlink lookups
5404 * hitting the name cache, redrive the lookup.
5414 * Call out to allow 3rd party notification of delete.
5415 * Ignore result of kauth_authorize_fileop call.
5418 if (has_listeners
) {
5419 kauth_authorize_fileop(vfs_context_ucred(ctx
),
5420 KAUTH_FILEOP_DELETE
,
5425 if (vp
->v_flag
& VISHARDLINK
) {
5427 // if a hardlink gets deleted we want to blow away the
5428 // v_parent link because the path that got us to this
5429 // instance of the link is no longer valid. this will
5430 // force the next call to get the path to ask the file
5431 // system instead of just following the v_parent link.
5433 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
5438 if (vp
->v_flag
& VISHARDLINK
) {
5439 get_fse_info(vp
, &finfo
, ctx
);
5441 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
5443 if (truncated_path
) {
5444 finfo
.mode
|= FSE_TRUNCATED_PATH
;
5446 add_fsevent(FSE_DELETE
, ctx
,
5447 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
5448 FSE_ARG_FINFO
, &finfo
,
5460 if (no_firmlink_path
!= NULL
) {
5461 RELEASE_PATH(no_firmlink_path
);
5462 no_firmlink_path
= NULL
;
5465 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5466 * will cause its shadow file to go away if necessary.
5468 if (vp
&& (vnode_isnamedstream(vp
)) &&
5469 (vp
->v_parent
!= NULLVP
) &&
5470 vnode_isshadow(vp
)) {
5475 * nameidone has to happen before we vnode_put(dvp)
5476 * since it may need to release the fs_nodelock on the dvp
5492 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
5493 enum uio_seg segflg
, int unlink_flags
)
5495 return unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
5500 * Delete a name from the filesystem using Carbon semantics.
5503 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
5505 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5506 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
);
5510 * Delete a name from the filesystem using POSIX semantics.
5513 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
5515 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5516 uap
->path
, UIO_USERSPACE
, 0);
5520 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
5522 if (uap
->flag
& ~(AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5526 if (uap
->flag
& (AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5527 int unlink_flags
= 0;
5529 if (uap
->flag
& AT_REMOVEDIR_DATALESS
) {
5530 unlink_flags
|= VNODE_REMOVE_DATALESS_DIR
;
5532 return rmdirat_internal(vfs_context_current(), uap
->fd
,
5533 uap
->path
, UIO_USERSPACE
, unlink_flags
);
5535 return unlinkat_internal(vfs_context_current(), uap
->fd
,
5536 NULLVP
, uap
->path
, UIO_USERSPACE
, 0);
5541 * Reposition read/write file offset.
5544 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
5546 struct fileproc
*fp
;
5548 struct vfs_context
*ctx
;
5549 off_t offset
= uap
->offset
, file_size
;
5552 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
5553 if (error
== ENOTSUP
) {
5558 if (vnode_isfifo(vp
)) {
5564 ctx
= vfs_context_current();
5566 if (uap
->whence
== L_INCR
&& uap
->offset
== 0) {
5567 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
5570 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
5578 if ((error
= vnode_getwithref(vp
))) {
5583 switch (uap
->whence
) {
5585 offset
+= fp
->f_fglob
->fg_offset
;
5588 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0) {
5591 offset
+= file_size
;
5596 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
5599 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
5605 if (uap
->offset
> 0 && offset
< 0) {
5606 /* Incremented/relative move past max size */
5610 * Allow negative offsets on character devices, per
5611 * POSIX 1003.1-2001. Most likely for writing disk
5614 if (offset
< 0 && vp
->v_type
!= VCHR
) {
5615 /* Decremented/relative move before start */
5619 fp
->f_fglob
->fg_offset
= offset
;
5620 *retval
= fp
->f_fglob
->fg_offset
;
5626 * An lseek can affect whether data is "available to read." Use
5627 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5629 post_event_if_success(vp
, error
, NOTE_NONE
);
5630 (void)vnode_put(vp
);
5637 * Check access permissions.
5639 * Returns: 0 Success
5640 * vnode_authorize:???
5643 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
5645 kauth_action_t action
;
5649 * If just the regular access bits, convert them to something
5650 * that vnode_authorize will understand.
5652 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
5654 if (uflags
& R_OK
) {
5655 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5657 if (uflags
& W_OK
) {
5658 if (vnode_isdir(vp
)) {
5659 action
|= KAUTH_VNODE_ADD_FILE
|
5660 KAUTH_VNODE_ADD_SUBDIRECTORY
;
5661 /* might want delete rights here too */
5663 action
|= KAUTH_VNODE_WRITE_DATA
;
5666 if (uflags
& X_OK
) {
5667 if (vnode_isdir(vp
)) {
5668 action
|= KAUTH_VNODE_SEARCH
;
5670 action
|= KAUTH_VNODE_EXECUTE
;
5674 /* take advantage of definition of uflags */
5675 action
= uflags
>> 8;
5679 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5685 /* action == 0 means only check for existence */
5687 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5698 * access_extended: Check access permissions in bulk.
5700 * Description: uap->entries Pointer to an array of accessx
5701 * descriptor structs, plus one or
5702 * more NULL terminated strings (see
5703 * "Notes" section below).
5704 * uap->size Size of the area pointed to by
5706 * uap->results Pointer to the results array.
5708 * Returns: 0 Success
5709 * ENOMEM Insufficient memory
5710 * EINVAL Invalid arguments
5711 * namei:EFAULT Bad address
5712 * namei:ENAMETOOLONG Filename too long
5713 * namei:ENOENT No such file or directory
5714 * namei:ELOOP Too many levels of symbolic links
5715 * namei:EBADF Bad file descriptor
5716 * namei:ENOTDIR Not a directory
5721 * uap->results Array contents modified
5723 * Notes: The uap->entries are structured as an arbitrary length array
5724 * of accessx descriptors, followed by one or more NULL terminated
5727 * struct accessx_descriptor[0]
5729 * struct accessx_descriptor[n]
5730 * char name_data[0];
5732 * We determine the entry count by walking the buffer containing
5733 * the uap->entries argument descriptor. For each descriptor we
5734 * see, the valid values for the offset ad_name_offset will be
5735 * in the byte range:
5737 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5739 * [ uap->entries + uap->size - 2 ]
5741 * since we must have at least one string, and the string must
5742 * be at least one character plus the NULL terminator in length.
5744 * XXX: Need to support the check-as uid argument
5747 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5749 struct accessx_descriptor
*input
= NULL
;
5750 errno_t
*result
= NULL
;
5753 unsigned int desc_max
, desc_actual
, i
, j
;
5754 struct vfs_context context
;
5755 struct nameidata nd
;
5759 #define ACCESSX_MAX_DESCR_ON_STACK 10
5760 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5762 context
.vc_ucred
= NULL
;
5765 * Validate parameters; if valid, copy the descriptor array and string
5766 * arguments into local memory. Before proceeding, the following
5767 * conditions must have been met:
5769 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5770 * o There must be sufficient room in the request for at least one
5771 * descriptor and a one yte NUL terminated string.
5772 * o The allocation of local storage must not fail.
5774 if (uap
->size
> ACCESSX_MAX_TABLESIZE
) {
5777 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2)) {
5780 if (uap
->size
<= sizeof(stack_input
)) {
5781 input
= stack_input
;
5783 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5784 if (input
== NULL
) {
5789 error
= copyin(uap
->entries
, input
, uap
->size
);
5794 AUDIT_ARG(opaque
, input
, uap
->size
);
5797 * Force NUL termination of the copyin buffer to avoid nami() running
5798 * off the end. If the caller passes us bogus data, they may get a
5801 ((char *)input
)[uap
->size
- 1] = 0;
5804 * Access is defined as checking against the process' real identity,
5805 * even if operations are checking the effective identity. This
5806 * requires that we use a local vfs context.
5808 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5809 context
.vc_thread
= current_thread();
5812 * Find out how many entries we have, so we can allocate the result
5813 * array by walking the list and adjusting the count downward by the
5814 * earliest string offset we see.
5816 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5817 desc_actual
= desc_max
;
5818 for (i
= 0; i
< desc_actual
; i
++) {
5820 * Take the offset to the name string for this entry and
5821 * convert to an input array index, which would be one off
5822 * the end of the array if this entry was the lowest-addressed
5825 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5828 * An offset greater than the max allowable offset is an error.
5829 * It is also an error for any valid entry to point
5830 * to a location prior to the end of the current entry, if
5831 * it's not a reference to the string of the previous entry.
5833 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5838 /* Also do not let ad_name_offset point to something beyond the size of the input */
5839 if (input
[i
].ad_name_offset
>= uap
->size
) {
5845 * An offset of 0 means use the previous descriptor's offset;
5846 * this is used to chain multiple requests for the same file
5847 * to avoid multiple lookups.
5850 /* This is not valid for the first entry */
5859 * If the offset of the string for this descriptor is before
5860 * what we believe is the current actual last descriptor,
5861 * then we need to adjust our estimate downward; this permits
5862 * the string table following the last descriptor to be out
5863 * of order relative to the descriptor list.
5865 if (j
< desc_actual
) {
5871 * We limit the actual number of descriptors we are willing to process
5872 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5873 * requested does not exceed this limit,
5875 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5879 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
| M_ZERO
);
5880 if (result
== NULL
) {
5886 * Do the work by iterating over the descriptor entries we know to
5887 * at least appear to contain valid data.
5890 for (i
= 0; i
< desc_actual
; i
++) {
5892 * If the ad_name_offset is 0, then we use the previous
5893 * results to make the check; otherwise, we are looking up
5896 if (input
[i
].ad_name_offset
!= 0) {
5897 /* discard old vnodes */
5908 * Scan forward in the descriptor list to see if we
5909 * need the parent vnode. We will need it if we are
5910 * deleting, since we must have rights to remove
5911 * entries in the parent directory, as well as the
5912 * rights to delete the object itself.
5914 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5915 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++) {
5916 if (input
[j
].ad_flags
& _DELETE_OK
) {
5921 niopts
= FOLLOW
| AUDITVNPATH1
;
5923 /* need parent for vnode_authorize for deletion test */
5925 niopts
|= WANTPARENT
;
5929 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5930 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5943 * Handle lookup errors.
5953 /* run this access check */
5954 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5957 /* fatal lookup error */
5963 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5965 /* copy out results */
5966 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5969 if (input
&& input
!= stack_input
) {
5970 FREE(input
, M_TEMP
);
5973 FREE(result
, M_TEMP
);
5981 if (IS_VALID_CRED(context
.vc_ucred
)) {
5982 kauth_cred_unref(&context
.vc_ucred
);
5989 * Returns: 0 Success
5990 * namei:EFAULT Bad address
5991 * namei:ENAMETOOLONG Filename too long
5992 * namei:ENOENT No such file or directory
5993 * namei:ELOOP Too many levels of symbolic links
5994 * namei:EBADF Bad file descriptor
5995 * namei:ENOTDIR Not a directory
6000 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
6001 int flag
, enum uio_seg segflg
)
6004 struct nameidata nd
;
6006 struct vfs_context context
;
6008 int is_namedstream
= 0;
6012 * Unless the AT_EACCESS option is used, Access is defined as checking
6013 * against the process' real identity, even if operations are checking
6014 * the effective identity. So we need to tweak the credential
6015 * in the context for that case.
6017 if (!(flag
& AT_EACCESS
)) {
6018 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
6020 context
.vc_ucred
= ctx
->vc_ucred
;
6022 context
.vc_thread
= ctx
->vc_thread
;
6025 niopts
= (flag
& AT_SYMLINK_NOFOLLOW
? NOFOLLOW
: FOLLOW
) | AUDITVNPATH1
;
6026 /* need parent for vnode_authorize for deletion test */
6027 if (amode
& _DELETE_OK
) {
6028 niopts
|= WANTPARENT
;
6030 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
6034 /* access(F_OK) calls are allowed for resource forks. */
6035 if (amode
== F_OK
) {
6036 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
6039 error
= nameiat(&nd
, fd
);
6045 /* Grab reference on the shadow stream file vnode to
6046 * force an inactive on release which will mark it
6049 if (vnode_isnamedstream(nd
.ni_vp
) &&
6050 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6051 vnode_isshadow(nd
.ni_vp
)) {
6053 vnode_ref(nd
.ni_vp
);
6057 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
6060 if (is_namedstream
) {
6061 vnode_rele(nd
.ni_vp
);
6065 vnode_put(nd
.ni_vp
);
6066 if (amode
& _DELETE_OK
) {
6067 vnode_put(nd
.ni_dvp
);
6072 if (!(flag
& AT_EACCESS
)) {
6073 kauth_cred_unref(&context
.vc_ucred
);
6079 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
6081 return faccessat_internal(vfs_context_current(), AT_FDCWD
,
6082 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
);
6086 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
6087 __unused
int32_t *retval
)
6089 if (uap
->flag
& ~(AT_EACCESS
| AT_SYMLINK_NOFOLLOW
)) {
6093 return faccessat_internal(vfs_context_current(), uap
->fd
,
6094 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
);
6098 * Returns: 0 Success
6105 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
6106 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
6107 enum uio_seg segflg
, int fd
, int flag
)
6109 struct nameidata nd
;
6116 struct user64_stat user64_sb
;
6117 struct user32_stat user32_sb
;
6118 struct user64_stat64 user64_sb64
;
6119 struct user32_stat64 user32_sb64
;
6123 kauth_filesec_t fsec
;
6124 size_t xsecurity_bufsize
;
6126 struct fileproc
*fp
= NULL
;
6127 int needsrealdev
= 0;
6129 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6130 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
6134 int is_namedstream
= 0;
6135 /* stat calls are allowed for resource forks. */
6136 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
6139 if (flag
& AT_FDONLY
) {
6142 error
= fp_getfvp(vfs_context_proc(ctx
), fd
, &fp
, &fvp
);
6146 if ((error
= vnode_getwithref(fvp
))) {
6152 error
= nameiat(&nd
, fd
);
6157 fsec
= KAUTH_FILESEC_NONE
;
6159 statptr
= (void *)&source
;
6162 /* Grab reference on the shadow stream file vnode to
6163 * force an inactive on release which will mark it
6166 if (vnode_isnamedstream(nd
.ni_vp
) &&
6167 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6168 vnode_isshadow(nd
.ni_vp
)) {
6170 vnode_ref(nd
.ni_vp
);
6174 needsrealdev
= flag
& AT_REALDEV
? 1 : 0;
6175 if (fp
&& (xsecurity
== USER_ADDR_NULL
)) {
6177 * If the caller has the file open, and is not
6178 * requesting extended security information, we are
6179 * going to let them get the basic stat information.
6181 error
= vn_stat_noauth(nd
.ni_vp
, statptr
, NULL
, isstat64
, needsrealdev
, ctx
,
6182 fp
->f_fglob
->fg_cred
);
6184 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
),
6185 isstat64
, needsrealdev
, ctx
);
6189 if (is_namedstream
) {
6190 vnode_rele(nd
.ni_vp
);
6193 vnode_put(nd
.ni_vp
);
6203 /* Zap spare fields */
6204 if (isstat64
!= 0) {
6205 source
.sb64
.st_lspare
= 0;
6206 source
.sb64
.st_qspare
[0] = 0LL;
6207 source
.sb64
.st_qspare
[1] = 0LL;
6208 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6209 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
6210 my_size
= sizeof(dest
.user64_sb64
);
6211 sbp
= (caddr_t
)&dest
.user64_sb64
;
6213 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
6214 my_size
= sizeof(dest
.user32_sb64
);
6215 sbp
= (caddr_t
)&dest
.user32_sb64
;
6218 * Check if we raced (post lookup) against the last unlink of a file.
6220 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
6221 source
.sb64
.st_nlink
= 1;
6224 source
.sb
.st_lspare
= 0;
6225 source
.sb
.st_qspare
[0] = 0LL;
6226 source
.sb
.st_qspare
[1] = 0LL;
6227 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6228 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
6229 my_size
= sizeof(dest
.user64_sb
);
6230 sbp
= (caddr_t
)&dest
.user64_sb
;
6232 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
6233 my_size
= sizeof(dest
.user32_sb
);
6234 sbp
= (caddr_t
)&dest
.user32_sb
;
6238 * Check if we raced (post lookup) against the last unlink of a file.
6240 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
6241 source
.sb
.st_nlink
= 1;
6244 if ((error
= copyout(sbp
, ub
, my_size
)) != 0) {
6248 /* caller wants extended security information? */
6249 if (xsecurity
!= USER_ADDR_NULL
) {
6250 /* did we get any? */
6251 if (fsec
== KAUTH_FILESEC_NONE
) {
6252 if (susize(xsecurity_size
, 0) != 0) {
6257 /* find the user buffer size */
6258 xsecurity_bufsize
= fusize(xsecurity_size
);
6260 /* copy out the actual data size */
6261 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
6266 /* if the caller supplied enough room, copy out to it */
6267 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
)) {
6268 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
6273 if (fsec
!= KAUTH_FILESEC_NONE
) {
6274 kauth_filesec_free(fsec
);
6280 * stat_extended: Get file status; with extended security (ACL).
6282 * Parameters: p (ignored)
6283 * uap User argument descriptor (see below)
6286 * Indirect: uap->path Path of file to get status from
6287 * uap->ub User buffer (holds file status info)
6288 * uap->xsecurity ACL to get (extended security)
6289 * uap->xsecurity_size Size of ACL
6291 * Returns: 0 Success
6296 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
6297 __unused
int32_t *retval
)
6299 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6300 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6305 * Returns: 0 Success
6306 * fstatat_internal:??? [see fstatat_internal() in this file]
6309 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
6311 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6312 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0);
6316 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
6318 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6319 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0);
6323 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6325 * Parameters: p (ignored)
6326 * uap User argument descriptor (see below)
6329 * Indirect: uap->path Path of file to get status from
6330 * uap->ub User buffer (holds file status info)
6331 * uap->xsecurity ACL to get (extended security)
6332 * uap->xsecurity_size Size of ACL
6334 * Returns: 0 Success
6339 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
6341 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6342 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6347 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6349 * Parameters: p (ignored)
6350 * uap User argument descriptor (see below)
6353 * Indirect: uap->path Path of file to get status from
6354 * uap->ub User buffer (holds file status info)
6355 * uap->xsecurity ACL to get (extended security)
6356 * uap->xsecurity_size Size of ACL
6358 * Returns: 0 Success
6363 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
6365 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6366 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6367 AT_SYMLINK_NOFOLLOW
);
6371 * Get file status; this version does not follow links.
6374 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
6376 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6377 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6381 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
6383 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6384 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6388 * lstat64_extended: Get file status; can handle large inode numbers; does not
6389 * follow links; with extended security (ACL).
6391 * Parameters: p (ignored)
6392 * uap User argument descriptor (see below)
6395 * Indirect: uap->path Path of file to get status from
6396 * uap->ub User buffer (holds file status info)
6397 * uap->xsecurity ACL to get (extended security)
6398 * uap->xsecurity_size Size of ACL
6400 * Returns: 0 Success
6405 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
6407 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6408 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6409 AT_SYMLINK_NOFOLLOW
);
6413 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
6415 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6419 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6420 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6424 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
6425 __unused
int32_t *retval
)
6427 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6431 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6432 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6436 * Get configurable pathname variables.
6438 * Returns: 0 Success
6442 * Notes: Global implementation constants are intended to be
6443 * implemented in this function directly; all other constants
6444 * are per-FS implementation, and therefore must be handled in
6445 * each respective FS, instead.
6447 * XXX We implement some things globally right now that should actually be
6448 * XXX per-FS; we will need to deal with this at some point.
6452 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
6455 struct nameidata nd
;
6456 vfs_context_t ctx
= vfs_context_current();
6458 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
6459 UIO_USERSPACE
, uap
->path
, ctx
);
6465 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
6467 vnode_put(nd
.ni_vp
);
6473 * Return target name of a symbolic link.
6477 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
6478 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
6484 struct nameidata nd
;
6485 char uio_buf
[UIO_SIZEOF(1)];
6487 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
6490 error
= nameiat(&nd
, fd
);
6498 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
6499 &uio_buf
[0], sizeof(uio_buf
));
6500 uio_addiov(auio
, buf
, bufsize
);
6501 if (vp
->v_type
!= VLNK
) {
6505 error
= mac_vnode_check_readlink(ctx
, vp
);
6508 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
6512 error
= VNOP_READLINK(vp
, auio
, ctx
);
6517 *retval
= bufsize
- (int)uio_resid(auio
);
6522 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
6524 enum uio_seg procseg
;
6526 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6527 return readlinkat_internal(vfs_context_current(), AT_FDCWD
,
6528 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
6529 uap
->count
, procseg
, retval
);
6533 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
6535 enum uio_seg procseg
;
6537 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6538 return readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6539 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
);
6543 * Change file flags, the deep inner layer.
6546 chflags0(vnode_t vp
, struct vnode_attr
*va
,
6547 int (*setattr
)(vnode_t
, void *, vfs_context_t
),
6548 void *arg
, vfs_context_t ctx
)
6550 kauth_action_t action
= 0;
6554 error
= mac_vnode_check_setflags(ctx
, vp
, va
->va_flags
);
6560 /* request authorisation, disregard immutability */
6561 if ((error
= vnode_authattr(vp
, va
, &action
, ctx
)) != 0) {
6565 * Request that the auth layer disregard those file flags it's allowed to when
6566 * authorizing this operation; we need to do this in order to be able to
6567 * clear immutable flags.
6569 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0)) {
6572 error
= (*setattr
)(vp
, arg
, ctx
);
6576 mac_vnode_notify_setflags(ctx
, vp
, va
->va_flags
);
6585 * Change file flags.
6587 * NOTE: this will vnode_put() `vp'
6590 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
6592 struct vnode_attr va
;
6596 VATTR_SET(&va
, va_flags
, flags
);
6598 error
= chflags0(vp
, &va
, (void *)vnode_setattr
, &va
, ctx
);
6601 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6609 * Change flags of a file given a path name.
6613 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
6616 vfs_context_t ctx
= vfs_context_current();
6618 struct nameidata nd
;
6620 AUDIT_ARG(fflags
, uap
->flags
);
6621 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6622 UIO_USERSPACE
, uap
->path
, ctx
);
6630 /* we don't vnode_put() here because chflags1 does internally */
6631 error
= chflags1(vp
, uap
->flags
, ctx
);
6637 * Change flags of a file given a file descriptor.
6641 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
6646 AUDIT_ARG(fd
, uap
->fd
);
6647 AUDIT_ARG(fflags
, uap
->flags
);
6648 if ((error
= file_vnode(uap
->fd
, &vp
))) {
6652 if ((error
= vnode_getwithref(vp
))) {
6657 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6659 /* we don't vnode_put() here because chflags1 does internally */
6660 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
6667 * Change security information on a filesystem object.
6669 * Returns: 0 Success
6670 * EPERM Operation not permitted
6671 * vnode_authattr:??? [anything vnode_authattr can return]
6672 * vnode_authorize:??? [anything vnode_authorize can return]
6673 * vnode_setattr:??? [anything vnode_setattr can return]
6675 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6676 * translated to EPERM before being returned.
6679 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
6681 kauth_action_t action
;
6684 AUDIT_ARG(mode
, vap
->va_mode
);
6685 /* XXX audit new args */
6688 /* chmod calls are not allowed for resource forks. */
6689 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6695 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
6696 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0) {
6700 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6701 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
6702 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6703 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1))) {
6708 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
6709 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
))) {
6714 /* make sure that the caller is allowed to set this security information */
6715 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
6716 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6717 if (error
== EACCES
) {
6723 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0) {
6728 if (VATTR_IS_ACTIVE(vap
, va_mode
)) {
6729 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
6732 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6733 mac_vnode_notify_setowner(ctx
, vp
,
6734 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6735 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
6738 if (VATTR_IS_ACTIVE(vap
, va_acl
)) {
6739 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
6748 * Change mode of a file given a path name.
6750 * Returns: 0 Success
6751 * namei:??? [anything namei can return]
6752 * chmod_vnode:??? [anything chmod_vnode can return]
6755 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
6756 int fd
, int flag
, enum uio_seg segflg
)
6758 struct nameidata nd
;
6761 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6762 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
6764 if ((error
= nameiat(&nd
, fd
))) {
6767 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
6768 vnode_put(nd
.ni_vp
);
6774 * chmod_extended: Change the mode of a file given a path name; with extended
6775 * argument list (including extended security (ACL)).
6777 * Parameters: p Process requesting the open
6778 * uap User argument descriptor (see below)
6781 * Indirect: uap->path Path to object (same as 'chmod')
6782 * uap->uid UID to set
6783 * uap->gid GID to set
6784 * uap->mode File mode to set (same as 'chmod')
6785 * uap->xsecurity ACL to set (or delete)
6787 * Returns: 0 Success
6790 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6792 * XXX: We should enummerate the possible errno values here, and where
6793 * in the code they originated.
6796 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6799 struct vnode_attr va
;
6800 kauth_filesec_t xsecdst
;
6802 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6805 if (uap
->mode
!= -1) {
6806 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6808 if (uap
->uid
!= KAUTH_UID_NONE
) {
6809 VATTR_SET(&va
, va_uid
, uap
->uid
);
6811 if (uap
->gid
!= KAUTH_GID_NONE
) {
6812 VATTR_SET(&va
, va_gid
, uap
->gid
);
6816 switch (uap
->xsecurity
) {
6817 /* explicit remove request */
6818 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6819 VATTR_SET(&va
, va_acl
, NULL
);
6822 case USER_ADDR_NULL
:
6825 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6828 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6829 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6832 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6835 if (xsecdst
!= NULL
) {
6836 kauth_filesec_free(xsecdst
);
6842 * Returns: 0 Success
6843 * chmodat:??? [anything chmodat can return]
6846 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6847 int flag
, enum uio_seg segflg
)
6849 struct vnode_attr va
;
6852 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6854 return chmodat(ctx
, path
, &va
, fd
, flag
, segflg
);
6858 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6860 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6861 AT_FDCWD
, 0, UIO_USERSPACE
);
6865 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6867 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
6871 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6872 uap
->fd
, uap
->flag
, UIO_USERSPACE
);
6876 * Change mode of a file given a file descriptor.
6879 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6886 if ((error
= file_vnode(fd
, &vp
)) != 0) {
6889 if ((error
= vnode_getwithref(vp
)) != 0) {
6893 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6895 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6896 (void)vnode_put(vp
);
6903 * fchmod_extended: Change mode of a file given a file descriptor; with
6904 * extended argument list (including extended security (ACL)).
6906 * Parameters: p Process requesting to change file mode
6907 * uap User argument descriptor (see below)
6910 * Indirect: uap->mode File mode to set (same as 'chmod')
6911 * uap->uid UID to set
6912 * uap->gid GID to set
6913 * uap->xsecurity ACL to set (or delete)
6914 * uap->fd File descriptor of file to change mode
6916 * Returns: 0 Success
6921 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6924 struct vnode_attr va
;
6925 kauth_filesec_t xsecdst
;
6927 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6930 if (uap
->mode
!= -1) {
6931 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6933 if (uap
->uid
!= KAUTH_UID_NONE
) {
6934 VATTR_SET(&va
, va_uid
, uap
->uid
);
6936 if (uap
->gid
!= KAUTH_GID_NONE
) {
6937 VATTR_SET(&va
, va_gid
, uap
->gid
);
6941 switch (uap
->xsecurity
) {
6942 case USER_ADDR_NULL
:
6943 VATTR_SET(&va
, va_acl
, NULL
);
6945 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6946 VATTR_SET(&va
, va_acl
, NULL
);
6949 case CAST_USER_ADDR_T(-1):
6952 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6955 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6958 error
= fchmod1(p
, uap
->fd
, &va
);
6961 switch (uap
->xsecurity
) {
6962 case USER_ADDR_NULL
:
6963 case CAST_USER_ADDR_T(-1):
6966 if (xsecdst
!= NULL
) {
6967 kauth_filesec_free(xsecdst
);
6974 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6976 struct vnode_attr va
;
6979 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6981 return fchmod1(p
, uap
->fd
, &va
);
6986 * Set ownership given a path name.
6990 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6991 gid_t gid
, int flag
, enum uio_seg segflg
)
6994 struct vnode_attr va
;
6996 struct nameidata nd
;
6998 kauth_action_t action
;
7000 AUDIT_ARG(owner
, uid
, gid
);
7002 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7003 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
7005 error
= nameiat(&nd
, fd
);
7014 if (uid
!= (uid_t
)VNOVAL
) {
7015 VATTR_SET(&va
, va_uid
, uid
);
7017 if (gid
!= (gid_t
)VNOVAL
) {
7018 VATTR_SET(&va
, va_gid
, gid
);
7022 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
7028 /* preflight and authorize attribute changes */
7029 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7032 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7035 error
= vnode_setattr(vp
, &va
, ctx
);
7039 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
7045 * EACCES is only allowed from namei(); permissions failure should
7046 * return EPERM, so we need to translate the error code.
7048 if (error
== EACCES
) {
7057 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
7059 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7060 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
);
7064 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
7066 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7067 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
);
7071 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
7073 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
7077 return fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
7078 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
);
7082 * Set ownership given a file descriptor.
7086 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
7088 struct vnode_attr va
;
7089 vfs_context_t ctx
= vfs_context_current();
7092 kauth_action_t action
;
7094 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7095 AUDIT_ARG(fd
, uap
->fd
);
7097 if ((error
= file_vnode(uap
->fd
, &vp
))) {
7101 if ((error
= vnode_getwithref(vp
))) {
7105 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7108 if (uap
->uid
!= VNOVAL
) {
7109 VATTR_SET(&va
, va_uid
, uap
->uid
);
7111 if (uap
->gid
!= VNOVAL
) {
7112 VATTR_SET(&va
, va_gid
, uap
->gid
);
7116 /* chown calls are not allowed for resource forks. */
7117 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7124 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7130 /* preflight and authorize attribute changes */
7131 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7134 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7135 if (error
== EACCES
) {
7140 error
= vnode_setattr(vp
, &va
, ctx
);
7144 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7149 (void)vnode_put(vp
);
7155 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
7159 if (usrtvp
== USER_ADDR_NULL
) {
7160 struct timeval old_tv
;
7161 /* XXX Y2038 bug because of microtime argument */
7163 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
7166 if (IS_64BIT_PROCESS(current_proc())) {
7167 struct user64_timeval tv
[2];
7168 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7172 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7173 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7175 struct user32_timeval tv
[2];
7176 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7180 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7181 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7188 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
7192 struct vnode_attr va
;
7193 kauth_action_t action
;
7195 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7198 VATTR_SET(&va
, va_access_time
, ts
[0]);
7199 VATTR_SET(&va
, va_modify_time
, ts
[1]);
7201 va
.va_vaflags
|= VA_UTIMES_NULL
;
7205 /* utimes calls are not allowed for resource forks. */
7206 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7213 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7218 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7219 if (!nullflag
&& error
== EACCES
) {
7225 /* since we may not need to auth anything, check here */
7226 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7227 if (!nullflag
&& error
== EACCES
) {
7232 error
= vnode_setattr(vp
, &va
, ctx
);
7236 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7245 * Set the access and modification times of a file.
7249 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
7251 struct timespec ts
[2];
7254 struct nameidata nd
;
7255 vfs_context_t ctx
= vfs_context_current();
7258 * AUDIT: Needed to change the order of operations to do the
7259 * name lookup first because auditing wants the path.
7261 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
7262 UIO_USERSPACE
, uap
->path
, ctx
);
7270 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7271 * the current time instead.
7274 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7278 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
7281 vnode_put(nd
.ni_vp
);
7286 * Set the access and modification times of a file.
7290 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
7292 struct timespec ts
[2];
7297 AUDIT_ARG(fd
, uap
->fd
);
7299 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7302 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
7305 if ((error
= vnode_getwithref(vp
))) {
7310 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
7317 * Truncate a file given its path name.
7321 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
7324 struct vnode_attr va
;
7325 vfs_context_t ctx
= vfs_context_current();
7327 struct nameidata nd
;
7328 kauth_action_t action
;
7330 if (uap
->length
< 0) {
7333 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
7334 UIO_USERSPACE
, uap
->path
, ctx
);
7335 if ((error
= namei(&nd
))) {
7343 VATTR_SET(&va
, va_data_size
, uap
->length
);
7346 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
7352 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7355 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7358 error
= vnode_setattr(vp
, &va
, ctx
);
7362 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
7372 * Truncate a file given a file descriptor.
7376 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
7378 vfs_context_t ctx
= vfs_context_current();
7379 struct vnode_attr va
;
7381 struct fileproc
*fp
;
7385 AUDIT_ARG(fd
, uap
->fd
);
7386 if (uap
->length
< 0) {
7390 if ((error
= fp_lookup(p
, fd
, &fp
, 0))) {
7394 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
7396 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
7405 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
7407 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
7408 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7413 if ((error
= vnode_getwithref(vp
)) != 0) {
7417 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7420 error
= mac_vnode_check_truncate(ctx
,
7421 fp
->f_fglob
->fg_cred
, vp
);
7423 (void)vnode_put(vp
);
7428 VATTR_SET(&va
, va_data_size
, uap
->length
);
7429 error
= vnode_setattr(vp
, &va
, ctx
);
7433 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
7437 (void)vnode_put(vp
);
7445 * Sync an open file with synchronized I/O _file_ integrity completion
7449 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
7451 __pthread_testcancel(1);
7452 return fsync_common(p
, uap
, MNT_WAIT
);
7457 * Sync an open file with synchronized I/O _file_ integrity completion
7459 * Notes: This is a legacy support function that does not test for
7460 * thread cancellation points.
7464 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
7466 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
);
7471 * Sync an open file with synchronized I/O _data_ integrity completion
7475 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
7477 __pthread_testcancel(1);
7478 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
);
7485 * Common fsync code to support both synchronized I/O file integrity completion
7486 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7488 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7489 * will only guarantee that the file data contents are retrievable. If
7490 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7491 * includes additional metadata unnecessary for retrieving the file data
7492 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7495 * Parameters: p The process
7496 * uap->fd The descriptor to synchronize
7497 * flags The data integrity flags
7499 * Returns: int Success
7500 * fp_getfvp:EBADF Bad file descriptor
7501 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7502 * VNOP_FSYNC:??? unspecified
7504 * Notes: We use struct fsync_args because it is a short name, and all
7505 * caller argument structures are otherwise identical.
7508 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
7511 struct fileproc
*fp
;
7512 vfs_context_t ctx
= vfs_context_current();
7515 AUDIT_ARG(fd
, uap
->fd
);
7517 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
7520 if ((error
= vnode_getwithref(vp
))) {
7525 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7527 error
= VNOP_FSYNC(vp
, flags
, ctx
);
7530 /* Sync resource fork shadow file if necessary. */
7532 (vp
->v_flag
& VISNAMEDSTREAM
) &&
7533 (vp
->v_parent
!= NULLVP
) &&
7534 vnode_isshadow(vp
) &&
7535 (fp
->f_flags
& FP_WRITTEN
)) {
7536 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
7540 (void)vnode_put(vp
);
7546 * Duplicate files. Source must be a file, target must be a file or
7549 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7550 * perform inheritance correctly.
7554 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
7556 vnode_t tvp
, fvp
, tdvp
, sdvp
;
7557 struct nameidata fromnd
, tond
;
7559 vfs_context_t ctx
= vfs_context_current();
7561 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
7562 struct vnode_attr va
;
7565 /* Check that the flags are valid. */
7567 if (uap
->flags
& ~CPF_MASK
) {
7571 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
7572 UIO_USERSPACE
, uap
->from
, ctx
);
7573 if ((error
= namei(&fromnd
))) {
7578 NDINIT(&tond
, CREATE
, OP_LINK
,
7579 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7580 UIO_USERSPACE
, uap
->to
, ctx
);
7581 if ((error
= namei(&tond
))) {
7588 if (!(uap
->flags
& CPF_OVERWRITE
)) {
7594 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
7599 /* This calls existing MAC hooks for open */
7600 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
7607 * See unlinkat_internal for an explanation of the potential
7608 * ENOENT from the MAC hook but the gist is that the MAC hook
7609 * can fail because vn_getpath isn't able to return the full
7610 * path. We choose to ignore this failure.
7612 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
7613 if (error
&& error
!= ENOENT
) {
7621 VATTR_SET(&va
, va_type
, fvp
->v_type
);
7622 /* Mask off all but regular access permissions */
7623 VATTR_SET(&va
, va_mode
,
7624 ((((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
7625 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
7629 #endif /* CONFIG_MACF */
7631 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
7639 * If source is the same as the destination (that is the
7640 * same inode number) then there is nothing to do.
7641 * (fixed to have POSIX semantics - CSM 3/2/98)
7647 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
7650 sdvp
= tond
.ni_startdir
;
7652 * nameidone has to happen before we vnode_put(tdvp)
7653 * since it may need to release the fs_nodelock on the tdvp
7673 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7676 * Helper function for doing clones. The caller is expected to provide an
7677 * iocounted source vnode and release it.
7680 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
7681 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
7684 struct nameidata tond
;
7687 boolean_t free_src_acl
;
7688 boolean_t attr_cleanup
;
7690 kauth_action_t action
;
7691 struct componentname
*cnp
;
7693 struct vnode_attr va
;
7694 struct vnode_attr nva
;
7695 uint32_t vnop_flags
;
7697 v_type
= vnode_vtype(fvp
);
7702 action
= KAUTH_VNODE_ADD_FILE
;
7705 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
7706 fvp
->v_mountedhere
) {
7709 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
7715 AUDIT_ARG(fd2
, dst_dirfd
);
7716 AUDIT_ARG(value32
, flags
);
7718 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7719 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
7720 UIO_USERSPACE
, dst
, ctx
);
7721 if ((error
= nameiat(&tond
, dst_dirfd
))) {
7728 free_src_acl
= FALSE
;
7729 attr_cleanup
= FALSE
;
7736 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
7742 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
))) {
7746 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
))) {
7750 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
7751 if (data_read_authorised
) {
7752 action
&= ~KAUTH_VNODE_READ_DATA
;
7754 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
))) {
7759 * certain attributes may need to be changed from the source, we ask for
7763 VATTR_WANTED(&va
, va_uid
);
7764 VATTR_WANTED(&va
, va_gid
);
7765 VATTR_WANTED(&va
, va_mode
);
7766 VATTR_WANTED(&va
, va_flags
);
7767 VATTR_WANTED(&va
, va_acl
);
7769 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0) {
7774 VATTR_SET(&nva
, va_type
, v_type
);
7775 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
7776 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
7777 free_src_acl
= TRUE
;
7780 /* Handle ACL inheritance, initialize vap. */
7781 if (v_type
== VLNK
) {
7782 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
7784 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
7788 attr_cleanup
= TRUE
;
7791 vnop_flags
= VNODE_CLONEFILE_DEFAULT
;
7793 * We've got initial values for all security parameters,
7794 * If we are superuser, then we can change owners to be the
7795 * same as the source. Both superuser and the owner have default
7796 * WRITE_SECURITY privileges so all other fields can be taken
7797 * from source as well.
7799 if (!(flags
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) {
7800 if (VATTR_IS_SUPPORTED(&va
, va_uid
)) {
7801 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
7803 if (VATTR_IS_SUPPORTED(&va
, va_gid
)) {
7804 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
7807 vnop_flags
|= VNODE_CLONEFILE_NOOWNERCOPY
;
7810 if (VATTR_IS_SUPPORTED(&va
, va_mode
)) {
7811 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
7813 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
7814 VATTR_SET(&nva
, va_flags
,
7815 ((va
.va_flags
& ~(UF_DATAVAULT
| SF_RESTRICTED
)) | /* Turn off from source */
7816 (nva
.va_flags
& (UF_DATAVAULT
| SF_RESTRICTED
))));
7819 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
);
7821 if (!error
&& tvp
) {
7822 int update_flags
= 0;
7825 #endif /* CONFIG_FSE */
7828 * If some of the requested attributes weren't handled by the
7829 * VNOP, use our fallback code.
7831 if (!VATTR_ALL_SUPPORTED(&va
)) {
7832 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
7836 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
7837 VNODE_LABEL_CREATE
, ctx
);
7840 // Make sure the name & parent pointers are hooked up
7841 if (tvp
->v_name
== NULL
) {
7842 update_flags
|= VNODE_UPDATE_NAME
;
7844 if (tvp
->v_parent
== NULLVP
) {
7845 update_flags
|= VNODE_UPDATE_PARENT
;
7849 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
7850 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
7854 switch (vnode_vtype(tvp
)) {
7858 fsevent
= FSE_CREATE_FILE
;
7861 fsevent
= FSE_CREATE_DIR
;
7867 if (need_fsevent(fsevent
, tvp
)) {
7869 * The following is a sequence of three explicit events.
7870 * A pair of FSE_CLONE events representing the source and destination
7871 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7872 * fseventsd may coalesce the destination clone and create events
7873 * into a single event resulting in the following sequence for a client
7875 * FSE_CLONE | FSE_CREATE (dst)
7877 add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
,
7879 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
7882 #endif /* CONFIG_FSE */
7887 vn_attribute_cleanup(&nva
, defaulted
);
7889 if (free_src_acl
&& va
.va_acl
) {
7890 kauth_acl_free(va
.va_acl
);
7901 * clone files or directories, target must not exist.
7905 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7906 __unused
int32_t *retval
)
7909 struct nameidata fromnd
;
7912 vfs_context_t ctx
= vfs_context_current();
7914 /* Check that the flags are valid. */
7915 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7919 AUDIT_ARG(fd
, uap
->src_dirfd
);
7921 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7922 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7923 UIO_USERSPACE
, uap
->src
, ctx
);
7924 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
))) {
7931 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7939 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7940 __unused
int32_t *retval
)
7943 struct fileproc
*fp
;
7945 vfs_context_t ctx
= vfs_context_current();
7947 /* Check that the flags are valid. */
7948 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7952 AUDIT_ARG(fd
, uap
->src_fd
);
7953 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7958 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7959 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7964 if ((error
= vnode_getwithref(fvp
))) {
7968 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7970 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7975 file_drop(uap
->src_fd
);
7980 rename_submounts_callback(mount_t mp
, void *arg
)
7983 mount_t pmp
= (mount_t
)arg
;
7984 int prefix_len
= strlen(pmp
->mnt_vfsstat
.f_mntonname
);
7986 if (strncmp(mp
->mnt_vfsstat
.f_mntonname
, pmp
->mnt_vfsstat
.f_mntonname
, prefix_len
) != 0) {
7990 if (mp
->mnt_vfsstat
.f_mntonname
[prefix_len
] != '/') {
7994 if ((error
= vfs_busy(mp
, LK_NOWAIT
))) {
7995 printf("vfs_busy failed with %d for %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
7999 int pathlen
= MAXPATHLEN
;
8000 if ((error
= vn_getpath_ext(mp
->mnt_vnodecovered
, NULL
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
))) {
8001 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
8010 * Rename files. Source and destination must either both be directories,
8011 * or both not be directories. If target is a directory, it must be empty.
8015 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
8016 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
8018 if (flags
& ~VFS_RENAME_FLAGS_MASK
) {
8022 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
)) {
8028 struct nameidata
*fromnd
, *tond
;
8036 const char *oname
= NULL
;
8037 char *from_name
= NULL
, *to_name
= NULL
;
8038 char *from_name_no_firmlink
= NULL
, *to_name_no_firmlink
= NULL
;
8039 int from_len
= 0, to_len
= 0;
8040 int from_len_no_firmlink
= 0, to_len_no_firmlink
= 0;
8041 int holding_mntlock
;
8042 mount_t locked_mp
= NULL
;
8043 vnode_t oparent
= NULLVP
;
8045 fse_info from_finfo
, to_finfo
;
8047 int from_truncated
= 0, to_truncated
= 0;
8048 int from_truncated_no_firmlink
= 0, to_truncated_no_firmlink
= 0;
8050 struct vnode_attr
*fvap
, *tvap
;
8052 /* carving out a chunk for structs that are too big to be on stack. */
8054 struct nameidata from_node
, to_node
;
8055 struct vnode_attr fv_attr
, tv_attr
;
8057 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
8058 fromnd
= &__rename_data
->from_node
;
8059 tond
= &__rename_data
->to_node
;
8061 holding_mntlock
= 0;
8070 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
8072 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8074 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
8076 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8079 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8080 if ((error
= nameiat(fromnd
, fromfd
))) {
8083 fdvp
= fromnd
->ni_dvp
;
8084 fvp
= fromnd
->ni_vp
;
8086 if (fvp
&& fvp
->v_type
== VDIR
) {
8087 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
8091 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8092 if ((error
= nameiat(tond
, tofd
))) {
8094 * Translate error code for rename("dir1", "dir2/.").
8096 if (error
== EISDIR
&& fvp
->v_type
== VDIR
) {
8101 tdvp
= tond
->ni_dvp
;
8105 #if DEVELOPMENT || DEBUG
8107 * XXX VSWAP: Check for entitlements or special flag here
8108 * so we can restrict access appropriately.
8110 #else /* DEVELOPMENT || DEBUG */
8112 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8117 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8121 #endif /* DEVELOPMENT || DEBUG */
8123 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
8128 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
8133 batched
= vnode_compound_rename_available(fdvp
);
8136 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
8139 get_fse_info(fvp
, &from_finfo
, ctx
);
8141 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
8146 fvap
= &__rename_data
->fv_attr
;
8150 get_fse_info(tvp
, &to_finfo
, ctx
);
8151 } else if (batched
) {
8152 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
8157 tvap
= &__rename_data
->tv_attr
;
8162 #endif /* CONFIG_FSE */
8164 has_listeners
= kauth_authorize_fileop_has_listeners();
8168 if (AUDIT_RECORD_EXISTS()) {
8173 if (need_event
|| has_listeners
) {
8174 if (from_name
== NULL
) {
8175 GET_PATH(from_name
);
8176 if (from_name
== NULL
) {
8182 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
8184 if (from_name_no_firmlink
== NULL
) {
8185 GET_PATH(from_name_no_firmlink
);
8186 if (from_name_no_firmlink
== NULL
) {
8192 from_len_no_firmlink
= safe_getpath_no_firmlink(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name_no_firmlink
, MAXPATHLEN
, &from_truncated_no_firmlink
);
8195 if (need_event
|| need_kpath2
|| has_listeners
) {
8196 if (to_name
== NULL
) {
8198 if (to_name
== NULL
) {
8204 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
8206 if (to_name_no_firmlink
== NULL
) {
8207 GET_PATH(to_name_no_firmlink
);
8208 if (to_name_no_firmlink
== NULL
) {
8214 to_len_no_firmlink
= safe_getpath_no_firmlink(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name_no_firmlink
, MAXPATHLEN
, &to_truncated_no_firmlink
);
8215 if (to_name
&& need_kpath2
) {
8216 AUDIT_ARG(kpath
, to_name
, ARG_KPATH2
);
8221 * Claim: this check will never reject a valid rename.
8222 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8223 * Suppose fdvp and tdvp are not on the same mount.
8224 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
8225 * then you can't move it to within another dir on the same mountpoint.
8226 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8228 * If this check passes, then we are safe to pass these vnodes to the same FS.
8230 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
8234 goto skipped_lookup
;
8238 error
= vn_authorize_renamex_with_paths(fdvp
, fvp
, &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
, flags
, NULL
);
8240 if (error
== ENOENT
) {
8241 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8243 * We encountered a race where after doing the namei, tvp stops
8244 * being valid. If so, simply re-drive the rename call from the
8256 * If the source and destination are the same (i.e. they're
8257 * links to the same vnode) and the target file system is
8258 * case sensitive, then there is nothing to do.
8260 * XXX Come back to this.
8266 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8267 * then assume that this file system is case sensitive.
8269 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
8270 pathconf_val
!= 0) {
8276 * Allow the renaming of mount points.
8277 * - target must not exist
8278 * - target must reside in the same directory as source
8279 * - union mounts cannot be renamed
8280 * - "/" cannot be renamed
8282 * XXX Handle this in VFS after a continued lookup (if we missed
8283 * in the cache to start off)
8285 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8286 * we'll skip past here. The file system is responsible for
8287 * checking that @tvp is not a descendent of @fvp and vice versa
8288 * so it should always return EINVAL if either @tvp or @fvp is the
8291 if ((fvp
->v_flag
& VROOT
) &&
8292 (fvp
->v_type
== VDIR
) &&
8294 (fvp
->v_mountedhere
== NULL
) &&
8296 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
8297 ((fvp
->v_mount
->mnt_kern_flag
& MNTK_SYSTEM
) == 0) &&
8298 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
8301 /* switch fvp to the covered vnode */
8302 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
8303 if ((vnode_getwithref(coveredvp
))) {
8313 * Check for cross-device rename.
8315 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
8316 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
8322 * If source is the same as the destination (that is the
8323 * same inode number) then there is nothing to do...
8324 * EXCEPT if the underlying file system supports case
8325 * insensitivity and is case preserving. In this case
8326 * the file system needs to handle the special case of
8327 * getting the same vnode as target (fvp) and source (tvp).
8329 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8330 * and _PC_CASE_PRESERVING can have this exception, and they need to
8331 * handle the special case of getting the same vnode as target and
8332 * source. NOTE: Then the target is unlocked going into vnop_rename,
8333 * so not to cause locking problems. There is a single reference on tvp.
8335 * NOTE - that fvp == tvp also occurs if they are hard linked and
8336 * that correct behaviour then is just to return success without doing
8339 * XXX filesystem should take care of this itself, perhaps...
8341 if (fvp
== tvp
&& fdvp
== tdvp
) {
8342 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
8343 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
8344 fromnd
->ni_cnd
.cn_namelen
)) {
8349 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
8351 * we're holding a reference and lock
8352 * on locked_mp, but it no longer matches
8353 * what we want to do... so drop our hold
8355 mount_unlock_renames(locked_mp
);
8356 mount_drop(locked_mp
, 0);
8357 holding_mntlock
= 0;
8359 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
8361 * serialize renames that re-shape
8362 * the tree... if holding_mntlock is
8363 * set, then we're ready to go...
8365 * first need to drop the iocounts
8366 * we picked up, second take the
8367 * lock to serialize the access,
8368 * then finally start the lookup
8369 * process over with the lock held
8371 if (!holding_mntlock
) {
8373 * need to grab a reference on
8374 * the mount point before we
8375 * drop all the iocounts... once
8376 * the iocounts are gone, the mount
8379 locked_mp
= fvp
->v_mount
;
8380 mount_ref(locked_mp
, 0);
8383 * nameidone has to happen before we vnode_put(tvp)
8384 * since it may need to release the fs_nodelock on the tvp
8394 * nameidone has to happen before we vnode_put(fdvp)
8395 * since it may need to release the fs_nodelock on the fvp
8402 mount_lock_renames(locked_mp
);
8403 holding_mntlock
= 1;
8409 * when we dropped the iocounts to take
8410 * the lock, we allowed the identity of
8411 * the various vnodes to change... if they did,
8412 * we may no longer be dealing with a rename
8413 * that reshapes the tree... once we're holding
8414 * the iocounts, the vnodes can't change type
8415 * so we're free to drop the lock at this point
8418 if (holding_mntlock
) {
8419 mount_unlock_renames(locked_mp
);
8420 mount_drop(locked_mp
, 0);
8421 holding_mntlock
= 0;
8425 // save these off so we can later verify that fvp is the same
8426 oname
= fvp
->v_name
;
8427 oparent
= fvp
->v_parent
;
8430 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
8431 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
8434 if (holding_mntlock
) {
8436 * we can drop our serialization
8439 mount_unlock_renames(locked_mp
);
8440 mount_drop(locked_mp
, 0);
8441 holding_mntlock
= 0;
8444 if (error
== EDATALESS
) {
8446 * If we've been here before, something has gone
8447 * horribly wrong and we should just get out lest
8448 * we spiral around the drain forever.
8450 if (flags
& VFS_RENAME_DATALESS
) {
8456 * The object we're renaming is dataless (or has a
8457 * dataless descendent) and requires materialization
8458 * before the rename occurs. But we're holding the
8459 * mount point's rename lock, so it's not safe to
8462 * In this case, we release the lock, perform the
8463 * materialization, and start the whole thing over.
8465 error
= vnode_materialize_dataless_file(fvp
,
8466 NAMESPACE_HANDLER_RENAME_OP
);
8470 * The next time around we need to tell the
8471 * file system that the materializtaion has
8474 flags
|= VFS_RENAME_DATALESS
;
8479 if (error
== EKEEPLOOKING
) {
8480 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8481 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8482 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8486 fromnd
->ni_vp
= fvp
;
8489 goto continue_lookup
;
8493 * We may encounter a race in the VNOP where the destination didn't
8494 * exist when we did the namei, but it does by the time we go and
8495 * try to create the entry. In this case, we should re-drive this rename
8496 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8497 * but other filesystems susceptible to this race could return it, too.
8499 if (error
== ERECYCLE
) {
8500 if (retry_count
< MAX_RENAME_ERECYCLE_RETRIES
) {
8504 printf("rename retry limit due to ERECYCLE reached\n");
8510 * For compound VNOPs, the authorization callback may return
8511 * ENOENT in case of racing hardlink lookups hitting the name
8512 * cache, redrive the lookup.
8514 if (batched
&& error
== ENOENT
) {
8515 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8524 /* call out to allow 3rd party notification of rename.
8525 * Ignore result of kauth_authorize_fileop call.
8527 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8528 KAUTH_FILEOP_RENAME
,
8529 (uintptr_t)from_name
, (uintptr_t)to_name
);
8530 if (flags
& VFS_RENAME_SWAP
) {
8531 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8532 KAUTH_FILEOP_RENAME
,
8533 (uintptr_t)to_name
, (uintptr_t)from_name
);
8537 if (from_name
!= NULL
&& to_name
!= NULL
) {
8538 if (from_truncated
|| to_truncated
) {
8539 // set it here since only the from_finfo gets reported up to user space
8540 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8544 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
8547 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
8551 add_fsevent(FSE_RENAME
, ctx
,
8552 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8553 FSE_ARG_FINFO
, &from_finfo
,
8554 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8555 FSE_ARG_FINFO
, &to_finfo
,
8557 if (flags
& VFS_RENAME_SWAP
) {
8559 * Strictly speaking, swap is the equivalent of
8560 * *three* renames. FSEvents clients should only take
8561 * the events as a hint, so we only bother reporting
8564 add_fsevent(FSE_RENAME
, ctx
,
8565 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8566 FSE_ARG_FINFO
, &to_finfo
,
8567 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8568 FSE_ARG_FINFO
, &from_finfo
,
8572 add_fsevent(FSE_RENAME
, ctx
,
8573 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8574 FSE_ARG_FINFO
, &from_finfo
,
8575 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8579 #endif /* CONFIG_FSE */
8582 * update filesystem's mount point data
8585 char *cp
, *pathend
, *mpname
;
8591 mp
= fvp
->v_mountedhere
;
8593 if (vfs_busy(mp
, LK_NOWAIT
)) {
8597 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
8599 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
8600 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
8602 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
8605 /* find current mount point prefix */
8606 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
8607 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
8612 /* find last component of target name */
8613 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
8619 /* Update f_mntonname of sub mounts */
8620 vfs_iterate(0, rename_submounts_callback
, (void *)mp
);
8622 /* append name to prefix */
8623 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
8624 bzero(pathend
, maxlen
);
8626 strlcpy(pathend
, mpname
, maxlen
);
8628 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
8632 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
8635 * fix up name & parent pointers. note that we first
8636 * check that fvp has the same name/parent pointers it
8637 * had before the rename call... this is a 'weak' check
8640 * XXX oparent and oname may not be set in the compound vnop case
8642 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
8645 update_flags
= VNODE_UPDATE_NAME
;
8648 update_flags
|= VNODE_UPDATE_PARENT
;
8651 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
8654 if (to_name
!= NULL
) {
8655 RELEASE_PATH(to_name
);
8658 if (to_name_no_firmlink
!= NULL
) {
8659 RELEASE_PATH(to_name_no_firmlink
);
8660 to_name_no_firmlink
= NULL
;
8662 if (from_name
!= NULL
) {
8663 RELEASE_PATH(from_name
);
8666 if (from_name_no_firmlink
!= NULL
) {
8667 RELEASE_PATH(from_name_no_firmlink
);
8668 from_name_no_firmlink
= NULL
;
8670 if (holding_mntlock
) {
8671 mount_unlock_renames(locked_mp
);
8672 mount_drop(locked_mp
, 0);
8673 holding_mntlock
= 0;
8677 * nameidone has to happen before we vnode_put(tdvp)
8678 * since it may need to release the fs_nodelock on the tdvp
8689 * nameidone has to happen before we vnode_put(fdvp)
8690 * since it may need to release the fs_nodelock on the fdvp
8701 * If things changed after we did the namei, then we will re-drive
8702 * this rename call from the top.
8709 FREE(__rename_data
, M_TEMP
);
8714 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
8716 return renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
8717 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0);
8721 renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
8723 return renameat_internal(
8724 vfs_context_current(),
8725 uap
->fromfd
, uap
->from
,
8727 UIO_USERSPACE
, uap
->flags
);
8731 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
8733 return renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
8734 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0);
8738 * Make a directory file.
8740 * Returns: 0 Success
8743 * vnode_authorize:???
8748 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
8749 enum uio_seg segflg
)
8753 int update_flags
= 0;
8755 struct nameidata nd
;
8757 AUDIT_ARG(mode
, vap
->va_mode
);
8758 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
8760 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
8761 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
8764 error
= nameiat(&nd
, fd
);
8776 batched
= vnode_compound_mkdir_available(dvp
);
8778 VATTR_SET(vap
, va_type
, VDIR
);
8782 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8783 * only get EXISTS or EISDIR for existing path components, and not that it could see
8784 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8785 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8787 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
8788 if (error
== EACCES
|| error
== EPERM
) {
8796 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8797 * rather than EACCESS if the target exists.
8799 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
8801 error2
= nameiat(&nd
, fd
);
8815 * make the directory
8817 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
8818 if (error
== EKEEPLOOKING
) {
8820 goto continue_lookup
;
8826 // Make sure the name & parent pointers are hooked up
8827 if (vp
->v_name
== NULL
) {
8828 update_flags
|= VNODE_UPDATE_NAME
;
8830 if (vp
->v_parent
== NULLVP
) {
8831 update_flags
|= VNODE_UPDATE_PARENT
;
8835 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
8839 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
8844 * nameidone has to happen before we vnode_put(dvp)
8845 * since it may need to release the fs_nodelock on the dvp
8860 * mkdir_extended: Create a directory; with extended security (ACL).
8862 * Parameters: p Process requesting to create the directory
8863 * uap User argument descriptor (see below)
8866 * Indirect: uap->path Path of directory to create
8867 * uap->mode Access permissions to set
8868 * uap->xsecurity ACL to set
8870 * Returns: 0 Success
8875 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
8878 kauth_filesec_t xsecdst
;
8879 struct vnode_attr va
;
8881 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
8884 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
8885 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
8890 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8891 if (xsecdst
!= NULL
) {
8892 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
8895 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8897 if (xsecdst
!= NULL
) {
8898 kauth_filesec_free(xsecdst
);
8904 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
8906 struct vnode_attr va
;
8909 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8911 return mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8916 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
8918 struct vnode_attr va
;
8921 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8923 return mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
8928 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
8929 enum uio_seg segflg
, int unlink_flags
)
8933 struct nameidata nd
;
8935 char *no_firmlink_path
= NULL
;
8937 int len_no_firmlink_path
= 0;
8938 int has_listeners
= 0;
8940 int truncated_path
= 0;
8941 int truncated_no_firmlink_path
= 0;
8943 struct vnode_attr va
;
8944 #endif /* CONFIG_FSE */
8945 struct vnode_attr
*vap
= NULL
;
8946 int restart_count
= 0;
8952 * This loop exists to restart rmdir in the unlikely case that two
8953 * processes are simultaneously trying to remove the same directory
8954 * containing orphaned appleDouble files.
8957 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
8958 segflg
, dirpath
, ctx
);
8959 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
8964 error
= nameiat(&nd
, fd
);
8973 batched
= vnode_compound_rmdir_available(vp
);
8975 if (vp
->v_flag
& VROOT
) {
8977 * The root of a mounted filesystem cannot be deleted.
8983 #if DEVELOPMENT || DEBUG
8985 * XXX VSWAP: Check for entitlements or special flag here
8986 * so we can restrict access appropriately.
8988 #else /* DEVELOPMENT || DEBUG */
8990 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
8994 #endif /* DEVELOPMENT || DEBUG */
8997 * Removed a check here; we used to abort if vp's vid
8998 * was not the same as what we'd seen the last time around.
8999 * I do not think that check was valid, because if we retry
9000 * and all dirents are gone, the directory could legitimately
9001 * be recycled but still be present in a situation where we would
9002 * have had permission to delete. Therefore, we won't make
9003 * an effort to preserve that check now that we may not have a
9008 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
9010 if (error
== ENOENT
) {
9011 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
9022 if (!vnode_compound_rmdir_available(dvp
)) {
9023 panic("No error, but no compound rmdir?");
9030 need_event
= need_fsevent(FSE_DELETE
, dvp
);
9033 get_fse_info(vp
, &finfo
, ctx
);
9035 error
= vfs_get_notify_attributes(&va
);
9044 has_listeners
= kauth_authorize_fileop_has_listeners();
9045 if (need_event
|| has_listeners
) {
9054 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
9056 if (no_firmlink_path
== NULL
) {
9057 GET_PATH(no_firmlink_path
);
9058 if (no_firmlink_path
== NULL
) {
9064 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
9066 if (truncated_no_firmlink_path
) {
9067 finfo
.mode
|= FSE_TRUNCATED_PATH
;
9072 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9075 /* Couldn't find a vnode */
9079 if (error
== EKEEPLOOKING
) {
9080 goto continue_lookup
;
9081 } else if (batched
&& error
== ENOENT
) {
9082 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
9084 * For compound VNOPs, the authorization callback
9085 * may return ENOENT in case of racing hard link lookups
9086 * redrive the lookup.
9095 * XXX There's no provision for passing flags
9096 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9097 * because it's not empty, then we try again
9098 * with VNOP_REMOVE(), passing in a special
9099 * flag that clever file systems will know
9102 if (error
== ENOTEMPTY
&&
9103 (unlink_flags
& VNODE_REMOVE_DATALESS_DIR
) != 0) {
9105 * If this fails, we want to keep the original
9108 if (vn_remove(dvp
, &vp
, &nd
,
9109 VNODE_REMOVE_DATALESS_DIR
, vap
, ctx
) == 0) {
9114 #if CONFIG_APPLEDOUBLE
9116 * Special case to remove orphaned AppleDouble
9117 * files. I don't like putting this in the kernel,
9118 * but carbon does not like putting this in carbon either,
9121 if (error
== ENOTEMPTY
) {
9122 int ad_error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
9123 if (ad_error
== EBUSY
) {
9130 * Assuming everything went well, we will try the RMDIR again
9133 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9136 #endif /* CONFIG_APPLEDOUBLE */
9138 * Call out to allow 3rd party notification of delete.
9139 * Ignore result of kauth_authorize_fileop call.
9142 if (has_listeners
) {
9143 kauth_authorize_fileop(vfs_context_ucred(ctx
),
9144 KAUTH_FILEOP_DELETE
,
9149 if (vp
->v_flag
& VISHARDLINK
) {
9150 // see the comment in unlink1() about why we update
9151 // the parent of a hard link when it is removed
9152 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
9158 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
9160 add_fsevent(FSE_DELETE
, ctx
,
9161 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
9162 FSE_ARG_FINFO
, &finfo
,
9174 if (no_firmlink_path
!= NULL
) {
9175 RELEASE_PATH(no_firmlink_path
);
9176 no_firmlink_path
= NULL
;
9180 * nameidone has to happen before we vnode_put(dvp)
9181 * since it may need to release the fs_nodelock on the dvp
9190 if (restart_flag
== 0) {
9191 wakeup_one((caddr_t
)vp
);
9194 tsleep(vp
, PVFS
, "rm AD", 1);
9195 } while (restart_flag
!= 0);
9201 * Remove a directory file.
9205 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
9207 return rmdirat_internal(vfs_context_current(), AT_FDCWD
,
9208 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
, 0);
9211 /* Get direntry length padded to 8 byte alignment */
9212 #define DIRENT64_LEN(namlen) \
9213 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9215 /* Get dirent length padded to 4 byte alignment */
9216 #define DIRENT_LEN(namelen) \
9217 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9219 /* Get the end of this dirent */
9220 #define DIRENT_END(dep) \
9221 (((char *)(dep)) + (dep)->d_reclen - 1)
9224 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
9225 int *numdirent
, vfs_context_t ctxp
)
9227 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
9228 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
9229 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
9230 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
9235 struct direntry
*entry64
;
9241 * We're here because the underlying file system does not
9242 * support direnties or we mounted denying support so we must
9243 * fall back to dirents and convert them to direntries.
9245 * Our kernel buffer needs to be smaller since re-packing will
9246 * expand each dirent. The worse case (when the name length
9247 * is 3 or less) corresponds to a struct direntry size of 32
9248 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9249 * (4-byte aligned). So having a buffer that is 3/8 the size
9250 * will prevent us from reading more than we can pack.
9252 * Since this buffer is wired memory, we will limit the
9253 * buffer size to a maximum of 32K. We would really like to
9254 * use 32K in the MIN(), but we use magic number 87371 to
9255 * prevent uio_resid() * 3 / 8 from overflowing.
9257 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
9258 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
9259 if (bufptr
== NULL
) {
9263 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
9264 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
9265 auio
->uio_offset
= uio
->uio_offset
;
9267 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
9269 dep
= (struct dirent
*)bufptr
;
9270 bytesread
= bufsize
- uio_resid(auio
);
9272 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
9275 * Convert all the entries and copy them out to user's buffer.
9277 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
9278 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
9280 if (DIRENT_END(dep
) > ((char *)bufptr
+ bytesread
) ||
9281 DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) {
9282 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
,
9283 vp
->v_mount
->mnt_vfsstat
.f_mntonname
,
9284 vp
->v_name
? vp
->v_name
: "<unknown>");
9289 bzero(entry64
, enbufsize
);
9290 /* Convert a dirent to a dirent64. */
9291 entry64
->d_ino
= dep
->d_ino
;
9292 entry64
->d_seekoff
= 0;
9293 entry64
->d_reclen
= enbufsize
;
9294 entry64
->d_namlen
= dep
->d_namlen
;
9295 entry64
->d_type
= dep
->d_type
;
9296 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
9298 /* Move to next entry. */
9299 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
9301 /* Copy entry64 to user's buffer. */
9302 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
9305 /* Update the real offset using the offset we got from VNOP_READDIR. */
9307 uio
->uio_offset
= auio
->uio_offset
;
9310 FREE(bufptr
, M_TEMP
);
9311 FREE(entry64
, M_TEMP
);
9316 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
9319 * Read a block of directory entries in a file system independent format.
9322 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
9323 off_t
*offset
, int *eofflag
, int flags
)
9326 struct vfs_context context
= *vfs_context_current(); /* local copy */
9327 struct fileproc
*fp
;
9329 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9331 int error
, numdirent
;
9332 char uio_buf
[UIO_SIZEOF(1)];
9334 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
9338 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9339 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9344 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
) {
9345 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
9349 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
9354 if ((error
= vnode_getwithref(vp
))) {
9357 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9360 if (vp
->v_type
!= VDIR
) {
9361 (void)vnode_put(vp
);
9367 error
= mac_vnode_check_readdir(&context
, vp
);
9369 (void)vnode_put(vp
);
9374 loff
= fp
->f_fglob
->fg_offset
;
9375 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9376 uio_addiov(auio
, bufp
, bufsize
);
9378 if (flags
& VNODE_READDIR_EXTENDED
) {
9379 error
= vnode_readdir64(vp
, auio
, flags
, eofflag
, &numdirent
, &context
);
9380 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
9382 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, &numdirent
, &context
);
9383 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
9386 (void)vnode_put(vp
);
9390 if ((user_ssize_t
)bufsize
== uio_resid(auio
)) {
9391 if (union_dircheckp
) {
9392 error
= union_dircheckp(&vp
, fp
, &context
);
9397 (void)vnode_put(vp
);
9402 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
9403 struct vnode
*tvp
= vp
;
9404 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
9406 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9407 fp
->f_fglob
->fg_offset
= 0;
9421 *bytesread
= bufsize
- uio_resid(auio
);
9429 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
9435 AUDIT_ARG(fd
, uap
->fd
);
9436 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
,
9437 &bytesread
, &offset
, &eofflag
, 0);
9440 if (proc_is64bit(p
)) {
9441 user64_long_t base
= (user64_long_t
)offset
;
9442 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
9444 user32_long_t base
= (user32_long_t
)offset
;
9445 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
9447 *retval
= bytesread
;
9453 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
9458 user_size_t bufsize
;
9460 AUDIT_ARG(fd
, uap
->fd
);
9463 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9464 * then the kernel carves out the last 4 bytes to return extended
9465 * information to userspace (namely whether we reached EOF with this call).
9467 if (uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9468 bufsize
= uap
->bufsize
- sizeof(getdirentries64_flags_t
);
9470 bufsize
= uap
->bufsize
;
9473 error
= getdirentries_common(uap
->fd
, uap
->buf
, bufsize
,
9474 &bytesread
, &offset
, &eofflag
, VNODE_READDIR_EXTENDED
);
9477 *retval
= bytesread
;
9478 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
9480 if (error
== 0 && uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9481 getdirentries64_flags_t flags
= 0;
9483 flags
|= GETDIRENTRIES64_EOF
;
9485 error
= copyout(&flags
, (user_addr_t
)uap
->buf
+ bufsize
,
9494 * Set the mode mask for creation of filesystem nodes.
9495 * XXX implement xsecurity
9497 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
9499 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
9501 struct filedesc
*fdp
;
9503 AUDIT_ARG(mask
, newmask
);
9506 *retval
= fdp
->fd_cmask
;
9507 fdp
->fd_cmask
= newmask
& ALLPERMS
;
9513 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9515 * Parameters: p Process requesting to set the umask
9516 * uap User argument descriptor (see below)
9517 * retval umask of the process (parameter p)
9519 * Indirect: uap->newmask umask to set
9520 * uap->xsecurity ACL to set
9522 * Returns: 0 Success
9527 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
9530 kauth_filesec_t xsecdst
;
9532 xsecdst
= KAUTH_FILESEC_NONE
;
9533 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
9534 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
9538 xsecdst
= KAUTH_FILESEC_NONE
;
9541 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
9543 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
9544 kauth_filesec_free(xsecdst
);
9550 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
9552 return umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
);
9556 * Void all references to file by ripping underlying filesystem
9561 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
9564 struct vnode_attr va
;
9565 vfs_context_t ctx
= vfs_context_current();
9567 struct nameidata nd
;
9569 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
9579 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
9584 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
9590 error
= mac_vnode_check_revoke(ctx
, vp
);
9597 VATTR_WANTED(&va
, va_uid
);
9598 if ((error
= vnode_getattr(vp
, &va
, ctx
))) {
9601 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
9602 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
9605 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
))) {
9606 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
9615 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9616 * The following system calls are designed to support features
9617 * which are specific to the HFS & HFS Plus volume formats
9622 * Obtain attribute information on objects in a directory while enumerating
9627 getdirentriesattr(proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
9630 struct fileproc
*fp
;
9632 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9633 uint32_t count
= 0, savecount
= 0;
9634 uint32_t newstate
= 0;
9637 struct attrlist attributelist
;
9638 vfs_context_t ctx
= vfs_context_current();
9640 char uio_buf
[UIO_SIZEOF(1)];
9641 kauth_action_t action
;
9645 /* Get the attributes into kernel space */
9646 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
9649 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
9653 if ((error
= fp_getfvp(p
, fd
, &fp
, &vp
))) {
9656 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9657 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9664 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
9672 if ((error
= vnode_getwithref(vp
))) {
9676 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9679 if (vp
->v_type
!= VDIR
) {
9680 (void)vnode_put(vp
);
9686 error
= mac_vnode_check_readdir(ctx
, vp
);
9688 (void)vnode_put(vp
);
9693 /* set up the uio structure which will contain the users return buffer */
9694 loff
= fp
->f_fglob
->fg_offset
;
9695 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9696 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
9699 * If the only item requested is file names, we can let that past with
9700 * just LIST_DIRECTORY. If they want any other attributes, that means
9701 * they need SEARCH as well.
9703 action
= KAUTH_VNODE_LIST_DIRECTORY
;
9704 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
9705 attributelist
.fileattr
|| attributelist
.dirattr
) {
9706 action
|= KAUTH_VNODE_SEARCH
;
9709 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
9710 /* Believe it or not, uap->options only has 32-bits of valid
9711 * info, so truncate before extending again */
9713 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
9714 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
9718 (void) vnode_put(vp
);
9723 * If we've got the last entry of a directory in a union mount
9724 * then reset the eofflag and pretend there's still more to come.
9725 * The next call will again set eofflag and the buffer will be empty,
9726 * so traverse to the underlying directory and do the directory
9729 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
9730 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
9732 } else { // Empty buffer
9733 struct vnode
*tvp
= vp
;
9734 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
9735 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
9736 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9737 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
9739 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
9747 (void)vnode_put(vp
);
9752 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
9754 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
)))) {
9757 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
)))) {
9760 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
)))) {
9764 *retval
= eofflag
; /* similar to getdirentries */
9768 return error
; /* return error earlier, an retval of 0 or 1 now */
9769 } /* end of getdirentriesattr system call */
9772 * Exchange data between two files
9777 exchangedata(__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
9779 struct nameidata fnd
, snd
;
9780 vfs_context_t ctx
= vfs_context_current();
9784 u_int32_t nameiflags
;
9787 int flen
= 0, slen
= 0;
9788 int from_truncated
= 0, to_truncated
= 0;
9790 fse_info f_finfo
, s_finfo
;
9794 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
9795 nameiflags
|= FOLLOW
;
9798 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
9799 UIO_USERSPACE
, uap
->path1
, ctx
);
9801 error
= namei(&fnd
);
9809 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
9810 UIO_USERSPACE
, uap
->path2
, ctx
);
9812 error
= namei(&snd
);
9821 * if the files are the same, return an inval error
9829 * if the files are on different volumes, return an error
9831 if (svp
->v_mount
!= fvp
->v_mount
) {
9836 /* If they're not files, return an error */
9837 if ((vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
9843 error
= mac_vnode_check_exchangedata(ctx
,
9849 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
9850 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0)) {
9856 need_fsevent(FSE_EXCHANGE
, fvp
) ||
9858 kauth_authorize_fileop_has_listeners()) {
9861 if (fpath
== NULL
|| spath
== NULL
) {
9866 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
9867 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
9870 get_fse_info(fvp
, &f_finfo
, ctx
);
9871 get_fse_info(svp
, &s_finfo
, ctx
);
9872 if (from_truncated
|| to_truncated
) {
9873 // set it here since only the f_finfo gets reported up to user space
9874 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
9878 /* Ok, make the call */
9879 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
9882 const char *tmpname
;
9884 if (fpath
!= NULL
&& spath
!= NULL
) {
9885 /* call out to allow 3rd party notification of exchangedata.
9886 * Ignore result of kauth_authorize_fileop call.
9888 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
9889 (uintptr_t)fpath
, (uintptr_t)spath
);
9893 tmpname
= fvp
->v_name
;
9894 fvp
->v_name
= svp
->v_name
;
9895 svp
->v_name
= tmpname
;
9897 if (fvp
->v_parent
!= svp
->v_parent
) {
9900 tmp
= fvp
->v_parent
;
9901 fvp
->v_parent
= svp
->v_parent
;
9902 svp
->v_parent
= tmp
;
9904 name_cache_unlock();
9907 if (fpath
!= NULL
&& spath
!= NULL
) {
9908 add_fsevent(FSE_EXCHANGE
, ctx
,
9909 FSE_ARG_STRING
, flen
, fpath
,
9910 FSE_ARG_FINFO
, &f_finfo
,
9911 FSE_ARG_STRING
, slen
, spath
,
9912 FSE_ARG_FINFO
, &s_finfo
,
9919 if (fpath
!= NULL
) {
9920 RELEASE_PATH(fpath
);
9922 if (spath
!= NULL
) {
9923 RELEASE_PATH(spath
);
9932 * Return (in MB) the amount of freespace on the given vnode's volume.
9934 uint32_t freespace_mb(vnode_t vp
);
9937 freespace_mb(vnode_t vp
)
9939 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
9940 return ((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
9941 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20;
9949 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
9954 struct nameidata nd
;
9955 struct user64_fssearchblock searchblock
;
9956 struct searchstate
*state
;
9957 struct attrlist
*returnattrs
;
9958 struct timeval timelimit
;
9959 void *searchparams1
, *searchparams2
;
9961 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9962 uint32_t nummatches
;
9964 uint32_t nameiflags
;
9965 vfs_context_t ctx
= vfs_context_current();
9966 char uio_buf
[UIO_SIZEOF(1)];
9968 /* Start by copying in fsearchblock parameter list */
9969 if (IS_64BIT_PROCESS(p
)) {
9970 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
9971 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
9972 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
9974 struct user32_fssearchblock tmp_searchblock
;
9976 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
9977 // munge into 64-bit version
9978 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
9979 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
9980 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
9981 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
9983 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9984 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9986 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
9987 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
9988 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
9989 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
9990 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
9991 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
9992 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
9998 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
10000 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
10001 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
) {
10005 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
10006 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
10007 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
10010 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
10011 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
10012 /* assumes the size is still 556 bytes it will continue to work */
10014 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
10015 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2 * sizeof(uint32_t));
10017 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
10019 /* Now set up the various pointers to the correct place in our newly allocated memory */
10021 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
10022 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
10023 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof(struct attrlist
));
10025 /* Now copy in the stuff given our local variables. */
10027 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
))) {
10031 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
))) {
10035 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
)))) {
10039 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
)))) {
10044 * When searching a union mount, need to set the
10045 * start flag at the first call on each layer to
10046 * reset state for the new volume.
10048 if (uap
->options
& SRCHFS_START
) {
10049 state
->ss_union_layer
= 0;
10051 uap
->options
|= state
->ss_union_flags
;
10053 state
->ss_union_flags
= 0;
10056 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
10057 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
10058 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
10059 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
10060 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
10063 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
10064 attrreference_t
* string_ref
;
10065 u_int32_t
* start_length
;
10066 user64_size_t param_length
;
10068 /* validate searchparams1 */
10069 param_length
= searchblock
.sizeofsearchparams1
;
10070 /* skip the word that specifies length of the buffer */
10071 start_length
= (u_int32_t
*) searchparams1
;
10072 start_length
= start_length
+ 1;
10073 string_ref
= (attrreference_t
*) start_length
;
10075 /* ensure no negative offsets or too big offsets */
10076 if (string_ref
->attr_dataoffset
< 0) {
10080 if (string_ref
->attr_length
> MAXPATHLEN
) {
10085 /* Check for pointer overflow in the string ref */
10086 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
10091 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
10095 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
10101 /* set up the uio structure which will contain the users return buffer */
10102 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10103 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
10106 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
10107 nameiflags
|= FOLLOW
;
10109 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
10110 UIO_USERSPACE
, uap
->path
, ctx
);
10112 error
= namei(&nd
);
10120 * Switch to the root vnode for the volume
10122 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
10130 * If it's a union mount, the path lookup takes
10131 * us to the top layer. But we may need to descend
10132 * to a lower layer. For non-union mounts the layer
10135 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
10136 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0) {
10140 vp
= vp
->v_mount
->mnt_vnodecovered
;
10146 error
= vnode_getwithref(vp
);
10154 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
10163 * If searchblock.maxmatches == 0, then skip the search. This has happened
10164 * before and sometimes the underlying code doesnt deal with it well.
10166 if (searchblock
.maxmatches
== 0) {
10172 * Allright, we have everything we need, so lets make that call.
10174 * We keep special track of the return value from the file system:
10175 * EAGAIN is an acceptable error condition that shouldn't keep us
10176 * from copying out any results...
10179 fserror
= VNOP_SEARCHFS(vp
,
10182 &searchblock
.searchattrs
,
10183 (u_long
)searchblock
.maxmatches
,
10187 (u_long
)uap
->scriptcode
,
10188 (u_long
)uap
->options
,
10190 (struct searchstate
*) &state
->ss_fsstate
,
10194 * If it's a union mount we need to be called again
10195 * to search the mounted-on filesystem.
10197 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
10198 state
->ss_union_flags
= SRCHFS_START
;
10199 state
->ss_union_layer
++; // search next layer down
10207 /* Now copy out the stuff that needs copying out. That means the number of matches, the
10208 * search state. Everything was already put into he return buffer by the vop call. */
10210 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0) {
10214 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0) {
10222 FREE(searchparams1
, M_TEMP
);
10225 } /* end of searchfs system call */
10227 #else /* CONFIG_SEARCHFS */
10230 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
10235 #endif /* CONFIG_SEARCHFS */
10238 #if CONFIG_DATALESS_FILES
10241 * === Namespace Resolver Up-call Mechanism ===
10243 * When I/O is performed to a dataless file or directory (read, write,
10244 * lookup-in, etc.), the file system performs an upcall to the namespace
10245 * resolver (filecoordinationd) to materialize the object.
10247 * We need multiple up-calls to be in flight at once, and we need these
10248 * up-calls to be interruptible, thus the following implementation:
10250 * => The nspace_resolver_request represents the in-kernel request state.
10251 * It contains a request ID, storage space for the errno code returned
10252 * by filecoordinationd, and flags.
10254 * => The request ID is simply a global monotonically incrementing 32-bit
10255 * number. Outstanding requests are stored in a hash table, and the
10256 * hash function is extremely simple.
10258 * => When an upcall is to be made to filecoordinationd, a request structure
10259 * is allocated on the stack (it is small, and needs to live only during
10260 * the duration of the call to resolve_nspace_item_ext()). It is
10261 * initialized and inserted into the table. Some backpressure from
10262 * filecoordinationd is applied by limiting the numnber of entries that
10263 * can be inserted into the table (and thus limiting the number of
10264 * outstanding requests issued to filecoordinationd); waiting for an
10265 * available slot is interruptible.
10267 * => Once the request has been inserted into the table, the up-call is made
10268 * to filecoordinationd via a MiG-generated stub. The up-call returns
10269 * immediately and filecoordinationd processes the request asynchronously.
10271 * => The caller now waits for the request to complete. Tnis is achieved by
10272 * sleeping on the address of the request structure and waiting for
10273 * filecoordinationd to mark the request structure as complete. This
10274 * is an interruptible sleep call; if interrupted, the request structure
10275 * is removed from the table and EINTR is returned to the caller. If
10276 * this occurs, an advisory up-call is made to filecoordinationd with
10277 * the request ID to indicate that the request can be aborted or
10278 * de-prioritized at the discretion of filecoordinationd.
10280 * => When filecoordinationd has completed the request, it signals completion
10281 * by writing to the vfs.nspace.complete sysctl node. Only a process
10282 * decorated as a namespace resolver can write to this sysctl node. The
10283 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10284 * The request ID is looked up in the table, and if the request is found,
10285 * the error code is stored in the request structure and a wakeup()
10286 * issued on the address of the request structure. If the request is not
10287 * found, we simply drop the completion notification, assuming that the
10288 * caller was interrupted.
10290 * => When the waiting thread wakes up, it extracts the error code from the
10291 * request structure, removes the request from the table, and returns the
10292 * error code to the calling function. Fini!
10295 struct nspace_resolver_request
{
10296 LIST_ENTRY(nspace_resolver_request
) r_hashlink
;
10298 int r_resolver_error
;
10302 #define RRF_COMPLETE 0x0001
10305 next_nspace_req_id(void)
10307 static uint32_t next_req_id
;
10309 return OSAddAtomic(1, &next_req_id
);
10312 #define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10313 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
10315 static LIST_HEAD(nspace_resolver_requesthead
,
10316 nspace_resolver_request
) * nspace_resolver_request_hashtbl
;
10317 static u_long nspace_resolver_request_hashmask
;
10318 static u_int nspace_resolver_request_count
;
10319 static bool nspace_resolver_request_wait_slot
;
10320 static lck_grp_t
*nspace_resolver_request_lck_grp
;
10321 static lck_mtx_t nspace_resolver_request_hash_mutex
;
10323 #define NSPACE_REQ_LOCK() \
10324 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10325 #define NSPACE_REQ_UNLOCK() \
10326 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10328 #define NSPACE_RESOLVER_HASH(req_id) \
10329 (&nspace_resolver_request_hashtbl[(req_id) & \
10330 nspace_resolver_request_hashmask])
10332 static struct nspace_resolver_request
*
10333 nspace_resolver_req_lookup(uint32_t req_id
)
10335 struct nspace_resolver_requesthead
*bucket
;
10336 struct nspace_resolver_request
*req
;
10338 bucket
= NSPACE_RESOLVER_HASH(req_id
);
10339 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10340 if (req
->r_req_id
== req_id
) {
10349 nspace_resolver_req_add(struct nspace_resolver_request
*req
)
10351 struct nspace_resolver_requesthead
*bucket
;
10354 while (nspace_resolver_request_count
>=
10355 NSPACE_RESOLVER_MAX_OUTSTANDING
) {
10356 nspace_resolver_request_wait_slot
= true;
10357 error
= msleep(&nspace_resolver_request_count
,
10358 &nspace_resolver_request_hash_mutex
,
10359 PVFS
| PCATCH
, "nspacerq", NULL
);
10365 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10367 assert(nspace_resolver_req_lookup(req
->r_req_id
) == NULL
);
10368 #endif /* DIAGNOSTIC */
10369 LIST_INSERT_HEAD(bucket
, req
, r_hashlink
);
10370 nspace_resolver_request_count
++;
10376 nspace_resolver_req_remove(struct nspace_resolver_request
*req
)
10378 struct nspace_resolver_requesthead
*bucket
;
10380 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10382 assert(nspace_resolver_req_lookup(req
->r_req_id
) != NULL
);
10383 #endif /* DIAGNOSTIC */
10384 LIST_REMOVE(req
, r_hashlink
);
10385 nspace_resolver_request_count
--;
10387 if (nspace_resolver_request_wait_slot
) {
10388 nspace_resolver_request_wait_slot
= false;
10389 wakeup(&nspace_resolver_request_count
);
10394 nspace_resolver_req_cancel(uint32_t req_id
)
10399 // Failures here aren't fatal -- the cancellation message
10400 // sent to the resolver is merely advisory.
10402 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10403 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10407 kr
= send_nspace_resolve_cancel(mp
, req_id
);
10408 if (kr
!= KERN_SUCCESS
) {
10409 os_log_error(OS_LOG_DEFAULT
,
10410 "NSPACE send_nspace_resolve_cancel failure: %d", kr
);
10413 ipc_port_release_send(mp
);
10417 nspace_resolver_req_wait(struct nspace_resolver_request
*req
)
10419 bool send_cancel_message
= false;
10424 while ((req
->r_flags
& RRF_COMPLETE
) == 0) {
10425 error
= msleep(req
, &nspace_resolver_request_hash_mutex
,
10426 PVFS
| PCATCH
, "nspace", NULL
);
10427 if (error
&& error
!= ERESTART
) {
10428 req
->r_resolver_error
= (error
== EINTR
) ? EINTR
:
10430 send_cancel_message
= true;
10435 nspace_resolver_req_remove(req
);
10437 NSPACE_REQ_UNLOCK();
10439 if (send_cancel_message
) {
10440 nspace_resolver_req_cancel(req
->r_req_id
);
10443 return req
->r_resolver_error
;
10447 nspace_resolver_req_mark_complete(
10448 struct nspace_resolver_request
*req
,
10449 int resolver_error
)
10451 req
->r_resolver_error
= resolver_error
;
10452 req
->r_flags
|= RRF_COMPLETE
;
10457 nspace_resolver_req_completed(uint32_t req_id
, int resolver_error
)
10459 struct nspace_resolver_request
*req
;
10463 // If we don't find the request corresponding to our req_id,
10464 // just drop the completion signal on the floor; it's likely
10465 // that the requester interrupted with a signal.
10467 req
= nspace_resolver_req_lookup(req_id
);
10469 nspace_resolver_req_mark_complete(req
, resolver_error
);
10472 NSPACE_REQ_UNLOCK();
10475 static struct proc
*nspace_resolver_proc
;
10478 nspace_resolver_get_proc_state(struct proc
*p
, int *is_resolver
)
10480 *is_resolver
= ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10481 p
== nspace_resolver_proc
) ? 1 : 0;
10486 nspace_resolver_set_proc_state(struct proc
*p
, int is_resolver
)
10488 vfs_context_t ctx
= vfs_context_current();
10492 // The system filecoordinationd runs as uid == 0. This also
10493 // has the nice side-effect of filtering out filecoordinationd
10494 // running in the simulator.
10496 if (!vfs_context_issuser(ctx
)) {
10500 error
= priv_check_cred(vfs_context_ucred(ctx
),
10501 PRIV_VFS_DATALESS_RESOLVER
, 0);
10509 if (nspace_resolver_proc
== NULL
) {
10511 p
->p_lflag
|= P_LNSPACE_RESOLVER
;
10513 nspace_resolver_proc
= p
;
10518 NSPACE_REQ_UNLOCK();
10520 // This is basically just like the exit case.
10521 // nspace_resolver_exited() will verify that the
10522 // process is the resolver, and will clear the
10524 nspace_resolver_exited(p
);
10531 nspace_materialization_get_proc_state(struct proc
*p
, int *is_prevented
)
10533 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) != 0 ||
10534 (p
->p_vfs_iopolicy
&
10535 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) == 0) {
10544 nspace_materialization_set_proc_state(struct proc
*p
, int is_prevented
)
10546 if (p
->p_lflag
& P_LNSPACE_RESOLVER
) {
10547 return is_prevented
? 0 : EBUSY
;
10550 if (is_prevented
) {
10551 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
), &p
->p_vfs_iopolicy
);
10553 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
, &p
->p_vfs_iopolicy
);
10559 nspace_materialization_get_thread_state(int *is_prevented
)
10561 uthread_t ut
= get_bsdthread_info(current_thread());
10563 *is_prevented
= (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) ? 1 : 0;
10568 nspace_materialization_set_thread_state(int is_prevented
)
10570 uthread_t ut
= get_bsdthread_info(current_thread());
10572 if (is_prevented
) {
10573 ut
->uu_flag
|= UT_NSPACE_NODATALESSFAULTS
;
10575 ut
->uu_flag
&= ~UT_NSPACE_NODATALESSFAULTS
;
10581 nspace_materialization_is_prevented(void)
10583 proc_t p
= current_proc();
10584 uthread_t ut
= (uthread_t
)get_bsdthread_info(current_thread());
10585 vfs_context_t ctx
= vfs_context_current();
10588 * Kernel context ==> return EDEADLK, as we would with any random
10589 * process decorated as no-materialize.
10591 if (ctx
== vfs_context_kernel()) {
10596 * If the process has the dataless-manipulation entitlement,
10597 * materialization is prevented, and depending on the kind
10598 * of file system operation, things get to proceed as if the
10599 * object is not dataless.
10601 if (vfs_context_is_dataless_manipulator(ctx
)) {
10602 return EJUSTRETURN
;
10606 * Per-thread decorations override any process-wide decorations.
10607 * (Foundation uses this, and this overrides even the dataless-
10608 * manipulation entitlement so as to make API contracts consistent.)
10611 if (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) {
10614 if (ut
->uu_flag
& UT_NSPACE_FORCEDATALESSFAULTS
) {
10620 * If the process's iopolicy specifies that dataless files
10621 * can be materialized, then we let it go ahead.
10623 if (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) {
10628 * The default behavior is to not materialize dataless files;
10629 * return to the caller that deadlock was detected.
10634 /* the vfs.nspace branch */
10635 SYSCTL_NODE(_vfs
, OID_AUTO
, nspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
, NULL
, "vfs nspace hinge");
10638 sysctl_nspace_resolver(__unused
struct sysctl_oid
*oidp
,
10639 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10641 struct proc
*p
= req
->p
;
10642 int new_value
, old_value
, changed
= 0;
10645 error
= nspace_resolver_get_proc_state(p
, &old_value
);
10650 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10652 if (error
== 0 && changed
) {
10653 error
= nspace_resolver_set_proc_state(p
, new_value
);
10658 /* decorate this process as the dataless file resolver */
10659 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, resolver
,
10660 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10661 0, 0, sysctl_nspace_resolver
, "I", "");
10664 sysctl_nspace_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10665 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10667 struct proc
*p
= req
->p
;
10668 int new_value
, old_value
, changed
= 0;
10671 error
= nspace_materialization_get_proc_state(p
, &old_value
);
10676 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10678 if (error
== 0 && changed
) {
10679 error
= nspace_materialization_set_proc_state(p
, new_value
);
10684 /* decorate this process as not wanting to materialize dataless files */
10685 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, prevent_materialization
,
10686 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10687 0, 0, sysctl_nspace_prevent_materialization
, "I", "");
10690 sysctl_nspace_thread_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10691 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10693 int new_value
, old_value
, changed
= 0;
10696 error
= nspace_materialization_get_thread_state(&old_value
);
10701 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10703 if (error
== 0 && changed
) {
10704 error
= nspace_materialization_set_thread_state(new_value
);
10709 /* decorate this thread as not wanting to materialize dataless files */
10710 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, thread_prevent_materialization
,
10711 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10712 0, 0, sysctl_nspace_thread_prevent_materialization
, "I", "");
10715 sysctl_nspace_complete(__unused
struct sysctl_oid
*oidp
, __unused
void *arg1
,
10716 __unused
int arg2
, struct sysctl_req
*req
)
10718 struct proc
*p
= req
->p
;
10719 uint32_t req_status
[2] = { 0, 0 };
10720 int error
, is_resolver
, changed
= 0;
10722 error
= nspace_resolver_get_proc_state(p
, &is_resolver
);
10727 if (!is_resolver
) {
10731 error
= sysctl_io_opaque(req
, req_status
, sizeof(req_status
),
10738 * req_status[0] is the req_id
10740 * req_status[1] is the errno
10742 if (error
== 0 && changed
) {
10743 nspace_resolver_req_completed(req_status
[0],
10744 (int)req_status
[1]);
10749 /* Resolver reports completed reqs here. */
10750 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, complete
,
10751 CTLTYPE_OPAQUE
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10752 0, 0, sysctl_nspace_complete
, "-", "");
10754 #endif /* CONFIG_DATALESS_FILES */
10756 #if CONFIG_DATALESS_FILES
10757 #define __no_dataless_unused /* nothing */
10759 #define __no_dataless_unused __unused
10763 nspace_resolver_init(void)
10765 #if CONFIG_DATALESS_FILES
10766 nspace_resolver_request_lck_grp
=
10767 lck_grp_alloc_init("file namespace resolver", NULL
);
10769 lck_mtx_init(&nspace_resolver_request_hash_mutex
,
10770 nspace_resolver_request_lck_grp
, NULL
);
10772 nspace_resolver_request_hashtbl
=
10773 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE
,
10774 M_VNODE
/* XXX */, &nspace_resolver_request_hashmask
);
10775 #endif /* CONFIG_DATALESS_FILES */
10779 nspace_resolver_exited(struct proc
*p __no_dataless_unused
)
10781 #if CONFIG_DATALESS_FILES
10782 struct nspace_resolver_requesthead
*bucket
;
10783 struct nspace_resolver_request
*req
;
10788 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10789 p
== nspace_resolver_proc
) {
10790 for (idx
= 0; idx
<= nspace_resolver_request_hashmask
; idx
++) {
10791 bucket
= &nspace_resolver_request_hashtbl
[idx
];
10792 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10793 nspace_resolver_req_mark_complete(req
,
10797 nspace_resolver_proc
= NULL
;
10800 NSPACE_REQ_UNLOCK();
10801 #endif /* CONFIG_DATALESS_FILES */
10805 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
10807 return resolve_nspace_item_ext(vp
, op
, NULL
);
10810 #define DATALESS_RESOLVER_ENTITLEMENT \
10811 "com.apple.private.vfs.dataless-resolver"
10812 #define DATALESS_MANIPULATION_ENTITLEMENT \
10813 "com.apple.private.vfs.dataless-manipulation"
10816 * Return TRUE if the vfs context is associated with a process entitled
10817 * for dataless manipulation.
10819 * XXX Arguably belongs in vfs_subr.c, but is here because of the
10820 * complication around CONFIG_DATALESS_FILES.
10823 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused
)
10825 #if CONFIG_DATALESS_FILES
10826 assert(ctx
->vc_thread
== current_thread());
10827 task_t
const task
= current_task();
10828 return IOTaskHasEntitlement(task
, DATALESS_MANIPULATION_ENTITLEMENT
) ||
10829 IOTaskHasEntitlement(task
, DATALESS_RESOLVER_ENTITLEMENT
);
10832 #endif /* CONFIG_DATALESS_FILES */
10836 resolve_nspace_item_ext(
10837 struct vnode
*vp __no_dataless_unused
,
10838 uint64_t op __no_dataless_unused
,
10839 void *arg __unused
)
10841 #if CONFIG_DATALESS_FILES
10847 struct nspace_resolver_request req
;
10849 // only allow namespace events on regular files, directories and symlinks.
10850 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
10855 // if this is a snapshot event and the vnode is on a
10856 // disk image just pretend nothing happened since any
10857 // change to the disk image will cause the disk image
10858 // itself to get backed up and this avoids multi-way
10859 // deadlocks between the snapshot handler and the ever
10860 // popular diskimages-helper process. the variable
10861 // nspace_allow_virtual_devs allows this behavior to
10862 // be overridden (for use by the Mobile TimeMachine
10863 // testing infrastructure which uses disk images)
10865 if (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
) {
10866 os_log_debug(OS_LOG_DEFAULT
, "NSPACE SNAPSHOT not handled");
10870 error
= nspace_materialization_is_prevented();
10872 os_log_debug(OS_LOG_DEFAULT
,
10873 "NSPACE process/thread is decorated as no-materialization");
10877 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10878 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10879 os_log_error(OS_LOG_DEFAULT
, "NSPACE no port");
10880 // Treat this like being unable to access the backing
10885 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
10886 if (path
== NULL
) {
10888 goto out_release_port
;
10890 path_len
= MAXPATHLEN
;
10892 error
= vn_getpath(vp
, path
, &path_len
);
10894 int xxx_rdar44371223
; /* XXX Mig bug */
10895 req
.r_req_id
= next_nspace_req_id();
10896 req
.r_resolver_error
= 0;
10900 error
= nspace_resolver_req_add(&req
);
10901 NSPACE_REQ_UNLOCK();
10903 goto out_release_port
;
10906 os_log_debug(OS_LOG_DEFAULT
, "NSPACE resolve_path call");
10907 kr
= send_nspace_resolve_path(mp
, req
.r_req_id
,
10908 current_proc()->p_pid
, (uint32_t)(op
& 0xffffffff),
10909 path
, &xxx_rdar44371223
);
10910 if (kr
!= KERN_SUCCESS
) {
10911 // Also treat this like being unable to access
10912 // the backing store server.
10913 os_log_error(OS_LOG_DEFAULT
,
10914 "NSPACE resolve_path failure: %d", kr
);
10918 nspace_resolver_req_remove(&req
);
10919 NSPACE_REQ_UNLOCK();
10920 goto out_release_port
;
10923 // Give back the memory we allocated earlier while
10924 // we wait; we no longer need it.
10925 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
10928 // Request has been submitted to the resolver.
10929 // Now (interruptibly) wait for completion.
10930 // Upon requrn, the request will have been removed
10931 // from the lookup table.
10932 error
= nspace_resolver_req_wait(&req
);
10936 if (path
!= NULL
) {
10937 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
10939 ipc_port_release_send(mp
);
10944 #endif /* CONFIG_DATALESS_FILES */
10948 nspace_snapshot_event(__unused vnode_t vp
, __unused
time_t ctime
,
10949 __unused
uint64_t op_type
, __unused
void *arg
)
10956 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
10958 struct vnode_attr va
;
10962 VATTR_WANTED(&va
, va_fsid
);
10963 VATTR_WANTED(&va
, va_fileid
);
10965 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
10966 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
10969 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
10977 static unsigned long
10978 fsctl_bogus_command_compat(unsigned long cmd
)
10981 case IOCBASECMD(FSIOC_SYNC_VOLUME
):
10982 return FSIOC_SYNC_VOLUME
;
10983 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
):
10984 return FSIOC_ROUTEFS_SETROUTEID
;
10985 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
):
10986 return FSIOC_SET_PACKAGE_EXTS
;
10987 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
):
10988 return FSIOC_SET_FSTYPENAME_OVERRIDE
;
10989 case IOCBASECMD(DISK_CONDITIONER_IOC_GET
):
10990 return DISK_CONDITIONER_IOC_GET
;
10991 case IOCBASECMD(DISK_CONDITIONER_IOC_SET
):
10992 return DISK_CONDITIONER_IOC_SET
;
10993 case IOCBASECMD(FSIOC_FIOSEEKHOLE
):
10994 return FSIOC_FIOSEEKHOLE
;
10995 case IOCBASECMD(FSIOC_FIOSEEKDATA
):
10996 return FSIOC_FIOSEEKDATA
;
10997 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
):
10998 return SPOTLIGHT_IOC_GET_MOUNT_TIME
;
10999 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
):
11000 return SPOTLIGHT_IOC_GET_LAST_MTIME
;
11007 cas_bsdflags_setattr(vnode_t vp
, void *arg
, vfs_context_t ctx
)
11009 return VNOP_IOCTL(vp
, FSIOC_CAS_BSDFLAGS
, arg
, FWRITE
, ctx
);
11013 * Make a filesystem-specific control call:
11017 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
11022 #define STK_PARAMS 128
11023 char stkbuf
[STK_PARAMS
] = {0};
11024 caddr_t data
, memp
;
11025 vnode_t vp
= *arg_vp
;
11027 if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
11031 cmd
= fsctl_bogus_command_compat(cmd
);
11033 size
= IOCPARM_LEN(cmd
);
11034 if (size
> IOCPARM_MAX
) {
11038 is64bit
= proc_is64bit(p
);
11042 if (size
> sizeof(stkbuf
)) {
11043 if ((memp
= (caddr_t
)kalloc(size
)) == 0) {
11051 if (cmd
& IOC_IN
) {
11053 error
= copyin(udata
, data
, size
);
11062 *(user_addr_t
*)data
= udata
;
11064 *(uint32_t *)data
= (uint32_t)udata
;
11067 } else if ((cmd
& IOC_OUT
) && size
) {
11069 * Zero the buffer so the user always
11070 * gets back something deterministic.
11073 } else if (cmd
& IOC_VOID
) {
11075 *(user_addr_t
*)data
= udata
;
11077 *(uint32_t *)data
= (uint32_t)udata
;
11081 /* Check to see if it's a generic command */
11083 case FSIOC_SYNC_VOLUME
: {
11084 struct vfs_attr vfa
;
11085 mount_t mp
= vp
->v_mount
;
11089 /* record vid of vp so we can drop it below. */
11090 uint32_t vvid
= vp
->v_id
;
11093 * Then grab mount_iterref so that we can release the vnode.
11094 * Without this, a thread may call vnode_iterate_prepare then
11095 * get into a deadlock because we've never released the root vp
11097 error
= mount_iterref(mp
, 0);
11104 if (*(uint32_t*)data
& FSCTL_SYNC_WAIT
) {
11109 * If the filessytem supports multiple filesytems in a
11110 * partition (For eg APFS volumes in a container, it knows
11111 * that the waitfor argument to VFS_SYNC are flags.
11113 VFSATTR_INIT(&vfa
);
11114 VFSATTR_WANTED(&vfa
, f_capabilities
);
11115 if ((vfs_getattr(mp
, &vfa
, vfs_context_current()) == 0) &&
11116 VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) &&
11117 ((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
)) &&
11118 ((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
))) {
11122 /* issue the sync for this volume */
11123 (void)sync_callback(mp
, &arg
);
11126 * Then release the mount_iterref once we're done syncing; it's not
11127 * needed for the VNOP_IOCTL below
11129 mount_iterdrop(mp
);
11131 if (arg
& FSCTL_SYNC_FULLSYNC
) {
11132 /* re-obtain vnode iocount on the root vp, if possible */
11133 error
= vnode_getwithvid(vp
, vvid
);
11135 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
11139 /* mark the argument VP as having been released */
11144 case FSIOC_ROUTEFS_SETROUTEID
: {
11146 char routepath
[MAXPATHLEN
];
11149 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11152 bzero(routepath
, MAXPATHLEN
);
11153 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
11157 error
= routefs_kernel_mount(routepath
);
11165 case FSIOC_SET_PACKAGE_EXTS
: {
11166 user_addr_t ext_strings
;
11167 uint32_t num_entries
;
11168 uint32_t max_width
;
11170 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0))) {
11174 if ((is64bit
&& size
!= sizeof(user64_package_ext_info
))
11175 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
11176 // either you're 64-bit and passed a 64-bit struct or
11177 // you're 32-bit and passed a 32-bit struct. otherwise
11184 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
11185 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
11186 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
11188 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
11189 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
11190 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
11192 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
11196 case FSIOC_SET_FSTYPENAME_OVERRIDE
:
11198 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11202 mount_lock(vp
->v_mount
);
11203 if (data
[0] != 0) {
11204 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
11205 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
11206 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11207 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
11208 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
11211 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11212 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
11214 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
11215 vp
->v_mount
->fstypename_override
[0] = '\0';
11217 mount_unlock(vp
->v_mount
);
11222 case DISK_CONDITIONER_IOC_GET
: {
11223 error
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11227 case DISK_CONDITIONER_IOC_SET
: {
11228 error
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11232 case FSIOC_CAS_BSDFLAGS
: {
11233 struct fsioc_cas_bsdflags
*cas
= (struct fsioc_cas_bsdflags
*)data
;
11234 struct vnode_attr va
;
11237 VATTR_SET(&va
, va_flags
, cas
->new_flags
);
11239 error
= chflags0(vp
, &va
, cas_bsdflags_setattr
, cas
, ctx
);
11243 case FSIOC_FD_ONLY_OPEN_ONCE
: {
11244 if (vnode_usecount(vp
) > 1) {
11253 /* other, known commands shouldn't be passed down here */
11256 case F_TRIM_ACTIVE_FILE
:
11258 case F_TRANSCODEKEY
:
11259 case F_GETPROTECTIONLEVEL
:
11260 case F_GETDEFAULTPROTLEVEL
:
11261 case F_MAKECOMPRESSED
:
11262 case F_SET_GREEDY_MODE
:
11263 case F_SETSTATICCONTENT
:
11265 case F_SETBACKINGSTORE
:
11266 case F_GETPATH_MTMINFO
:
11267 case APFSIOC_REVERT_TO_SNAPSHOT
:
11268 case FSIOC_FIOSEEKHOLE
:
11269 case FSIOC_FIOSEEKDATA
:
11270 case HFS_GET_BOOT_INFO
:
11271 case HFS_SET_BOOT_INFO
:
11275 case F_BARRIERFSYNC
:
11281 /* Invoke the filesystem-specific code */
11282 error
= VNOP_IOCTL(vp
, cmd
, data
, options
, ctx
);
11284 } /* end switch stmt */
11287 * if no errors, copy any data to user. Size was
11288 * already set and checked above.
11290 if (error
== 0 && (cmd
& IOC_OUT
) && size
) {
11291 error
= copyout(data
, udata
, size
);
11304 fsctl(proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
11307 struct nameidata nd
;
11310 vfs_context_t ctx
= vfs_context_current();
11312 AUDIT_ARG(cmd
, uap
->cmd
);
11313 AUDIT_ARG(value32
, uap
->options
);
11314 /* Get the vnode for the file we are getting info on: */
11317 // if we come through fsctl() then the file is by definition not open.
11318 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11319 // lest the caller mistakenly thinks the only open is their own (but in
11320 // reality it's someone elses).
11322 if (uap
->cmd
== FSIOC_FD_ONLY_OPEN_ONCE
) {
11325 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
11326 nameiflags
|= FOLLOW
;
11328 if (uap
->cmd
== FSIOC_FIRMLINK_CTL
) {
11329 nameiflags
|= (CN_FIRMLINK_NOFOLLOW
| NOCACHE
);
11331 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
11332 UIO_USERSPACE
, uap
->path
, ctx
);
11333 if ((error
= namei(&nd
))) {
11340 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
11346 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11356 ffsctl(proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
11360 vfs_context_t ctx
= vfs_context_current();
11363 AUDIT_ARG(fd
, uap
->fd
);
11364 AUDIT_ARG(cmd
, uap
->cmd
);
11365 AUDIT_ARG(value32
, uap
->options
);
11367 /* Get the vnode for the file we are getting info on: */
11368 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11372 if ((error
= vnode_getwithref(vp
))) {
11378 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
11385 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11389 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11396 /* end of fsctl system call */
11399 * Retrieve the data of an extended attribute.
11402 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
11405 struct nameidata nd
;
11406 char attrname
[XATTR_MAXNAMELEN
+ 1];
11407 vfs_context_t ctx
= vfs_context_current();
11409 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11410 size_t attrsize
= 0;
11412 u_int32_t nameiflags
;
11414 char uio_buf
[UIO_SIZEOF(1)];
11416 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11420 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11421 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11422 if ((error
= namei(&nd
))) {
11428 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11432 if (xattr_protected(attrname
)) {
11433 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
11439 * the specific check for 0xffffffff is a hack to preserve
11440 * binaray compatibilty in K64 with applications that discovered
11441 * that passing in a buf pointer and a size of -1 resulted in
11442 * just the size of the indicated extended attribute being returned.
11443 * this isn't part of the documented behavior, but because of the
11444 * original implemtation's check for "uap->size > 0", this behavior
11445 * was allowed. In K32 that check turned into a signed comparison
11446 * even though uap->size is unsigned... in K64, we blow by that
11447 * check because uap->size is unsigned and doesn't get sign smeared
11448 * in the munger for a 32 bit user app. we also need to add a
11449 * check to limit the maximum size of the buffer being passed in...
11450 * unfortunately, the underlying fileystems seem to just malloc
11451 * the requested size even if the actual extended attribute is tiny.
11452 * because that malloc is for kernel wired memory, we have to put a
11453 * sane limit on it.
11455 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11456 * U64 running on K64 will yield -1 (64 bits wide)
11457 * U32/U64 running on K32 will yield -1 (32 bits wide)
11459 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1) {
11464 if (uap
->size
> (size_t)XATTR_MAXSIZE
) {
11465 uap
->size
= XATTR_MAXSIZE
;
11468 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11469 &uio_buf
[0], sizeof(uio_buf
));
11470 uio_addiov(auio
, uap
->value
, uap
->size
);
11473 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
11478 *retval
= uap
->size
- uio_resid(auio
);
11480 *retval
= (user_ssize_t
)attrsize
;
11487 * Retrieve the data of an extended attribute.
11490 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
11493 char attrname
[XATTR_MAXNAMELEN
+ 1];
11495 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11496 size_t attrsize
= 0;
11499 char uio_buf
[UIO_SIZEOF(1)];
11501 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11505 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11508 if ((error
= vnode_getwithref(vp
))) {
11509 file_drop(uap
->fd
);
11512 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11516 if (xattr_protected(attrname
)) {
11520 if (uap
->value
&& uap
->size
> 0) {
11521 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11522 &uio_buf
[0], sizeof(uio_buf
));
11523 uio_addiov(auio
, uap
->value
, uap
->size
);
11526 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11528 (void)vnode_put(vp
);
11529 file_drop(uap
->fd
);
11532 *retval
= uap
->size
- uio_resid(auio
);
11534 *retval
= (user_ssize_t
)attrsize
;
11540 * Set the data of an extended attribute.
11543 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
11546 struct nameidata nd
;
11547 char attrname
[XATTR_MAXNAMELEN
+ 1];
11548 vfs_context_t ctx
= vfs_context_current();
11550 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11552 u_int32_t nameiflags
;
11554 char uio_buf
[UIO_SIZEOF(1)];
11556 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11560 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11562 if (error
== EPERM
) {
11563 /* if the string won't fit in attrname, copyinstr emits EPERM */
11564 return ENAMETOOLONG
;
11566 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11569 if (xattr_protected(attrname
)) {
11572 if (uap
->size
!= 0 && uap
->value
== 0) {
11576 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11577 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11578 if ((error
= namei(&nd
))) {
11584 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11585 &uio_buf
[0], sizeof(uio_buf
));
11586 uio_addiov(auio
, uap
->value
, uap
->size
);
11588 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
11591 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11602 * Set the data of an extended attribute.
11605 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
11608 char attrname
[XATTR_MAXNAMELEN
+ 1];
11610 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11613 char uio_buf
[UIO_SIZEOF(1)];
11615 vfs_context_t ctx
= vfs_context_current();
11618 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11622 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11624 if (error
== EPERM
) {
11625 /* if the string won't fit in attrname, copyinstr emits EPERM */
11626 return ENAMETOOLONG
;
11628 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11631 if (xattr_protected(attrname
)) {
11634 if (uap
->size
!= 0 && uap
->value
== 0) {
11637 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11640 if ((error
= vnode_getwithref(vp
))) {
11641 file_drop(uap
->fd
);
11644 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11645 &uio_buf
[0], sizeof(uio_buf
));
11646 uio_addiov(auio
, uap
->value
, uap
->size
);
11648 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
11651 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11657 file_drop(uap
->fd
);
11663 * Remove an extended attribute.
11664 * XXX Code duplication here.
11667 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
11670 struct nameidata nd
;
11671 char attrname
[XATTR_MAXNAMELEN
+ 1];
11672 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11673 vfs_context_t ctx
= vfs_context_current();
11675 u_int32_t nameiflags
;
11678 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11682 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11686 if (xattr_protected(attrname
)) {
11689 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11690 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11691 if ((error
= namei(&nd
))) {
11697 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
11700 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11711 * Remove an extended attribute.
11712 * XXX Code duplication here.
11715 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
11718 char attrname
[XATTR_MAXNAMELEN
+ 1];
11722 vfs_context_t ctx
= vfs_context_current();
11725 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11729 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11733 if (xattr_protected(attrname
)) {
11736 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11739 if ((error
= vnode_getwithref(vp
))) {
11740 file_drop(uap
->fd
);
11744 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
11747 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11753 file_drop(uap
->fd
);
11759 * Retrieve the list of extended attribute names.
11760 * XXX Code duplication here.
11763 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
11766 struct nameidata nd
;
11767 vfs_context_t ctx
= vfs_context_current();
11769 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11770 size_t attrsize
= 0;
11771 u_int32_t nameiflags
;
11773 char uio_buf
[UIO_SIZEOF(1)];
11775 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11779 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11780 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11781 if ((error
= namei(&nd
))) {
11786 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11787 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
11788 &uio_buf
[0], sizeof(uio_buf
));
11789 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11792 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
11796 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11798 *retval
= (user_ssize_t
)attrsize
;
11804 * Retrieve the list of extended attribute names.
11805 * XXX Code duplication here.
11808 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
11812 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11813 size_t attrsize
= 0;
11815 char uio_buf
[UIO_SIZEOF(1)];
11817 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11821 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11824 if ((error
= vnode_getwithref(vp
))) {
11825 file_drop(uap
->fd
);
11828 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11829 auio
= uio_createwithbuffer(1, 0, spacetype
,
11830 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
11831 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11834 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11837 file_drop(uap
->fd
);
11839 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11841 *retval
= (user_ssize_t
)attrsize
;
11847 fsgetpath_internal(vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
11848 vm_size_t bufsize
, caddr_t buf
, uint32_t options
, int *pathlen
)
11851 struct mount
*mp
= NULL
;
11855 /* maximum number of times to retry build_path */
11856 unsigned int retries
= 0x10;
11858 if (bufsize
> PAGE_SIZE
) {
11867 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
11868 error
= ENOTSUP
; /* unexpected failure */
11874 struct vfs_attr vfsattr
;
11875 int use_vfs_root
= TRUE
;
11877 VFSATTR_INIT(&vfsattr
);
11878 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
11879 if (!(options
& FSOPT_ISREALFSID
) &&
11880 vfs_getattr(mp
, &vfsattr
, vfs_context_kernel()) == 0 &&
11881 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
11882 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
) &&
11883 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
)) {
11884 use_vfs_root
= FALSE
;
11888 if (use_vfs_root
) {
11889 error
= VFS_ROOT(mp
, &vp
, ctx
);
11891 error
= VFS_VGET(mp
, objid
, &vp
, ctx
);
11894 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
11897 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
11899 * If the fileid isn't found and we're in a union
11900 * mount volume, then see if the fileid is in the
11901 * mounted-on volume.
11903 struct mount
*tmp
= mp
;
11904 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
11906 if (vfs_busy(mp
, LK_NOWAIT
) == 0) {
11918 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
11925 /* Obtain the absolute path to this vnode. */
11926 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
11927 if (options
& FSOPT_NOFIRMLINKPATH
) {
11928 bpflags
|= BUILDPATH_NO_FIRMLINK
;
11930 bpflags
|= BUILDPATH_CHECK_MOVED
;
11931 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
11935 /* there was a race building the path, try a few more times */
11936 if (error
== EAGAIN
) {
11947 AUDIT_ARG(text
, buf
);
11949 if (kdebug_enable
) {
11950 long dbg_parms
[NUMPARMS
];
11953 dbg_namelen
= (int)sizeof(dbg_parms
);
11955 if (length
< dbg_namelen
) {
11956 memcpy((char *)dbg_parms
, buf
, length
);
11957 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
11959 dbg_namelen
= length
;
11961 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
11964 kdebug_vfs_lookup(dbg_parms
, dbg_namelen
, (void *)vp
,
11965 KDBG_VFS_LOOKUP_FLAG_LOOKUP
);
11968 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
11975 * Obtain the full pathname of a file system object by id.
11978 fsgetpath_extended(user_addr_t buf
, int bufsize
, user_addr_t user_fsid
, uint64_t objid
,
11979 uint32_t options
, user_ssize_t
*retval
)
11981 vfs_context_t ctx
= vfs_context_current();
11987 if (options
& ~(FSOPT_NOFIRMLINKPATH
| FSOPT_ISREALFSID
)) {
11991 if ((error
= copyin(user_fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
11994 AUDIT_ARG(value32
, fsid
.val
[0]);
11995 AUDIT_ARG(value64
, objid
);
11996 /* Restrict output buffer size for now. */
11998 if (bufsize
> PAGE_SIZE
|| bufsize
<= 0) {
12001 MALLOC(realpath
, char *, bufsize
, M_TEMP
, M_WAITOK
| M_ZERO
);
12002 if (realpath
== NULL
) {
12006 error
= fsgetpath_internal(ctx
, fsid
.val
[0], objid
, bufsize
, realpath
,
12013 error
= copyout((caddr_t
)realpath
, buf
, length
);
12015 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
12018 FREE(realpath
, M_TEMP
);
12024 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
12026 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
12031 fsgetpath_ext(__unused proc_t p
, struct fsgetpath_ext_args
*uap
, user_ssize_t
*retval
)
12033 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
12034 uap
->options
, retval
);
12038 * Common routine to handle various flavors of statfs data heading out
12041 * Returns: 0 Success
12045 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
12046 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
12047 boolean_t partial_copy
)
12050 int my_size
, copy_size
;
12053 struct user64_statfs sfs
;
12054 my_size
= copy_size
= sizeof(sfs
);
12055 bzero(&sfs
, my_size
);
12056 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12057 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
12058 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12059 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
12060 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
12061 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
12062 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
12063 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
12064 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
12065 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
12066 sfs
.f_fsid
= sfsp
->f_fsid
;
12067 sfs
.f_owner
= sfsp
->f_owner
;
12068 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12069 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12071 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12073 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12074 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12076 if (partial_copy
) {
12077 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12079 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12081 struct user32_statfs sfs
;
12083 my_size
= copy_size
= sizeof(sfs
);
12084 bzero(&sfs
, my_size
);
12086 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12087 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
12088 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12091 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12092 * have to fudge the numbers here in that case. We inflate the blocksize in order
12093 * to reflect the filesystem size as best we can.
12095 if ((sfsp
->f_blocks
> INT_MAX
)
12096 /* Hack for 4061702 . I think the real fix is for Carbon to
12097 * look for some volume capability and not depend on hidden
12098 * semantics agreed between a FS and carbon.
12099 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12100 * for Carbon to set bNoVolumeSizes volume attribute.
12101 * Without this the webdavfs files cannot be copied onto
12102 * disk as they look huge. This change should not affect
12103 * XSAN as they should not setting these to -1..
12105 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
12106 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
12107 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
12111 * Work out how far we have to shift the block count down to make it fit.
12112 * Note that it's possible to have to shift so far that the resulting
12113 * blocksize would be unreportably large. At that point, we will clip
12114 * any values that don't fit.
12116 * For safety's sake, we also ensure that f_iosize is never reported as
12117 * being smaller than f_bsize.
12119 for (shift
= 0; shift
< 32; shift
++) {
12120 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
) {
12123 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
) {
12127 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
12128 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
12129 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
12130 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
12131 #undef __SHIFT_OR_CLIP
12132 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
12133 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
12135 /* filesystem is small enough to be reported honestly */
12136 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
12137 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
12138 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
12139 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
12140 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
12142 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
12143 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
12144 sfs
.f_fsid
= sfsp
->f_fsid
;
12145 sfs
.f_owner
= sfsp
->f_owner
;
12146 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12147 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12149 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12151 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12152 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12154 if (partial_copy
) {
12155 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12157 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12160 if (sizep
!= NULL
) {
12167 * copy stat structure into user_stat structure.
12170 munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
12172 bzero(usbp
, sizeof(*usbp
));
12174 usbp
->st_dev
= sbp
->st_dev
;
12175 usbp
->st_ino
= sbp
->st_ino
;
12176 usbp
->st_mode
= sbp
->st_mode
;
12177 usbp
->st_nlink
= sbp
->st_nlink
;
12178 usbp
->st_uid
= sbp
->st_uid
;
12179 usbp
->st_gid
= sbp
->st_gid
;
12180 usbp
->st_rdev
= sbp
->st_rdev
;
12181 #ifndef _POSIX_C_SOURCE
12182 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12183 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12184 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12185 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12186 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12187 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12189 usbp
->st_atime
= sbp
->st_atime
;
12190 usbp
->st_atimensec
= sbp
->st_atimensec
;
12191 usbp
->st_mtime
= sbp
->st_mtime
;
12192 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12193 usbp
->st_ctime
= sbp
->st_ctime
;
12194 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12196 usbp
->st_size
= sbp
->st_size
;
12197 usbp
->st_blocks
= sbp
->st_blocks
;
12198 usbp
->st_blksize
= sbp
->st_blksize
;
12199 usbp
->st_flags
= sbp
->st_flags
;
12200 usbp
->st_gen
= sbp
->st_gen
;
12201 usbp
->st_lspare
= sbp
->st_lspare
;
12202 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12203 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12207 munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
12209 bzero(usbp
, sizeof(*usbp
));
12211 usbp
->st_dev
= sbp
->st_dev
;
12212 usbp
->st_ino
= sbp
->st_ino
;
12213 usbp
->st_mode
= sbp
->st_mode
;
12214 usbp
->st_nlink
= sbp
->st_nlink
;
12215 usbp
->st_uid
= sbp
->st_uid
;
12216 usbp
->st_gid
= sbp
->st_gid
;
12217 usbp
->st_rdev
= sbp
->st_rdev
;
12218 #ifndef _POSIX_C_SOURCE
12219 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12220 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12221 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12222 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12223 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12224 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12226 usbp
->st_atime
= sbp
->st_atime
;
12227 usbp
->st_atimensec
= sbp
->st_atimensec
;
12228 usbp
->st_mtime
= sbp
->st_mtime
;
12229 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12230 usbp
->st_ctime
= sbp
->st_ctime
;
12231 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12233 usbp
->st_size
= sbp
->st_size
;
12234 usbp
->st_blocks
= sbp
->st_blocks
;
12235 usbp
->st_blksize
= sbp
->st_blksize
;
12236 usbp
->st_flags
= sbp
->st_flags
;
12237 usbp
->st_gen
= sbp
->st_gen
;
12238 usbp
->st_lspare
= sbp
->st_lspare
;
12239 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12240 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12244 * copy stat64 structure into user_stat64 structure.
12247 munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
12249 bzero(usbp
, sizeof(*usbp
));
12251 usbp
->st_dev
= sbp
->st_dev
;
12252 usbp
->st_ino
= sbp
->st_ino
;
12253 usbp
->st_mode
= sbp
->st_mode
;
12254 usbp
->st_nlink
= sbp
->st_nlink
;
12255 usbp
->st_uid
= sbp
->st_uid
;
12256 usbp
->st_gid
= sbp
->st_gid
;
12257 usbp
->st_rdev
= sbp
->st_rdev
;
12258 #ifndef _POSIX_C_SOURCE
12259 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12260 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12261 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12262 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12263 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12264 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12265 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12266 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12268 usbp
->st_atime
= sbp
->st_atime
;
12269 usbp
->st_atimensec
= sbp
->st_atimensec
;
12270 usbp
->st_mtime
= sbp
->st_mtime
;
12271 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12272 usbp
->st_ctime
= sbp
->st_ctime
;
12273 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12274 usbp
->st_birthtime
= sbp
->st_birthtime
;
12275 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12277 usbp
->st_size
= sbp
->st_size
;
12278 usbp
->st_blocks
= sbp
->st_blocks
;
12279 usbp
->st_blksize
= sbp
->st_blksize
;
12280 usbp
->st_flags
= sbp
->st_flags
;
12281 usbp
->st_gen
= sbp
->st_gen
;
12282 usbp
->st_lspare
= sbp
->st_lspare
;
12283 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12284 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12288 munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
12290 bzero(usbp
, sizeof(*usbp
));
12292 usbp
->st_dev
= sbp
->st_dev
;
12293 usbp
->st_ino
= sbp
->st_ino
;
12294 usbp
->st_mode
= sbp
->st_mode
;
12295 usbp
->st_nlink
= sbp
->st_nlink
;
12296 usbp
->st_uid
= sbp
->st_uid
;
12297 usbp
->st_gid
= sbp
->st_gid
;
12298 usbp
->st_rdev
= sbp
->st_rdev
;
12299 #ifndef _POSIX_C_SOURCE
12300 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12301 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12302 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12303 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12304 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12305 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12306 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12307 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12309 usbp
->st_atime
= sbp
->st_atime
;
12310 usbp
->st_atimensec
= sbp
->st_atimensec
;
12311 usbp
->st_mtime
= sbp
->st_mtime
;
12312 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12313 usbp
->st_ctime
= sbp
->st_ctime
;
12314 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12315 usbp
->st_birthtime
= sbp
->st_birthtime
;
12316 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12318 usbp
->st_size
= sbp
->st_size
;
12319 usbp
->st_blocks
= sbp
->st_blocks
;
12320 usbp
->st_blksize
= sbp
->st_blksize
;
12321 usbp
->st_flags
= sbp
->st_flags
;
12322 usbp
->st_gen
= sbp
->st_gen
;
12323 usbp
->st_lspare
= sbp
->st_lspare
;
12324 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12325 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12329 * Purge buffer cache for simulating cold starts
12332 vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
12334 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
12336 return VNODE_RETURNED
;
12340 vfs_purge_callback(mount_t mp
, __unused
void * arg
)
12342 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
12344 return VFS_RETURNED
;
12348 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
12350 if (!kauth_cred_issuser(kauth_cred_get())) {
12354 vfs_iterate(0 /* flags */, vfs_purge_callback
, NULL
);
12360 * gets the vnode associated with the (unnamed) snapshot directory
12361 * for a Filesystem. The snapshot directory vnode is returned with
12362 * an iocount on it.
12365 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
12367 return VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
);
12371 * Get the snapshot vnode.
12373 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12374 * needs nameidone() on ndp.
12376 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12378 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12382 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
12383 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
12384 #if !CONFIG_TRIGGERS
12387 enum path_operation pathop
,
12393 struct vfs_attr vfa
;
12398 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
12403 if (!vnode_isvroot(*rvpp
)) {
12408 /* Make sure the filesystem supports snapshots */
12409 VFSATTR_INIT(&vfa
);
12410 VFSATTR_WANTED(&vfa
, f_capabilities
);
12411 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
12412 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
12413 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
12414 VOL_CAP_INT_SNAPSHOT
)) ||
12415 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
12416 VOL_CAP_INT_SNAPSHOT
))) {
12421 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
12426 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12427 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12433 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12434 * (the length returned by copyinstr includes the terminating NUL)
12436 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
12437 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
12441 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++) {
12444 if (i
< (int)name_len
) {
12450 if (op
== CREATE
) {
12451 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
12453 } else if (op
== DELETE
) {
12454 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
12462 /* Check if the snapshot already exists ... */
12463 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
12464 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
12465 ndp
->ni_dvp
= *sdvpp
;
12467 error
= namei(ndp
);
12469 FREE(name_buf
, M_TEMP
);
12485 * create a filesystem snapshot (for supporting filesystems)
12487 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12488 * We get to the (unnamed) snapshot directory vnode and create the vnode
12489 * for the snapshot in it.
12493 * a) Passed in name for snapshot cannot have slashes.
12494 * b) name can't be "." or ".."
12496 * Since this requires superuser privileges, vnode_authorize calls are not
12500 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12503 vnode_t rvp
, snapdvp
;
12505 struct nameidata namend
;
12507 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
12513 if (namend
.ni_vp
) {
12514 vnode_put(namend
.ni_vp
);
12517 struct vnode_attr va
;
12518 vnode_t vp
= NULLVP
;
12521 VATTR_SET(&va
, va_type
, VREG
);
12522 VATTR_SET(&va
, va_mode
, 0);
12524 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
12525 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
12526 if (!error
&& vp
) {
12531 nameidone(&namend
);
12532 vnode_put(snapdvp
);
12538 * Delete a Filesystem snapshot
12540 * get the vnode for the unnamed snapshot directory and the snapshot and
12541 * delete the snapshot.
12544 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12547 vnode_t rvp
, snapdvp
;
12549 struct nameidata namend
;
12551 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
12557 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
12558 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
12560 vnode_put(namend
.ni_vp
);
12561 nameidone(&namend
);
12562 vnode_put(snapdvp
);
12569 * Revert a filesystem to a snapshot
12571 * Marks the filesystem to revert to the given snapshot on next mount.
12574 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12580 struct fs_snapshot_revert_args revert_data
;
12581 struct componentname cnp
;
12585 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12589 mp
= vnode_mount(rvp
);
12591 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12592 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12594 FREE(name_buf
, M_TEMP
);
12600 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
12602 FREE(name_buf
, M_TEMP
);
12609 * Grab mount_iterref so that we can release the vnode,
12610 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12612 error
= mount_iterref(mp
, 0);
12615 FREE(name_buf
, M_TEMP
);
12619 memset(&cnp
, 0, sizeof(cnp
));
12620 cnp
.cn_pnbuf
= (char *)name_buf
;
12621 cnp
.cn_nameiop
= LOOKUP
;
12622 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12623 cnp
.cn_pnlen
= MAXPATHLEN
;
12624 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12625 cnp
.cn_namelen
= (int)name_len
;
12626 revert_data
.sr_cnp
= &cnp
;
12628 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
12629 mount_iterdrop(mp
);
12630 FREE(name_buf
, M_TEMP
);
12633 /* If there was any error, try again using VNOP_IOCTL */
12636 struct nameidata namend
;
12638 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
12645 error
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
12648 vnode_put(namend
.ni_vp
);
12649 nameidone(&namend
);
12650 vnode_put(snapdvp
);
12658 * rename a Filesystem snapshot
12660 * get the vnode for the unnamed snapshot directory and the snapshot and
12661 * rename the snapshot. This is a very specialised (and simple) case of
12662 * rename(2) (which has to deal with a lot more complications). It differs
12663 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12666 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
12667 __unused
uint32_t flags
, vfs_context_t ctx
)
12669 vnode_t rvp
, snapdvp
;
12671 caddr_t newname_buf
;
12674 struct nameidata
*fromnd
, *tond
;
12675 /* carving out a chunk for structs that are too big to be on stack. */
12677 struct nameidata from_node
;
12678 struct nameidata to_node
;
12681 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
12682 fromnd
= &__rename_data
->from_node
;
12683 tond
= &__rename_data
->to_node
;
12685 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
12690 fvp
= fromnd
->ni_vp
;
12692 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12693 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
12699 * Some sanity checks- new name can't be empty, "." or ".." or have
12701 * (the length returned by copyinstr includes the terminating NUL)
12703 * The FS rename VNOP is suppossed to handle this but we'll pick it
12706 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
12707 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
12711 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++) {
12714 if (i
< (int)name_len
) {
12720 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
12727 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
12728 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
12729 tond
->ni_dvp
= snapdvp
;
12731 error
= namei(tond
);
12734 } else if (tond
->ni_vp
) {
12736 * snapshot rename behaves differently than rename(2) - if the
12737 * new name exists, EEXIST is returned.
12739 vnode_put(tond
->ni_vp
);
12744 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
12745 &tond
->ni_cnd
, ctx
);
12750 FREE(newname_buf
, M_TEMP
);
12752 vnode_put(snapdvp
);
12756 FREE(__rename_data
, M_TEMP
);
12761 * Mount a Filesystem snapshot
12763 * get the vnode for the unnamed snapshot directory and the snapshot and
12764 * mount the snapshot.
12767 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
12768 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
12771 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
12772 struct fs_snapshot_mount_args smnt_data
;
12774 struct nameidata
*snapndp
, *dirndp
;
12775 /* carving out a chunk for structs that are too big to be on stack. */
12777 struct nameidata snapnd
;
12778 struct nameidata dirnd
;
12779 } * __snapshot_mount_data
;
12781 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
12783 snapndp
= &__snapshot_mount_data
->snapnd
;
12784 dirndp
= &__snapshot_mount_data
->dirnd
;
12786 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
12792 snapvp
= snapndp
->ni_vp
;
12793 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
12798 /* Get the vnode to be covered */
12799 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
12800 UIO_USERSPACE
, directory
, ctx
);
12801 error
= namei(dirndp
);
12806 vp
= dirndp
->ni_vp
;
12807 pvp
= dirndp
->ni_dvp
;
12808 mp
= vnode_mount(rvp
);
12810 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
12816 error
= mac_mount_check_snapshot_mount(ctx
, rvp
, vp
, &dirndp
->ni_cnd
, snapndp
->ni_cnd
.cn_nameptr
,
12817 mp
->mnt_vfsstat
.f_fstypename
);
12823 smnt_data
.sm_mp
= mp
;
12824 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
12825 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
12826 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags
& MNT_DONTBROWSE
,
12827 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
12835 vnode_put(snapdvp
);
12837 nameidone(snapndp
);
12839 FREE(__snapshot_mount_data
, M_TEMP
);
12844 * Root from a snapshot of the filesystem
12846 * Marks the filesystem to root from the given snapshot on next boot.
12849 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12855 struct fs_snapshot_root_args root_data
;
12856 struct componentname cnp
;
12860 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12864 mp
= vnode_mount(rvp
);
12866 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12867 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12869 FREE(name_buf
, M_TEMP
);
12874 // XXX MAC checks ?
12877 * Grab mount_iterref so that we can release the vnode,
12878 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12880 error
= mount_iterref(mp
, 0);
12883 FREE(name_buf
, M_TEMP
);
12887 memset(&cnp
, 0, sizeof(cnp
));
12888 cnp
.cn_pnbuf
= (char *)name_buf
;
12889 cnp
.cn_nameiop
= LOOKUP
;
12890 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12891 cnp
.cn_pnlen
= MAXPATHLEN
;
12892 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12893 cnp
.cn_namelen
= (int)name_len
;
12894 root_data
.sr_cnp
= &cnp
;
12896 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
12898 mount_iterdrop(mp
);
12899 FREE(name_buf
, M_TEMP
);
12905 * FS snapshot operations dispatcher
12908 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
12909 __unused
int32_t *retval
)
12912 vfs_context_t ctx
= vfs_context_current();
12914 AUDIT_ARG(fd
, uap
->dirfd
);
12915 AUDIT_ARG(value32
, uap
->op
);
12917 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
12923 * Enforce user authorization for snapshot modification operations
12925 if ((uap
->op
!= SNAPSHOT_OP_MOUNT
) &&
12926 (uap
->op
!= SNAPSHOT_OP_ROOT
)) {
12927 vnode_t dvp
= NULLVP
;
12928 vnode_t devvp
= NULLVP
;
12931 error
= vnode_getfromfd(ctx
, uap
->dirfd
, &dvp
);
12935 mp
= vnode_mount(dvp
);
12936 devvp
= mp
->mnt_devvp
;
12938 /* get an iocount on devvp */
12939 if (devvp
== NULLVP
) {
12940 error
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 0, &devvp
, ctx
);
12941 /* for mounts which arent block devices */
12942 if (error
== ENOENT
) {
12946 error
= vnode_getwithref(devvp
);
12954 if ((vfs_context_issuser(ctx
) == 0) &&
12955 (vnode_authorize(devvp
, NULL
, KAUTH_VNODE_WRITE_DATA
, ctx
) != 0)) {
12967 case SNAPSHOT_OP_CREATE
:
12968 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12970 case SNAPSHOT_OP_DELETE
:
12971 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12973 case SNAPSHOT_OP_RENAME
:
12974 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
12977 case SNAPSHOT_OP_MOUNT
:
12978 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
12979 uap
->data
, uap
->flags
, ctx
);
12981 case SNAPSHOT_OP_REVERT
:
12982 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12984 #if CONFIG_MNT_ROOTSNAP
12985 case SNAPSHOT_OP_ROOT
:
12986 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12988 #endif /* CONFIG_MNT_ROOTSNAP */