2 * Copyright (c) 1995-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <kern/kalloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/sysctl.h>
98 #include <sys/xattr.h>
99 #include <sys/fcntl.h>
100 #include <sys/fsctl.h>
101 #include <sys/ubc_internal.h>
102 #include <sys/disk.h>
103 #include <sys/content_protection.h>
104 #include <sys/clonefile.h>
105 #include <sys/snapshot.h>
106 #include <sys/priv.h>
107 #include <sys/fsgetpath.h>
108 #include <machine/cons.h>
109 #include <machine/limits.h>
110 #include <miscfs/specfs/specdev.h>
112 #include <vfs/vfs_disk_conditioner.h>
114 #include <security/audit/audit.h>
115 #include <bsm/audit_kevents.h>
117 #include <mach/mach_types.h>
118 #include <kern/kern_types.h>
119 #include <kern/kalloc.h>
120 #include <kern/task.h>
122 #include <vm/vm_pageout.h>
123 #include <vm/vm_protos.h>
125 #include <libkern/OSAtomic.h>
126 #include <os/atomic_private.h>
127 #include <pexpert/pexpert.h>
128 #include <IOKit/IOBSD.h>
131 #include <kern/host.h>
132 #include <kern/ipc_misc.h>
133 #include <mach/host_priv.h>
134 #include <mach/vfs_nspace.h>
137 #include <nfs/nfs_conf.h>
140 #include <miscfs/routefs/routefs.h>
144 #include <security/mac.h>
145 #include <security/mac_framework.h>
149 #define GET_PATH(x) \
150 ((x) = get_pathbuff())
151 #define RELEASE_PATH(x) \
154 #define GET_PATH(x) \
155 ((x) = zalloc(ZV_NAMEI))
156 #define RELEASE_PATH(x) \
158 #endif /* CONFIG_FSE */
160 #ifndef HFS_GET_BOOT_INFO
161 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
164 #ifndef HFS_SET_BOOT_INFO
165 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
168 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
169 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
173 * If you need accounting for KM_FD_VN_DATA consider using
174 * ZONE_VIEW_DEFINE to define a zone view.
176 #define KM_FD_VN_DATA KHEAP_DEFAULT
178 extern void disk_conditioner_unmount(mount_t mp
);
180 /* struct for checkdirs iteration */
185 /* callback for checkdirs iteration */
186 static int checkdirs_callback(proc_t p
, void * arg
);
188 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
189 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
190 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
191 static int getfsstat_callback(mount_t mp
, void * arg
);
192 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
193 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
194 static int sync_callback(mount_t
, void *);
195 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
196 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
197 boolean_t partial_copy
);
198 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
199 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
200 struct componentname
*cnp
, user_addr_t fsmountargs
,
201 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
203 void vfs_notify_mount(vnode_t pdvp
);
205 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
207 struct fd_vn_data
* fg_vn_data_alloc(void);
210 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
211 * Concurrent lookups (or lookups by ids) on hard links can cause the
212 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
213 * does) to return ENOENT as the path cannot be returned from the name cache
214 * alone. We have no option but to retry and hope to get one namei->reverse path
215 * generation done without an intervening lookup, lookup by id on the hard link
216 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
217 * which currently are the MAC hooks for rename, unlink and rmdir.
219 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
221 /* Max retry limit for rename due to vnode recycling. */
222 #define MAX_RENAME_ERECYCLE_RETRIES 1024
224 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
,
227 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, uint32_t options
, int *);
229 #ifdef CONFIG_IMGSRC_ACCESS
230 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
231 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
232 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
233 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
234 static void mount_end_update(mount_t mp
);
235 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
236 #endif /* CONFIG_IMGSRC_ACCESS */
238 #if CONFIG_LOCKERBOOT
239 int mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
240 const char *pbdevpath
);
244 #if CONFIG_MNT_ROOTSNAP
245 static int __attribute__ ((noinline
)) snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
);
247 static int __attribute__ ((noinline
)) snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
) __attribute__((unused
));
251 int sync_internal(void);
254 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
256 static LCK_GRP_DECLARE(fd_vn_lck_grp
, "fd_vnode_data");
257 static LCK_ATTR_DECLARE(fd_vn_lck_attr
, 0, 0);
259 /* vars for sync mutex */
260 static LCK_GRP_DECLARE(sync_mtx_lck_grp
, "sync thread");
261 static LCK_MTX_DECLARE(sync_mtx_lck
, &sync_mtx_lck_grp
);
263 extern lck_rw_t rootvnode_rw_lock
;
266 * incremented each time a mount or unmount operation occurs
267 * used to invalidate the cached value of the rootvp in the
268 * mount structure utilized by cache_lookup_path
270 uint32_t mount_generation
= 0;
272 /* counts number of mount and unmount operations */
273 unsigned int vfs_nummntops
= 0;
275 /* system-wide, per-boot unique mount ID */
276 static _Atomic
uint64_t mount_unique_id
= 1;
278 extern const struct fileops vnops
;
279 #if CONFIG_APPLEDOUBLE
280 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
281 #endif /* CONFIG_APPLEDOUBLE */
284 * Virtual File System System Calls
287 #if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
289 * Private in-kernel mounting spi (NFS only, not exported)
293 vfs_iskernelmount(mount_t mp
)
295 return (mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
;
300 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
301 void *data
, __unused
size_t datalen
, int syscall_flags
, uint32_t kern_flags
, vfs_context_t ctx
)
307 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
308 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
311 * Get the vnode to be covered if it's not supplied
316 if (kern_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_VOLBYROLE_MASK
)) {
317 printf("failed to locate mount-on path: %s ", path
);
325 char *pnbuf
= CAST_DOWN(char *, path
);
327 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
328 nd
.ni_cnd
.cn_pnlen
= (int)(strlen(pnbuf
) + 1);
332 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
333 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
343 #endif /* CONFIG_NFS_CLIENT || DEVFS */
346 * Mount a file system.
350 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
352 struct __mac_mount_args muap
;
354 muap
.type
= uap
->type
;
355 muap
.path
= uap
->path
;
356 muap
.flags
= uap
->flags
;
357 muap
.data
= uap
->data
;
358 muap
.mac_p
= USER_ADDR_NULL
;
359 return __mac_mount(p
, &muap
, retval
);
363 fmount(__unused proc_t p
, struct fmount_args
*uap
, __unused
int32_t *retval
)
365 struct componentname cn
;
366 vfs_context_t ctx
= vfs_context_current();
369 int flags
= uap
->flags
;
370 char fstypename
[MFSNAMELEN
];
371 char *labelstr
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */
375 AUDIT_ARG(fd
, uap
->fd
);
376 AUDIT_ARG(fflags
, flags
);
377 /* fstypename will get audited by mount_common */
379 /* Sanity check the flags */
380 if (flags
& (MNT_IMGSRC_BY_INDEX
| MNT_ROOTFS
)) {
384 if (flags
& MNT_UNION
) {
388 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
393 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
397 if ((error
= vnode_getwithref(vp
)) != 0) {
402 pvp
= vnode_getparent(vp
);
409 memset(&cn
, 0, sizeof(struct componentname
));
410 cn
.cn_pnbuf
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
);
411 cn
.cn_pnlen
= MAXPATHLEN
;
413 if ((error
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) {
414 zfree(ZV_NAMEI
, cn
.cn_pnbuf
);
421 error
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
);
423 zfree(ZV_NAMEI
, cn
.cn_pnbuf
);
432 vfs_notify_mount(vnode_t pdvp
)
434 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
435 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
440 * Mount a file system taking into account MAC label behavior.
441 * See mount(2) man page for more information
443 * Parameters: p Process requesting the mount
444 * uap User argument descriptor (see below)
447 * Indirect: uap->type Filesystem type
448 * uap->path Path to mount
449 * uap->data Mount arguments
450 * uap->mac_p MAC info
451 * uap->flags Mount flags
457 boolean_t root_fs_upgrade_try
= FALSE
;
460 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
464 int need_nameidone
= 0;
465 vfs_context_t ctx
= vfs_context_current();
466 char fstypename
[MFSNAMELEN
];
469 char *labelstr
= NULL
;
471 int flags
= uap
->flags
;
473 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
474 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
479 * Get the fs type name from user space
481 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
487 * Get the vnode to be covered
489 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
490 UIO_USERSPACE
, uap
->path
, ctx
);
499 #ifdef CONFIG_IMGSRC_ACCESS
500 /* Mounting image source cannot be batched with other operations */
501 if (flags
== MNT_IMGSRC_BY_INDEX
) {
502 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
503 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
506 #endif /* CONFIG_IMGSRC_ACCESS */
510 * Get the label string (if any) from user space
512 if (uap
->mac_p
!= USER_ADDR_NULL
) {
517 struct user64_mac mac64
;
518 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
519 mac
.m_buflen
= (user_size_t
)mac64
.m_buflen
;
520 mac
.m_string
= (user_addr_t
)mac64
.m_string
;
522 struct user32_mac mac32
;
523 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
524 mac
.m_buflen
= mac32
.m_buflen
;
525 mac
.m_string
= mac32
.m_string
;
530 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
531 (mac
.m_buflen
< 2)) {
535 labelsz
= mac
.m_buflen
;
536 labelstr
= kheap_alloc(KHEAP_TEMP
, labelsz
, Z_WAITOK
);
537 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
541 AUDIT_ARG(mac_string
, labelstr
);
543 #endif /* CONFIG_MACF */
545 AUDIT_ARG(fflags
, flags
);
548 if (flags
& MNT_UNION
) {
549 /* No union mounts on release kernels */
555 if ((vp
->v_flag
& VROOT
) &&
556 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
557 if (!(flags
& MNT_UNION
)) {
561 * For a union mount on '/', treat it as fresh
562 * mount instead of update.
563 * Otherwise, union mouting on '/' used to panic the
564 * system before, since mnt_vnodecovered was found to
565 * be NULL for '/' which is required for unionlookup
566 * after it gets ENOENT on union mount.
568 flags
= (flags
& ~(MNT_UPDATE
));
572 if ((flags
& MNT_RDONLY
) == 0) {
573 /* Release kernels are not allowed to mount "/" as rw */
579 * See 7392553 for more details on why this check exists.
580 * Suffice to say: If this check is ON and something tries
581 * to mount the rootFS RW, we'll turn off the codesign
582 * bitmap optimization.
584 #if CHECK_CS_VALIDATION_BITMAP
585 if ((flags
& MNT_RDONLY
) == 0) {
586 root_fs_upgrade_try
= TRUE
;
591 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
592 labelstr
, FALSE
, ctx
);
597 kheap_free(KHEAP_DEFAULT
, labelstr
, labelsz
);
598 #endif /* CONFIG_MACF */
606 if (need_nameidone
) {
614 * common mount implementation (final stage of mounting)
617 * fstypename file system type (ie it's vfs name)
618 * pvp parent of covered vnode
620 * cnp component name (ie path) of covered vnode
621 * flags generic mount flags
622 * fsmountargs file system specific data
623 * labelstr optional MAC label
624 * kernelmount TRUE for mounts initiated from inside the kernel
625 * ctx caller's context
628 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
629 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
630 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
633 #pragma unused(labelstr)
635 struct vnode
*devvp
= NULLVP
;
636 struct vnode
*device_vnode
= NULLVP
;
641 struct vfstable
*vfsp
= (struct vfstable
*)0;
642 struct proc
*p
= vfs_context_proc(ctx
);
644 bool flag_set
= false;
645 user_addr_t devpath
= USER_ADDR_NULL
;
648 boolean_t vfsp_ref
= FALSE
;
649 boolean_t is_rwlock_locked
= FALSE
;
650 boolean_t did_rele
= FALSE
;
651 boolean_t have_usecount
= FALSE
;
652 boolean_t did_set_lmount
= FALSE
;
654 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM || CONFIG_BASESYSTEMROOT
655 /* Check for mutually-exclusive flag bits */
656 uint32_t checkflags
= (internal_flags
& (KERNEL_MOUNT_VOLBYROLE_MASK
| KERNEL_MOUNT_BASESYSTEMROOT
));
658 while (checkflags
!= 0) {
659 checkflags
&= (checkflags
- 1);
664 //not allowed to request multiple mount-by-role flags
671 * Process an update for an existing mount
673 if (flags
& MNT_UPDATE
) {
674 if ((vp
->v_flag
& VROOT
) == 0) {
680 /* if unmount or mount in progress, return error */
682 if (mp
->mnt_lflag
& (MNT_LUNMOUNT
| MNT_LMOUNT
)) {
687 mp
->mnt_lflag
|= MNT_LMOUNT
;
688 did_set_lmount
= TRUE
;
690 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
691 is_rwlock_locked
= TRUE
;
693 * We only allow the filesystem to be reloaded if it
694 * is currently mounted read-only.
696 if ((flags
& MNT_RELOAD
) &&
697 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
703 * If content protection is enabled, update mounts are not
704 * allowed to turn it off.
706 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
707 ((flags
& MNT_CPROTECT
) == 0)) {
713 * can't turn off MNT_REMOVABLE either but it may be an unexpected
714 * failure to return an error for this so we'll just silently
715 * add it if it is not passed in.
717 if ((mp
->mnt_flag
& MNT_REMOVABLE
) &&
718 ((flags
& MNT_REMOVABLE
) == 0)) {
719 flags
|= MNT_REMOVABLE
;
722 /* Can't downgrade the backer of the root FS */
723 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
724 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
730 * Only root, or the user that did the original mount is
731 * permitted to update it.
733 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
734 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
738 error
= mac_mount_check_remount(ctx
, mp
);
744 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
745 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
747 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
748 flags
|= MNT_NOSUID
| MNT_NODEV
;
749 if (mp
->mnt_flag
& MNT_NOEXEC
) {
758 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
760 vfsp
= mp
->mnt_vtable
;
765 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
766 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
768 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
769 flags
|= MNT_NOSUID
| MNT_NODEV
;
770 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
) {
775 /* XXXAUDIT: Should we capture the type on the error path as well? */
776 AUDIT_ARG(text
, fstypename
);
778 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
779 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
780 vfsp
->vfc_refcount
++;
792 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
793 * except in ROSV configs and for the initial BaseSystem root.
795 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) &&
796 ((internal_flags
& KERNEL_MOUNT_VOLBYROLE_MASK
) == 0) &&
797 ((internal_flags
& KERNEL_MOUNT_BASESYSTEMROOT
) == 0)) {
798 error
= EINVAL
; /* unsupported request */
802 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
808 * Allocate and initialize the filesystem (mount_t)
810 mp
= zalloc_flags(mount_zone
, Z_WAITOK
| Z_ZERO
);
813 /* Initialize the default IO constraints */
814 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
815 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
816 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
817 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
818 mp
->mnt_devblocksize
= DEV_BSIZE
;
819 mp
->mnt_alignmentmask
= PAGE_MASK
;
820 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
823 mp
->mnt_realrootvp
= NULLVP
;
824 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
826 mp
->mnt_lflag
|= MNT_LMOUNT
;
827 did_set_lmount
= TRUE
;
829 TAILQ_INIT(&mp
->mnt_vnodelist
);
830 TAILQ_INIT(&mp
->mnt_workerqueue
);
831 TAILQ_INIT(&mp
->mnt_newvnodes
);
833 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
834 is_rwlock_locked
= TRUE
;
835 mp
->mnt_op
= vfsp
->vfc_vfsops
;
836 mp
->mnt_vtable
= vfsp
;
837 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
838 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
839 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
841 int pathlen
= MAXPATHLEN
;
843 if (vn_getpath_ext(vp
, pvp
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
)) {
844 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
847 mp
->mnt_vnodecovered
= vp
;
848 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
849 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
850 mp
->mnt_devbsdunit
= 0;
851 mp
->mnt_mount_id
= os_atomic_inc_orig(&mount_unique_id
, relaxed
);
853 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
854 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
856 #if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
858 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
860 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0) {
861 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
863 #endif /* CONFIG_NFS_CLIENT || DEVFS */
865 if (KERNEL_MOUNT_DEVFS
& internal_flags
) {
866 // kernel mounted devfs
867 mp
->mnt_kern_flag
|= MNTK_SYSTEM
;
873 * Set the mount level flags.
875 if (flags
& MNT_RDONLY
) {
876 mp
->mnt_flag
|= MNT_RDONLY
;
877 } else if (mp
->mnt_flag
& MNT_RDONLY
) {
878 // disallow read/write upgrades of file systems that
879 // had the TYPENAME_OVERRIDE feature set.
880 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
884 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
886 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
887 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
888 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
889 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
890 MNT_QUARANTINE
| MNT_CPROTECT
);
895 * On release builds of iOS based platforms, always enforce NOSUID on
896 * all mounts. We do this here because we can catch update mounts as well as
897 * non-update mounts in this case.
899 mp
->mnt_flag
|= (MNT_NOSUID
);
903 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
904 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
905 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
906 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
| MNT_STRICTATIME
|
907 MNT_QUARANTINE
| MNT_CPROTECT
);
910 if (flags
& MNT_MULTILABEL
) {
911 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
915 mp
->mnt_flag
|= MNT_MULTILABEL
;
919 * Process device path for local file systems if requested.
921 * Snapshot and mount-by-role mounts do not use this path; they are
922 * passing other opaque data in the device path field.
924 * Basesystemroot mounts pass a device path to be resolved here,
925 * but it's just a char * already inside the kernel, which
926 * kernel_mount() shoved into a user_addr_t to call us. So for such
927 * mounts we must skip copyin (both of the address and of the string
930 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
931 !(internal_flags
& (KERNEL_MOUNT_SNAPSHOT
| KERNEL_MOUNT_VOLBYROLE_MASK
))) {
932 boolean_t do_copyin_devpath
= true;
933 #if CONFIG_BASESYSTEMROOT
934 if (internal_flags
& KERNEL_MOUNT_BASESYSTEMROOT
) {
935 // KERNEL_MOUNT_BASESYSTEMROOT implies subtle behavior worh nothing:
936 // We have been passed fsmountargs, which is typed as a user_addr_t,
937 // but is actually a char ** pointing to a (kernelspace) string.
938 // We manually unpack it with a series of casts and dereferences
939 // that reverses what was done just above us on the stack in
940 // imageboot_pivot_image().
941 // After retrieving the path to the dev node (which we will NDINIT
942 // in a moment), we pass NULL fsmountargs on to the filesystem.
943 _Static_assert(sizeof(char **) == sizeof(fsmountargs
), "fsmountargs should fit a (kernel) address");
944 char **devnamepp
= (char **)fsmountargs
;
945 char *devnamep
= *devnamepp
;
946 devpath
= CAST_USER_ADDR_T(devnamep
);
947 do_copyin_devpath
= false;
948 fsmountargs
= USER_ADDR_NULL
;
950 //Now that we have a mp, denote that this mount is for the basesystem.
951 mp
->mnt_supl_kern_flag
|= MNTK_SUPL_BASESYSTEM
;
953 #endif // CONFIG_BASESYSTEMROOT
955 if (do_copyin_devpath
) {
956 if (vfs_context_is64bit(ctx
)) {
957 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
960 fsmountargs
+= sizeof(devpath
);
963 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
966 /* munge into LP64 addr */
967 devpath
= CAST_USER_ADDR_T(tmp
);
968 fsmountargs
+= sizeof(tmp
);
972 /* Lookup device and authorize access to it */
976 enum uio_seg seg
= UIO_USERSPACE
;
977 #if CONFIG_BASESYSTEMROOT
978 if (internal_flags
& KERNEL_MOUNT_BASESYSTEMROOT
) {
981 #endif // CONFIG_BASESYSTEMROOT
983 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, seg
, devpath
, ctx
);
984 if ((error
= namei(&nd
))) {
988 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
993 if (devvp
->v_type
!= VBLK
) {
997 if (major(devvp
->v_rdev
) >= nblkdev
) {
1002 * If mount by non-root, then verify that user has necessary
1003 * permissions on the device.
1005 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
1006 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
1008 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1009 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1011 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0) {
1016 /* On first mount, preflight and open device */
1017 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
1018 if ((error
= vnode_ref(devvp
))) {
1022 * Disallow multiple mounts of the same device.
1023 * Disallow mounting of a device that is currently in use
1024 * (except for root, which might share swap device for miniroot).
1025 * Flush out any old buffers remaining from a previous use.
1027 if ((error
= vfs_mountedon(devvp
))) {
1031 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
1035 if ((error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
))) {
1039 if ((error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0))) {
1043 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
1045 error
= mac_vnode_check_open(ctx
,
1047 ronly
? FREAD
: FREAD
| FWRITE
);
1052 if ((error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
))) {
1056 mp
->mnt_devvp
= devvp
;
1057 device_vnode
= devvp
;
1058 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
1059 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
1060 (device_vnode
= mp
->mnt_devvp
)) {
1064 * If upgrade to read-write by non-root, then verify
1065 * that user has necessary permissions on the device.
1067 vnode_getalways(device_vnode
);
1069 if (suser(vfs_context_ucred(ctx
), NULL
) &&
1070 (error
= vnode_authorize(device_vnode
, NULL
,
1071 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
1073 vnode_put(device_vnode
);
1077 /* Tell the device that we're upgrading */
1078 dev
= (dev_t
)device_vnode
->v_rdev
;
1081 if ((u_int
)maj
>= (u_int
)nblkdev
) {
1082 panic("Volume mounted on a device with invalid major number.");
1085 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
1086 vnode_put(device_vnode
);
1087 device_vnode
= NULLVP
;
1092 } // localargs && !(snapshot | data | vm)
1095 if ((flags
& MNT_UPDATE
) == 0) {
1096 mac_mount_label_init(mp
);
1097 mac_mount_label_associate(ctx
, mp
);
1100 if ((flags
& MNT_UPDATE
) != 0) {
1101 error
= mac_mount_check_label_update(ctx
, mp
);
1109 * Mount the filesystem. We already asserted that internal_flags
1110 * cannot have more than one mount-by-role bit set.
1112 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
1113 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
1114 (caddr_t
)fsmountargs
, 0, ctx
);
1115 } else if (internal_flags
& KERNEL_MOUNT_DATAVOL
) {
1116 #if CONFIG_ROSV_STARTUP
1117 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1118 fs_role_mount_args_t frma
= {origin_mp
, VFS_DATA_ROLE
};
1119 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1121 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE
, error
);
1123 /* Mark volume associated with system volume */
1124 mp
->mnt_kern_flag
|= MNTK_SYSTEM
;
1126 /* Attempt to acquire the mnt_devvp and set it up */
1127 struct vnode
*mp_devvp
= NULL
;
1128 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1129 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1130 0, &mp_devvp
, vfs_context_kernel());
1132 mp
->mnt_devvp
= mp_devvp
;
1133 //vnode_lookup took an iocount, need to drop it.
1134 vnode_put(mp_devvp
);
1135 // now set `device_vnode` to the devvp that was acquired.
1136 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1137 // note that though the iocount above was dropped, the mount acquires
1138 // an implicit reference against the device.
1139 device_vnode
= mp_devvp
;
1146 } else if (internal_flags
& KERNEL_MOUNT_VMVOL
) {
1148 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1149 fs_role_mount_args_t frma
= {origin_mp
, VFS_VM_ROLE
};
1150 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1152 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE
, error
);
1154 /* Mark volume associated with system volume and a swap mount */
1155 mp
->mnt_kern_flag
|= (MNTK_SYSTEM
| MNTK_SWAP_MOUNT
);
1156 /* Attempt to acquire the mnt_devvp and set it up */
1157 struct vnode
*mp_devvp
= NULL
;
1158 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1159 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1160 0, &mp_devvp
, vfs_context_kernel());
1162 mp
->mnt_devvp
= mp_devvp
;
1163 //vnode_lookup took an iocount, need to drop it.
1164 vnode_put(mp_devvp
);
1166 // now set `device_vnode` to the devvp that was acquired.
1167 // note that though the iocount above was dropped, the mount acquires
1168 // an implicit reference against the device.
1169 device_vnode
= mp_devvp
;
1176 } else if ((internal_flags
& KERNEL_MOUNT_PREBOOTVOL
) || (internal_flags
& KERNEL_MOUNT_RECOVERYVOL
)) {
1177 #if CONFIG_MOUNT_PREBOOTRECOVERY
1178 struct mount
*origin_mp
= (struct mount
*)fsmountargs
;
1179 uint32_t mount_role
= 0;
1180 if (internal_flags
& KERNEL_MOUNT_PREBOOTVOL
) {
1181 mount_role
= VFS_PREBOOT_ROLE
;
1182 } else if (internal_flags
& KERNEL_MOUNT_RECOVERYVOL
) {
1183 mount_role
= VFS_RECOVERY_ROLE
;
1186 if (mount_role
!= 0) {
1187 fs_role_mount_args_t frma
= {origin_mp
, mount_role
};
1188 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
);
1190 printf("MOUNT-BY-ROLE (%d) failed! (%d)", mount_role
, error
);
1192 // NOT YET - need to qualify how this interacts with shutdown, ERP/ERB, etc
1193 /* Mark volume associated with system volume */
1194 //mp->mnt_kern_flag |= MNTK_SYSTEM;
1195 /* Attempt to acquire the mnt_devvp and set it up */
1196 struct vnode
*mp_devvp
= NULL
;
1197 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) {
1198 errno_t lerr
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
,
1199 0, &mp_devvp
, vfs_context_kernel());
1201 mp
->mnt_devvp
= mp_devvp
;
1202 //vnode_lookup took an iocount, need to drop it.
1203 vnode_put(mp_devvp
);
1205 // now set `device_vnode` to the devvp that was acquired.
1206 // note that though the iocount above was dropped, the mount acquires
1207 // an implicit reference against the device.
1208 device_vnode
= mp_devvp
;
1213 printf("MOUNT-BY-ROLE (%d) failed - ROLE UNRECOGNIZED! (%d)", mount_role
, error
);
1220 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
1223 if (flags
& MNT_UPDATE
) {
1224 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) {
1225 mp
->mnt_flag
&= ~MNT_RDONLY
;
1228 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
1229 mp
->mnt_kern_flag
&= ~MNTK_WANTRDWR
;
1231 mp
->mnt_flag
= flag
; /* restore flag value */
1233 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
1234 lck_rw_done(&mp
->mnt_rwlock
);
1235 is_rwlock_locked
= FALSE
;
1237 enablequotas(mp
, ctx
);
1243 * Put the new filesystem on the mount list after root.
1246 struct vfs_attr vfsattr
;
1248 error
= mac_mount_check_mount_late(ctx
, mp
);
1253 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
1254 error
= VFS_ROOT(mp
, &rvp
, ctx
);
1256 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
1259 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
1261 * drop reference provided by VFS_ROOT
1271 vnode_lock_spin(vp
);
1272 CLR(vp
->v_flag
, VMOUNT
);
1273 vp
->v_mountedhere
= mp
;
1277 * taking the name_cache_lock exclusively will
1278 * insure that everyone is out of the fast path who
1279 * might be trying to use a now stale copy of
1280 * vp->v_mountedhere->mnt_realrootvp
1281 * bumping mount_generation causes the cached values
1286 name_cache_unlock();
1288 error
= vnode_ref(vp
);
1293 have_usecount
= TRUE
;
1295 error
= checkdirs(vp
, ctx
);
1297 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1301 * there is no cleanup code here so I have made it void
1302 * we need to revisit this
1304 (void)VFS_START(mp
, 0, ctx
);
1306 if (mount_list_add(mp
) != 0) {
1308 * The system is shutting down trying to umount
1309 * everything, so fail with a plausible errno.
1314 lck_rw_done(&mp
->mnt_rwlock
);
1315 is_rwlock_locked
= FALSE
;
1317 /* Check if this mounted file system supports EAs or named streams. */
1318 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1319 VFSATTR_INIT(&vfsattr
);
1320 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
1321 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
1322 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
1323 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
1324 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
1325 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
1326 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1329 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
1330 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
1331 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
1334 /* Check if this file system supports path from id lookups. */
1335 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
1336 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
1337 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1338 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
1339 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1340 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1343 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
1344 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
1345 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1348 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1349 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1351 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1352 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1354 /* increment the operations count */
1355 OSAddAtomic(1, &vfs_nummntops
);
1356 enablequotas(mp
, ctx
);
1359 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1362 * cache the IO attributes for the underlying physical media...
1363 * an error return indicates the underlying driver doesn't
1364 * support all the queries necessary... however, reasonable
1365 * defaults will have been set, so no reason to bail or care
1367 vfs_init_io_attributes(device_vnode
, mp
);
1370 /* Now that mount is setup, notify the listeners */
1371 vfs_notify_mount(pvp
);
1372 IOBSDMountChange(mp
, kIOMountChangeMount
);
1374 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1375 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1376 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1377 mp
->mnt_vtable
->vfc_name
, error
);
1380 vnode_lock_spin(vp
);
1381 CLR(vp
->v_flag
, VMOUNT
);
1384 mp
->mnt_vtable
->vfc_refcount
--;
1385 mount_list_unlock();
1388 vnode_rele(device_vnode
);
1389 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
);
1391 lck_rw_done(&mp
->mnt_rwlock
);
1392 is_rwlock_locked
= FALSE
;
1395 * if we get here, we have a mount structure that needs to be freed,
1396 * but since the coveredvp hasn't yet been updated to point at it,
1397 * no need to worry about other threads holding a crossref on this mp
1398 * so it's ok to just free it
1400 mount_lock_destroy(mp
);
1402 mac_mount_label_destroy(mp
);
1404 zfree(mount_zone
, mp
);
1405 did_set_lmount
= false;
1409 * drop I/O count on the device vp if there was one
1411 if (devpath
&& devvp
) {
1415 if (did_set_lmount
) {
1416 mount_lock_spin(mp
);
1417 mp
->mnt_lflag
&= ~MNT_LMOUNT
;
1423 /* Error condition exits */
1425 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1428 * If the mount has been placed on the covered vp,
1429 * it may have been discovered by now, so we have
1430 * to treat this just like an unmount
1432 mount_lock_spin(mp
);
1433 mp
->mnt_lflag
|= MNT_LDEAD
;
1436 if (device_vnode
!= NULLVP
) {
1437 vnode_rele(device_vnode
);
1438 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
1443 vnode_lock_spin(vp
);
1446 vp
->v_mountedhere
= (mount_t
) 0;
1450 if (have_usecount
) {
1454 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
)) {
1458 if (devpath
&& devvp
) {
1462 /* Release mnt_rwlock only when it was taken */
1463 if (is_rwlock_locked
== TRUE
) {
1465 mp
->mnt_flag
= flag
; /* restore mnt_flag value */
1467 lck_rw_done(&mp
->mnt_rwlock
);
1470 if (did_set_lmount
) {
1471 mount_lock_spin(mp
);
1472 mp
->mnt_lflag
&= ~MNT_LMOUNT
;
1477 if (mp
->mnt_crossref
) {
1478 mount_dropcrossref(mp
, vp
, 0);
1480 mount_lock_destroy(mp
);
1482 mac_mount_label_destroy(mp
);
1484 zfree(mount_zone
, mp
);
1489 vfsp
->vfc_refcount
--;
1490 mount_list_unlock();
1497 * Flush in-core data, check for competing mount attempts,
1501 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1504 #pragma unused(cnp,fsname)
1506 struct vnode_attr va
;
1511 * If the user is not root, ensure that they own the directory
1512 * onto which we are attempting to mount.
1515 VATTR_WANTED(&va
, va_uid
);
1516 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1517 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1518 (!vfs_context_issuser(ctx
)))) {
1524 if ((error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
))) {
1528 if ((error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0))) {
1532 if (vp
->v_type
!= VDIR
) {
1537 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1543 error
= mac_mount_check_mount(ctx
, vp
,
1550 vnode_lock_spin(vp
);
1551 SET(vp
->v_flag
, VMOUNT
);
1558 #if CONFIG_IMGSRC_ACCESS
1560 #define DEBUG_IMGSRC 0
1563 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
1565 #define IMGSRC_DEBUG(args...) do { } while(0)
1569 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1571 struct nameidata nd
;
1572 vnode_t vp
, realdevvp
;
1575 enum uio_seg uio
= UIO_USERSPACE
;
1577 if (ctx
== vfs_context_kernel()) {
1581 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, uio
, devpath
, ctx
);
1582 if ((error
= namei(&nd
))) {
1583 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1589 if (!vnode_isblk(vp
)) {
1590 IMGSRC_DEBUG("Not block device.\n");
1595 realdevvp
= mp
->mnt_devvp
;
1596 if (realdevvp
== NULLVP
) {
1597 IMGSRC_DEBUG("No device backs the mount.\n");
1602 error
= vnode_getwithref(realdevvp
);
1604 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1608 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1609 IMGSRC_DEBUG("Wrong dev_t.\n");
1614 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1617 * If mount by non-root, then verify that user has necessary
1618 * permissions on the device.
1620 if (!vfs_context_issuser(ctx
)) {
1621 accessmode
= KAUTH_VNODE_READ_DATA
;
1622 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1623 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1625 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1626 IMGSRC_DEBUG("Access denied.\n");
1634 vnode_put(realdevvp
);
1647 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1648 * and call checkdirs()
1651 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1655 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1657 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1658 mp
->mnt_vtable
->vfc_name
, vnode_getname(vp
));
1660 vnode_lock_spin(vp
);
1661 CLR(vp
->v_flag
, VMOUNT
);
1662 vp
->v_mountedhere
= mp
;
1666 * taking the name_cache_lock exclusively will
1667 * insure that everyone is out of the fast path who
1668 * might be trying to use a now stale copy of
1669 * vp->v_mountedhere->mnt_realrootvp
1670 * bumping mount_generation causes the cached values
1675 name_cache_unlock();
1677 error
= vnode_ref(vp
);
1682 error
= checkdirs(vp
, ctx
);
1684 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1691 mp
->mnt_vnodecovered
= NULLVP
;
1697 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1700 vnode_lock_spin(vp
);
1701 vp
->v_mountedhere
= (mount_t
)NULL
;
1704 mp
->mnt_vnodecovered
= NULLVP
;
1708 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1712 /* unmount in progress return error */
1713 mount_lock_spin(mp
);
1714 if (mp
->mnt_lflag
& (MNT_LUNMOUNT
| MNT_LMOUNT
)) {
1719 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1722 * We only allow the filesystem to be reloaded if it
1723 * is currently mounted read-only.
1725 if ((flags
& MNT_RELOAD
) &&
1726 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1732 * Only root, or the user that did the original mount is
1733 * permitted to update it.
1735 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1736 (!vfs_context_issuser(ctx
))) {
1741 error
= mac_mount_check_remount(ctx
, mp
);
1749 lck_rw_done(&mp
->mnt_rwlock
);
1756 mount_end_update(mount_t mp
)
1758 lck_rw_done(&mp
->mnt_rwlock
);
1762 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1766 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1770 vp
= imgsrc_rootvnodes
[height
];
1771 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1780 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
,
1781 struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
,
1782 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1786 boolean_t placed
= FALSE
;
1787 struct vfstable
*vfsp
;
1788 user_addr_t devpath
;
1789 char *old_mntonname
;
1795 /* If we didn't imageboot, nothing to move */
1796 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1800 /* Only root can do this */
1801 if (!vfs_context_issuser(ctx
)) {
1805 IMGSRC_DEBUG("looking for root vnode.\n");
1808 * Get root vnode of filesystem we're moving.
1812 struct user64_mnt_imgsrc_args mia64
;
1813 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1815 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1819 height
= mia64
.mi_height
;
1820 flags
= mia64
.mi_flags
;
1821 devpath
= (user_addr_t
)mia64
.mi_devpath
;
1823 struct user32_mnt_imgsrc_args mia32
;
1824 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1826 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1830 height
= mia32
.mi_height
;
1831 flags
= mia32
.mi_flags
;
1832 devpath
= mia32
.mi_devpath
;
1836 * For binary compatibility--assumes one level of nesting.
1839 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
1844 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
1848 /* munge into LP64 addr */
1849 devpath
= CAST_USER_ADDR_T(tmp
);
1857 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1861 error
= get_imgsrc_rootvnode(height
, &rvp
);
1863 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error
);
1867 IMGSRC_DEBUG("got old root vnode\n");
1869 old_mntonname
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
);
1871 /* Can only move once */
1872 mp
= vnode_mount(rvp
);
1873 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1874 IMGSRC_DEBUG("Already moved.\n");
1879 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp
->mnt_vtable
->vfc_name
);
1880 IMGSRC_DEBUG("Starting updated.\n");
1882 /* Get exclusive rwlock on mount, authorize update on mp */
1883 error
= mount_begin_update(mp
, ctx
, 0);
1885 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1890 * It can only be moved once. Flag is set under the rwlock,
1891 * so we're now safe to proceed.
1893 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1894 IMGSRC_DEBUG("Already moved [2]\n");
1898 IMGSRC_DEBUG("Preparing coveredvp.\n");
1900 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1901 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1903 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1907 IMGSRC_DEBUG("Covered vp OK.\n");
1909 /* Sanity check the name caller has provided */
1910 vfsp
= mp
->mnt_vtable
;
1911 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1912 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1913 vfsp
->vfc_name
, fsname
);
1918 /* Check the device vnode and update mount-from name, for local filesystems */
1919 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1920 IMGSRC_DEBUG("Local, doing device validation.\n");
1922 if (devpath
!= USER_ADDR_NULL
) {
1923 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1925 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1934 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1935 * and increment the name cache's mount generation
1938 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1939 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1946 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1947 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1949 /* Forbid future moves */
1951 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1954 /* Finally, add to mount list, completely ready to go */
1955 if (mount_list_add(mp
) != 0) {
1957 * The system is shutting down trying to umount
1958 * everything, so fail with a plausible errno.
1964 mount_end_update(mp
);
1966 zfree(ZV_NAMEI
, old_mntonname
);
1968 vfs_notify_mount(pvp
);
1972 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1975 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1980 * Placing the mp on the vnode clears VMOUNT,
1981 * so cleanup is different after that point
1984 /* Rele the vp, clear VMOUNT and v_mountedhere */
1985 undo_place_on_covered_vp(mp
, vp
);
1987 vnode_lock_spin(vp
);
1988 CLR(vp
->v_flag
, VMOUNT
);
1992 mount_end_update(mp
);
1996 zfree(ZV_NAMEI
, old_mntonname
);
2000 #if CONFIG_LOCKERBOOT
2003 mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
2004 const char *pbdevpath
)
2007 struct nameidata nd
;
2008 boolean_t cleanup_nd
= FALSE
;
2009 vfs_context_t ctx
= vfs_context_kernel();
2010 boolean_t is64
= TRUE
;
2011 boolean_t by_index
= TRUE
;
2012 struct user64_mnt_imgsrc_args mia64
= {
2015 .mi_devpath
= CAST_USER_ADDR_T(pbdevpath
),
2017 user_addr_t mia64addr
= CAST_USER_ADDR_T(&mia64
);
2019 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
2020 UIO_SYSSPACE
, CAST_USER_ADDR_T(mntpoint
), ctx
);
2023 IMGSRC_DEBUG("namei: %d\n", error
);
2028 error
= relocate_imageboot_source(nd
.ni_dvp
, nd
.ni_vp
,
2029 &nd
.ni_cnd
, fsname
, ctx
, is64
, mia64addr
, by_index
);
2033 int stashed
= error
;
2035 error
= vnode_put(nd
.ni_vp
);
2037 panic("vnode_put() returned non-zero: %d", error
);
2041 error
= vnode_put(nd
.ni_dvp
);
2043 panic("vnode_put() returned non-zero: %d", error
);
2052 #endif /* CONFIG_LOCKERBOOT */
2053 #endif /* CONFIG_IMGSRC_ACCESS */
2056 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
2058 struct nameidata qnd
;
2060 char qfpath
[MAXPATHLEN
];
2061 const char *qfname
= QUOTAFILENAME
;
2062 const char *qfopsname
= QUOTAOPSNAME
;
2063 const char *qfextension
[] = INITQFNAMES
;
2065 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
2066 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0) {
2070 * Enable filesystem disk quotas if necessary.
2071 * We ignore errors as this should not interfere with final mount
2073 for (type
= 0; type
< MAXQUOTAS
; type
++) {
2074 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
2075 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
2076 CAST_USER_ADDR_T(qfpath
), ctx
);
2077 if (namei(&qnd
) != 0) {
2078 continue; /* option file to trigger quotas is not present */
2080 vnode_put(qnd
.ni_vp
);
2082 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
2084 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
2091 checkdirs_callback(proc_t p
, void * arg
)
2093 struct cdirargs
* cdrp
= (struct cdirargs
*)arg
;
2094 vnode_t olddp
= cdrp
->olddp
;
2095 vnode_t newdp
= cdrp
->newdp
;
2096 struct filedesc
*fdp
;
2097 vnode_t new_cvp
= newdp
;
2098 vnode_t new_rvp
= newdp
;
2099 vnode_t old_cvp
= NULL
;
2100 vnode_t old_rvp
= NULL
;
2103 * XXX Also needs to iterate each thread in the process to see if it
2104 * XXX is using a per-thread current working directory, and, if so,
2105 * XXX update that as well.
2109 * First, with the proc_fdlock held, check to see if we will need
2110 * to do any work. If not, we will get out fast.
2115 (fdp
->fd_cdir
!= olddp
&& fdp
->fd_rdir
!= olddp
)) {
2117 return PROC_RETURNED
;
2122 * Ok, we will have to do some work. Always take two refs
2123 * because we might need that many. We'll dispose of whatever
2124 * we ended up not using.
2126 if (vnode_ref(newdp
) != 0) {
2127 return PROC_RETURNED
;
2129 if (vnode_ref(newdp
) != 0) {
2131 return PROC_RETURNED
;
2134 proc_dirs_lock_exclusive(p
);
2136 * Now do the work. Note: we dropped the proc_fdlock, so we
2137 * have to do all of the checks again.
2142 if (fdp
->fd_cdir
== olddp
) {
2144 fdp
->fd_cdir
= newdp
;
2147 if (fdp
->fd_rdir
== olddp
) {
2149 fdp
->fd_rdir
= newdp
;
2154 proc_dirs_unlock_exclusive(p
);
2157 * Dispose of any references that are no longer needed.
2159 if (old_cvp
!= NULL
) {
2160 vnode_rele(old_cvp
);
2162 if (old_rvp
!= NULL
) {
2163 vnode_rele(old_rvp
);
2165 if (new_cvp
!= NULL
) {
2166 vnode_rele(new_cvp
);
2168 if (new_rvp
!= NULL
) {
2169 vnode_rele(new_rvp
);
2172 return PROC_RETURNED
;
2178 * Scan all active processes to see if any of them have a current
2179 * or root directory onto which the new filesystem has just been
2180 * mounted. If so, replace them with the new mount point.
2183 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
2188 struct cdirargs cdr
;
2190 if (olddp
->v_usecount
== 1) {
2193 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
2197 panic("mount: lost mount: error %d", err
);
2204 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2205 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
2207 if (rootvnode
== olddp
) {
2209 lck_rw_lock_exclusive(&rootvnode_rw_lock
);
2212 lck_rw_unlock_exclusive(&rootvnode_rw_lock
);
2221 * Unmount a file system.
2223 * Note: unmount takes a path to the vnode mounted on as argument,
2224 * not special file (as before).
2228 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
2233 struct nameidata nd
;
2234 vfs_context_t ctx
= vfs_context_current();
2236 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
2237 UIO_USERSPACE
, uap
->path
, ctx
);
2247 error
= mac_mount_check_umount(ctx
, mp
);
2254 * Must be the root of the filesystem
2256 if ((vp
->v_flag
& VROOT
) == 0) {
2262 /* safedounmount consumes the mount ref */
2263 return safedounmount(mp
, uap
->flags
, ctx
);
2267 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
2271 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
2272 if (mp
== (mount_t
)0) {
2277 /* safedounmount consumes the mount ref */
2278 return safedounmount(mp
, flags
, ctx
);
2281 #define ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT \
2282 "com.apple.private.vfs.role-account-unmount"
2285 * The mount struct comes with a mount ref which will be consumed.
2286 * Do the actual file system unmount, prevent some common foot shooting.
2289 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2292 proc_t p
= vfs_context_proc(ctx
);
2295 * If the file system is not responding and MNT_NOBLOCK
2296 * is set and not a forced unmount then return EBUSY.
2298 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
2299 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
2305 * Skip authorization in two cases:
2306 * - If the process running the unmount has ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT.
2307 * This entitlement allows non-root processes unmount volumes mounted by
2309 * - If the mount is tagged as permissive and this is not a forced-unmount
2312 if (!IOTaskHasEntitlement(current_task(), ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT
) &&
2313 (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0)))) {
2315 * Only root, or the user that did the original mount is
2316 * permitted to unmount this filesystem.
2318 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
2319 (error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
2324 * Don't allow unmounting the root file system, or other volumes
2325 * associated with it (for example, the associated VM or DATA mounts) .
2327 if ((mp
->mnt_flag
& MNT_ROOTFS
) || (mp
->mnt_kern_flag
& MNTK_SYSTEM
)) {
2328 if (!(mp
->mnt_flag
& MNT_ROOTFS
)) {
2329 printf("attempt to unmount a system mount (%s), will return EBUSY\n",
2330 mp
->mnt_vfsstat
.f_mntonname
);
2332 error
= EBUSY
; /* the root (or associated volumes) is always busy */
2337 * If the mount is providing the root filesystem's disk image
2338 * (i.e. imageboot), don't allow unmounting
2340 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
2345 return dounmount(mp
, flags
, 1, ctx
);
2353 * Do the actual file system unmount.
2356 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
2358 vnode_t coveredvp
= (vnode_t
)0;
2361 int forcedunmount
= 0;
2363 struct vnode
*devvp
= NULLVP
;
2365 proc_t p
= vfs_context_proc(ctx
);
2367 int pflags_save
= 0;
2368 #endif /* CONFIG_TRIGGERS */
2371 if (!(flags
& MNT_FORCE
)) {
2372 fsevent_unmount(mp
, ctx
); /* has to come first! */
2379 * If already an unmount in progress just return EBUSY.
2380 * Even a forced unmount cannot override.
2382 if (mp
->mnt_lflag
& (MNT_LUNMOUNT
| MNT_LMOUNT
)) {
2390 if (flags
& MNT_FORCE
) {
2392 mp
->mnt_lflag
|= MNT_LFORCE
;
2396 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2397 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
2401 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
2402 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
2403 mp
->mnt_flag
&= ~MNT_ASYNC
;
2405 * anyone currently in the fast path that
2406 * trips over the cached rootvp will be
2407 * dumped out and forced into the slow path
2408 * to regenerate a new cached value
2410 mp
->mnt_realrootvp
= NULLVP
;
2413 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
2415 * Force unmount any mounts in this filesystem.
2416 * If any unmounts fail - just leave them dangling.
2419 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
2423 * taking the name_cache_lock exclusively will
2424 * insure that everyone is out of the fast path who
2425 * might be trying to use a now stale copy of
2426 * vp->v_mountedhere->mnt_realrootvp
2427 * bumping mount_generation causes the cached values
2432 name_cache_unlock();
2435 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2440 if (forcedunmount
== 0) {
2441 ubc_umount(mp
); /* release cached vnodes */
2442 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2443 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
2446 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2447 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2448 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2454 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
2457 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
2460 if (forcedunmount
) {
2461 lflags
|= FORCECLOSE
;
2463 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
2464 if ((forcedunmount
== 0) && error
) {
2466 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2467 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2468 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2472 /* make sure there are no one in the mount iterations or lookup */
2473 mount_iterdrain(mp
);
2475 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
2477 mount_iterreset(mp
);
2479 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2480 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2481 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2485 /* increment the operations count */
2487 OSAddAtomic(1, &vfs_nummntops
);
2490 if (mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
2491 /* hold an io reference and drop the usecount before close */
2492 devvp
= mp
->mnt_devvp
;
2493 vnode_getalways(devvp
);
2495 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
2497 vnode_clearmountedon(devvp
);
2500 lck_rw_done(&mp
->mnt_rwlock
);
2501 mount_list_remove(mp
);
2502 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2504 /* mark the mount point hook in the vp but not drop the ref yet */
2505 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2507 * The covered vnode needs special handling. Trying to get an
2508 * iocount must not block here as this may lead to deadlocks
2509 * if the Filesystem to which the covered vnode belongs is
2510 * undergoing forced unmounts. Since we hold a usecount, the
2511 * vnode cannot be reused (it can, however, still be terminated)
2513 vnode_getalways(coveredvp
);
2514 vnode_lock_spin(coveredvp
);
2517 coveredvp
->v_mountedhere
= (struct mount
*)0;
2518 CLR(coveredvp
->v_flag
, VMOUNT
);
2520 vnode_unlock(coveredvp
);
2521 vnode_put(coveredvp
);
2525 mp
->mnt_vtable
->vfc_refcount
--;
2526 mount_list_unlock();
2528 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2529 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2531 mp
->mnt_lflag
|= MNT_LDEAD
;
2533 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2535 * do the wakeup here
2536 * in case we block in mount_refdrain
2537 * which will drop the mount lock
2538 * and allow anyone blocked in vfs_busy
2539 * to wakeup and see the LDEAD state
2541 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2542 wakeup((caddr_t
)mp
);
2546 /* free disk_conditioner_info structure for this mount */
2547 disk_conditioner_unmount(mp
);
2550 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2551 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2556 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2557 // Restore P_NOREMOTEHANG bit to its previous value
2558 if ((pflags_save
& P_NOREMOTEHANG
) == 0) {
2559 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2564 * Callback and context are set together under the mount lock, and
2565 * never cleared, so we're safe to examine them here, drop the lock,
2568 if (mp
->mnt_triggercallback
!= NULL
) {
2571 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2572 } else if (did_vflush
) {
2573 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2580 #endif /* CONFIG_TRIGGERS */
2582 lck_rw_done(&mp
->mnt_rwlock
);
2585 wakeup((caddr_t
)mp
);
2589 if ((coveredvp
!= NULLVP
)) {
2590 vnode_t pvp
= NULLVP
;
2593 * The covered vnode needs special handling. Trying to
2594 * get an iocount must not block here as this may lead
2595 * to deadlocks if the Filesystem to which the covered
2596 * vnode belongs is undergoing forced unmounts. Since we
2597 * hold a usecount, the vnode cannot be reused
2598 * (it can, however, still be terminated).
2600 vnode_getalways(coveredvp
);
2602 mount_dropcrossref(mp
, coveredvp
, 0);
2604 * We'll _try_ to detect if this really needs to be
2605 * done. The coveredvp can only be in termination (or
2606 * terminated) if the coveredvp's mount point is in a
2607 * forced unmount (or has been) since we still hold the
2610 if (!vnode_isrecycled(coveredvp
)) {
2611 pvp
= vnode_getparent(coveredvp
);
2613 if (coveredvp
->v_resolve
) {
2614 vnode_trigger_rearm(coveredvp
, ctx
);
2619 vnode_rele(coveredvp
);
2620 vnode_put(coveredvp
);
2624 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2627 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2628 mount_lock_destroy(mp
);
2630 mac_mount_label_destroy(mp
);
2632 zfree(mount_zone
, mp
);
2634 panic("dounmount: no coveredvp");
2641 * Unmount any mounts in this filesystem.
2644 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2647 fsid_t
*fsids
, fsid
;
2649 int count
= 0, i
, m
= 0;
2654 // Get an array to hold the submounts fsids.
2655 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2657 fsids_sz
= count
* sizeof(fsid_t
);
2658 fsids
= kheap_alloc(KHEAP_TEMP
, fsids_sz
, Z_NOWAIT
);
2659 if (fsids
== NULL
) {
2660 mount_list_unlock();
2663 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2666 * Fill the array with submount fsids.
2667 * Since mounts are always added to the tail of the mount list, the
2668 * list is always in mount order.
2669 * For each mount check if the mounted-on vnode belongs to a
2670 * mount that's already added to our array of mounts to be unmounted.
2672 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2673 vp
= smp
->mnt_vnodecovered
;
2677 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2678 for (i
= 0; i
<= m
; i
++) {
2679 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2680 fsids
[i
].val
[1] == fsid
.val
[1]) {
2681 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2686 mount_list_unlock();
2688 // Unmount the submounts in reverse order. Ignore errors.
2689 for (i
= m
; i
> 0; i
--) {
2690 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2693 mount_iterdrop(smp
);
2694 (void) dounmount(smp
, flags
, 1, ctx
);
2698 kheap_free(KHEAP_TEMP
, fsids
, fsids_sz
);
2702 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2707 if (mp
->mnt_crossref
< 0) {
2708 panic("mount cross refs -ve");
2711 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2713 vnode_put_locked(dp
);
2717 mount_lock_destroy(mp
);
2719 mac_mount_label_destroy(mp
);
2721 zfree(mount_zone
, mp
);
2725 vnode_put_locked(dp
);
2732 * Sync each mounted filesystem.
2738 int print_vmpage_stat
= 0;
2741 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2742 * mounted read-write with the passed waitfor value.
2744 * Parameters: mp mount-point descriptor per mounted file-system instance.
2745 * arg user argument (please see below)
2747 * User argument is a pointer to 32 bit unsigned integer which describes the
2748 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2749 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2752 * Returns: VFS_RETURNED
2755 sync_callback(mount_t mp
, void *arg
)
2757 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2758 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2759 unsigned waitfor
= MNT_NOWAIT
;
2762 waitfor
= *(uint32_t*)arg
;
2765 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2766 if (waitfor
!= MNT_WAIT
&&
2767 waitfor
!= (MNT_WAIT
| MNT_VOLUME
) &&
2768 waitfor
!= MNT_NOWAIT
&&
2769 waitfor
!= (MNT_NOWAIT
| MNT_VOLUME
) &&
2770 waitfor
!= MNT_DWAIT
&&
2771 waitfor
!= (MNT_DWAIT
| MNT_VOLUME
)) {
2772 panic("Passed inappropriate waitfor %u to "
2773 "sync_callback()", waitfor
);
2776 mp
->mnt_flag
&= ~MNT_ASYNC
;
2777 (void)VFS_SYNC(mp
, waitfor
, vfs_context_kernel());
2779 mp
->mnt_flag
|= MNT_ASYNC
;
2783 return VFS_RETURNED
;
2788 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2790 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2792 if (print_vmpage_stat
) {
2793 vm_countdirtypages();
2800 #endif /* DIAGNOSTIC */
2806 SYNC_ONLY_RELIABLE_MEDIA
= 1,
2807 SYNC_ONLY_UNRELIABLE_MEDIA
= 2
2811 sync_internal_callback(mount_t mp
, void *arg
)
2814 int is_reliable
= !(mp
->mnt_kern_flag
& MNTK_VIRTUALDEV
) &&
2815 (mp
->mnt_flag
& MNT_LOCAL
);
2816 sync_type_t sync_type
= *((sync_type_t
*)arg
);
2818 if ((sync_type
== SYNC_ONLY_RELIABLE_MEDIA
) && !is_reliable
) {
2819 return VFS_RETURNED
;
2820 } else if ((sync_type
== SYNC_ONLY_UNRELIABLE_MEDIA
) && is_reliable
) {
2821 return VFS_RETURNED
;
2825 (void)sync_callback(mp
, NULL
);
2827 return VFS_RETURNED
;
2830 int sync_thread_state
= 0;
2831 int sync_timeout_seconds
= 5;
2833 #define SYNC_THREAD_RUN 0x0001
2834 #define SYNC_THREAD_RUNNING 0x0002
2836 #if CONFIG_PHYS_WRITE_ACCT
2837 thread_t pm_sync_thread
;
2838 #endif /* CONFIG_PHYS_WRITE_ACCT */
2841 sync_thread(__unused
void *arg
, __unused wait_result_t wr
)
2843 sync_type_t sync_type
;
2844 #if CONFIG_PHYS_WRITE_ACCT
2845 pm_sync_thread
= current_thread();
2846 #endif /* CONFIG_PHYS_WRITE_ACCT */
2848 lck_mtx_lock(&sync_mtx_lck
);
2849 while (sync_thread_state
& SYNC_THREAD_RUN
) {
2850 sync_thread_state
&= ~SYNC_THREAD_RUN
;
2851 lck_mtx_unlock(&sync_mtx_lck
);
2853 sync_type
= SYNC_ONLY_RELIABLE_MEDIA
;
2854 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2855 sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
;
2856 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2858 lck_mtx_lock(&sync_mtx_lck
);
2861 * This wakeup _has_ to be issued before the lock is released otherwise
2862 * we may end up waking up a thread in sync_internal which is
2863 * expecting a wakeup from a thread it just created and not from this
2864 * thread which is about to exit.
2866 wakeup(&sync_thread_state
);
2867 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2868 #if CONFIG_PHYS_WRITE_ACCT
2869 pm_sync_thread
= NULL
;
2870 #endif /* CONFIG_PHYS_WRITE_ACCT */
2871 lck_mtx_unlock(&sync_mtx_lck
);
2873 if (print_vmpage_stat
) {
2874 vm_countdirtypages();
2881 #endif /* DIAGNOSTIC */
2884 struct timeval sync_timeout_last_print
= {.tv_sec
= 0, .tv_usec
= 0};
2887 * An in-kernel sync for power management to call.
2888 * This function always returns within sync_timeout seconds.
2890 __private_extern__
int
2895 int thread_created
= FALSE
;
2896 struct timespec ts
= {.tv_sec
= sync_timeout_seconds
, .tv_nsec
= 0};
2898 lck_mtx_lock(&sync_mtx_lck
);
2899 sync_thread_state
|= SYNC_THREAD_RUN
;
2900 if (!(sync_thread_state
& SYNC_THREAD_RUNNING
)) {
2903 sync_thread_state
|= SYNC_THREAD_RUNNING
;
2904 kr
= kernel_thread_start(sync_thread
, NULL
, &thd
);
2905 if (kr
!= KERN_SUCCESS
) {
2906 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2907 lck_mtx_unlock(&sync_mtx_lck
);
2908 printf("sync_thread failed\n");
2911 thread_created
= TRUE
;
2914 error
= msleep((caddr_t
)&sync_thread_state
, &sync_mtx_lck
,
2915 (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2920 if (now
.tv_sec
- sync_timeout_last_print
.tv_sec
> 120) {
2921 printf("sync timed out: %d sec\n", sync_timeout_seconds
);
2922 sync_timeout_last_print
.tv_sec
= now
.tv_sec
;
2926 if (thread_created
) {
2927 thread_deallocate(thd
);
2931 } /* end of sync_internal call */
2934 * Change filesystem quotas.
2938 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2941 int error
, quota_cmd
, quota_status
= 0;
2944 struct nameidata nd
;
2945 vfs_context_t ctx
= vfs_context_current();
2946 struct dqblk my_dqblk
= {};
2948 AUDIT_ARG(uid
, uap
->uid
);
2949 AUDIT_ARG(cmd
, uap
->cmd
);
2950 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2956 mp
= nd
.ni_vp
->v_mount
;
2958 vnode_put(nd
.ni_vp
);
2961 /* copyin any data we will need for downstream code */
2962 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2964 switch (quota_cmd
) {
2966 /* uap->arg specifies a file from which to take the quotas */
2967 fnamelen
= MAXPATHLEN
;
2968 datap
= zalloc(ZV_NAMEI
);
2969 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2972 /* uap->arg is a pointer to a dqblk structure. */
2973 datap
= (caddr_t
) &my_dqblk
;
2977 /* uap->arg is a pointer to a dqblk structure. */
2978 datap
= (caddr_t
) &my_dqblk
;
2979 if (proc_is64bit(p
)) {
2980 struct user_dqblk my_dqblk64
;
2981 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof(my_dqblk64
));
2983 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2986 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof(my_dqblk
));
2990 /* uap->arg is a pointer to an integer */
2991 datap
= (caddr_t
) "a_status
;
2999 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
3002 switch (quota_cmd
) {
3004 if (datap
!= NULL
) {
3005 zfree(ZV_NAMEI
, datap
);
3009 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
3011 if (proc_is64bit(p
)) {
3012 struct user_dqblk my_dqblk64
;
3014 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
));
3015 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
3016 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof(my_dqblk64
));
3018 error
= copyout(datap
, uap
->arg
, sizeof(struct dqblk
));
3023 /* uap->arg is a pointer to an integer */
3025 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
3037 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
3044 * Get filesystem statistics.
3046 * Returns: 0 Success
3048 * vfs_update_vfsstat:???
3049 * munge_statfs:EFAULT
3053 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
3056 struct vfsstatfs
*sp
;
3058 struct nameidata nd
;
3059 vfs_context_t ctx
= vfs_context_current();
3062 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
3063 UIO_USERSPACE
, uap
->path
, ctx
);
3070 sp
= &mp
->mnt_vfsstat
;
3074 error
= mac_mount_check_stat(ctx
, mp
);
3081 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
3087 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
3093 * Get filesystem statistics.
3097 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
3101 struct vfsstatfs
*sp
;
3104 AUDIT_ARG(fd
, uap
->fd
);
3106 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3110 error
= vnode_getwithref(vp
);
3116 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
3125 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3131 sp
= &mp
->mnt_vfsstat
;
3132 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
3136 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
3146 vfs_get_statfs64(struct mount
*mp
, struct statfs64
*sfs
)
3148 struct vfsstatfs
*vsfs
= &mp
->mnt_vfsstat
;
3150 bzero(sfs
, sizeof(*sfs
));
3152 sfs
->f_bsize
= vsfs
->f_bsize
;
3153 sfs
->f_iosize
= (int32_t)vsfs
->f_iosize
;
3154 sfs
->f_blocks
= vsfs
->f_blocks
;
3155 sfs
->f_bfree
= vsfs
->f_bfree
;
3156 sfs
->f_bavail
= vsfs
->f_bavail
;
3157 sfs
->f_files
= vsfs
->f_files
;
3158 sfs
->f_ffree
= vsfs
->f_ffree
;
3159 sfs
->f_fsid
= vsfs
->f_fsid
;
3160 sfs
->f_owner
= vsfs
->f_owner
;
3161 sfs
->f_type
= mp
->mnt_vtable
->vfc_typenum
;
3162 sfs
->f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
3163 sfs
->f_fssubtype
= vsfs
->f_fssubtype
;
3164 sfs
->f_flags_ext
= (mp
->mnt_kern_flag
& MNTK_SYSTEMDATA
) ? MNT_EXT_ROOT_DATA_VOL
: 0;
3165 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
3166 strlcpy(&sfs
->f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
3168 strlcpy(&sfs
->f_fstypename
[0], &vsfs
->f_fstypename
[0], MFSTYPENAMELEN
);
3170 strlcpy(&sfs
->f_mntonname
[0], &vsfs
->f_mntonname
[0], MAXPATHLEN
);
3171 strlcpy(&sfs
->f_mntfromname
[0], &vsfs
->f_mntfromname
[0], MAXPATHLEN
);
3175 * Get file system statistics in 64-bit mode
3178 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
3182 struct nameidata
*ndp
;
3183 struct statfs64
*sfsp
;
3184 vfs_context_t ctxp
= vfs_context_current();
3187 struct nameidata nd
;
3188 struct statfs64 sfs
;
3189 } *__nameidata_statfs64
;
3191 __nameidata_statfs64
= kheap_alloc(KHEAP_TEMP
, sizeof(*__nameidata_statfs64
),
3193 ndp
= &__nameidata_statfs64
->nd
;
3195 NDINIT(ndp
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
3196 UIO_USERSPACE
, uap
->path
, ctxp
);
3206 error
= mac_mount_check_stat(ctxp
, mp
);
3213 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
3219 sfsp
= &__nameidata_statfs64
->sfs
;
3220 vfs_get_statfs64(mp
, sfsp
);
3221 if ((mp
->mnt_kern_flag
& MNTK_SYSTEMDATA
) &&
3222 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3223 /* This process does not want to see a seperate data volume mountpoint */
3224 strlcpy(&sfsp
->f_mntonname
[0], "/", sizeof("/"));
3226 error
= copyout(sfsp
, uap
->buf
, sizeof(*sfsp
));
3230 kheap_free(KHEAP_TEMP
, __nameidata_statfs64
, sizeof(*__nameidata_statfs64
));
3236 * Get file system statistics in 64-bit mode
3239 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
3243 struct statfs64 sfs
;
3246 AUDIT_ARG(fd
, uap
->fd
);
3248 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3252 error
= vnode_getwithref(vp
);
3258 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
3267 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3273 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
3277 vfs_get_statfs64(mp
, &sfs
);
3278 if ((mp
->mnt_kern_flag
& MNTK_SYSTEMDATA
) &&
3279 (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) {
3280 /* This process does not want to see a seperate data volume mountpoint */
3281 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/"));
3283 error
= copyout(&sfs
, uap
->buf
, sizeof(sfs
));
3292 struct getfsstat_struct
{
3303 getfsstat_callback(mount_t mp
, void * arg
)
3305 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3306 struct vfsstatfs
*sp
;
3308 vfs_context_t ctx
= vfs_context_current();
3310 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3312 error
= mac_mount_check_stat(ctx
, mp
);
3314 fstp
->error
= error
;
3315 return VFS_RETURNED_DONE
;
3318 sp
= &mp
->mnt_vfsstat
;
3320 * If MNT_NOWAIT is specified, do not refresh the
3321 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
3323 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3324 (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3325 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3326 (error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
)))) {
3327 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3328 return VFS_RETURNED
;
3332 * Need to handle LP64 version of struct statfs
3334 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
3336 fstp
->error
= error
;
3337 return VFS_RETURNED_DONE
;
3339 fstp
->sfsp
+= my_size
;
3343 error
= mac_mount_label_get(mp
, *fstp
->mp
);
3345 fstp
->error
= error
;
3346 return VFS_RETURNED_DONE
;
3353 return VFS_RETURNED
;
3357 * Get statistics on all filesystems.
3360 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
3362 struct __mac_getfsstat_args muap
;
3364 muap
.buf
= uap
->buf
;
3365 muap
.bufsize
= uap
->bufsize
;
3366 muap
.mac
= USER_ADDR_NULL
;
3368 muap
.flags
= uap
->flags
;
3370 return __mac_getfsstat(p
, &muap
, retval
);
3374 * __mac_getfsstat: Get MAC-related file system statistics
3376 * Parameters: p (ignored)
3377 * uap User argument descriptor (see below)
3378 * retval Count of file system statistics (N stats)
3380 * Indirect: uap->bufsize Buffer size
3381 * uap->macsize MAC info size
3382 * uap->buf Buffer where information will be returned
3384 * uap->flags File system flags
3387 * Returns: 0 Success
3392 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
3396 size_t count
, maxcount
, bufsize
, macsize
;
3397 struct getfsstat_struct fst
;
3399 if ((unsigned)uap
->bufsize
> INT_MAX
|| (unsigned)uap
->macsize
> INT_MAX
) {
3403 bufsize
= (size_t) uap
->bufsize
;
3404 macsize
= (size_t) uap
->macsize
;
3406 if (IS_64BIT_PROCESS(p
)) {
3407 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
3409 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
3417 if (uap
->mac
!= USER_ADDR_NULL
) {
3422 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
3423 if (count
!= maxcount
) {
3427 /* Copy in the array */
3428 mp0
= kheap_alloc(KHEAP_TEMP
, macsize
, Z_WAITOK
);
3433 error
= copyin(uap
->mac
, mp0
, macsize
);
3435 kheap_free(KHEAP_TEMP
, mp0
, macsize
);
3439 /* Normalize to an array of user_addr_t */
3440 mp
= kheap_alloc(KHEAP_TEMP
, count
* sizeof(user_addr_t
), Z_WAITOK
);
3442 kheap_free(KHEAP_TEMP
, mp0
, macsize
);
3446 for (i
= 0; i
< count
; i
++) {
3447 if (IS_64BIT_PROCESS(p
)) {
3448 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
3450 mp
[i
] = (user_addr_t
)mp0
[i
];
3453 kheap_free(KHEAP_TEMP
, mp0
, macsize
);
3460 fst
.flags
= uap
->flags
;
3463 fst
.maxcount
= (int)maxcount
;
3466 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat_callback
, &fst
);
3469 kheap_free(KHEAP_TEMP
, mp
, count
* sizeof(user_addr_t
));
3473 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3477 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3478 *retval
= fst
.maxcount
;
3480 *retval
= fst
.count
;
3486 getfsstat64_callback(mount_t mp
, void * arg
)
3488 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3489 struct vfsstatfs
*sp
;
3490 struct statfs64 sfs
;
3493 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3495 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3497 fstp
->error
= error
;
3498 return VFS_RETURNED_DONE
;
3501 sp
= &mp
->mnt_vfsstat
;
3503 * If MNT_NOWAIT is specified, do not refresh the fsstat
3504 * cache. MNT_WAIT overrides MNT_NOWAIT.
3506 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3507 * getfsstat, since the constants are out of the same
3510 if ((mp
->mnt_lflag
& MNT_LDEAD
) ||
3511 ((((fstp
->flags
& MNT_NOWAIT
) == 0) || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3512 (!(mp
->mnt_lflag
& MNT_LUNMOUNT
)) &&
3513 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)))) {
3514 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3515 return VFS_RETURNED
;
3518 vfs_get_statfs64(mp
, &sfs
);
3519 error
= copyout(&sfs
, fstp
->sfsp
, sizeof(sfs
));
3521 fstp
->error
= error
;
3522 return VFS_RETURNED_DONE
;
3524 fstp
->sfsp
+= sizeof(sfs
);
3527 return VFS_RETURNED
;
3531 * Get statistics on all file systems in 64 bit mode.
3534 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
3537 int count
, maxcount
;
3538 struct getfsstat_struct fst
;
3540 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
3546 fst
.flags
= uap
->flags
;
3549 fst
.maxcount
= maxcount
;
3551 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat64_callback
, &fst
);
3554 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3558 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3559 *retval
= fst
.maxcount
;
3561 *retval
= fst
.count
;
3568 * gets the associated vnode with the file descriptor passed.
3572 * ctx - vfs context of caller
3573 * fd - file descriptor for which vnode is required.
3574 * vpp - Pointer to pointer to vnode to be returned.
3576 * The vnode is returned with an iocount so any vnode obtained
3577 * by this call needs a vnode_put
3581 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
3585 struct fileproc
*fp
;
3586 proc_t p
= vfs_context_proc(ctx
);
3590 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
3595 error
= vnode_getwithref(vp
);
3597 (void)fp_drop(p
, fd
, fp
, 0);
3601 (void)fp_drop(p
, fd
, fp
, 0);
3607 * Wrapper function around namei to start lookup from a directory
3608 * specified by a file descriptor ni_dirfd.
3610 * In addition to all the errors returned by namei, this call can
3611 * return ENOTDIR if the file descriptor does not refer to a directory.
3612 * and EBADF if the file descriptor is not valid.
3615 nameiat(struct nameidata
*ndp
, int dirfd
)
3617 if ((dirfd
!= AT_FDCWD
) &&
3618 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
3619 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3623 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3624 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3629 c
= *((char *)(ndp
->ni_dirp
));
3635 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3641 if (vnode_vtype(dvp_at
) != VDIR
) {
3646 ndp
->ni_dvp
= dvp_at
;
3647 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3649 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3659 * Change current working directory to a given file descriptor.
3663 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3665 struct filedesc
*fdp
= p
->p_fd
;
3670 int error
, should_put
= 1;
3671 vfs_context_t ctx
= vfs_context_current();
3673 AUDIT_ARG(fd
, uap
->fd
);
3674 if (per_thread
&& uap
->fd
== -1) {
3676 * Switching back from per-thread to per process CWD; verify we
3677 * in fact have one before proceeding. The only success case
3678 * for this code path is to return 0 preemptively after zapping
3679 * the thread structure contents.
3681 thread_t th
= vfs_context_thread(ctx
);
3683 uthread_t uth
= get_bsdthread_info(th
);
3685 uth
->uu_cdir
= NULLVP
;
3686 if (tvp
!= NULLVP
) {
3694 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3697 if ((error
= vnode_getwithref(vp
))) {
3702 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3704 if (vp
->v_type
!= VDIR
) {
3710 error
= mac_vnode_check_chdir(ctx
, vp
);
3715 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3720 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3721 if (vfs_busy(mp
, LK_NOWAIT
)) {
3725 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3736 if ((error
= vnode_ref(vp
))) {
3743 thread_t th
= vfs_context_thread(ctx
);
3745 uthread_t uth
= get_bsdthread_info(th
);
3748 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3755 proc_dirs_lock_exclusive(p
);
3760 proc_dirs_unlock_exclusive(p
);
3777 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3779 return common_fchdir(p
, uap
, 0);
3783 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3785 return common_fchdir(p
, (void *)uap
, 1);
3790 * Change current working directory (".").
3792 * Returns: 0 Success
3793 * change_dir:ENOTDIR
3795 * vnode_ref:ENOENT No such file or directory
3799 chdir_internal(proc_t p
, vfs_context_t ctx
, struct nameidata
*ndp
, int per_thread
)
3801 struct filedesc
*fdp
= p
->p_fd
;
3805 error
= change_dir(ndp
, ctx
);
3809 if ((error
= vnode_ref(ndp
->ni_vp
))) {
3810 vnode_put(ndp
->ni_vp
);
3814 * drop the iocount we picked up in change_dir
3816 vnode_put(ndp
->ni_vp
);
3819 thread_t th
= vfs_context_thread(ctx
);
3821 uthread_t uth
= get_bsdthread_info(th
);
3823 uth
->uu_cdir
= ndp
->ni_vp
;
3824 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3826 vnode_rele(ndp
->ni_vp
);
3830 proc_dirs_lock_exclusive(p
);
3833 fdp
->fd_cdir
= ndp
->ni_vp
;
3835 proc_dirs_unlock_exclusive(p
);
3847 * Change current working directory (".").
3849 * Returns: 0 Success
3850 * chdir_internal:ENOTDIR
3851 * chdir_internal:ENOENT No such file or directory
3852 * chdir_internal:???
3856 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3858 struct nameidata nd
;
3859 vfs_context_t ctx
= vfs_context_current();
3861 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3862 UIO_USERSPACE
, uap
->path
, ctx
);
3864 return chdir_internal(p
, ctx
, &nd
, per_thread
);
3871 * Change current working directory (".") for the entire process
3873 * Parameters: p Process requesting the call
3874 * uap User argument descriptor (see below)
3877 * Indirect parameters: uap->path Directory path
3879 * Returns: 0 Success
3880 * common_chdir: ENOTDIR
3881 * common_chdir: ENOENT No such file or directory
3886 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3888 return common_chdir(p
, (void *)uap
, 0);
3894 * Change current working directory (".") for a single thread
3896 * Parameters: p Process requesting the call
3897 * uap User argument descriptor (see below)
3900 * Indirect parameters: uap->path Directory path
3902 * Returns: 0 Success
3903 * common_chdir: ENOTDIR
3904 * common_chdir: ENOENT No such file or directory
3909 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3911 return common_chdir(p
, (void *)uap
, 1);
3916 * Change notion of root (``/'') directory.
3920 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3922 struct filedesc
*fdp
= p
->p_fd
;
3924 struct nameidata nd
;
3926 vfs_context_t ctx
= vfs_context_current();
3928 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
3932 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3933 UIO_USERSPACE
, uap
->path
, ctx
);
3934 error
= change_dir(&nd
, ctx
);
3940 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3943 vnode_put(nd
.ni_vp
);
3948 if ((error
= vnode_ref(nd
.ni_vp
))) {
3949 vnode_put(nd
.ni_vp
);
3952 vnode_put(nd
.ni_vp
);
3955 * This lock provides the guarantee that as long as you hold the lock
3956 * fdp->fd_rdir has a usecount on it. This is used to take an iocount
3957 * on a referenced vnode in namei when determining the rootvnode for
3960 /* needed for synchronization with lookup */
3961 proc_dirs_lock_exclusive(p
);
3962 /* needed for setting the flag and other activities on the fd itself */
3965 fdp
->fd_rdir
= nd
.ni_vp
;
3966 fdp
->fd_flags
|= FD_CHROOT
;
3968 proc_dirs_unlock_exclusive(p
);
3977 #define PATHSTATICBUFLEN 256
3978 #define PIVOT_ROOT_ENTITLEMENT \
3979 "com.apple.private.vfs.pivot-root"
3981 #if defined(XNU_TARGET_OS_OSX)
3983 pivot_root(proc_t p
, struct pivot_root_args
*uap
, __unused
int *retval
)
3986 char new_rootfs_path_before
[PATHSTATICBUFLEN
] = {0};
3987 char old_rootfs_path_after
[PATHSTATICBUFLEN
] = {0};
3988 char *new_rootfs_path_before_buf
= NULL
;
3989 char *old_rootfs_path_after_buf
= NULL
;
3990 char *incoming
= NULL
;
3991 char *outgoing
= NULL
;
3992 vnode_t incoming_rootvp
= NULLVP
;
3993 size_t bytes_copied
;
3996 * XXX : Additional restrictions needed
3997 * - perhaps callable only once.
3999 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
4004 * pivot_root can be executed by launchd only.
4005 * Enforce entitlement.
4007 if ((p
->p_pid
!= 1) || !IOTaskHasEntitlement(current_task(), PIVOT_ROOT_ENTITLEMENT
)) {
4011 error
= copyinstr(uap
->new_rootfs_path_before
, &new_rootfs_path_before
[0], PATHSTATICBUFLEN
, &bytes_copied
);
4012 if (error
== ENAMETOOLONG
) {
4013 new_rootfs_path_before_buf
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
);
4014 error
= copyinstr(uap
->new_rootfs_path_before
, new_rootfs_path_before_buf
, MAXPATHLEN
, &bytes_copied
);
4021 error
= copyinstr(uap
->old_rootfs_path_after
, &old_rootfs_path_after
[0], PATHSTATICBUFLEN
, &bytes_copied
);
4022 if (error
== ENAMETOOLONG
) {
4023 old_rootfs_path_after_buf
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
);
4024 error
= copyinstr(uap
->old_rootfs_path_after
, old_rootfs_path_after_buf
, MAXPATHLEN
, &bytes_copied
);
4030 if (new_rootfs_path_before_buf
) {
4031 incoming
= new_rootfs_path_before_buf
;
4033 incoming
= &new_rootfs_path_before
[0];
4036 if (old_rootfs_path_after_buf
) {
4037 outgoing
= old_rootfs_path_after_buf
;
4039 outgoing
= &old_rootfs_path_after
[0];
4043 * The proposed incoming FS MUST be authenticated (i.e. not a chunklist DMG).
4044 * Userland is not allowed to pivot to an image.
4046 error
= vnode_lookup(incoming
, 0, &incoming_rootvp
, vfs_context_kernel());
4050 error
= VNOP_IOCTL(incoming_rootvp
, FSIOC_KERNEL_ROOTAUTH
, NULL
, 0, vfs_context_kernel());
4055 error
= vfs_switch_root(incoming
, outgoing
, VFSSR_VIRTUALDEV_PROHIBITED
);
4058 if (incoming_rootvp
!= NULLVP
) {
4059 vnode_put(incoming_rootvp
);
4060 incoming_rootvp
= NULLVP
;
4063 if (old_rootfs_path_after_buf
) {
4064 zfree(ZV_NAMEI
, old_rootfs_path_after_buf
);
4067 if (new_rootfs_path_before_buf
) {
4068 zfree(ZV_NAMEI
, new_rootfs_path_before_buf
);
4075 pivot_root(proc_t p
, __unused
struct pivot_root_args
*uap
, int *retval
)
4077 return nosys(p
, NULL
, retval
);
4079 #endif /* XNU_TARGET_OS_OSX */
4082 * Common routine for chroot and chdir.
4084 * Returns: 0 Success
4085 * ENOTDIR Not a directory
4086 * namei:??? [anything namei can return]
4087 * vnode_authorize:??? [anything vnode_authorize can return]
4090 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
4095 if ((error
= namei(ndp
))) {
4101 if (vp
->v_type
!= VDIR
) {
4107 error
= mac_vnode_check_chdir(ctx
, vp
);
4114 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
4124 * Free the vnode data (for directories) associated with the file glob.
4127 fg_vn_data_alloc(void)
4129 struct fd_vn_data
*fvdata
;
4131 /* Allocate per fd vnode data */
4132 fvdata
= kheap_alloc(KM_FD_VN_DATA
, sizeof(struct fd_vn_data
),
4134 lck_mtx_init(&fvdata
->fv_lock
, &fd_vn_lck_grp
, &fd_vn_lck_attr
);
4139 * Free the vnode data (for directories) associated with the file glob.
4142 fg_vn_data_free(void *fgvndata
)
4144 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
4146 kheap_free(KHEAP_DATA_BUFFERS
, fvdata
->fv_buf
, fvdata
->fv_bufallocsiz
);
4147 lck_mtx_destroy(&fvdata
->fv_lock
, &fd_vn_lck_grp
);
4148 kheap_free(KM_FD_VN_DATA
, fvdata
, sizeof(struct fd_vn_data
));
4152 * Check permissions, allocate an open file structure,
4153 * and call the device open routine if any.
4155 * Returns: 0 Success
4166 * XXX Need to implement uid, gid
4169 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
4170 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
4173 proc_t p
= vfs_context_proc(ctx
);
4174 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
4175 struct fileproc
*fp
;
4178 int type
, indx
, error
;
4179 struct vfs_context context
;
4183 if ((oflags
& O_ACCMODE
) == O_ACCMODE
) {
4187 flags
= FFLAGS(uflags
);
4188 CLR(flags
, FENCRYPTED
);
4189 CLR(flags
, FUNENCRYPTED
);
4191 AUDIT_ARG(fflags
, oflags
);
4192 AUDIT_ARG(mode
, vap
->va_mode
);
4194 if ((error
= falloc_withalloc(p
,
4195 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
4198 uu
->uu_dupfd
= -indx
- 1;
4200 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
4201 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)) { /* XXX from fdopen */
4202 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
4203 fp_drop(p
, indx
, NULL
, 0);
4208 if (error
== ERESTART
) {
4211 fp_free(p
, indx
, fp
);
4217 fp
->fp_glob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
4218 fp
->fp_glob
->fg_ops
= &vnops
;
4219 fp
->fp_glob
->fg_data
= (caddr_t
)vp
;
4221 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
4223 .l_whence
= SEEK_SET
,
4226 if (flags
& O_EXLOCK
) {
4227 lf
.l_type
= F_WRLCK
;
4229 lf
.l_type
= F_RDLCK
;
4232 if ((flags
& FNONBLOCK
) == 0) {
4236 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->fp_glob
,
4242 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->fp_glob
, F_SETLK
, &lf
, type
, ctx
, NULL
))) {
4245 fp
->fp_glob
->fg_flag
|= FWASLOCKED
;
4248 /* try to truncate by setting the size attribute */
4249 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0)) {
4254 * For directories we hold some additional information in the fd.
4256 if (vnode_vtype(vp
) == VDIR
) {
4257 fp
->fp_glob
->fg_vn_data
= fg_vn_data_alloc();
4259 fp
->fp_glob
->fg_vn_data
= NULL
;
4265 * The first terminal open (without a O_NOCTTY) by a session leader
4266 * results in it being set as the controlling terminal.
4268 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
4269 !(flags
& O_NOCTTY
)) {
4272 (void)(*fp
->fp_glob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
4273 (caddr_t
)&tmp
, ctx
);
4277 if (flags
& O_CLOEXEC
) {
4278 *fdflags(p
, indx
) |= UF_EXCLOSE
;
4280 if (flags
& O_CLOFORK
) {
4281 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
4283 procfdtbl_releasefd(p
, indx
, NULL
);
4285 #if CONFIG_SECLUDED_MEMORY
4286 if (secluded_for_filecache
&&
4287 FILEGLOB_DTYPE(fp
->fp_glob
) == DTYPE_VNODE
&&
4288 vnode_vtype(vp
) == VREG
) {
4289 memory_object_control_t moc
;
4291 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
4293 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
4294 /* nothing to do... */
4295 } else if (fp
->fp_glob
->fg_flag
& FWRITE
) {
4296 /* writable -> no longer eligible for secluded pages */
4297 memory_object_mark_eligible_for_secluded(moc
,
4299 } else if (secluded_for_filecache
== 1) {
4300 char pathname
[32] = { 0, };
4302 /* XXX FBDP: better way to detect /Applications/ ? */
4303 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4304 (void)copyinstr(ndp
->ni_dirp
,
4309 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
4314 pathname
[sizeof(pathname
) - 1] = '\0';
4315 if (strncmp(pathname
,
4317 strlen("/Applications/")) == 0 &&
4319 "/Applications/Camera.app/",
4320 strlen("/Applications/Camera.app/")) != 0) {
4323 * AND from "/Applications/"
4324 * AND not from "/Applications/Camera.app/"
4325 * ==> eligible for secluded
4327 memory_object_mark_eligible_for_secluded(moc
,
4330 } else if (secluded_for_filecache
== 2) {
4332 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4334 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4336 /* not implemented... */
4338 size_t len
= strlen(vp
->v_name
);
4339 if (!strncmp(vp
->v_name
, DYLD_SHARED_CACHE_NAME
, len
) ||
4340 !strncmp(vp
->v_name
, "dyld", len
) ||
4341 !strncmp(vp
->v_name
, "launchd", len
) ||
4342 !strncmp(vp
->v_name
, "Camera", len
) ||
4343 !strncmp(vp
->v_name
, "mediaserverd", len
) ||
4344 !strncmp(vp
->v_name
, "SpringBoard", len
) ||
4345 !strncmp(vp
->v_name
, "backboardd", len
)) {
4347 * This file matters when launching Camera:
4348 * do not store its contents in the secluded
4349 * pool that will be drained on Camera launch.
4351 memory_object_mark_eligible_for_secluded(moc
,
4356 #endif /* CONFIG_SECLUDED_MEMORY */
4358 fp_drop(p
, indx
, fp
, 1);
4365 context
= *vfs_context_current();
4366 context
.vc_ucred
= fp
->fp_glob
->fg_cred
;
4368 if ((fp
->fp_glob
->fg_flag
& FWASLOCKED
) &&
4369 (FILEGLOB_DTYPE(fp
->fp_glob
) == DTYPE_VNODE
)) {
4371 .l_whence
= SEEK_SET
,
4376 vp
, (caddr_t
)fp
->fp_glob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
4379 vn_close(vp
, fp
->fp_glob
->fg_flag
, &context
);
4381 fp_free(p
, indx
, fp
);
4387 * While most of the *at syscall handlers can call nameiat() which
4388 * is a wrapper around namei, the use of namei and initialisation
4389 * of nameidata are far removed and in different functions - namei
4390 * gets called in vn_open_auth for open1. So we'll just do here what
4394 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
4395 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
4398 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
4402 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
4403 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
4408 c
= *((char *)(ndp
->ni_dirp
));
4414 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
4420 if (vnode_vtype(dvp_at
) != VDIR
) {
4425 ndp
->ni_dvp
= dvp_at
;
4426 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
4427 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
4434 return open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
);
4438 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
4440 * Parameters: p Process requesting the open
4441 * uap User argument descriptor (see below)
4442 * retval Pointer to an area to receive the
4443 * return calue from the system call
4445 * Indirect: uap->path Path to open (same as 'open')
4446 * uap->flags Flags to open (same as 'open'
4447 * uap->uid UID to set, if creating
4448 * uap->gid GID to set, if creating
4449 * uap->mode File mode, if creating (same as 'open')
4450 * uap->xsecurity ACL to set, if creating
4452 * Returns: 0 Success
4455 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4457 * XXX: We should enummerate the possible errno values here, and where
4458 * in the code they originated.
4461 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
4463 struct filedesc
*fdp
= p
->p_fd
;
4465 kauth_filesec_t xsecdst
;
4466 struct vnode_attr va
;
4467 struct nameidata nd
;
4470 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4473 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
4474 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
4479 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4480 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4481 if (uap
->uid
!= KAUTH_UID_NONE
) {
4482 VATTR_SET(&va
, va_uid
, uap
->uid
);
4484 if (uap
->gid
!= KAUTH_GID_NONE
) {
4485 VATTR_SET(&va
, va_gid
, uap
->gid
);
4487 if (xsecdst
!= NULL
) {
4488 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4491 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4492 uap
->path
, vfs_context_current());
4494 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4495 fileproc_alloc_init
, NULL
, retval
);
4496 if (xsecdst
!= NULL
) {
4497 kauth_filesec_free(xsecdst
);
4504 * Go through the data-protected atomically controlled open (2)
4506 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4509 open_dprotected_np(__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
)
4511 int flags
= uap
->flags
;
4512 int class = uap
->class;
4513 int dpflags
= uap
->dpflags
;
4516 * Follow the same path as normal open(2)
4517 * Look up the item if it exists, and acquire the vnode.
4519 struct filedesc
*fdp
= p
->p_fd
;
4520 struct vnode_attr va
;
4521 struct nameidata nd
;
4526 /* Mask off all but regular access permissions */
4527 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4528 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4530 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
4531 uap
->path
, vfs_context_current());
4534 * Initialize the extra fields in vnode_attr to pass down our
4536 * 1. target cprotect class.
4537 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4539 if (flags
& O_CREAT
) {
4540 /* lower level kernel code validates that the class is valid before applying it. */
4541 if (class != PROTECTION_CLASS_DEFAULT
) {
4543 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4544 * file behave the same as open (2)
4546 VATTR_SET(&va
, va_dataprotect_class
, class);
4550 if (dpflags
& (O_DP_GETRAWENCRYPTED
| O_DP_GETRAWUNENCRYPTED
)) {
4551 if (flags
& (O_RDWR
| O_WRONLY
)) {
4552 /* Not allowed to write raw encrypted bytes */
4555 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
4556 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
4558 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
4559 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
4563 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4564 fileproc_alloc_init
, NULL
, retval
);
4570 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
4571 int fd
, enum uio_seg segflg
, int *retval
)
4573 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
4575 struct vnode_attr va
;
4576 struct nameidata nd
;
4578 struct vnode_attr
*vap
;
4579 struct nameidata
*ndp
;
4583 __open_data
= kheap_alloc(KHEAP_TEMP
, sizeof(*__open_data
), Z_WAITOK
);
4584 vap
= &__open_data
->va
;
4585 ndp
= &__open_data
->nd
;
4588 /* Mask off all but regular access permissions */
4589 cmode
= ((mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4590 VATTR_SET(vap
, va_mode
, cmode
& ACCESSPERMS
);
4592 NDINIT(ndp
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
4595 error
= open1at(ctx
, ndp
, flags
, vap
, fileproc_alloc_init
, NULL
,
4598 kheap_free(KHEAP_TEMP
, __open_data
, sizeof(*__open_data
));
4604 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
4606 __pthread_testcancel(1);
4607 return open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
);
4611 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
4614 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4615 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
);
4619 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
4622 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4623 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
);
4627 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
4629 __pthread_testcancel(1);
4630 return openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
);
4634 * openbyid_np: open a file given a file system id and a file system object id
4635 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4636 * file systems that don't support object ids it is a node id (uint64_t).
4638 * Parameters: p Process requesting the open
4639 * uap User argument descriptor (see below)
4640 * retval Pointer to an area to receive the
4641 * return calue from the system call
4643 * Indirect: uap->path Path to open (same as 'open')
4645 * uap->fsid id of target file system
4646 * uap->objid id of target file system object
4647 * uap->flags Flags to open (same as 'open')
4649 * Returns: 0 Success
4653 * XXX: We should enummerate the possible errno values here, and where
4654 * in the code they originated.
4657 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
4663 int buflen
= MAXPATHLEN
;
4665 vfs_context_t ctx
= vfs_context_current();
4667 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
4671 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
4675 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4676 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
4680 AUDIT_ARG(value32
, fsid
.val
[0]);
4681 AUDIT_ARG(value64
, objid
);
4683 /*resolve path from fsis, objid*/
4685 buf
= kheap_alloc(KHEAP_TEMP
, buflen
+ 1, Z_WAITOK
);
4690 error
= fsgetpath_internal( ctx
, fsid
.val
[0], objid
, buflen
,
4691 buf
, FSOPT_ISREALFSID
, &pathlen
);
4694 kheap_free(KHEAP_TEMP
, buf
, buflen
+ 1);
4697 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
4705 error
= openat_internal(
4706 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
4708 kheap_free(KHEAP_TEMP
, buf
, buflen
+ 1);
4715 * Create a special file.
4717 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
4720 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
4722 struct vnode_attr va
;
4723 vfs_context_t ctx
= vfs_context_current();
4725 struct nameidata nd
;
4729 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4730 VATTR_SET(&va
, va_rdev
, uap
->dev
);
4732 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4733 if ((uap
->mode
& S_IFMT
) == S_IFIFO
) {
4734 return mkfifo1(ctx
, uap
->path
, &va
);
4737 AUDIT_ARG(mode
, (mode_t
)uap
->mode
);
4738 AUDIT_ARG(value32
, uap
->dev
);
4740 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
4743 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
4744 UIO_USERSPACE
, uap
->path
, ctx
);
4757 switch (uap
->mode
& S_IFMT
) {
4759 VATTR_SET(&va
, va_type
, VCHR
);
4762 VATTR_SET(&va
, va_type
, VBLK
);
4770 error
= mac_vnode_check_create(ctx
,
4771 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
4777 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4781 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0) {
4786 int update_flags
= 0;
4788 // Make sure the name & parent pointers are hooked up
4789 if (vp
->v_name
== NULL
) {
4790 update_flags
|= VNODE_UPDATE_NAME
;
4792 if (vp
->v_parent
== NULLVP
) {
4793 update_flags
|= VNODE_UPDATE_PARENT
;
4797 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4801 add_fsevent(FSE_CREATE_FILE
, ctx
,
4809 * nameidone has to happen before we vnode_put(dvp)
4810 * since it may need to release the fs_nodelock on the dvp
4823 * Create a named pipe.
4825 * Returns: 0 Success
4828 * vnode_authorize:???
4832 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4836 struct nameidata nd
;
4838 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4839 UIO_USERSPACE
, upath
, ctx
);
4847 /* check that this is a new file and authorize addition */
4852 VATTR_SET(vap
, va_type
, VFIFO
);
4854 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
4858 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4861 * nameidone has to happen before we vnode_put(dvp)
4862 * since it may need to release the fs_nodelock on the dvp
4876 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4878 * Parameters: p Process requesting the open
4879 * uap User argument descriptor (see below)
4882 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4883 * uap->uid UID to set
4884 * uap->gid GID to set
4885 * uap->mode File mode to set (same as 'mkfifo')
4886 * uap->xsecurity ACL to set, if creating
4888 * Returns: 0 Success
4891 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4893 * XXX: We should enummerate the possible errno values here, and where
4894 * in the code they originated.
4897 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4900 kauth_filesec_t xsecdst
;
4901 struct vnode_attr va
;
4903 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4905 xsecdst
= KAUTH_FILESEC_NONE
;
4906 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4907 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
4913 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4914 if (uap
->uid
!= KAUTH_UID_NONE
) {
4915 VATTR_SET(&va
, va_uid
, uap
->uid
);
4917 if (uap
->gid
!= KAUTH_GID_NONE
) {
4918 VATTR_SET(&va
, va_gid
, uap
->gid
);
4920 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4921 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4924 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4926 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4927 kauth_filesec_free(xsecdst
);
4934 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4936 struct vnode_attr va
;
4939 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4941 return mkfifo1(vfs_context_current(), uap
->path
, &va
);
4944 extern int safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
);
4945 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4946 extern int safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4949 safe_getpath_new(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
)
4951 int ret
, len
= _len
;
4953 *truncated_path
= 0;
4956 ret
= vn_getpath(dvp
, path
, &len
);
4958 ret
= vn_getpath_no_firmlink(dvp
, path
, &len
);
4960 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4962 path
[len
- 1] = '/';
4963 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
- len
) + 1;
4964 if (len
> MAXPATHLEN
) {
4967 // the string got truncated!
4968 *truncated_path
= 1;
4969 ptr
= strrchr(path
, '/');
4971 *ptr
= '\0'; // chop off the string at the last directory component
4973 len
= (int)strlen(path
) + 1;
4976 } else if (ret
== 0) {
4977 *truncated_path
= 1;
4978 } else if (ret
!= 0) {
4979 struct vnode
*mydvp
= dvp
;
4981 if (ret
!= ENOSPC
) {
4982 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4983 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4985 *truncated_path
= 1;
4988 if (mydvp
->v_parent
!= NULL
) {
4989 mydvp
= mydvp
->v_parent
;
4990 } else if (mydvp
->v_mount
) {
4991 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4994 // no parent and no mount point? only thing is to punt and say "/" changed
4995 strlcpy(path
, "/", _len
);
5000 if (mydvp
== NULL
) {
5006 ret
= vn_getpath(mydvp
, path
, &len
);
5008 ret
= vn_getpath_no_firmlink(mydvp
, path
, &len
);
5010 } while (ret
== ENOSPC
);
5017 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
5019 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 1);
5023 safe_getpath_no_firmlink(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
5025 return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 0);
5029 * Make a hard file link.
5031 * Returns: 0 Success
5036 * vnode_authorize:???
5041 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
5042 user_addr_t link
, int flag
, enum uio_seg segflg
)
5044 vnode_t vp
, pvp
, dvp
, lvp
;
5045 struct nameidata nd
;
5051 int need_event
, has_listeners
, need_kpath2
;
5052 char *target_path
= NULL
;
5055 vp
= dvp
= lvp
= NULLVP
;
5057 /* look up the object we are linking to */
5058 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
5059 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
5062 error
= nameiat(&nd
, fd1
);
5071 * Normally, linking to directories is not supported.
5072 * However, some file systems may have limited support.
5074 if (vp
->v_type
== VDIR
) {
5075 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
5076 error
= EPERM
; /* POSIX */
5080 /* Linking to a directory requires ownership. */
5081 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
5082 struct vnode_attr dva
;
5085 VATTR_WANTED(&dva
, va_uid
);
5086 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
5087 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
5088 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
5095 /* lookup the target node */
5099 nd
.ni_cnd
.cn_nameiop
= CREATE
;
5100 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
5102 error
= nameiat(&nd
, fd2
);
5110 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0) {
5115 /* or to anything that kauth doesn't want us to (eg. immutable items) */
5116 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0) {
5120 /* target node must not exist */
5121 if (lvp
!= NULLVP
) {
5125 /* cannot link across mountpoints */
5126 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
5131 /* authorize creation of the target note */
5132 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
5136 /* and finally make the link */
5137 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
5143 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
5147 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
5151 has_listeners
= kauth_authorize_fileop_has_listeners();
5155 if (AUDIT_RECORD_EXISTS()) {
5160 if (need_event
|| has_listeners
|| need_kpath2
) {
5161 char *link_to_path
= NULL
;
5162 int len
, link_name_len
;
5164 /* build the path to the new link file */
5165 GET_PATH(target_path
);
5167 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
5169 AUDIT_ARG(kpath
, target_path
, ARG_KPATH2
);
5171 if (has_listeners
) {
5172 /* build the path to file we are linking to */
5173 GET_PATH(link_to_path
);
5175 link_name_len
= MAXPATHLEN
;
5176 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
5178 * Call out to allow 3rd party notification of rename.
5179 * Ignore result of kauth_authorize_fileop call.
5181 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
5182 (uintptr_t)link_to_path
,
5183 (uintptr_t)target_path
);
5185 if (link_to_path
!= NULL
) {
5186 RELEASE_PATH(link_to_path
);
5191 /* construct fsevent */
5192 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
5194 finfo
.mode
|= FSE_TRUNCATED_PATH
;
5197 // build the path to the destination of the link
5198 add_fsevent(FSE_CREATE_FILE
, ctx
,
5199 FSE_ARG_STRING
, len
, target_path
,
5200 FSE_ARG_FINFO
, &finfo
,
5205 // need an iocount on pvp in this case
5206 if (pvp
&& pvp
!= dvp
) {
5207 error
= vnode_get(pvp
);
5214 add_fsevent(FSE_STAT_CHANGED
, ctx
,
5215 FSE_ARG_VNODE
, pvp
, FSE_ARG_DONE
);
5217 if (pvp
&& pvp
!= dvp
) {
5225 * nameidone has to happen before we vnode_put(dvp)
5226 * since it may need to release the fs_nodelock on the dvp
5229 if (target_path
!= NULL
) {
5230 RELEASE_PATH(target_path
);
5244 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
5246 return linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
5247 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
);
5251 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
5253 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
) {
5257 return linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
5258 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
);
5262 * Make a symbolic link.
5264 * We could add support for ACLs here too...
5268 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
5269 user_addr_t link
, enum uio_seg segflg
)
5271 struct vnode_attr va
;
5274 struct nameidata nd
;
5280 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
5281 path
= zalloc(ZV_NAMEI
);
5282 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
5284 path
= (char *)path_data
;
5289 AUDIT_ARG(text
, path
); /* This is the link string */
5291 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
5294 error
= nameiat(&nd
, fd
);
5301 p
= vfs_context_proc(ctx
);
5303 VATTR_SET(&va
, va_type
, VLNK
);
5304 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
5307 error
= mac_vnode_check_create(ctx
,
5308 dvp
, &nd
.ni_cnd
, &va
);
5321 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
5323 /* get default ownership, etc. */
5325 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
5328 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
5331 /* do fallback attribute handling */
5332 if (error
== 0 && vp
) {
5333 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
5337 if (error
== 0 && vp
) {
5338 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
5343 int update_flags
= 0;
5345 /*check if a new vnode was created, else try to get one*/
5347 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
5349 nd
.ni_op
= OP_LOOKUP
;
5351 nd
.ni_cnd
.cn_flags
= 0;
5352 error
= nameiat(&nd
, fd
);
5360 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
5361 /* call out to allow 3rd party notification of rename.
5362 * Ignore result of kauth_authorize_fileop call.
5364 if (kauth_authorize_fileop_has_listeners() &&
5366 char *new_link_path
= NULL
;
5369 /* build the path to the new link file */
5370 new_link_path
= get_pathbuff();
5372 vn_getpath(dvp
, new_link_path
, &len
);
5373 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
5374 new_link_path
[len
- 1] = '/';
5375 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
- len
);
5378 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
5379 (uintptr_t)path
, (uintptr_t)new_link_path
);
5380 if (new_link_path
!= NULL
) {
5381 release_pathbuff(new_link_path
);
5385 // Make sure the name & parent pointers are hooked up
5386 if (vp
->v_name
== NULL
) {
5387 update_flags
|= VNODE_UPDATE_NAME
;
5389 if (vp
->v_parent
== NULLVP
) {
5390 update_flags
|= VNODE_UPDATE_PARENT
;
5394 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
5398 add_fsevent(FSE_CREATE_FILE
, ctx
,
5406 * nameidone has to happen before we vnode_put(dvp)
5407 * since it may need to release the fs_nodelock on the dvp
5416 if (path
&& (path
!= (char *)path_data
)) {
5417 zfree(ZV_NAMEI
, path
);
5424 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
5426 return symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
5427 uap
->link
, UIO_USERSPACE
);
5431 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
5432 __unused
int32_t *retval
)
5434 return symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
5435 uap
->path2
, UIO_USERSPACE
);
5439 * Delete a whiteout from the filesystem.
5440 * No longer supported.
5443 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
5449 * Delete a name from the filesystem.
5453 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
5454 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
5456 struct nameidata nd
;
5459 struct componentname
*cnp
;
5461 char *no_firmlink_path
= NULL
;
5463 int len_no_firmlink_path
= 0;
5466 struct vnode_attr va
;
5472 int truncated_no_firmlink_path
;
5474 struct vnode_attr
*vap
;
5476 int retry_count
= 0;
5479 cn_flags
= LOCKPARENT
;
5480 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
)) {
5481 cn_flags
|= AUDITVNPATH1
;
5483 /* If a starting dvp is passed, it trumps any fd passed. */
5489 /* unlink or delete is allowed on rsrc forks and named streams */
5490 cn_flags
|= CN_ALLOWRSRCFORK
;
5499 truncated_no_firmlink_path
= 0;
5502 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
5504 nd
.ni_dvp
= start_dvp
;
5505 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
5509 error
= nameiat(&nd
, fd
);
5518 /* With Carbon delete semantics, busy files cannot be deleted */
5519 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
5520 flags
|= VNODE_REMOVE_NODELETEBUSY
;
5523 /* Skip any potential upcalls if told to. */
5524 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
5525 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
5529 batched
= vnode_compound_remove_available(vp
);
5531 * The root of a mounted filesystem cannot be deleted.
5533 if ((vp
->v_flag
& VROOT
) || (dvp
->v_mount
!= vp
->v_mount
)) {
5538 #if DEVELOPMENT || DEBUG
5540 * XXX VSWAP: Check for entitlements or special flag here
5541 * so we can restrict access appropriately.
5543 #else /* DEVELOPMENT || DEBUG */
5545 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
5549 #endif /* DEVELOPMENT || DEBUG */
5552 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
5554 if (error
== ENOENT
) {
5555 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5566 if (!vnode_compound_remove_available(dvp
)) {
5567 panic("No vp, but no compound remove?");
5572 need_event
= need_fsevent(FSE_DELETE
, dvp
);
5575 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
5576 /* XXX need to get these data in batched VNOP */
5577 get_fse_info(vp
, &finfo
, ctx
);
5580 error
= vfs_get_notify_attributes(&va
);
5589 has_listeners
= kauth_authorize_fileop_has_listeners();
5590 if (need_event
|| has_listeners
) {
5594 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
5595 if (no_firmlink_path
== NULL
) {
5596 GET_PATH(no_firmlink_path
);
5598 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
5602 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
) {
5603 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
5607 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
5609 if (error
== EKEEPLOOKING
) {
5611 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5614 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
5615 panic("EKEEPLOOKING, but continue flag not set?");
5618 if (vnode_isdir(vp
)) {
5622 goto continue_lookup
;
5623 } else if (error
== ENOENT
&& batched
) {
5624 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5626 * For compound VNOPs, the authorization callback may
5627 * return ENOENT in case of racing hardlink lookups
5628 * hitting the name cache, redrive the lookup.
5638 * Call out to allow 3rd party notification of delete.
5639 * Ignore result of kauth_authorize_fileop call.
5642 if (has_listeners
) {
5643 kauth_authorize_fileop(vfs_context_ucred(ctx
),
5644 KAUTH_FILEOP_DELETE
,
5649 if (vp
->v_flag
& VISHARDLINK
) {
5651 // if a hardlink gets deleted we want to blow away the
5652 // v_parent link because the path that got us to this
5653 // instance of the link is no longer valid. this will
5654 // force the next call to get the path to ask the file
5655 // system instead of just following the v_parent link.
5657 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
5662 if (vp
->v_flag
& VISHARDLINK
) {
5663 get_fse_info(vp
, &finfo
, ctx
);
5665 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
5667 if (truncated_path
) {
5668 finfo
.mode
|= FSE_TRUNCATED_PATH
;
5670 add_fsevent(FSE_DELETE
, ctx
,
5671 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
5672 FSE_ARG_FINFO
, &finfo
,
5684 if (no_firmlink_path
!= NULL
) {
5685 RELEASE_PATH(no_firmlink_path
);
5686 no_firmlink_path
= NULL
;
5689 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5690 * will cause its shadow file to go away if necessary.
5692 if (vp
&& (vnode_isnamedstream(vp
)) &&
5693 (vp
->v_parent
!= NULLVP
) &&
5694 vnode_isshadow(vp
)) {
5699 * nameidone has to happen before we vnode_put(dvp)
5700 * since it may need to release the fs_nodelock on the dvp
5716 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
5717 enum uio_seg segflg
, int unlink_flags
)
5719 return unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
5724 * Delete a name from the filesystem using Carbon semantics.
5727 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
5729 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5730 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
);
5734 * Delete a name from the filesystem using POSIX semantics.
5737 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
5739 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5740 uap
->path
, UIO_USERSPACE
, 0);
5744 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
5746 if (uap
->flag
& ~(AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5750 if (uap
->flag
& (AT_REMOVEDIR
| AT_REMOVEDIR_DATALESS
)) {
5751 int unlink_flags
= 0;
5753 if (uap
->flag
& AT_REMOVEDIR_DATALESS
) {
5754 unlink_flags
|= VNODE_REMOVE_DATALESS_DIR
;
5756 return rmdirat_internal(vfs_context_current(), uap
->fd
,
5757 uap
->path
, UIO_USERSPACE
, unlink_flags
);
5759 return unlinkat_internal(vfs_context_current(), uap
->fd
,
5760 NULLVP
, uap
->path
, UIO_USERSPACE
, 0);
5765 * Reposition read/write file offset.
5768 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
5770 struct fileproc
*fp
;
5772 struct vfs_context
*ctx
;
5773 off_t offset
= uap
->offset
, file_size
;
5776 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
5777 if (error
== ENOTSUP
) {
5782 if (vnode_isfifo(vp
)) {
5788 ctx
= vfs_context_current();
5790 if (uap
->whence
== L_INCR
&& uap
->offset
== 0) {
5791 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
5794 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
5802 if ((error
= vnode_getwithref(vp
))) {
5807 switch (uap
->whence
) {
5809 offset
+= fp
->fp_glob
->fg_offset
;
5812 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0) {
5815 offset
+= file_size
;
5820 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
5823 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
5829 if (uap
->offset
> 0 && offset
< 0) {
5830 /* Incremented/relative move past max size */
5834 * Allow negative offsets on character devices, per
5835 * POSIX 1003.1-2001. Most likely for writing disk
5838 if (offset
< 0 && vp
->v_type
!= VCHR
) {
5839 /* Decremented/relative move before start */
5843 fp
->fp_glob
->fg_offset
= offset
;
5844 *retval
= fp
->fp_glob
->fg_offset
;
5850 * An lseek can affect whether data is "available to read." Use
5851 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5853 post_event_if_success(vp
, error
, NOTE_NONE
);
5854 (void)vnode_put(vp
);
5861 * Check access permissions.
5863 * Returns: 0 Success
5864 * vnode_authorize:???
5867 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
5869 kauth_action_t action
;
5873 * If just the regular access bits, convert them to something
5874 * that vnode_authorize will understand.
5876 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
5878 if (uflags
& R_OK
) {
5879 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5881 if (uflags
& W_OK
) {
5882 if (vnode_isdir(vp
)) {
5883 action
|= KAUTH_VNODE_ADD_FILE
|
5884 KAUTH_VNODE_ADD_SUBDIRECTORY
;
5885 /* might want delete rights here too */
5887 action
|= KAUTH_VNODE_WRITE_DATA
;
5890 if (uflags
& X_OK
) {
5891 if (vnode_isdir(vp
)) {
5892 action
|= KAUTH_VNODE_SEARCH
;
5894 action
|= KAUTH_VNODE_EXECUTE
;
5898 /* take advantage of definition of uflags */
5899 action
= uflags
>> 8;
5903 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5909 /* action == 0 means only check for existence */
5911 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5922 * access_extended: Check access permissions in bulk.
5924 * Description: uap->entries Pointer to an array of accessx
5925 * descriptor structs, plus one or
5926 * more NULL terminated strings (see
5927 * "Notes" section below).
5928 * uap->size Size of the area pointed to by
5930 * uap->results Pointer to the results array.
5932 * Returns: 0 Success
5933 * ENOMEM Insufficient memory
5934 * EINVAL Invalid arguments
5935 * namei:EFAULT Bad address
5936 * namei:ENAMETOOLONG Filename too long
5937 * namei:ENOENT No such file or directory
5938 * namei:ELOOP Too many levels of symbolic links
5939 * namei:EBADF Bad file descriptor
5940 * namei:ENOTDIR Not a directory
5945 * uap->results Array contents modified
5947 * Notes: The uap->entries are structured as an arbitrary length array
5948 * of accessx descriptors, followed by one or more NULL terminated
5951 * struct accessx_descriptor[0]
5953 * struct accessx_descriptor[n]
5954 * char name_data[0];
5956 * We determine the entry count by walking the buffer containing
5957 * the uap->entries argument descriptor. For each descriptor we
5958 * see, the valid values for the offset ad_name_offset will be
5959 * in the byte range:
5961 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5963 * [ uap->entries + uap->size - 2 ]
5965 * since we must have at least one string, and the string must
5966 * be at least one character plus the NULL terminator in length.
5968 * XXX: Need to support the check-as uid argument
5971 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5973 struct accessx_descriptor
*input
= NULL
;
5974 errno_t
*result
= NULL
;
5977 size_t desc_max
, desc_actual
;
5979 struct vfs_context context
;
5980 struct nameidata nd
;
5984 #define ACCESSX_MAX_DESCR_ON_STACK 10
5985 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5987 context
.vc_ucred
= NULL
;
5990 * Validate parameters; if valid, copy the descriptor array and string
5991 * arguments into local memory. Before proceeding, the following
5992 * conditions must have been met:
5994 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5995 * o There must be sufficient room in the request for at least one
5996 * descriptor and a one yte NUL terminated string.
5997 * o The allocation of local storage must not fail.
5999 if (uap
->size
> ACCESSX_MAX_TABLESIZE
) {
6002 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2)) {
6005 if (uap
->size
<= sizeof(stack_input
)) {
6006 input
= stack_input
;
6008 input
= kheap_alloc(KHEAP_DATA_BUFFERS
, uap
->size
, Z_WAITOK
);
6009 if (input
== NULL
) {
6014 error
= copyin(uap
->entries
, input
, uap
->size
);
6019 AUDIT_ARG(opaque
, input
, uap
->size
);
6022 * Force NUL termination of the copyin buffer to avoid nami() running
6023 * off the end. If the caller passes us bogus data, they may get a
6026 ((char *)input
)[uap
->size
- 1] = 0;
6029 * Access is defined as checking against the process' real identity,
6030 * even if operations are checking the effective identity. This
6031 * requires that we use a local vfs context.
6033 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
6034 context
.vc_thread
= current_thread();
6037 * Find out how many entries we have, so we can allocate the result
6038 * array by walking the list and adjusting the count downward by the
6039 * earliest string offset we see.
6041 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
6042 desc_actual
= desc_max
;
6043 for (i
= 0; i
< desc_actual
; i
++) {
6045 * Take the offset to the name string for this entry and
6046 * convert to an input array index, which would be one off
6047 * the end of the array if this entry was the lowest-addressed
6050 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
6053 * An offset greater than the max allowable offset is an error.
6054 * It is also an error for any valid entry to point
6055 * to a location prior to the end of the current entry, if
6056 * it's not a reference to the string of the previous entry.
6058 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
6063 /* Also do not let ad_name_offset point to something beyond the size of the input */
6064 if (input
[i
].ad_name_offset
>= uap
->size
) {
6070 * An offset of 0 means use the previous descriptor's offset;
6071 * this is used to chain multiple requests for the same file
6072 * to avoid multiple lookups.
6075 /* This is not valid for the first entry */
6084 * If the offset of the string for this descriptor is before
6085 * what we believe is the current actual last descriptor,
6086 * then we need to adjust our estimate downward; this permits
6087 * the string table following the last descriptor to be out
6088 * of order relative to the descriptor list.
6090 if (j
< desc_actual
) {
6096 * We limit the actual number of descriptors we are willing to process
6097 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
6098 * requested does not exceed this limit,
6100 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
6104 result
= kheap_alloc(KHEAP_DATA_BUFFERS
, desc_actual
* sizeof(errno_t
),
6106 if (result
== NULL
) {
6112 * Do the work by iterating over the descriptor entries we know to
6113 * at least appear to contain valid data.
6116 for (i
= 0; i
< desc_actual
; i
++) {
6118 * If the ad_name_offset is 0, then we use the previous
6119 * results to make the check; otherwise, we are looking up
6122 if (input
[i
].ad_name_offset
!= 0) {
6123 /* discard old vnodes */
6134 * Scan forward in the descriptor list to see if we
6135 * need the parent vnode. We will need it if we are
6136 * deleting, since we must have rights to remove
6137 * entries in the parent directory, as well as the
6138 * rights to delete the object itself.
6140 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
6141 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++) {
6142 if (input
[j
].ad_flags
& _DELETE_OK
) {
6147 niopts
= FOLLOW
| AUDITVNPATH1
;
6149 /* need parent for vnode_authorize for deletion test */
6151 niopts
|= WANTPARENT
;
6155 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
6156 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
6169 * Handle lookup errors.
6179 /* run this access check */
6180 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
6183 /* fatal lookup error */
6189 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
6191 /* copy out results */
6192 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
6195 if (input
&& input
!= stack_input
) {
6196 kheap_free(KHEAP_DATA_BUFFERS
, input
, uap
->size
);
6199 kheap_free(KHEAP_DATA_BUFFERS
, result
, desc_actual
* sizeof(errno_t
));
6207 if (IS_VALID_CRED(context
.vc_ucred
)) {
6208 kauth_cred_unref(&context
.vc_ucred
);
6215 * Returns: 0 Success
6216 * namei:EFAULT Bad address
6217 * namei:ENAMETOOLONG Filename too long
6218 * namei:ENOENT No such file or directory
6219 * namei:ELOOP Too many levels of symbolic links
6220 * namei:EBADF Bad file descriptor
6221 * namei:ENOTDIR Not a directory
6226 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
6227 int flag
, enum uio_seg segflg
)
6230 struct nameidata nd
;
6232 struct vfs_context context
;
6234 int is_namedstream
= 0;
6238 * Unless the AT_EACCESS option is used, Access is defined as checking
6239 * against the process' real identity, even if operations are checking
6240 * the effective identity. So we need to tweak the credential
6241 * in the context for that case.
6243 if (!(flag
& AT_EACCESS
)) {
6244 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
6246 context
.vc_ucred
= ctx
->vc_ucred
;
6248 context
.vc_thread
= ctx
->vc_thread
;
6251 niopts
= (flag
& AT_SYMLINK_NOFOLLOW
? NOFOLLOW
: FOLLOW
) | AUDITVNPATH1
;
6252 /* need parent for vnode_authorize for deletion test */
6253 if (amode
& _DELETE_OK
) {
6254 niopts
|= WANTPARENT
;
6256 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
6260 /* access(F_OK) calls are allowed for resource forks. */
6261 if (amode
== F_OK
) {
6262 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
6265 error
= nameiat(&nd
, fd
);
6271 /* Grab reference on the shadow stream file vnode to
6272 * force an inactive on release which will mark it
6275 if (vnode_isnamedstream(nd
.ni_vp
) &&
6276 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6277 vnode_isshadow(nd
.ni_vp
)) {
6279 vnode_ref(nd
.ni_vp
);
6283 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
6286 if (is_namedstream
) {
6287 vnode_rele(nd
.ni_vp
);
6291 vnode_put(nd
.ni_vp
);
6292 if (amode
& _DELETE_OK
) {
6293 vnode_put(nd
.ni_dvp
);
6298 if (!(flag
& AT_EACCESS
)) {
6299 kauth_cred_unref(&context
.vc_ucred
);
6305 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
6307 return faccessat_internal(vfs_context_current(), AT_FDCWD
,
6308 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
);
6312 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
6313 __unused
int32_t *retval
)
6315 if (uap
->flag
& ~(AT_EACCESS
| AT_SYMLINK_NOFOLLOW
)) {
6319 return faccessat_internal(vfs_context_current(), uap
->fd
,
6320 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
);
6324 * Returns: 0 Success
6331 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
6332 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
6333 enum uio_seg segflg
, int fd
, int flag
)
6335 struct nameidata nd
;
6342 struct user64_stat user64_sb
;
6343 struct user32_stat user32_sb
;
6344 struct user64_stat64 user64_sb64
;
6345 struct user32_stat64 user32_sb64
;
6349 kauth_filesec_t fsec
;
6350 size_t xsecurity_bufsize
;
6352 struct fileproc
*fp
= NULL
;
6353 int needsrealdev
= 0;
6355 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6356 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
6360 int is_namedstream
= 0;
6361 /* stat calls are allowed for resource forks. */
6362 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
6365 if (flag
& AT_FDONLY
) {
6368 error
= fp_getfvp(vfs_context_proc(ctx
), fd
, &fp
, &fvp
);
6372 if ((error
= vnode_getwithref(fvp
))) {
6378 error
= nameiat(&nd
, fd
);
6383 fsec
= KAUTH_FILESEC_NONE
;
6385 statptr
= (void *)&source
;
6388 /* Grab reference on the shadow stream file vnode to
6389 * force an inactive on release which will mark it
6392 if (vnode_isnamedstream(nd
.ni_vp
) &&
6393 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
6394 vnode_isshadow(nd
.ni_vp
)) {
6396 vnode_ref(nd
.ni_vp
);
6400 needsrealdev
= flag
& AT_REALDEV
? 1 : 0;
6401 if (fp
&& (xsecurity
== USER_ADDR_NULL
)) {
6403 * If the caller has the file open, and is not
6404 * requesting extended security information, we are
6405 * going to let them get the basic stat information.
6407 error
= vn_stat_noauth(nd
.ni_vp
, statptr
, NULL
, isstat64
, needsrealdev
, ctx
,
6408 fp
->fp_glob
->fg_cred
);
6410 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
),
6411 isstat64
, needsrealdev
, ctx
);
6415 if (is_namedstream
) {
6416 vnode_rele(nd
.ni_vp
);
6419 vnode_put(nd
.ni_vp
);
6429 /* Zap spare fields */
6430 if (isstat64
!= 0) {
6431 source
.sb64
.st_lspare
= 0;
6432 source
.sb64
.st_qspare
[0] = 0LL;
6433 source
.sb64
.st_qspare
[1] = 0LL;
6434 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6435 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
6436 my_size
= sizeof(dest
.user64_sb64
);
6437 sbp
= (caddr_t
)&dest
.user64_sb64
;
6439 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
6440 my_size
= sizeof(dest
.user32_sb64
);
6441 sbp
= (caddr_t
)&dest
.user32_sb64
;
6444 * Check if we raced (post lookup) against the last unlink of a file.
6446 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
6447 source
.sb64
.st_nlink
= 1;
6450 source
.sb
.st_lspare
= 0;
6451 source
.sb
.st_qspare
[0] = 0LL;
6452 source
.sb
.st_qspare
[1] = 0LL;
6453 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
6454 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
6455 my_size
= sizeof(dest
.user64_sb
);
6456 sbp
= (caddr_t
)&dest
.user64_sb
;
6458 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
6459 my_size
= sizeof(dest
.user32_sb
);
6460 sbp
= (caddr_t
)&dest
.user32_sb
;
6464 * Check if we raced (post lookup) against the last unlink of a file.
6466 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
6467 source
.sb
.st_nlink
= 1;
6470 if ((error
= copyout(sbp
, ub
, my_size
)) != 0) {
6474 /* caller wants extended security information? */
6475 if (xsecurity
!= USER_ADDR_NULL
) {
6476 /* did we get any? */
6477 if (fsec
== KAUTH_FILESEC_NONE
) {
6478 if (susize(xsecurity_size
, 0) != 0) {
6483 /* find the user buffer size */
6484 xsecurity_bufsize
= fusize(xsecurity_size
);
6486 /* copy out the actual data size */
6487 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
6492 /* if the caller supplied enough room, copy out to it */
6493 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
)) {
6494 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
6499 if (fsec
!= KAUTH_FILESEC_NONE
) {
6500 kauth_filesec_free(fsec
);
6506 * stat_extended: Get file status; with extended security (ACL).
6508 * Parameters: p (ignored)
6509 * uap User argument descriptor (see below)
6512 * Indirect: uap->path Path of file to get status from
6513 * uap->ub User buffer (holds file status info)
6514 * uap->xsecurity ACL to get (extended security)
6515 * uap->xsecurity_size Size of ACL
6517 * Returns: 0 Success
6522 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
6523 __unused
int32_t *retval
)
6525 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6526 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6531 * Returns: 0 Success
6532 * fstatat_internal:??? [see fstatat_internal() in this file]
6535 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
6537 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6538 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0);
6542 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
6544 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6545 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0);
6549 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6551 * Parameters: p (ignored)
6552 * uap User argument descriptor (see below)
6555 * Indirect: uap->path Path of file to get status from
6556 * uap->ub User buffer (holds file status info)
6557 * uap->xsecurity ACL to get (extended security)
6558 * uap->xsecurity_size Size of ACL
6560 * Returns: 0 Success
6565 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
6567 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6568 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6573 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6575 * Parameters: p (ignored)
6576 * uap User argument descriptor (see below)
6579 * Indirect: uap->path Path of file to get status from
6580 * uap->ub User buffer (holds file status info)
6581 * uap->xsecurity ACL to get (extended security)
6582 * uap->xsecurity_size Size of ACL
6584 * Returns: 0 Success
6589 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
6591 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6592 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
6593 AT_SYMLINK_NOFOLLOW
);
6597 * Get file status; this version does not follow links.
6600 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
6602 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6603 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6607 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
6609 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6610 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6614 * lstat64_extended: Get file status; can handle large inode numbers; does not
6615 * follow links; with extended security (ACL).
6617 * Parameters: p (ignored)
6618 * uap User argument descriptor (see below)
6621 * Indirect: uap->path Path of file to get status from
6622 * uap->ub User buffer (holds file status info)
6623 * uap->xsecurity ACL to get (extended security)
6624 * uap->xsecurity_size Size of ACL
6626 * Returns: 0 Success
6631 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
6633 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6634 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6635 AT_SYMLINK_NOFOLLOW
);
6639 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
6641 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6645 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6646 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6650 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
6651 __unused
int32_t *retval
)
6653 if (uap
->flag
& ~(AT_SYMLINK_NOFOLLOW
| AT_REALDEV
| AT_FDONLY
)) {
6657 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6658 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6662 * Get configurable pathname variables.
6664 * Returns: 0 Success
6668 * Notes: Global implementation constants are intended to be
6669 * implemented in this function directly; all other constants
6670 * are per-FS implementation, and therefore must be handled in
6671 * each respective FS, instead.
6673 * XXX We implement some things globally right now that should actually be
6674 * XXX per-FS; we will need to deal with this at some point.
6678 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
6681 struct nameidata nd
;
6682 vfs_context_t ctx
= vfs_context_current();
6684 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
6685 UIO_USERSPACE
, uap
->path
, ctx
);
6691 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
6693 vnode_put(nd
.ni_vp
);
6699 * Return target name of a symbolic link.
6703 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
6704 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
6710 struct nameidata nd
;
6711 char uio_buf
[UIO_SIZEOF(1)];
6713 if (bufsize
> INT32_MAX
) {
6717 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
6720 error
= nameiat(&nd
, fd
);
6728 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
6729 &uio_buf
[0], sizeof(uio_buf
));
6730 uio_addiov(auio
, buf
, bufsize
);
6731 if (vp
->v_type
!= VLNK
) {
6735 error
= mac_vnode_check_readlink(ctx
, vp
);
6738 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
6742 error
= VNOP_READLINK(vp
, auio
, ctx
);
6747 *retval
= (int)(bufsize
- uio_resid(auio
));
6752 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
6754 enum uio_seg procseg
;
6756 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6757 return readlinkat_internal(vfs_context_current(), AT_FDCWD
,
6758 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
6759 uap
->count
, procseg
, retval
);
6763 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
6765 enum uio_seg procseg
;
6767 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6768 return readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6769 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
);
6773 * Change file flags, the deep inner layer.
6776 chflags0(vnode_t vp
, struct vnode_attr
*va
,
6777 int (*setattr
)(vnode_t
, void *, vfs_context_t
),
6778 void *arg
, vfs_context_t ctx
)
6780 kauth_action_t action
= 0;
6784 error
= mac_vnode_check_setflags(ctx
, vp
, va
->va_flags
);
6790 /* request authorisation, disregard immutability */
6791 if ((error
= vnode_authattr(vp
, va
, &action
, ctx
)) != 0) {
6795 * Request that the auth layer disregard those file flags it's allowed to when
6796 * authorizing this operation; we need to do this in order to be able to
6797 * clear immutable flags.
6799 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0)) {
6802 error
= (*setattr
)(vp
, arg
, ctx
);
6806 mac_vnode_notify_setflags(ctx
, vp
, va
->va_flags
);
6815 * Change file flags.
6817 * NOTE: this will vnode_put() `vp'
6820 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
6822 struct vnode_attr va
;
6826 VATTR_SET(&va
, va_flags
, flags
);
6828 error
= chflags0(vp
, &va
, (void *)vnode_setattr
, &va
, ctx
);
6831 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6839 * Change flags of a file given a path name.
6843 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
6846 vfs_context_t ctx
= vfs_context_current();
6848 struct nameidata nd
;
6850 AUDIT_ARG(fflags
, uap
->flags
);
6851 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6852 UIO_USERSPACE
, uap
->path
, ctx
);
6860 /* we don't vnode_put() here because chflags1 does internally */
6861 error
= chflags1(vp
, uap
->flags
, ctx
);
6867 * Change flags of a file given a file descriptor.
6871 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
6876 AUDIT_ARG(fd
, uap
->fd
);
6877 AUDIT_ARG(fflags
, uap
->flags
);
6878 if ((error
= file_vnode(uap
->fd
, &vp
))) {
6882 if ((error
= vnode_getwithref(vp
))) {
6887 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6889 /* we don't vnode_put() here because chflags1 does internally */
6890 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
6897 * Change security information on a filesystem object.
6899 * Returns: 0 Success
6900 * EPERM Operation not permitted
6901 * vnode_authattr:??? [anything vnode_authattr can return]
6902 * vnode_authorize:??? [anything vnode_authorize can return]
6903 * vnode_setattr:??? [anything vnode_setattr can return]
6905 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6906 * translated to EPERM before being returned.
6909 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
6911 kauth_action_t action
;
6914 AUDIT_ARG(mode
, vap
->va_mode
);
6915 /* XXX audit new args */
6918 /* chmod calls are not allowed for resource forks. */
6919 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6925 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
6926 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0) {
6930 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6931 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
6932 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6933 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1))) {
6938 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
6939 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
))) {
6944 /* make sure that the caller is allowed to set this security information */
6945 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
6946 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6947 if (error
== EACCES
) {
6953 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0) {
6958 if (VATTR_IS_ACTIVE(vap
, va_mode
)) {
6959 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
6962 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6963 mac_vnode_notify_setowner(ctx
, vp
,
6964 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6965 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
6968 if (VATTR_IS_ACTIVE(vap
, va_acl
)) {
6969 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
6978 * Change mode of a file given a path name.
6980 * Returns: 0 Success
6981 * namei:??? [anything namei can return]
6982 * chmod_vnode:??? [anything chmod_vnode can return]
6985 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
6986 int fd
, int flag
, enum uio_seg segflg
)
6988 struct nameidata nd
;
6991 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6992 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
6994 if ((error
= nameiat(&nd
, fd
))) {
6997 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
6998 vnode_put(nd
.ni_vp
);
7004 * chmod_extended: Change the mode of a file given a path name; with extended
7005 * argument list (including extended security (ACL)).
7007 * Parameters: p Process requesting the open
7008 * uap User argument descriptor (see below)
7011 * Indirect: uap->path Path to object (same as 'chmod')
7012 * uap->uid UID to set
7013 * uap->gid GID to set
7014 * uap->mode File mode to set (same as 'chmod')
7015 * uap->xsecurity ACL to set (or delete)
7017 * Returns: 0 Success
7020 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
7022 * XXX: We should enummerate the possible errno values here, and where
7023 * in the code they originated.
7026 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
7029 struct vnode_attr va
;
7030 kauth_filesec_t xsecdst
;
7032 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7035 if (uap
->mode
!= -1) {
7036 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
7038 if (uap
->uid
!= KAUTH_UID_NONE
) {
7039 VATTR_SET(&va
, va_uid
, uap
->uid
);
7041 if (uap
->gid
!= KAUTH_GID_NONE
) {
7042 VATTR_SET(&va
, va_gid
, uap
->gid
);
7046 switch (uap
->xsecurity
) {
7047 /* explicit remove request */
7048 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
7049 VATTR_SET(&va
, va_acl
, NULL
);
7052 case USER_ADDR_NULL
:
7055 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
7058 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7059 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
7062 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
7065 if (xsecdst
!= NULL
) {
7066 kauth_filesec_free(xsecdst
);
7072 * Returns: 0 Success
7073 * chmodat:??? [anything chmodat can return]
7076 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
7077 int flag
, enum uio_seg segflg
)
7079 struct vnode_attr va
;
7082 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
7084 return chmodat(ctx
, path
, &va
, fd
, flag
, segflg
);
7088 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
7090 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
7091 AT_FDCWD
, 0, UIO_USERSPACE
);
7095 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
7097 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
7101 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
7102 uap
->fd
, uap
->flag
, UIO_USERSPACE
);
7106 * Change mode of a file given a file descriptor.
7109 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
7116 if ((error
= file_vnode(fd
, &vp
)) != 0) {
7119 if ((error
= vnode_getwithref(vp
)) != 0) {
7123 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7125 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
7126 (void)vnode_put(vp
);
7133 * fchmod_extended: Change mode of a file given a file descriptor; with
7134 * extended argument list (including extended security (ACL)).
7136 * Parameters: p Process requesting to change file mode
7137 * uap User argument descriptor (see below)
7140 * Indirect: uap->mode File mode to set (same as 'chmod')
7141 * uap->uid UID to set
7142 * uap->gid GID to set
7143 * uap->xsecurity ACL to set (or delete)
7144 * uap->fd File descriptor of file to change mode
7146 * Returns: 0 Success
7151 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
7154 struct vnode_attr va
;
7155 kauth_filesec_t xsecdst
;
7157 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7160 if (uap
->mode
!= -1) {
7161 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
7163 if (uap
->uid
!= KAUTH_UID_NONE
) {
7164 VATTR_SET(&va
, va_uid
, uap
->uid
);
7166 if (uap
->gid
!= KAUTH_GID_NONE
) {
7167 VATTR_SET(&va
, va_gid
, uap
->gid
);
7171 switch (uap
->xsecurity
) {
7172 case USER_ADDR_NULL
:
7173 VATTR_SET(&va
, va_acl
, NULL
);
7175 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
7176 VATTR_SET(&va
, va_acl
, NULL
);
7179 case CAST_USER_ADDR_T(-1):
7182 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
7185 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7188 error
= fchmod1(p
, uap
->fd
, &va
);
7191 switch (uap
->xsecurity
) {
7192 case USER_ADDR_NULL
:
7193 case CAST_USER_ADDR_T(-1):
7196 if (xsecdst
!= NULL
) {
7197 kauth_filesec_free(xsecdst
);
7204 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
7206 struct vnode_attr va
;
7209 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
7211 return fchmod1(p
, uap
->fd
, &va
);
7216 * Set ownership given a path name.
7220 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
7221 gid_t gid
, int flag
, enum uio_seg segflg
)
7224 struct vnode_attr va
;
7226 struct nameidata nd
;
7228 kauth_action_t action
;
7230 AUDIT_ARG(owner
, uid
, gid
);
7232 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7233 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
7235 error
= nameiat(&nd
, fd
);
7244 if (uid
!= (uid_t
)VNOVAL
) {
7245 VATTR_SET(&va
, va_uid
, uid
);
7247 if (gid
!= (gid_t
)VNOVAL
) {
7248 VATTR_SET(&va
, va_gid
, gid
);
7252 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
7258 /* preflight and authorize attribute changes */
7259 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7262 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7265 error
= vnode_setattr(vp
, &va
, ctx
);
7269 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
7275 * EACCES is only allowed from namei(); permissions failure should
7276 * return EPERM, so we need to translate the error code.
7278 if (error
== EACCES
) {
7287 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
7289 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7290 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
);
7294 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
7296 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
7297 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
);
7301 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
7303 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
7307 return fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
7308 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
);
7312 * Set ownership given a file descriptor.
7316 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
7318 struct vnode_attr va
;
7319 vfs_context_t ctx
= vfs_context_current();
7322 kauth_action_t action
;
7324 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7325 AUDIT_ARG(fd
, uap
->fd
);
7327 if ((error
= file_vnode(uap
->fd
, &vp
))) {
7331 if ((error
= vnode_getwithref(vp
))) {
7335 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7338 if (uap
->uid
!= VNOVAL
) {
7339 VATTR_SET(&va
, va_uid
, uap
->uid
);
7341 if (uap
->gid
!= VNOVAL
) {
7342 VATTR_SET(&va
, va_gid
, uap
->gid
);
7346 /* chown calls are not allowed for resource forks. */
7347 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7354 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7360 /* preflight and authorize attribute changes */
7361 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7364 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7365 if (error
== EACCES
) {
7370 error
= vnode_setattr(vp
, &va
, ctx
);
7374 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
7379 (void)vnode_put(vp
);
7385 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
7389 if (usrtvp
== USER_ADDR_NULL
) {
7390 struct timeval old_tv
;
7391 /* XXX Y2038 bug because of microtime argument */
7393 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
7396 if (IS_64BIT_PROCESS(current_proc())) {
7397 struct user64_timeval tv
[2];
7398 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7402 TIMEVAL64_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7403 TIMEVAL64_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7405 struct user32_timeval tv
[2];
7406 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
7410 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
7411 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
7418 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
7422 struct vnode_attr va
;
7423 kauth_action_t action
;
7425 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7428 VATTR_SET(&va
, va_access_time
, ts
[0]);
7429 VATTR_SET(&va
, va_modify_time
, ts
[1]);
7431 va
.va_vaflags
|= VA_UTIMES_NULL
;
7435 /* utimes calls are not allowed for resource forks. */
7436 if (vp
->v_flag
& VISNAMEDSTREAM
) {
7443 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7448 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7449 if (!nullflag
&& error
== EACCES
) {
7455 /* since we may not need to auth anything, check here */
7456 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7457 if (!nullflag
&& error
== EACCES
) {
7462 error
= vnode_setattr(vp
, &va
, ctx
);
7466 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
7475 * Set the access and modification times of a file.
7479 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
7481 struct timespec ts
[2];
7484 struct nameidata nd
;
7485 vfs_context_t ctx
= vfs_context_current();
7488 * AUDIT: Needed to change the order of operations to do the
7489 * name lookup first because auditing wants the path.
7491 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
7492 UIO_USERSPACE
, uap
->path
, ctx
);
7500 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7501 * the current time instead.
7504 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7508 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
7511 vnode_put(nd
.ni_vp
);
7516 * Set the access and modification times of a file.
7520 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
7522 struct timespec ts
[2];
7527 AUDIT_ARG(fd
, uap
->fd
);
7529 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
7532 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
7535 if ((error
= vnode_getwithref(vp
))) {
7540 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
7547 * Truncate a file given its path name.
7551 truncate(proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
7554 struct vnode_attr va
;
7555 vfs_context_t ctx
= vfs_context_current();
7557 struct nameidata nd
;
7558 kauth_action_t action
;
7561 if (uap
->length
< 0) {
7565 fsize_limit
= proc_limitgetcur(p
, RLIMIT_FSIZE
, TRUE
);
7566 if ((rlim_t
)uap
->length
> fsize_limit
) {
7567 psignal(p
, SIGXFSZ
);
7571 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
7572 UIO_USERSPACE
, uap
->path
, ctx
);
7573 if ((error
= namei(&nd
))) {
7581 VATTR_SET(&va
, va_data_size
, uap
->length
);
7584 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
7590 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
7593 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
7596 error
= vnode_setattr(vp
, &va
, ctx
);
7600 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
7610 * Truncate a file given a file descriptor.
7614 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
7616 vfs_context_t ctx
= vfs_context_current();
7617 struct vnode_attr va
;
7619 struct fileproc
*fp
;
7624 AUDIT_ARG(fd
, uap
->fd
);
7625 if (uap
->length
< 0) {
7629 fsize_limit
= proc_limitgetcur(p
, RLIMIT_FSIZE
, TRUE
);
7630 if ((rlim_t
)uap
->length
> fsize_limit
) {
7631 psignal(p
, SIGXFSZ
);
7635 if ((error
= fp_lookup(p
, fd
, &fp
, 0))) {
7639 switch (FILEGLOB_DTYPE(fp
->fp_glob
)) {
7641 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
7650 vp
= (vnode_t
)fp
->fp_glob
->fg_data
;
7652 if ((fp
->fp_glob
->fg_flag
& FWRITE
) == 0) {
7653 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7658 if ((error
= vnode_getwithref(vp
)) != 0) {
7662 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7665 error
= mac_vnode_check_truncate(ctx
,
7666 fp
->fp_glob
->fg_cred
, vp
);
7668 (void)vnode_put(vp
);
7673 VATTR_SET(&va
, va_data_size
, uap
->length
);
7674 error
= vnode_setattr(vp
, &va
, ctx
);
7678 mac_vnode_notify_truncate(ctx
, fp
->fp_glob
->fg_cred
, vp
);
7682 (void)vnode_put(vp
);
7690 * Sync an open file with synchronized I/O _file_ integrity completion
7694 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
7696 __pthread_testcancel(1);
7697 return fsync_common(p
, uap
, MNT_WAIT
);
7702 * Sync an open file with synchronized I/O _file_ integrity completion
7704 * Notes: This is a legacy support function that does not test for
7705 * thread cancellation points.
7709 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
7711 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
);
7716 * Sync an open file with synchronized I/O _data_ integrity completion
7720 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
7722 __pthread_testcancel(1);
7723 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
);
7730 * Common fsync code to support both synchronized I/O file integrity completion
7731 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7733 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7734 * will only guarantee that the file data contents are retrievable. If
7735 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7736 * includes additional metadata unnecessary for retrieving the file data
7737 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7740 * Parameters: p The process
7741 * uap->fd The descriptor to synchronize
7742 * flags The data integrity flags
7744 * Returns: int Success
7745 * fp_getfvp:EBADF Bad file descriptor
7746 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7747 * VNOP_FSYNC:??? unspecified
7749 * Notes: We use struct fsync_args because it is a short name, and all
7750 * caller argument structures are otherwise identical.
7753 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
7756 struct fileproc
*fp
;
7757 vfs_context_t ctx
= vfs_context_current();
7760 AUDIT_ARG(fd
, uap
->fd
);
7762 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
7765 if ((error
= vnode_getwithref(vp
))) {
7770 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7772 error
= VNOP_FSYNC(vp
, flags
, ctx
);
7775 /* Sync resource fork shadow file if necessary. */
7777 (vp
->v_flag
& VISNAMEDSTREAM
) &&
7778 (vp
->v_parent
!= NULLVP
) &&
7779 vnode_isshadow(vp
) &&
7780 (fp
->fp_glob
->fg_flag
& FWASWRITTEN
)) {
7781 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
7785 (void)vnode_put(vp
);
7791 * Duplicate files. Source must be a file, target must be a file or
7794 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7795 * perform inheritance correctly.
7799 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
7801 vnode_t tvp
, fvp
, tdvp
, sdvp
;
7802 struct nameidata fromnd
, tond
;
7804 vfs_context_t ctx
= vfs_context_current();
7806 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
7807 struct vnode_attr va
;
7810 /* Check that the flags are valid. */
7812 if (uap
->flags
& ~CPF_MASK
) {
7816 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
7817 UIO_USERSPACE
, uap
->from
, ctx
);
7818 if ((error
= namei(&fromnd
))) {
7823 NDINIT(&tond
, CREATE
, OP_LINK
,
7824 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7825 UIO_USERSPACE
, uap
->to
, ctx
);
7826 if ((error
= namei(&tond
))) {
7833 if (!(uap
->flags
& CPF_OVERWRITE
)) {
7839 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
7844 /* This calls existing MAC hooks for open */
7845 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
7852 * See unlinkat_internal for an explanation of the potential
7853 * ENOENT from the MAC hook but the gist is that the MAC hook
7854 * can fail because vn_getpath isn't able to return the full
7855 * path. We choose to ignore this failure.
7857 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
7858 if (error
&& error
!= ENOENT
) {
7866 VATTR_SET(&va
, va_type
, fvp
->v_type
);
7867 /* Mask off all but regular access permissions */
7868 VATTR_SET(&va
, va_mode
,
7869 ((((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
7870 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
7874 #endif /* CONFIG_MACF */
7876 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
7884 * If source is the same as the destination (that is the
7885 * same inode number) then there is nothing to do.
7886 * (fixed to have POSIX semantics - CSM 3/2/98)
7892 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
7895 sdvp
= tond
.ni_startdir
;
7897 * nameidone has to happen before we vnode_put(tdvp)
7898 * since it may need to release the fs_nodelock on the tdvp
7918 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7921 * Helper function for doing clones. The caller is expected to provide an
7922 * iocounted source vnode and release it.
7925 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
7926 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
7929 struct nameidata tond
;
7932 boolean_t free_src_acl
;
7933 boolean_t attr_cleanup
;
7935 kauth_action_t action
;
7936 struct componentname
*cnp
;
7938 struct vnode_attr va
;
7939 struct vnode_attr nva
;
7940 uint32_t vnop_flags
;
7942 v_type
= vnode_vtype(fvp
);
7947 action
= KAUTH_VNODE_ADD_FILE
;
7950 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
7951 fvp
->v_mountedhere
) {
7954 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
7960 AUDIT_ARG(fd2
, dst_dirfd
);
7961 AUDIT_ARG(value32
, flags
);
7963 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7964 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
7965 UIO_USERSPACE
, dst
, ctx
);
7966 if ((error
= nameiat(&tond
, dst_dirfd
))) {
7973 free_src_acl
= FALSE
;
7974 attr_cleanup
= FALSE
;
7981 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
7987 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
))) {
7991 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
))) {
7995 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
7996 if (data_read_authorised
) {
7997 action
&= ~KAUTH_VNODE_READ_DATA
;
7999 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
))) {
8004 * certain attributes may need to be changed from the source, we ask for
8005 * those here with the exception of source file's ACL. The clone file
8006 * will inherit the target directory's ACL.
8009 VATTR_WANTED(&va
, va_uid
);
8010 VATTR_WANTED(&va
, va_gid
);
8011 VATTR_WANTED(&va
, va_mode
);
8012 VATTR_WANTED(&va
, va_flags
);
8014 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0) {
8019 VATTR_SET(&nva
, va_type
, v_type
);
8020 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
8021 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
8022 free_src_acl
= TRUE
;
8025 /* Handle ACL inheritance, initialize vap. */
8026 if (v_type
== VLNK
) {
8027 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
8029 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
8033 attr_cleanup
= TRUE
;
8036 vnop_flags
= VNODE_CLONEFILE_DEFAULT
;
8038 * We've got initial values for all security parameters,
8039 * If we are superuser, then we can change owners to be the
8040 * same as the source. Both superuser and the owner have default
8041 * WRITE_SECURITY privileges so all other fields can be taken
8042 * from source as well.
8044 if (!(flags
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) {
8045 if (VATTR_IS_SUPPORTED(&va
, va_uid
)) {
8046 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
8048 if (VATTR_IS_SUPPORTED(&va
, va_gid
)) {
8049 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
8052 vnop_flags
|= VNODE_CLONEFILE_NOOWNERCOPY
;
8055 if (VATTR_IS_SUPPORTED(&va
, va_mode
)) {
8056 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
8058 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
8059 VATTR_SET(&nva
, va_flags
,
8060 ((va
.va_flags
& ~(UF_DATAVAULT
| SF_RESTRICTED
)) | /* Turn off from source */
8061 (nva
.va_flags
& (UF_DATAVAULT
| SF_RESTRICTED
))));
8064 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
);
8066 if (!error
&& tvp
) {
8067 int update_flags
= 0;
8070 #endif /* CONFIG_FSE */
8073 * If some of the requested attributes weren't handled by the
8074 * VNOP, use our fallback code.
8076 if (!VATTR_ALL_SUPPORTED(&nva
)) {
8077 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
8081 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
8082 VNODE_LABEL_CREATE
, ctx
);
8085 // Make sure the name & parent pointers are hooked up
8086 if (tvp
->v_name
== NULL
) {
8087 update_flags
|= VNODE_UPDATE_NAME
;
8089 if (tvp
->v_parent
== NULLVP
) {
8090 update_flags
|= VNODE_UPDATE_PARENT
;
8094 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
8095 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
8099 switch (vnode_vtype(tvp
)) {
8103 fsevent
= FSE_CREATE_FILE
;
8106 fsevent
= FSE_CREATE_DIR
;
8112 if (need_fsevent(fsevent
, tvp
)) {
8114 * The following is a sequence of three explicit events.
8115 * A pair of FSE_CLONE events representing the source and destination
8116 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
8117 * fseventsd may coalesce the destination clone and create events
8118 * into a single event resulting in the following sequence for a client
8120 * FSE_CLONE | FSE_CREATE (dst)
8122 add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
,
8124 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
8127 #endif /* CONFIG_FSE */
8132 vn_attribute_cleanup(&nva
, defaulted
);
8134 if (free_src_acl
&& va
.va_acl
) {
8135 kauth_acl_free(va
.va_acl
);
8146 * clone files or directories, target must not exist.
8150 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
8151 __unused
int32_t *retval
)
8154 struct nameidata fromnd
;
8157 vfs_context_t ctx
= vfs_context_current();
8159 /* Check that the flags are valid. */
8160 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
8164 AUDIT_ARG(fd
, uap
->src_dirfd
);
8166 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
8167 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
8168 UIO_USERSPACE
, uap
->src
, ctx
);
8169 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
))) {
8176 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
8184 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
8185 __unused
int32_t *retval
)
8188 struct fileproc
*fp
;
8190 vfs_context_t ctx
= vfs_context_current();
8192 /* Check that the flags are valid. */
8193 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
8197 AUDIT_ARG(fd
, uap
->src_fd
);
8198 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
8203 if ((fp
->fp_glob
->fg_flag
& FREAD
) == 0) {
8204 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
8209 if ((error
= vnode_getwithref(fvp
))) {
8213 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
8215 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
8220 file_drop(uap
->src_fd
);
8225 rename_submounts_callback(mount_t mp
, void *arg
)
8228 mount_t pmp
= (mount_t
)arg
;
8229 int prefix_len
= (int)strlen(pmp
->mnt_vfsstat
.f_mntonname
);
8231 if (strncmp(mp
->mnt_vfsstat
.f_mntonname
, pmp
->mnt_vfsstat
.f_mntonname
, prefix_len
) != 0) {
8235 if (mp
->mnt_vfsstat
.f_mntonname
[prefix_len
] != '/') {
8239 if ((error
= vfs_busy(mp
, LK_NOWAIT
))) {
8240 printf("vfs_busy failed with %d for %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
8244 int pathlen
= MAXPATHLEN
;
8245 if ((error
= vn_getpath_ext(mp
->mnt_vnodecovered
, NULL
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
))) {
8246 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
);
8255 * Rename files. Source and destination must either both be directories,
8256 * or both not be directories. If target is a directory, it must be empty.
8260 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
8261 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
8263 if (flags
& ~VFS_RENAME_FLAGS_MASK
) {
8267 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
)) {
8274 struct nameidata
*fromnd
, *tond
;
8282 const char *oname
= NULL
;
8283 char *from_name
= NULL
, *to_name
= NULL
;
8284 char *from_name_no_firmlink
= NULL
, *to_name_no_firmlink
= NULL
;
8285 int from_len
= 0, to_len
= 0;
8286 int from_len_no_firmlink
= 0, to_len_no_firmlink
= 0;
8287 int holding_mntlock
;
8288 int vn_authorize_skipped
;
8289 mount_t locked_mp
= NULL
;
8290 vnode_t oparent
= NULLVP
;
8292 fse_info from_finfo
, to_finfo
;
8294 int from_truncated
= 0, to_truncated
= 0;
8295 int from_truncated_no_firmlink
= 0, to_truncated_no_firmlink
= 0;
8297 struct vnode_attr
*fvap
, *tvap
;
8299 /* carving out a chunk for structs that are too big to be on stack. */
8301 struct nameidata from_node
, to_node
;
8302 struct vnode_attr fv_attr
, tv_attr
;
8304 __rename_data
= kheap_alloc(KHEAP_TEMP
, sizeof(*__rename_data
), Z_WAITOK
);
8305 fromnd
= &__rename_data
->from_node
;
8306 tond
= &__rename_data
->to_node
;
8308 holding_mntlock
= 0;
8317 vn_authorize_skipped
= FALSE
;
8319 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
8321 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8323 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
8325 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
8328 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8329 if ((error
= nameiat(fromnd
, fromfd
))) {
8332 fdvp
= fromnd
->ni_dvp
;
8333 fvp
= fromnd
->ni_vp
;
8335 if (fvp
&& fvp
->v_type
== VDIR
) {
8336 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
8340 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
8341 if ((error
= nameiat(tond
, tofd
))) {
8343 * Translate error code for rename("dir1", "dir2/.").
8345 if (error
== EISDIR
&& fvp
->v_type
== VDIR
) {
8350 tdvp
= tond
->ni_dvp
;
8354 #if DEVELOPMENT || DEBUG
8356 * XXX VSWAP: Check for entitlements or special flag here
8357 * so we can restrict access appropriately.
8359 #else /* DEVELOPMENT || DEBUG */
8361 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8366 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
8370 #endif /* DEVELOPMENT || DEBUG */
8372 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
8377 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
8382 * We allow rename with VFS_RENAME_EXCL flag for an existing file which
8383 * has the same name as target iff the following conditions are met:
8384 * 1. the target file system is case insensitive
8385 * 2. source and target directories are the same
8386 * 3. source and target files are the same
8387 * 4. name only differs in case (determined by underlying filesystem)
8389 if (fvp
!= tvp
|| fdvp
!= tdvp
) {
8395 * Assume that the target file system is case sensitive if
8396 * _PC_CASE_SENSITIVE selector isn't supported.
8398 err
= VNOP_PATHCONF(tvp
, _PC_CASE_SENSITIVE
, &pval
, ctx
);
8399 if (err
!= 0 || pval
!= 0) {
8405 batched
= vnode_compound_rename_available(fdvp
);
8408 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
8411 get_fse_info(fvp
, &from_finfo
, ctx
);
8413 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
8418 fvap
= &__rename_data
->fv_attr
;
8422 get_fse_info(tvp
, &to_finfo
, ctx
);
8423 } else if (batched
) {
8424 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
8429 tvap
= &__rename_data
->tv_attr
;
8434 #endif /* CONFIG_FSE */
8436 has_listeners
= kauth_authorize_fileop_has_listeners();
8440 if (AUDIT_RECORD_EXISTS()) {
8445 if (need_event
|| has_listeners
) {
8446 if (from_name
== NULL
) {
8447 GET_PATH(from_name
);
8450 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
8452 if (from_name_no_firmlink
== NULL
) {
8453 GET_PATH(from_name_no_firmlink
);
8456 from_len_no_firmlink
= safe_getpath_no_firmlink(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name_no_firmlink
, MAXPATHLEN
, &from_truncated_no_firmlink
);
8459 if (need_event
|| need_kpath2
|| has_listeners
) {
8460 if (to_name
== NULL
) {
8464 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
8466 if (to_name_no_firmlink
== NULL
) {
8467 GET_PATH(to_name_no_firmlink
);
8470 to_len_no_firmlink
= safe_getpath_no_firmlink(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name_no_firmlink
, MAXPATHLEN
, &to_truncated_no_firmlink
);
8471 if (to_name
&& need_kpath2
) {
8472 AUDIT_ARG(kpath
, to_name
, ARG_KPATH2
);
8477 * Claim: this check will never reject a valid rename.
8478 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8479 * Suppose fdvp and tdvp are not on the same mount.
8480 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
8481 * then you can't move it to within another dir on the same mountpoint.
8482 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8484 * If this check passes, then we are safe to pass these vnodes to the same FS.
8486 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
8490 goto skipped_lookup
;
8494 * If the source and destination are the same (i.e. they're
8495 * links to the same vnode) and the target file system is
8496 * case sensitive, then there is nothing to do.
8498 * XXX Come back to this.
8504 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8505 * then assume that this file system is case sensitive.
8507 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
8508 pathconf_val
!= 0) {
8509 vn_authorize_skipped
= TRUE
;
8515 * Allow the renaming of mount points.
8516 * - target must not exist
8517 * - target must reside in the same directory as source
8518 * - union mounts cannot be renamed
8519 * - the root fs, and tightly-linked system volumes, cannot be renamed
8521 * XXX Handle this in VFS after a continued lookup (if we missed
8522 * in the cache to start off)
8524 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8525 * we'll skip past here. The file system is responsible for
8526 * checking that @tvp is not a descendent of @fvp and vice versa
8527 * so it should always return EINVAL if either @tvp or @fvp is the
8530 if ((fvp
->v_flag
& VROOT
) &&
8531 (fvp
->v_type
== VDIR
) &&
8533 (fvp
->v_mountedhere
== NULL
) &&
8535 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
8536 ((fvp
->v_mount
->mnt_kern_flag
& MNTK_SYSTEM
) == 0) &&
8537 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
8540 /* switch fvp to the covered vnode */
8541 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
8542 if ((vnode_getwithref(coveredvp
))) {
8547 * Save the 'fvp' as it is needed for vn_authorize_renamex_with_paths()
8556 * Check for cross-device rename.
8558 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
8559 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
8565 * If source is the same as the destination (that is the
8566 * same inode number) then there is nothing to do...
8567 * EXCEPT if the underlying file system supports case
8568 * insensitivity and is case preserving. In this case
8569 * the file system needs to handle the special case of
8570 * getting the same vnode as target (fvp) and source (tvp).
8572 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8573 * and _PC_CASE_PRESERVING can have this exception, and they need to
8574 * handle the special case of getting the same vnode as target and
8575 * source. NOTE: Then the target is unlocked going into vnop_rename,
8576 * so not to cause locking problems. There is a single reference on tvp.
8578 * NOTE - that fvp == tvp also occurs if they are hard linked and
8579 * that correct behaviour then is just to return success without doing
8582 * XXX filesystem should take care of this itself, perhaps...
8584 if (fvp
== tvp
&& fdvp
== tdvp
) {
8585 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
8586 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
8587 fromnd
->ni_cnd
.cn_namelen
)) {
8588 vn_authorize_skipped
= TRUE
;
8593 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
8595 * we're holding a reference and lock
8596 * on locked_mp, but it no longer matches
8597 * what we want to do... so drop our hold
8599 mount_unlock_renames(locked_mp
);
8600 mount_drop(locked_mp
, 0);
8601 holding_mntlock
= 0;
8603 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
8605 * serialize renames that re-shape
8606 * the tree... if holding_mntlock is
8607 * set, then we're ready to go...
8609 * first need to drop the iocounts
8610 * we picked up, second take the
8611 * lock to serialize the access,
8612 * then finally start the lookup
8613 * process over with the lock held
8615 if (!holding_mntlock
) {
8617 * need to grab a reference on
8618 * the mount point before we
8619 * drop all the iocounts... once
8620 * the iocounts are gone, the mount
8623 locked_mp
= fvp
->v_mount
;
8624 mount_ref(locked_mp
, 0);
8627 * nameidone has to happen before we vnode_put(tvp)
8628 * since it may need to release the fs_nodelock on the tvp
8638 * nameidone has to happen before we vnode_put(fdvp)
8639 * since it may need to release the fs_nodelock on the fvp
8646 if (mnt_fvp
!= NULLVP
) {
8650 mount_lock_renames(locked_mp
);
8651 holding_mntlock
= 1;
8657 * when we dropped the iocounts to take
8658 * the lock, we allowed the identity of
8659 * the various vnodes to change... if they did,
8660 * we may no longer be dealing with a rename
8661 * that reshapes the tree... once we're holding
8662 * the iocounts, the vnodes can't change type
8663 * so we're free to drop the lock at this point
8666 if (holding_mntlock
) {
8667 mount_unlock_renames(locked_mp
);
8668 mount_drop(locked_mp
, 0);
8669 holding_mntlock
= 0;
8674 error
= vn_authorize_renamex_with_paths(fdvp
, mntrename
? mnt_fvp
: fvp
,
8675 &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
,
8678 if (error
== ENOENT
) {
8679 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8681 * We encountered a race where after doing the namei,
8682 * tvp stops being valid. If so, simply re-drive the rename
8683 * call from the top.
8693 /* Release the 'mnt_fvp' now that it is no longer needed. */
8694 if (mnt_fvp
!= NULLVP
) {
8699 // save these off so we can later verify that fvp is the same
8700 oname
= fvp
->v_name
;
8701 oparent
= fvp
->v_parent
;
8704 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
8705 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
8708 if (holding_mntlock
) {
8710 * we can drop our serialization
8713 mount_unlock_renames(locked_mp
);
8714 mount_drop(locked_mp
, 0);
8715 holding_mntlock
= 0;
8718 if (error
== EDATALESS
) {
8720 * If we've been here before, something has gone
8721 * horribly wrong and we should just get out lest
8722 * we spiral around the drain forever.
8724 if (flags
& VFS_RENAME_DATALESS
) {
8730 * The object we're renaming is dataless (or has a
8731 * dataless descendent) and requires materialization
8732 * before the rename occurs. But we're holding the
8733 * mount point's rename lock, so it's not safe to
8736 * In this case, we release the lock, perform the
8737 * materialization, and start the whole thing over.
8739 error
= vnode_materialize_dataless_file(fvp
,
8740 NAMESPACE_HANDLER_RENAME_OP
);
8744 * The next time around we need to tell the
8745 * file system that the materializtaion has
8748 flags
|= VFS_RENAME_DATALESS
;
8753 if (error
== EKEEPLOOKING
) {
8754 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8755 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
8756 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8760 fromnd
->ni_vp
= fvp
;
8763 goto continue_lookup
;
8767 * We may encounter a race in the VNOP where the destination didn't
8768 * exist when we did the namei, but it does by the time we go and
8769 * try to create the entry. In this case, we should re-drive this rename
8770 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8771 * but other filesystems susceptible to this race could return it, too.
8773 if (error
== ERECYCLE
) {
8774 if (retry_count
< MAX_RENAME_ERECYCLE_RETRIES
) {
8778 printf("rename retry limit due to ERECYCLE reached\n");
8784 * For compound VNOPs, the authorization callback may return
8785 * ENOENT in case of racing hardlink lookups hitting the name
8786 * cache, redrive the lookup.
8788 if (batched
&& error
== ENOENT
) {
8789 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8798 /* call out to allow 3rd party notification of rename.
8799 * Ignore result of kauth_authorize_fileop call.
8801 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8802 KAUTH_FILEOP_RENAME
,
8803 (uintptr_t)from_name
, (uintptr_t)to_name
);
8804 if (flags
& VFS_RENAME_SWAP
) {
8805 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8806 KAUTH_FILEOP_RENAME
,
8807 (uintptr_t)to_name
, (uintptr_t)from_name
);
8811 if (from_name
!= NULL
&& to_name
!= NULL
) {
8812 if (from_truncated
|| to_truncated
) {
8813 // set it here since only the from_finfo gets reported up to user space
8814 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8818 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
8821 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
8825 add_fsevent(FSE_RENAME
, ctx
,
8826 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8827 FSE_ARG_FINFO
, &from_finfo
,
8828 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8829 FSE_ARG_FINFO
, &to_finfo
,
8831 if (flags
& VFS_RENAME_SWAP
) {
8833 * Strictly speaking, swap is the equivalent of
8834 * *three* renames. FSEvents clients should only take
8835 * the events as a hint, so we only bother reporting
8838 add_fsevent(FSE_RENAME
, ctx
,
8839 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8840 FSE_ARG_FINFO
, &to_finfo
,
8841 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8842 FSE_ARG_FINFO
, &from_finfo
,
8846 add_fsevent(FSE_RENAME
, ctx
,
8847 FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
,
8848 FSE_ARG_FINFO
, &from_finfo
,
8849 FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
,
8853 #endif /* CONFIG_FSE */
8856 * update filesystem's mount point data
8859 char *cp
, *pathend
, *mpname
;
8865 mp
= fvp
->v_mountedhere
;
8867 if (vfs_busy(mp
, LK_NOWAIT
)) {
8871 tobuf
= zalloc(ZV_NAMEI
);
8873 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
8874 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
8876 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
8879 /* find current mount point prefix */
8880 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
8881 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
8886 /* find last component of target name */
8887 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
8893 /* Update f_mntonname of sub mounts */
8894 vfs_iterate(0, rename_submounts_callback
, (void *)mp
);
8896 /* append name to prefix */
8897 maxlen
= MAXPATHLEN
- (int)(pathend
- mp
->mnt_vfsstat
.f_mntonname
);
8898 bzero(pathend
, maxlen
);
8900 strlcpy(pathend
, mpname
, maxlen
);
8902 zfree(ZV_NAMEI
, tobuf
);
8906 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
8909 * fix up name & parent pointers. note that we first
8910 * check that fvp has the same name/parent pointers it
8911 * had before the rename call... this is a 'weak' check
8914 * XXX oparent and oname may not be set in the compound vnop case
8916 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
8919 update_flags
= VNODE_UPDATE_NAME
;
8922 update_flags
|= VNODE_UPDATE_PARENT
;
8925 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
8929 * There are some cases (for e.g. 'fvp == tvp') when vn_authorize was
8930 * skipped earlier as no actual rename was performed.
8932 if (vn_authorize_skipped
&& error
== 0) {
8933 error
= vn_authorize_renamex_with_paths(fdvp
, fvp
,
8934 &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
,
8936 if (error
&& error
== ENOENT
) {
8937 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8943 if (to_name
!= NULL
) {
8944 RELEASE_PATH(to_name
);
8947 if (to_name_no_firmlink
!= NULL
) {
8948 RELEASE_PATH(to_name_no_firmlink
);
8949 to_name_no_firmlink
= NULL
;
8951 if (from_name
!= NULL
) {
8952 RELEASE_PATH(from_name
);
8955 if (from_name_no_firmlink
!= NULL
) {
8956 RELEASE_PATH(from_name_no_firmlink
);
8957 from_name_no_firmlink
= NULL
;
8959 if (holding_mntlock
) {
8960 mount_unlock_renames(locked_mp
);
8961 mount_drop(locked_mp
, 0);
8962 holding_mntlock
= 0;
8966 * nameidone has to happen before we vnode_put(tdvp)
8967 * since it may need to release the fs_nodelock on the tdvp
8978 * nameidone has to happen before we vnode_put(fdvp)
8979 * since it may need to release the fs_nodelock on the fdvp
8988 if (mnt_fvp
!= NULLVP
) {
8992 * If things changed after we did the namei, then we will re-drive
8993 * this rename call from the top.
9000 kheap_free(KHEAP_TEMP
, __rename_data
, sizeof(*__rename_data
));
9005 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
9007 return renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
9008 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0);
9012 renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
9014 return renameat_internal(
9015 vfs_context_current(),
9016 uap
->fromfd
, uap
->from
,
9018 UIO_USERSPACE
, uap
->flags
);
9022 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
9024 return renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
9025 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0);
9029 * Make a directory file.
9031 * Returns: 0 Success
9034 * vnode_authorize:???
9039 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
9040 enum uio_seg segflg
)
9044 int update_flags
= 0;
9046 struct nameidata nd
;
9048 AUDIT_ARG(mode
, vap
->va_mode
);
9049 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
9051 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
9052 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
9055 error
= nameiat(&nd
, fd
);
9067 batched
= vnode_compound_mkdir_available(dvp
);
9069 VATTR_SET(vap
, va_type
, VDIR
);
9073 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
9074 * only get EXISTS or EISDIR for existing path components, and not that it could see
9075 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
9076 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
9078 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
9079 if (error
== EACCES
|| error
== EPERM
) {
9087 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
9088 * rather than EACCESS if the target exists.
9090 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
9092 error2
= nameiat(&nd
, fd
);
9106 * make the directory
9108 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
9109 if (error
== EKEEPLOOKING
) {
9111 goto continue_lookup
;
9117 // Make sure the name & parent pointers are hooked up
9118 if (vp
->v_name
== NULL
) {
9119 update_flags
|= VNODE_UPDATE_NAME
;
9121 if (vp
->v_parent
== NULLVP
) {
9122 update_flags
|= VNODE_UPDATE_PARENT
;
9126 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
9130 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
9135 * nameidone has to happen before we vnode_put(dvp)
9136 * since it may need to release the fs_nodelock on the dvp
9151 * mkdir_extended: Create a directory; with extended security (ACL).
9153 * Parameters: p Process requesting to create the directory
9154 * uap User argument descriptor (see below)
9157 * Indirect: uap->path Path of directory to create
9158 * uap->mode Access permissions to set
9159 * uap->xsecurity ACL to set
9161 * Returns: 0 Success
9166 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
9169 kauth_filesec_t xsecdst
;
9170 struct vnode_attr va
;
9172 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
9175 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
9176 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
9181 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
9182 if (xsecdst
!= NULL
) {
9183 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
9186 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
9188 if (xsecdst
!= NULL
) {
9189 kauth_filesec_free(xsecdst
);
9195 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
9197 struct vnode_attr va
;
9200 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
9202 return mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
9207 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
9209 struct vnode_attr va
;
9212 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
9214 return mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
9219 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
9220 enum uio_seg segflg
, int unlink_flags
)
9224 struct nameidata nd
;
9226 char *no_firmlink_path
= NULL
;
9228 int len_no_firmlink_path
= 0;
9229 int has_listeners
= 0;
9231 int truncated_path
= 0;
9232 int truncated_no_firmlink_path
= 0;
9234 struct vnode_attr va
;
9235 #endif /* CONFIG_FSE */
9236 struct vnode_attr
*vap
= NULL
;
9237 int restart_count
= 0;
9243 * This loop exists to restart rmdir in the unlikely case that two
9244 * processes are simultaneously trying to remove the same directory
9245 * containing orphaned appleDouble files.
9248 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
9249 segflg
, dirpath
, ctx
);
9250 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
9255 error
= nameiat(&nd
, fd
);
9264 batched
= vnode_compound_rmdir_available(vp
);
9266 if (vp
->v_flag
& VROOT
) {
9268 * The root of a mounted filesystem cannot be deleted.
9274 #if DEVELOPMENT || DEBUG
9276 * XXX VSWAP: Check for entitlements or special flag here
9277 * so we can restrict access appropriately.
9279 #else /* DEVELOPMENT || DEBUG */
9281 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
9285 #endif /* DEVELOPMENT || DEBUG */
9288 * Removed a check here; we used to abort if vp's vid
9289 * was not the same as what we'd seen the last time around.
9290 * I do not think that check was valid, because if we retry
9291 * and all dirents are gone, the directory could legitimately
9292 * be recycled but still be present in a situation where we would
9293 * have had permission to delete. Therefore, we won't make
9294 * an effort to preserve that check now that we may not have a
9299 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
9301 if (error
== ENOENT
) {
9302 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
9313 if (!vnode_compound_rmdir_available(dvp
)) {
9314 panic("No error, but no compound rmdir?");
9319 fse_info finfo
= {0};
9321 need_event
= need_fsevent(FSE_DELETE
, dvp
);
9324 get_fse_info(vp
, &finfo
, ctx
);
9326 error
= vfs_get_notify_attributes(&va
);
9335 has_listeners
= kauth_authorize_fileop_has_listeners();
9336 if (need_event
|| has_listeners
) {
9341 len_path
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
9343 if (no_firmlink_path
== NULL
) {
9344 GET_PATH(no_firmlink_path
);
9347 len_no_firmlink_path
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
);
9349 if (truncated_no_firmlink_path
) {
9350 finfo
.mode
|= FSE_TRUNCATED_PATH
;
9355 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9358 /* Couldn't find a vnode */
9362 if (error
== EKEEPLOOKING
) {
9363 goto continue_lookup
;
9364 } else if (batched
&& error
== ENOENT
) {
9365 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
9367 * For compound VNOPs, the authorization callback
9368 * may return ENOENT in case of racing hard link lookups
9369 * redrive the lookup.
9378 * XXX There's no provision for passing flags
9379 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9380 * because it's not empty, then we try again
9381 * with VNOP_REMOVE(), passing in a special
9382 * flag that clever file systems will know
9385 if (error
== ENOTEMPTY
&&
9386 (unlink_flags
& VNODE_REMOVE_DATALESS_DIR
) != 0) {
9388 * If this fails, we want to keep the original
9391 if (vn_remove(dvp
, &vp
, &nd
,
9392 VNODE_REMOVE_DATALESS_DIR
, vap
, ctx
) == 0) {
9397 #if CONFIG_APPLEDOUBLE
9399 * Special case to remove orphaned AppleDouble
9400 * files. I don't like putting this in the kernel,
9401 * but carbon does not like putting this in carbon either,
9404 if (error
== ENOTEMPTY
) {
9405 int ad_error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
9406 if (ad_error
== EBUSY
) {
9413 * Assuming everything went well, we will try the RMDIR again
9416 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
9419 #endif /* CONFIG_APPLEDOUBLE */
9421 * Call out to allow 3rd party notification of delete.
9422 * Ignore result of kauth_authorize_fileop call.
9425 if (has_listeners
) {
9426 kauth_authorize_fileop(vfs_context_ucred(ctx
),
9427 KAUTH_FILEOP_DELETE
,
9432 if (vp
->v_flag
& VISHARDLINK
) {
9433 // see the comment in unlink1() about why we update
9434 // the parent of a hard link when it is removed
9435 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
9441 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
9443 add_fsevent(FSE_DELETE
, ctx
,
9444 FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
,
9445 FSE_ARG_FINFO
, &finfo
,
9457 if (no_firmlink_path
!= NULL
) {
9458 RELEASE_PATH(no_firmlink_path
);
9459 no_firmlink_path
= NULL
;
9463 * nameidone has to happen before we vnode_put(dvp)
9464 * since it may need to release the fs_nodelock on the dvp
9473 if (restart_flag
== 0) {
9474 wakeup_one((caddr_t
)vp
);
9477 tsleep(vp
, PVFS
, "rm AD", 1);
9478 } while (restart_flag
!= 0);
9484 * Remove a directory file.
9488 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
9490 return rmdirat_internal(vfs_context_current(), AT_FDCWD
,
9491 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
, 0);
9494 /* Get direntry length padded to 8 byte alignment */
9495 #define DIRENT64_LEN(namlen) \
9496 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9498 /* Get dirent length padded to 4 byte alignment */
9499 #define DIRENT_LEN(namelen) \
9500 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9502 /* Get the end of this dirent */
9503 #define DIRENT_END(dep) \
9504 (((char *)(dep)) + (dep)->d_reclen - 1)
9507 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
9508 int *numdirent
, vfs_context_t ctxp
)
9510 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
9511 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
9512 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
9513 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
9518 struct direntry
*entry64
;
9524 * We're here because the underlying file system does not
9525 * support direnties or we mounted denying support so we must
9526 * fall back to dirents and convert them to direntries.
9528 * Our kernel buffer needs to be smaller since re-packing will
9529 * expand each dirent. The worse case (when the name length
9530 * is 3 or less) corresponds to a struct direntry size of 32
9531 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9532 * (4-byte aligned). So having a buffer that is 3/8 the size
9533 * will prevent us from reading more than we can pack.
9535 * Since this buffer is wired memory, we will limit the
9536 * buffer size to a maximum of 32K. We would really like to
9537 * use 32K in the MIN(), but we use magic number 87371 to
9538 * prevent uio_resid() * 3 / 8 from overflowing.
9540 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
9541 bufptr
= kheap_alloc(KHEAP_DATA_BUFFERS
, bufsize
, Z_WAITOK
);
9542 if (bufptr
== NULL
) {
9546 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
9547 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
9548 auio
->uio_offset
= uio
->uio_offset
;
9550 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
9552 dep
= (struct dirent
*)bufptr
;
9553 bytesread
= bufsize
- uio_resid(auio
);
9555 entry64
= kheap_alloc(KHEAP_TEMP
, sizeof(struct direntry
), Z_WAITOK
);
9557 * Convert all the entries and copy them out to user's buffer.
9559 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
9560 /* First check that the dirent struct up to d_name is within the buffer */
9561 if ((char*)dep
+ offsetof(struct dirent
, d_name
) > ((char *)bufptr
+ bytesread
) ||
9562 /* Check that the length of the entire dirent is within the buffer */
9563 DIRENT_END(dep
) > ((char *)bufptr
+ bytesread
) ||
9564 /* Check that the actual length including the name doesn't exceed d_reclen */
9565 DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) {
9566 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
,
9567 vp
->v_mount
->mnt_vfsstat
.f_mntonname
,
9568 vp
->v_name
? vp
->v_name
: "<unknown>");
9573 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
9575 bzero(entry64
, enbufsize
);
9576 /* Convert a dirent to a dirent64. */
9577 entry64
->d_ino
= dep
->d_ino
;
9578 entry64
->d_seekoff
= 0;
9579 entry64
->d_reclen
= (uint16_t)enbufsize
;
9580 entry64
->d_namlen
= dep
->d_namlen
;
9581 entry64
->d_type
= dep
->d_type
;
9582 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
9584 /* Move to next entry. */
9585 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
9587 /* Copy entry64 to user's buffer. */
9588 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
9591 /* Update the real offset using the offset we got from VNOP_READDIR. */
9593 uio
->uio_offset
= auio
->uio_offset
;
9596 kheap_free(KHEAP_DATA_BUFFERS
, bufptr
, bufsize
);
9597 kheap_free(KHEAP_TEMP
, entry64
, sizeof(struct direntry
));
9602 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
9605 * Read a block of directory entries in a file system independent format.
9608 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
9609 off_t
*offset
, int *eofflag
, int flags
)
9612 struct vfs_context context
= *vfs_context_current(); /* local copy */
9613 struct fileproc
*fp
;
9615 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9617 int error
, numdirent
;
9618 char uio_buf
[UIO_SIZEOF(1)];
9620 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
9624 if ((fp
->fp_glob
->fg_flag
& FREAD
) == 0) {
9625 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9630 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
) {
9631 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
9635 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->fp_glob
);
9640 if ((error
= vnode_getwithref(vp
))) {
9643 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9646 if (vp
->v_type
!= VDIR
) {
9647 (void)vnode_put(vp
);
9653 error
= mac_vnode_check_readdir(&context
, vp
);
9655 (void)vnode_put(vp
);
9660 loff
= fp
->fp_glob
->fg_offset
;
9661 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9662 uio_addiov(auio
, bufp
, bufsize
);
9664 if (flags
& VNODE_READDIR_EXTENDED
) {
9665 error
= vnode_readdir64(vp
, auio
, flags
, eofflag
, &numdirent
, &context
);
9666 fp
->fp_glob
->fg_offset
= uio_offset(auio
);
9668 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, &numdirent
, &context
);
9669 fp
->fp_glob
->fg_offset
= uio_offset(auio
);
9672 (void)vnode_put(vp
);
9676 if ((user_ssize_t
)bufsize
== uio_resid(auio
)) {
9677 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
9678 struct vnode
*tvp
= vp
;
9679 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
9681 fp
->fp_glob
->fg_data
= (caddr_t
) vp
;
9682 fp
->fp_glob
->fg_offset
= 0;
9696 *bytesread
= bufsize
- uio_resid(auio
);
9704 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
9710 AUDIT_ARG(fd
, uap
->fd
);
9711 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
,
9712 &bytesread
, &offset
, &eofflag
, 0);
9715 if (proc_is64bit(p
)) {
9716 user64_long_t base
= (user64_long_t
)offset
;
9717 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
9719 user32_long_t base
= (user32_long_t
)offset
;
9720 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
9722 *retval
= (int)bytesread
;
9728 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
9733 user_size_t bufsize
;
9735 AUDIT_ARG(fd
, uap
->fd
);
9738 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9739 * then the kernel carves out the last 4 bytes to return extended
9740 * information to userspace (namely whether we reached EOF with this call).
9742 if (uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9743 bufsize
= uap
->bufsize
- sizeof(getdirentries64_flags_t
);
9745 bufsize
= uap
->bufsize
;
9748 error
= getdirentries_common(uap
->fd
, uap
->buf
, bufsize
,
9749 &bytesread
, &offset
, &eofflag
, VNODE_READDIR_EXTENDED
);
9752 *retval
= bytesread
;
9753 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
9755 if (error
== 0 && uap
->bufsize
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) {
9756 getdirentries64_flags_t flags
= 0;
9758 flags
|= GETDIRENTRIES64_EOF
;
9760 error
= copyout(&flags
, (user_addr_t
)uap
->buf
+ bufsize
,
9769 * Set the mode mask for creation of filesystem nodes.
9770 * XXX implement xsecurity
9772 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
9774 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
9776 struct filedesc
*fdp
;
9778 AUDIT_ARG(mask
, newmask
);
9781 *retval
= fdp
->fd_cmask
;
9782 fdp
->fd_cmask
= newmask
& ALLPERMS
;
9788 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9790 * Parameters: p Process requesting to set the umask
9791 * uap User argument descriptor (see below)
9792 * retval umask of the process (parameter p)
9794 * Indirect: uap->newmask umask to set
9795 * uap->xsecurity ACL to set
9797 * Returns: 0 Success
9802 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
9805 kauth_filesec_t xsecdst
;
9807 xsecdst
= KAUTH_FILESEC_NONE
;
9808 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
9809 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
9813 xsecdst
= KAUTH_FILESEC_NONE
;
9816 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
9818 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
9819 kauth_filesec_free(xsecdst
);
9825 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
9827 return umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
);
9831 * Void all references to file by ripping underlying filesystem
9836 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
9839 struct vnode_attr va
;
9840 vfs_context_t ctx
= vfs_context_current();
9842 struct nameidata nd
;
9844 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
9854 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
9859 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
9865 error
= mac_vnode_check_revoke(ctx
, vp
);
9872 VATTR_WANTED(&va
, va_uid
);
9873 if ((error
= vnode_getattr(vp
, &va
, ctx
))) {
9876 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
9877 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
9880 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
))) {
9881 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
9890 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9891 * The following system calls are designed to support features
9892 * which are specific to the HFS & HFS Plus volume formats
9897 * Obtain attribute information on objects in a directory while enumerating
9902 getdirentriesattr(proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
9905 struct fileproc
*fp
;
9907 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9908 uint32_t count
= 0, savecount
= 0;
9909 uint32_t newstate
= 0;
9912 struct attrlist attributelist
;
9913 vfs_context_t ctx
= vfs_context_current();
9915 char uio_buf
[UIO_SIZEOF(1)];
9916 kauth_action_t action
;
9920 /* Get the attributes into kernel space */
9921 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
9924 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
9928 if ((error
= fp_getfvp(p
, fd
, &fp
, &vp
))) {
9931 if ((fp
->fp_glob
->fg_flag
& FREAD
) == 0) {
9932 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9939 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
9947 if ((error
= vnode_getwithref(vp
))) {
9951 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9954 if (vp
->v_type
!= VDIR
) {
9955 (void)vnode_put(vp
);
9961 error
= mac_vnode_check_readdir(ctx
, vp
);
9963 (void)vnode_put(vp
);
9968 /* set up the uio structure which will contain the users return buffer */
9969 loff
= fp
->fp_glob
->fg_offset
;
9970 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9971 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
9974 * If the only item requested is file names, we can let that past with
9975 * just LIST_DIRECTORY. If they want any other attributes, that means
9976 * they need SEARCH as well.
9978 action
= KAUTH_VNODE_LIST_DIRECTORY
;
9979 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
9980 attributelist
.fileattr
|| attributelist
.dirattr
) {
9981 action
|= KAUTH_VNODE_SEARCH
;
9984 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
9985 /* Believe it or not, uap->options only has 32-bits of valid
9986 * info, so truncate before extending again */
9988 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
9989 (uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
9993 (void) vnode_put(vp
);
9998 * If we've got the last entry of a directory in a union mount
9999 * then reset the eofflag and pretend there's still more to come.
10000 * The next call will again set eofflag and the buffer will be empty,
10001 * so traverse to the underlying directory and do the directory
10004 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
10005 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
10007 } else { // Empty buffer
10008 struct vnode
*tvp
= vp
;
10009 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
10010 vnode_ref_ext(vp
, fp
->fp_glob
->fg_flag
& O_EVTONLY
, 0);
10011 fp
->fp_glob
->fg_data
= (caddr_t
) vp
;
10012 fp
->fp_glob
->fg_offset
= 0; // reset index for new dir
10014 vnode_rele_internal(tvp
, fp
->fp_glob
->fg_flag
& O_EVTONLY
, 0, 0);
10022 (void)vnode_put(vp
);
10027 fp
->fp_glob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
10029 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
)))) {
10032 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
)))) {
10035 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
)))) {
10039 *retval
= eofflag
; /* similar to getdirentries */
10043 return error
; /* return error earlier, an retval of 0 or 1 now */
10044 } /* end of getdirentriesattr system call */
10047 * Exchange data between two files
10052 exchangedata(__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
10054 struct nameidata fnd
, snd
;
10055 vfs_context_t ctx
= vfs_context_current();
10059 u_int32_t nameiflags
;
10060 char *fpath
= NULL
;
10061 char *spath
= NULL
;
10062 int flen
= 0, slen
= 0;
10063 int from_truncated
= 0, to_truncated
= 0;
10065 fse_info f_finfo
, s_finfo
;
10069 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
10070 nameiflags
|= FOLLOW
;
10073 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
10074 UIO_USERSPACE
, uap
->path1
, ctx
);
10076 error
= namei(&fnd
);
10084 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
10085 UIO_USERSPACE
, uap
->path2
, ctx
);
10087 error
= namei(&snd
);
10096 * if the files are the same, return an inval error
10104 * if the files are on different volumes, return an error
10106 if (svp
->v_mount
!= fvp
->v_mount
) {
10111 /* If they're not files, return an error */
10112 if ((vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
10118 error
= mac_vnode_check_exchangedata(ctx
,
10124 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
10125 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0)) {
10131 need_fsevent(FSE_EXCHANGE
, fvp
) ||
10133 kauth_authorize_fileop_has_listeners()) {
10137 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
10138 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
10141 get_fse_info(fvp
, &f_finfo
, ctx
);
10142 get_fse_info(svp
, &s_finfo
, ctx
);
10143 if (from_truncated
|| to_truncated
) {
10144 // set it here since only the f_finfo gets reported up to user space
10145 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
10149 /* Ok, make the call */
10150 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
10153 const char *tmpname
;
10155 if (fpath
!= NULL
&& spath
!= NULL
) {
10156 /* call out to allow 3rd party notification of exchangedata.
10157 * Ignore result of kauth_authorize_fileop call.
10159 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
10160 (uintptr_t)fpath
, (uintptr_t)spath
);
10164 tmpname
= fvp
->v_name
;
10165 fvp
->v_name
= svp
->v_name
;
10166 svp
->v_name
= tmpname
;
10168 if (fvp
->v_parent
!= svp
->v_parent
) {
10171 tmp
= fvp
->v_parent
;
10172 fvp
->v_parent
= svp
->v_parent
;
10173 svp
->v_parent
= tmp
;
10175 name_cache_unlock();
10178 if (fpath
!= NULL
&& spath
!= NULL
) {
10179 add_fsevent(FSE_EXCHANGE
, ctx
,
10180 FSE_ARG_STRING
, flen
, fpath
,
10181 FSE_ARG_FINFO
, &f_finfo
,
10182 FSE_ARG_STRING
, slen
, spath
,
10183 FSE_ARG_FINFO
, &s_finfo
,
10190 if (fpath
!= NULL
) {
10191 RELEASE_PATH(fpath
);
10193 if (spath
!= NULL
) {
10194 RELEASE_PATH(spath
);
10203 * Return (in MB) the amount of freespace on the given vnode's volume.
10205 uint32_t freespace_mb(vnode_t vp
);
10208 freespace_mb(vnode_t vp
)
10210 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
10211 return (uint32_t)(((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
10212 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
10215 #if CONFIG_SEARCHFS
10220 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
10225 struct nameidata nd
;
10226 struct user64_fssearchblock searchblock
;
10227 struct searchstate
*state
;
10228 struct attrlist
*returnattrs
;
10229 struct timeval timelimit
;
10230 void *searchparams1
, *searchparams2
;
10232 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10233 uint32_t nummatches
;
10235 uint32_t nameiflags
;
10236 vfs_context_t ctx
= vfs_context_current();
10237 char uio_buf
[UIO_SIZEOF(1)];
10239 /* Start by copying in fsearchblock parameter list */
10240 if (IS_64BIT_PROCESS(p
)) {
10241 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
10242 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
10243 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
10245 struct user32_fssearchblock tmp_searchblock
;
10247 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
10248 // munge into 64-bit version
10249 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
10250 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
10251 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
10252 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
10254 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
10255 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
10257 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
10258 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
10259 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
10260 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
10261 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
10262 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
10263 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
10269 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
10271 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
10272 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
) {
10276 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
10277 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
10278 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
10281 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
10282 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
10283 /* assumes the size is still 556 bytes it will continue to work */
10285 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
10286 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2 * sizeof(uint32_t));
10288 searchparams1
= kheap_alloc(KHEAP_DATA_BUFFERS
, mallocsize
, Z_WAITOK
);
10290 /* Now set up the various pointers to the correct place in our newly allocated memory */
10292 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
10293 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
10294 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof(struct attrlist
));
10296 /* Now copy in the stuff given our local variables. */
10298 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
))) {
10302 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
))) {
10306 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
)))) {
10310 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
)))) {
10315 * When searching a union mount, need to set the
10316 * start flag at the first call on each layer to
10317 * reset state for the new volume.
10319 if (uap
->options
& SRCHFS_START
) {
10320 state
->ss_union_layer
= 0;
10322 uap
->options
|= state
->ss_union_flags
;
10324 state
->ss_union_flags
= 0;
10327 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
10328 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
10329 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
10330 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
10331 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
10334 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
10335 attrreference_t
* string_ref
;
10336 u_int32_t
* start_length
;
10337 user64_size_t param_length
;
10339 /* validate searchparams1 */
10340 param_length
= searchblock
.sizeofsearchparams1
;
10341 /* skip the word that specifies length of the buffer */
10342 start_length
= (u_int32_t
*) searchparams1
;
10343 start_length
= start_length
+ 1;
10344 string_ref
= (attrreference_t
*) start_length
;
10346 /* ensure no negative offsets or too big offsets */
10347 if (string_ref
->attr_dataoffset
< 0) {
10351 if (string_ref
->attr_length
> MAXPATHLEN
) {
10356 /* Check for pointer overflow in the string ref */
10357 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
10362 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
10366 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
10372 /* set up the uio structure which will contain the users return buffer */
10373 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10374 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
10377 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
10378 nameiflags
|= FOLLOW
;
10380 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
10381 UIO_USERSPACE
, uap
->path
, ctx
);
10383 error
= namei(&nd
);
10391 * Switch to the root vnode for the volume
10393 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
10401 * If it's a union mount, the path lookup takes
10402 * us to the top layer. But we may need to descend
10403 * to a lower layer. For non-union mounts the layer
10406 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
10407 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0) {
10411 vp
= vp
->v_mount
->mnt_vnodecovered
;
10417 error
= vnode_getwithref(vp
);
10425 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
10434 * If searchblock.maxmatches == 0, then skip the search. This has happened
10435 * before and sometimes the underlying code doesnt deal with it well.
10437 if (searchblock
.maxmatches
== 0) {
10443 * Allright, we have everything we need, so lets make that call.
10445 * We keep special track of the return value from the file system:
10446 * EAGAIN is an acceptable error condition that shouldn't keep us
10447 * from copying out any results...
10450 fserror
= VNOP_SEARCHFS(vp
,
10453 &searchblock
.searchattrs
,
10454 (uint32_t)searchblock
.maxmatches
,
10458 (uint32_t)uap
->scriptcode
,
10459 (uint32_t)uap
->options
,
10461 (struct searchstate
*) &state
->ss_fsstate
,
10465 * If it's a union mount we need to be called again
10466 * to search the mounted-on filesystem.
10468 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
10469 state
->ss_union_flags
= SRCHFS_START
;
10470 state
->ss_union_layer
++; // search next layer down
10478 /* Now copy out the stuff that needs copying out. That means the number of matches, the
10479 * search state. Everything was already put into he return buffer by the vop call. */
10481 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0) {
10485 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0) {
10493 kheap_free(KHEAP_DATA_BUFFERS
, searchparams1
, mallocsize
);
10496 } /* end of searchfs system call */
10498 #else /* CONFIG_SEARCHFS */
10501 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
10506 #endif /* CONFIG_SEARCHFS */
10509 #if CONFIG_DATALESS_FILES
10512 * === Namespace Resolver Up-call Mechanism ===
10514 * When I/O is performed to a dataless file or directory (read, write,
10515 * lookup-in, etc.), the file system performs an upcall to the namespace
10516 * resolver (filecoordinationd) to materialize the object.
10518 * We need multiple up-calls to be in flight at once, and we need these
10519 * up-calls to be interruptible, thus the following implementation:
10521 * => The nspace_resolver_request represents the in-kernel request state.
10522 * It contains a request ID, storage space for the errno code returned
10523 * by filecoordinationd, and flags.
10525 * => The request ID is simply a global monotonically incrementing 32-bit
10526 * number. Outstanding requests are stored in a hash table, and the
10527 * hash function is extremely simple.
10529 * => When an upcall is to be made to filecoordinationd, a request structure
10530 * is allocated on the stack (it is small, and needs to live only during
10531 * the duration of the call to resolve_nspace_item_ext()). It is
10532 * initialized and inserted into the table. Some backpressure from
10533 * filecoordinationd is applied by limiting the numnber of entries that
10534 * can be inserted into the table (and thus limiting the number of
10535 * outstanding requests issued to filecoordinationd); waiting for an
10536 * available slot is interruptible.
10538 * => Once the request has been inserted into the table, the up-call is made
10539 * to filecoordinationd via a MiG-generated stub. The up-call returns
10540 * immediately and filecoordinationd processes the request asynchronously.
10542 * => The caller now waits for the request to complete. Tnis is achieved by
10543 * sleeping on the address of the request structure and waiting for
10544 * filecoordinationd to mark the request structure as complete. This
10545 * is an interruptible sleep call; if interrupted, the request structure
10546 * is removed from the table and EINTR is returned to the caller. If
10547 * this occurs, an advisory up-call is made to filecoordinationd with
10548 * the request ID to indicate that the request can be aborted or
10549 * de-prioritized at the discretion of filecoordinationd.
10551 * => When filecoordinationd has completed the request, it signals completion
10552 * by writing to the vfs.nspace.complete sysctl node. Only a process
10553 * decorated as a namespace resolver can write to this sysctl node. The
10554 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10555 * The request ID is looked up in the table, and if the request is found,
10556 * the error code is stored in the request structure and a wakeup()
10557 * issued on the address of the request structure. If the request is not
10558 * found, we simply drop the completion notification, assuming that the
10559 * caller was interrupted.
10561 * => When the waiting thread wakes up, it extracts the error code from the
10562 * request structure, removes the request from the table, and returns the
10563 * error code to the calling function. Fini!
10566 struct nspace_resolver_request
{
10567 LIST_ENTRY(nspace_resolver_request
) r_hashlink
;
10570 int r_resolver_error
;
10574 #define RRF_COMPLETE 0x0001
10577 next_nspace_req_id(void)
10579 static uint32_t next_req_id
;
10581 return OSAddAtomic(1, &next_req_id
);
10584 #define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10585 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
10587 static LIST_HEAD(nspace_resolver_requesthead
,
10588 nspace_resolver_request
) * nspace_resolver_request_hashtbl
;
10589 static u_long nspace_resolver_request_hashmask
;
10590 static u_int nspace_resolver_request_count
;
10591 static bool nspace_resolver_request_wait_slot
;
10592 static LCK_GRP_DECLARE(nspace_resolver_request_lck_grp
, "file namespace resolver");
10593 static LCK_MTX_DECLARE(nspace_resolver_request_hash_mutex
,
10594 &nspace_resolver_request_lck_grp
);
10596 #define NSPACE_REQ_LOCK() \
10597 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10598 #define NSPACE_REQ_UNLOCK() \
10599 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10601 #define NSPACE_RESOLVER_HASH(req_id) \
10602 (&nspace_resolver_request_hashtbl[(req_id) & \
10603 nspace_resolver_request_hashmask])
10605 static struct nspace_resolver_request
*
10606 nspace_resolver_req_lookup(uint32_t req_id
)
10608 struct nspace_resolver_requesthead
*bucket
;
10609 struct nspace_resolver_request
*req
;
10611 bucket
= NSPACE_RESOLVER_HASH(req_id
);
10612 LIST_FOREACH(req
, bucket
, r_hashlink
) {
10613 if (req
->r_req_id
== req_id
) {
10622 nspace_resolver_req_add(struct nspace_resolver_request
*req
)
10624 struct nspace_resolver_requesthead
*bucket
;
10627 while (nspace_resolver_request_count
>=
10628 NSPACE_RESOLVER_MAX_OUTSTANDING
) {
10629 nspace_resolver_request_wait_slot
= true;
10630 error
= msleep(&nspace_resolver_request_count
,
10631 &nspace_resolver_request_hash_mutex
,
10632 PVFS
| PCATCH
, "nspacerq", NULL
);
10638 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10640 assert(nspace_resolver_req_lookup(req
->r_req_id
) == NULL
);
10641 #endif /* DIAGNOSTIC */
10642 LIST_INSERT_HEAD(bucket
, req
, r_hashlink
);
10643 nspace_resolver_request_count
++;
10649 nspace_resolver_req_remove(struct nspace_resolver_request
*req
)
10651 struct nspace_resolver_requesthead
*bucket
;
10653 bucket
= NSPACE_RESOLVER_HASH(req
->r_req_id
);
10655 assert(nspace_resolver_req_lookup(req
->r_req_id
) != NULL
);
10656 #endif /* DIAGNOSTIC */
10657 LIST_REMOVE(req
, r_hashlink
);
10658 nspace_resolver_request_count
--;
10660 if (nspace_resolver_request_wait_slot
) {
10661 nspace_resolver_request_wait_slot
= false;
10662 wakeup(&nspace_resolver_request_count
);
10667 nspace_resolver_req_cancel(uint32_t req_id
)
10672 // Failures here aren't fatal -- the cancellation message
10673 // sent to the resolver is merely advisory.
10675 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
10676 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
10680 kr
= send_nspace_resolve_cancel(mp
, req_id
);
10681 if (kr
!= KERN_SUCCESS
) {
10682 os_log_error(OS_LOG_DEFAULT
,
10683 "NSPACE send_nspace_resolve_cancel failure: %d", kr
);
10686 ipc_port_release_send(mp
);
10690 nspace_resolver_req_wait(struct nspace_resolver_request
*req
)
10692 bool send_cancel_message
= false;
10697 while ((req
->r_flags
& RRF_COMPLETE
) == 0) {
10698 error
= msleep(req
, &nspace_resolver_request_hash_mutex
,
10699 PVFS
| PCATCH
, "nspace", NULL
);
10700 if (error
&& error
!= ERESTART
) {
10701 req
->r_resolver_error
= (error
== EINTR
) ? EINTR
:
10703 send_cancel_message
= true;
10708 nspace_resolver_req_remove(req
);
10710 NSPACE_REQ_UNLOCK();
10712 if (send_cancel_message
) {
10713 nspace_resolver_req_cancel(req
->r_req_id
);
10716 return req
->r_resolver_error
;
10720 nspace_resolver_req_mark_complete(
10721 struct nspace_resolver_request
*req
,
10722 int resolver_error
)
10724 req
->r_resolver_error
= resolver_error
;
10725 req
->r_flags
|= RRF_COMPLETE
;
10730 nspace_resolver_req_completed(uint32_t req_id
, int resolver_error
, uint64_t orig_gencount
)
10732 struct nspace_resolver_request
*req
;
10736 // If we don't find the request corresponding to our req_id,
10737 // just drop the completion signal on the floor; it's likely
10738 // that the requester interrupted with a signal.
10740 req
= nspace_resolver_req_lookup(req_id
);
10742 mount_t locked_mp
= NULL
;
10744 locked_mp
= req
->r_vp
->v_mount
;
10745 mount_ref(locked_mp
, 0);
10746 mount_lock_renames(locked_mp
);
10749 // if the resolver isn't already returning an error and we have an
10750 // orig_gencount, then get an iocount on the request vnode and check
10751 // that the gencount on req->r_vp has not changed.
10753 // note: a ref was taken on req->r_vp when the request was created
10754 // and that ref will be dropped by that thread when it wakes up.
10756 if (resolver_error
== 0 &&
10757 orig_gencount
!= 0 &&
10758 vnode_getwithref(req
->r_vp
) == 0) {
10759 struct vnode_attr va
;
10760 uint64_t cur_gencount
;
10763 VATTR_WANTED(&va
, va_recursive_gencount
);
10765 if (vnode_getattr(req
->r_vp
, &va
, vfs_context_kernel()) == 0) {
10766 cur_gencount
= va
.va_recursive_gencount
;
10771 if (resolver_error
== 0 && cur_gencount
&& orig_gencount
&& cur_gencount
!= orig_gencount
) {
10772 printf("nspace.complete: gencount changed! (orig %llu cur %llu)\n", orig_gencount
, cur_gencount
);
10774 // this error will be returned to the thread that initiated the
10775 // materialization of req->r_vp.
10776 resolver_error
= EBUSY
;
10778 // note: we explicitly do not return an error to the caller (i.e.
10779 // the thread that did the materialization) because they said they
10783 vnode_put(req
->r_vp
);
10786 mount_unlock_renames(locked_mp
);
10787 mount_drop(locked_mp
, 0);
10789 nspace_resolver_req_mark_complete(req
, resolver_error
);
10792 NSPACE_REQ_UNLOCK();
10797 static struct proc
*nspace_resolver_proc
;
10800 nspace_resolver_get_proc_state(struct proc
*p
, int *is_resolver
)
10802 *is_resolver
= ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
10803 p
== nspace_resolver_proc
) ? 1 : 0;
10808 nspace_resolver_set_proc_state(struct proc
*p
, int is_resolver
)
10810 vfs_context_t ctx
= vfs_context_current();
10814 // The system filecoordinationd runs as uid == 0. This also
10815 // has the nice side-effect of filtering out filecoordinationd
10816 // running in the simulator.
10818 if (!vfs_context_issuser(ctx
)) {
10822 error
= priv_check_cred(vfs_context_ucred(ctx
),
10823 PRIV_VFS_DATALESS_RESOLVER
, 0);
10831 if (nspace_resolver_proc
== NULL
) {
10833 p
->p_lflag
|= P_LNSPACE_RESOLVER
;
10835 nspace_resolver_proc
= p
;
10840 NSPACE_REQ_UNLOCK();
10842 // This is basically just like the exit case.
10843 // nspace_resolver_exited() will verify that the
10844 // process is the resolver, and will clear the
10846 nspace_resolver_exited(p
);
10853 nspace_materialization_get_proc_state(struct proc
*p
, int *is_prevented
)
10855 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) != 0 ||
10856 (p
->p_vfs_iopolicy
&
10857 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) == 0) {
10866 nspace_materialization_set_proc_state(struct proc
*p
, int is_prevented
)
10868 if (p
->p_lflag
& P_LNSPACE_RESOLVER
) {
10869 return is_prevented
? 0 : EBUSY
;
10872 if (is_prevented
) {
10873 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
), &p
->p_vfs_iopolicy
);
10875 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
, &p
->p_vfs_iopolicy
);
10881 nspace_materialization_get_thread_state(int *is_prevented
)
10883 uthread_t ut
= get_bsdthread_info(current_thread());
10885 *is_prevented
= (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) ? 1 : 0;
10890 nspace_materialization_set_thread_state(int is_prevented
)
10892 uthread_t ut
= get_bsdthread_info(current_thread());
10894 if (is_prevented
) {
10895 ut
->uu_flag
|= UT_NSPACE_NODATALESSFAULTS
;
10897 ut
->uu_flag
&= ~UT_NSPACE_NODATALESSFAULTS
;
10902 /* the vfs.nspace branch */
10903 SYSCTL_NODE(_vfs
, OID_AUTO
, nspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
, NULL
, "vfs nspace hinge");
10906 sysctl_nspace_resolver(__unused
struct sysctl_oid
*oidp
,
10907 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10909 struct proc
*p
= req
->p
;
10910 int new_value
, old_value
, changed
= 0;
10913 error
= nspace_resolver_get_proc_state(p
, &old_value
);
10918 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10920 if (error
== 0 && changed
) {
10921 error
= nspace_resolver_set_proc_state(p
, new_value
);
10926 /* decorate this process as the dataless file resolver */
10927 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, resolver
,
10928 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10929 0, 0, sysctl_nspace_resolver
, "I", "");
10932 sysctl_nspace_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10933 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10935 struct proc
*p
= req
->p
;
10936 int new_value
, old_value
, changed
= 0;
10939 error
= nspace_materialization_get_proc_state(p
, &old_value
);
10944 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10946 if (error
== 0 && changed
) {
10947 error
= nspace_materialization_set_proc_state(p
, new_value
);
10952 /* decorate this process as not wanting to materialize dataless files */
10953 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, prevent_materialization
,
10954 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10955 0, 0, sysctl_nspace_prevent_materialization
, "I", "");
10958 sysctl_nspace_thread_prevent_materialization(__unused
struct sysctl_oid
*oidp
,
10959 __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
10961 int new_value
, old_value
, changed
= 0;
10964 error
= nspace_materialization_get_thread_state(&old_value
);
10969 error
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
,
10971 if (error
== 0 && changed
) {
10972 error
= nspace_materialization_set_thread_state(new_value
);
10977 /* decorate this thread as not wanting to materialize dataless files */
10978 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, thread_prevent_materialization
,
10979 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
10980 0, 0, sysctl_nspace_thread_prevent_materialization
, "I", "");
10983 sysctl_nspace_complete(__unused
struct sysctl_oid
*oidp
, __unused
void *arg1
,
10984 __unused
int arg2
, struct sysctl_req
*req
)
10986 struct proc
*p
= req
->p
;
10987 uint32_t req_status
[2] = { 0, 0 };
10988 uint64_t gencount
= 0;
10989 int error
, is_resolver
, changed
= 0, gencount_changed
;
10991 error
= nspace_resolver_get_proc_state(p
, &is_resolver
);
10996 if (!is_resolver
) {
11000 error
= sysctl_io_opaque(req
, req_status
, sizeof(req_status
),
11006 // get the gencount if it was passed
11007 error
= sysctl_io_opaque(req
, &gencount
, sizeof(gencount
),
11008 &gencount_changed
);
11011 // we ignore the error because the gencount was optional
11016 * req_status[0] is the req_id
11018 * req_status[1] is the errno
11020 if (error
== 0 && changed
) {
11021 nspace_resolver_req_completed(req_status
[0],
11022 (int)req_status
[1], gencount
);
11027 /* Resolver reports completed reqs here. */
11028 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, complete
,
11029 CTLTYPE_OPAQUE
| CTLFLAG_RW
| CTLFLAG_ANYBODY
| CTLFLAG_LOCKED
,
11030 0, 0, sysctl_nspace_complete
, "-", "");
11032 #endif /* CONFIG_DATALESS_FILES */
11034 #if CONFIG_DATALESS_FILES
11035 #define __no_dataless_unused /* nothing */
11037 #define __no_dataless_unused __unused
11041 vfs_context_dataless_materialization_is_prevented(
11042 vfs_context_t
const ctx __no_dataless_unused
)
11044 #if CONFIG_DATALESS_FILES
11045 proc_t
const p
= vfs_context_proc(ctx
);
11046 thread_t
const t
= vfs_context_thread(ctx
);
11047 uthread_t
const ut
= t
? get_bsdthread_info(t
) : NULL
;
11050 * Kernel context ==> return EDEADLK, as we would with any random
11051 * process decorated as no-materialize.
11053 if (ctx
== vfs_context_kernel()) {
11058 * If the process has the dataless-manipulation entitlement,
11059 * materialization is prevented, and depending on the kind
11060 * of file system operation, things get to proceed as if the
11061 * object is not dataless.
11063 if (vfs_context_is_dataless_manipulator(ctx
)) {
11064 return EJUSTRETURN
;
11068 * Per-thread decorations override any process-wide decorations.
11069 * (Foundation uses this, and this overrides even the dataless-
11070 * manipulation entitlement so as to make API contracts consistent.)
11073 if (ut
->uu_flag
& UT_NSPACE_NODATALESSFAULTS
) {
11076 if (ut
->uu_flag
& UT_NSPACE_FORCEDATALESSFAULTS
) {
11082 * If the process's iopolicy specifies that dataless files
11083 * can be materialized, then we let it go ahead.
11085 if (p
->p_vfs_iopolicy
& P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) {
11088 #endif /* CONFIG_DATALESS_FILES */
11091 * The default behavior is to not materialize dataless files;
11092 * return to the caller that deadlock was detected.
11098 nspace_resolver_init(void)
11100 #if CONFIG_DATALESS_FILES
11101 nspace_resolver_request_hashtbl
=
11102 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE
,
11103 M_VNODE
/* XXX */, &nspace_resolver_request_hashmask
);
11104 #endif /* CONFIG_DATALESS_FILES */
11108 nspace_resolver_exited(struct proc
*p __no_dataless_unused
)
11110 #if CONFIG_DATALESS_FILES
11111 struct nspace_resolver_requesthead
*bucket
;
11112 struct nspace_resolver_request
*req
;
11117 if ((p
->p_lflag
& P_LNSPACE_RESOLVER
) &&
11118 p
== nspace_resolver_proc
) {
11119 for (idx
= 0; idx
<= nspace_resolver_request_hashmask
; idx
++) {
11120 bucket
= &nspace_resolver_request_hashtbl
[idx
];
11121 LIST_FOREACH(req
, bucket
, r_hashlink
) {
11122 nspace_resolver_req_mark_complete(req
,
11126 nspace_resolver_proc
= NULL
;
11129 NSPACE_REQ_UNLOCK();
11130 #endif /* CONFIG_DATALESS_FILES */
11134 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
11136 return resolve_nspace_item_ext(vp
, op
, NULL
);
11139 #define DATALESS_RESOLVER_ENTITLEMENT \
11140 "com.apple.private.vfs.dataless-resolver"
11141 #define DATALESS_MANIPULATION_ENTITLEMENT \
11142 "com.apple.private.vfs.dataless-manipulation"
11145 * Return TRUE if the vfs context is associated with a process entitled
11146 * for dataless manipulation.
11148 * XXX Arguably belongs in vfs_subr.c, but is here because of the
11149 * complication around CONFIG_DATALESS_FILES.
11152 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused
)
11154 #if CONFIG_DATALESS_FILES
11155 assert(ctx
->vc_thread
== current_thread());
11156 task_t
const task
= current_task();
11157 return IOTaskHasEntitlement(task
, DATALESS_MANIPULATION_ENTITLEMENT
) ||
11158 IOTaskHasEntitlement(task
, DATALESS_RESOLVER_ENTITLEMENT
);
11161 #endif /* CONFIG_DATALESS_FILES */
11165 resolve_nspace_item_ext(
11166 struct vnode
*vp __no_dataless_unused
,
11167 uint64_t op __no_dataless_unused
,
11168 void *arg __unused
)
11170 #if CONFIG_DATALESS_FILES
11176 struct nspace_resolver_request req
;
11178 // only allow namespace events on regular files, directories and symlinks.
11179 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
11184 // if this is a snapshot event and the vnode is on a
11185 // disk image just pretend nothing happened since any
11186 // change to the disk image will cause the disk image
11187 // itself to get backed up and this avoids multi-way
11188 // deadlocks between the snapshot handler and the ever
11189 // popular diskimages-helper process. the variable
11190 // nspace_allow_virtual_devs allows this behavior to
11191 // be overridden (for use by the Mobile TimeMachine
11192 // testing infrastructure which uses disk images)
11194 if (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
) {
11195 os_log_debug(OS_LOG_DEFAULT
, "NSPACE SNAPSHOT not handled");
11199 error
= vfs_context_dataless_materialization_is_prevented(
11200 vfs_context_current());
11202 os_log_debug(OS_LOG_DEFAULT
,
11203 "NSPACE process/thread is decorated as no-materialization");
11207 kr
= host_get_filecoordinationd_port(host_priv_self(), &mp
);
11208 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(mp
)) {
11209 os_log_error(OS_LOG_DEFAULT
, "NSPACE no port");
11210 // Treat this like being unable to access the backing
11215 path
= zalloc(ZV_NAMEI
);
11216 path_len
= MAXPATHLEN
;
11218 error
= vn_getpath(vp
, path
, &path_len
);
11220 int xxx_rdar44371223
; /* XXX Mig bug */
11221 req
.r_req_id
= next_nspace_req_id();
11222 req
.r_resolver_error
= 0;
11225 if ((error
= vnode_ref(vp
)) == 0) { // take a ref so that the vnode doesn't go away
11228 goto out_release_port
;
11232 error
= nspace_resolver_req_add(&req
);
11233 NSPACE_REQ_UNLOCK();
11235 vnode_rele(req
.r_vp
);
11236 goto out_release_port
;
11239 os_log_debug(OS_LOG_DEFAULT
, "NSPACE resolve_path call");
11240 kr
= send_nspace_resolve_path(mp
, req
.r_req_id
,
11241 current_proc()->p_pid
, (uint32_t)(op
& 0xffffffff),
11242 path
, &xxx_rdar44371223
);
11243 if (kr
!= KERN_SUCCESS
) {
11244 // Also treat this like being unable to access
11245 // the backing store server.
11246 os_log_error(OS_LOG_DEFAULT
,
11247 "NSPACE resolve_path failure: %d", kr
);
11251 nspace_resolver_req_remove(&req
);
11252 NSPACE_REQ_UNLOCK();
11253 vnode_rele(req
.r_vp
);
11254 goto out_release_port
;
11257 // Give back the memory we allocated earlier while
11258 // we wait; we no longer need it.
11259 zfree(ZV_NAMEI
, path
);
11262 // Request has been submitted to the resolver.
11263 // Now (interruptibly) wait for completion.
11264 // Upon requrn, the request will have been removed
11265 // from the lookup table.
11266 error
= nspace_resolver_req_wait(&req
);
11268 vnode_rele(req
.r_vp
);
11272 if (path
!= NULL
) {
11273 zfree(ZV_NAMEI
, path
);
11275 ipc_port_release_send(mp
);
11280 #endif /* CONFIG_DATALESS_FILES */
11284 nspace_snapshot_event(__unused vnode_t vp
, __unused
time_t ctime
,
11285 __unused
uint64_t op_type
, __unused
void *arg
)
11292 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
11294 struct vnode_attr va
;
11298 VATTR_WANTED(&va
, va_fsid
);
11299 VATTR_WANTED(&va
, va_fileid
);
11301 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
11302 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
11305 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
11313 static unsigned long
11314 fsctl_bogus_command_compat(unsigned long cmd
)
11317 case IOCBASECMD(FSIOC_SYNC_VOLUME
):
11318 return FSIOC_SYNC_VOLUME
;
11319 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
):
11320 return FSIOC_ROUTEFS_SETROUTEID
;
11321 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
):
11322 return FSIOC_SET_PACKAGE_EXTS
;
11323 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
):
11324 return FSIOC_SET_FSTYPENAME_OVERRIDE
;
11325 case IOCBASECMD(DISK_CONDITIONER_IOC_GET
):
11326 return DISK_CONDITIONER_IOC_GET
;
11327 case IOCBASECMD(DISK_CONDITIONER_IOC_SET
):
11328 return DISK_CONDITIONER_IOC_SET
;
11329 case IOCBASECMD(FSIOC_FIOSEEKHOLE
):
11330 return FSIOC_FIOSEEKHOLE
;
11331 case IOCBASECMD(FSIOC_FIOSEEKDATA
):
11332 return FSIOC_FIOSEEKDATA
;
11333 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
):
11334 return SPOTLIGHT_IOC_GET_MOUNT_TIME
;
11335 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
):
11336 return SPOTLIGHT_IOC_GET_LAST_MTIME
;
11343 cas_bsdflags_setattr(vnode_t vp
, void *arg
, vfs_context_t ctx
)
11345 return VNOP_IOCTL(vp
, FSIOC_CAS_BSDFLAGS
, arg
, FWRITE
, ctx
);
11348 static int __attribute__((noinline
))
11349 handle_sync_volume(vnode_t vp
, vnode_t
*arg_vp
, caddr_t data
, vfs_context_t ctx
)
11351 struct vfs_attr vfa
;
11352 mount_t mp
= vp
->v_mount
;
11356 /* record vid of vp so we can drop it below. */
11357 uint32_t vvid
= vp
->v_id
;
11360 * Then grab mount_iterref so that we can release the vnode.
11361 * Without this, a thread may call vnode_iterate_prepare then
11362 * get into a deadlock because we've never released the root vp
11364 error
= mount_iterref(mp
, 0);
11371 if (*(uint32_t*)data
& FSCTL_SYNC_WAIT
) {
11376 * If the filessytem supports multiple filesytems in a
11377 * partition (For eg APFS volumes in a container, it knows
11378 * that the waitfor argument to VFS_SYNC are flags.
11380 VFSATTR_INIT(&vfa
);
11381 VFSATTR_WANTED(&vfa
, f_capabilities
);
11382 if ((vfs_getattr(mp
, &vfa
, vfs_context_current()) == 0) &&
11383 VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) &&
11384 ((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
)) &&
11385 ((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
))) {
11389 /* issue the sync for this volume */
11390 (void)sync_callback(mp
, &arg
);
11393 * Then release the mount_iterref once we're done syncing; it's not
11394 * needed for the VNOP_IOCTL below
11396 mount_iterdrop(mp
);
11398 if (arg
& FSCTL_SYNC_FULLSYNC
) {
11399 /* re-obtain vnode iocount on the root vp, if possible */
11400 error
= vnode_getwithvid(vp
, vvid
);
11402 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
11406 /* mark the argument VP as having been released */
11412 static int __attribute__((noinline
))
11413 handle_routes(user_addr_t udata
)
11415 char routepath
[MAXPATHLEN
];
11419 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11422 bzero(routepath
, MAXPATHLEN
);
11423 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
11427 error
= routefs_kernel_mount(routepath
);
11432 static int __attribute__((noinline
))
11433 handle_flags(vnode_t vp
, caddr_t data
, vfs_context_t ctx
)
11435 struct fsioc_cas_bsdflags
*cas
= (struct fsioc_cas_bsdflags
*)data
;
11436 struct vnode_attr va
;
11440 VATTR_SET(&va
, va_flags
, cas
->new_flags
);
11442 error
= chflags0(vp
, &va
, cas_bsdflags_setattr
, cas
, ctx
);
11446 static int __attribute__((noinline
))
11447 handle_auth(vnode_t vp
, u_long cmd
, caddr_t data
, u_long options
, vfs_context_t ctx
)
11449 struct mount
*mp
= NULL
;
11450 errno_t rootauth
= 0;
11455 * query the underlying FS and see if it reports something
11456 * sane for this vnode. If volume is authenticated via
11457 * chunklist, leave that for the caller to determine.
11459 rootauth
= VNOP_IOCTL(vp
, cmd
, data
, (int)options
, ctx
);
11465 * Make a filesystem-specific control call:
11469 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
11474 #define STK_PARAMS 128
11475 char stkbuf
[STK_PARAMS
] = {0};
11476 caddr_t data
, memp
;
11477 vnode_t vp
= *arg_vp
;
11479 if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
11483 cmd
= fsctl_bogus_command_compat(cmd
);
11485 size
= IOCPARM_LEN(cmd
);
11486 if (size
> IOCPARM_MAX
) {
11490 is64bit
= proc_is64bit(p
);
11494 if (size
> sizeof(stkbuf
)) {
11495 if ((memp
= (caddr_t
)kheap_alloc(KHEAP_TEMP
, size
, Z_WAITOK
)) == 0) {
11503 if (cmd
& IOC_IN
) {
11505 error
= copyin(udata
, data
, size
);
11508 kheap_free(KHEAP_TEMP
, memp
, size
);
11514 *(user_addr_t
*)data
= udata
;
11516 *(uint32_t *)data
= (uint32_t)udata
;
11519 } else if ((cmd
& IOC_OUT
) && size
) {
11521 * Zero the buffer so the user always
11522 * gets back something deterministic.
11525 } else if (cmd
& IOC_VOID
) {
11527 *(user_addr_t
*)data
= udata
;
11529 *(uint32_t *)data
= (uint32_t)udata
;
11533 /* Check to see if it's a generic command */
11535 case FSIOC_SYNC_VOLUME
:
11536 error
= handle_sync_volume(vp
, arg_vp
, data
, ctx
);
11539 case FSIOC_ROUTEFS_SETROUTEID
:
11541 error
= handle_routes(udata
);
11545 case FSIOC_SET_PACKAGE_EXTS
: {
11546 user_addr_t ext_strings
;
11547 uint32_t num_entries
;
11548 uint32_t max_width
;
11550 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0))) {
11554 if ((is64bit
&& size
!= sizeof(user64_package_ext_info
))
11555 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
11556 // either you're 64-bit and passed a 64-bit struct or
11557 // you're 32-bit and passed a 32-bit struct. otherwise
11564 if (sizeof(user64_addr_t
) > sizeof(user_addr_t
)) {
11565 assert(((user64_package_ext_info
*)data
)->strings
<= UINT32_MAX
);
11567 ext_strings
= (user_addr_t
)((user64_package_ext_info
*)data
)->strings
;
11568 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
11569 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
11571 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
11572 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
11573 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
11575 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
11579 case FSIOC_SET_FSTYPENAME_OVERRIDE
:
11581 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
11585 mount_lock(vp
->v_mount
);
11586 if (data
[0] != 0) {
11588 for (i
= 0; i
< MFSTYPENAMELEN
; i
++) {
11590 goto continue_copy
;
11594 * Getting here means we have a user data string which has no
11595 * NULL termination in its first MFSTYPENAMELEN bytes.
11596 * This is bogus, let's avoid strlcpy-ing the read data and
11602 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
11603 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
11604 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11605 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
11606 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
11609 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
11610 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
11612 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
11613 vp
->v_mount
->fstypename_override
[0] = '\0';
11616 mount_unlock(vp
->v_mount
);
11621 case DISK_CONDITIONER_IOC_GET
: {
11622 error
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11626 case DISK_CONDITIONER_IOC_SET
: {
11627 error
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
11631 case FSIOC_CAS_BSDFLAGS
:
11632 error
= handle_flags(vp
, data
, ctx
);
11635 case FSIOC_FD_ONLY_OPEN_ONCE
: {
11637 if (vnode_usecount(vp
) > 1) {
11638 vnode_lock_spin(vp
);
11639 if (vp
->v_lflag
& VL_HASSTREAMS
) {
11640 if (vnode_isinuse_locked(vp
, 1, 1)) {
11643 } else if (vnode_usecount(vp
) > 1) {
11651 case FSIOC_EVAL_ROOTAUTH
:
11652 error
= handle_auth(vp
, cmd
, data
, options
, ctx
);
11656 /* other, known commands shouldn't be passed down here */
11659 case F_TRIM_ACTIVE_FILE
:
11661 case F_TRANSCODEKEY
:
11662 case F_GETPROTECTIONLEVEL
:
11663 case F_GETDEFAULTPROTLEVEL
:
11664 case F_MAKECOMPRESSED
:
11665 case F_SET_GREEDY_MODE
:
11666 case F_SETSTATICCONTENT
:
11668 case F_SETBACKINGSTORE
:
11669 case F_GETPATH_MTMINFO
:
11670 case APFSIOC_REVERT_TO_SNAPSHOT
:
11671 case FSIOC_FIOSEEKHOLE
:
11672 case FSIOC_FIOSEEKDATA
:
11673 case HFS_GET_BOOT_INFO
:
11674 case HFS_SET_BOOT_INFO
:
11678 case F_BARRIERFSYNC
:
11681 case FSIOC_KERNEL_ROOTAUTH
:
11685 /* Invoke the filesystem-specific code */
11686 error
= VNOP_IOCTL(vp
, cmd
, data
, (int)options
, ctx
);
11688 } /* end switch stmt */
11691 * if no errors, copy any data to user. Size was
11692 * already set and checked above.
11694 if (error
== 0 && (cmd
& IOC_OUT
) && size
) {
11695 error
= copyout(data
, udata
, size
);
11700 kheap_free(KHEAP_TEMP
, memp
, size
);
11708 fsctl(proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
11711 struct nameidata nd
;
11712 uint32_t nameiflags
;
11714 vfs_context_t ctx
= vfs_context_current();
11716 AUDIT_ARG(cmd
, (int)uap
->cmd
);
11717 AUDIT_ARG(value32
, uap
->options
);
11718 /* Get the vnode for the file we are getting info on: */
11721 // if we come through fsctl() then the file is by definition not open.
11722 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11723 // lest the caller mistakenly thinks the only open is their own (but in
11724 // reality it's someone elses).
11726 if (uap
->cmd
== FSIOC_FD_ONLY_OPEN_ONCE
) {
11729 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
11730 nameiflags
|= FOLLOW
;
11732 if (uap
->cmd
== FSIOC_FIRMLINK_CTL
) {
11733 nameiflags
|= (CN_FIRMLINK_NOFOLLOW
| NOCACHE
);
11735 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
11736 UIO_USERSPACE
, uap
->path
, ctx
);
11737 if ((error
= namei(&nd
))) {
11744 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
11750 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11760 ffsctl(proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
11764 vfs_context_t ctx
= vfs_context_current();
11767 AUDIT_ARG(fd
, uap
->fd
);
11768 AUDIT_ARG(cmd
, (int)uap
->cmd
);
11769 AUDIT_ARG(value32
, uap
->options
);
11771 /* Get the vnode for the file we are getting info on: */
11772 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11776 if ((error
= vnode_getwithref(vp
))) {
11782 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
11789 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11793 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11800 /* end of fsctl system call */
11802 #define FILESEC_ACCESS_ENTITLEMENT \
11803 "com.apple.private.vfs.filesec-access"
11806 xattr_entitlement_check(const char *attrname
, vfs_context_t ctx
, bool setting
)
11808 if (strcmp(attrname
, KAUTH_FILESEC_XATTR
) == 0) {
11810 * get: root and tasks with FILESEC_ACCESS_ENTITLEMENT.
11811 * set: only tasks with FILESEC_ACCESS_ENTITLEMENT.
11813 if ((!setting
&& vfs_context_issuser(ctx
)) ||
11814 IOTaskHasEntitlement(current_task(),
11815 FILESEC_ACCESS_ENTITLEMENT
)) {
11824 * Retrieve the data of an extended attribute.
11827 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
11830 struct nameidata nd
;
11831 char attrname
[XATTR_MAXNAMELEN
+ 1];
11832 vfs_context_t ctx
= vfs_context_current();
11834 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11835 size_t attrsize
= 0;
11837 u_int32_t nameiflags
;
11839 char uio_buf
[UIO_SIZEOF(1)];
11841 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11845 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11846 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11847 if ((error
= namei(&nd
))) {
11853 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11857 if (xattr_protected(attrname
) &&
11858 (error
= xattr_entitlement_check(attrname
, ctx
, false)) != 0) {
11862 * the specific check for 0xffffffff is a hack to preserve
11863 * binaray compatibilty in K64 with applications that discovered
11864 * that passing in a buf pointer and a size of -1 resulted in
11865 * just the size of the indicated extended attribute being returned.
11866 * this isn't part of the documented behavior, but because of the
11867 * original implemtation's check for "uap->size > 0", this behavior
11868 * was allowed. In K32 that check turned into a signed comparison
11869 * even though uap->size is unsigned... in K64, we blow by that
11870 * check because uap->size is unsigned and doesn't get sign smeared
11871 * in the munger for a 32 bit user app. we also need to add a
11872 * check to limit the maximum size of the buffer being passed in...
11873 * unfortunately, the underlying fileystems seem to just malloc
11874 * the requested size even if the actual extended attribute is tiny.
11875 * because that malloc is for kernel wired memory, we have to put a
11876 * sane limit on it.
11878 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11879 * U64 running on K64 will yield -1 (64 bits wide)
11880 * U32/U64 running on K32 will yield -1 (32 bits wide)
11882 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1) {
11887 if (uap
->size
> (size_t)XATTR_MAXSIZE
) {
11888 uap
->size
= XATTR_MAXSIZE
;
11891 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11892 &uio_buf
[0], sizeof(uio_buf
));
11893 uio_addiov(auio
, uap
->value
, uap
->size
);
11896 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
11901 *retval
= uap
->size
- uio_resid(auio
);
11903 *retval
= (user_ssize_t
)attrsize
;
11910 * Retrieve the data of an extended attribute.
11913 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
11916 char attrname
[XATTR_MAXNAMELEN
+ 1];
11917 vfs_context_t ctx
= vfs_context_current();
11919 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11920 size_t attrsize
= 0;
11923 char uio_buf
[UIO_SIZEOF(1)];
11925 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11929 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11932 if ((error
= vnode_getwithref(vp
))) {
11933 file_drop(uap
->fd
);
11936 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11940 if (xattr_protected(attrname
) &&
11941 (error
= xattr_entitlement_check(attrname
, ctx
, false)) != 0) {
11944 if (uap
->value
&& uap
->size
> 0) {
11945 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11946 &uio_buf
[0], sizeof(uio_buf
));
11947 uio_addiov(auio
, uap
->value
, uap
->size
);
11950 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11952 (void)vnode_put(vp
);
11953 file_drop(uap
->fd
);
11956 *retval
= uap
->size
- uio_resid(auio
);
11958 *retval
= (user_ssize_t
)attrsize
;
11964 * Set the data of an extended attribute.
11967 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
11970 struct nameidata nd
;
11971 char attrname
[XATTR_MAXNAMELEN
+ 1];
11972 vfs_context_t ctx
= vfs_context_current();
11974 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11976 u_int32_t nameiflags
;
11978 char uio_buf
[UIO_SIZEOF(1)];
11980 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11984 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11986 if (error
== EPERM
) {
11987 /* if the string won't fit in attrname, copyinstr emits EPERM */
11988 return ENAMETOOLONG
;
11990 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11993 if (xattr_protected(attrname
) &&
11994 (error
= xattr_entitlement_check(attrname
, ctx
, true)) != 0) {
11997 if (uap
->size
!= 0 && uap
->value
== 0) {
12000 if (uap
->size
> INT_MAX
) {
12004 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
12005 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
12006 if ((error
= namei(&nd
))) {
12012 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
12013 &uio_buf
[0], sizeof(uio_buf
));
12014 uio_addiov(auio
, uap
->value
, uap
->size
);
12016 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
12019 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
12030 * Set the data of an extended attribute.
12033 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
12036 char attrname
[XATTR_MAXNAMELEN
+ 1];
12037 vfs_context_t ctx
= vfs_context_current();
12039 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
12042 char uio_buf
[UIO_SIZEOF(1)];
12044 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
12048 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
12050 if (error
== EPERM
) {
12051 /* if the string won't fit in attrname, copyinstr emits EPERM */
12052 return ENAMETOOLONG
;
12054 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
12057 if (xattr_protected(attrname
) &&
12058 (error
= xattr_entitlement_check(attrname
, ctx
, true)) != 0) {
12061 if (uap
->size
!= 0 && uap
->value
== 0) {
12064 if (uap
->size
> INT_MAX
) {
12067 if ((error
= file_vnode(uap
->fd
, &vp
))) {
12070 if ((error
= vnode_getwithref(vp
))) {
12071 file_drop(uap
->fd
);
12074 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
12075 &uio_buf
[0], sizeof(uio_buf
));
12076 uio_addiov(auio
, uap
->value
, uap
->size
);
12078 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
12081 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
12087 file_drop(uap
->fd
);
12093 * Remove an extended attribute.
12094 * XXX Code duplication here.
12097 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
12100 struct nameidata nd
;
12101 char attrname
[XATTR_MAXNAMELEN
+ 1];
12102 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
12103 vfs_context_t ctx
= vfs_context_current();
12105 u_int32_t nameiflags
;
12108 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
12112 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
12116 if (xattr_protected(attrname
)) {
12119 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
12120 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
12121 if ((error
= namei(&nd
))) {
12127 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
12130 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
12141 * Remove an extended attribute.
12142 * XXX Code duplication here.
12145 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
12148 char attrname
[XATTR_MAXNAMELEN
+ 1];
12152 vfs_context_t ctx
= vfs_context_current();
12155 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
12159 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
12163 if (xattr_protected(attrname
)) {
12166 if ((error
= file_vnode(uap
->fd
, &vp
))) {
12169 if ((error
= vnode_getwithref(vp
))) {
12170 file_drop(uap
->fd
);
12174 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
12177 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
12183 file_drop(uap
->fd
);
12189 * Retrieve the list of extended attribute names.
12190 * XXX Code duplication here.
12193 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
12196 struct nameidata nd
;
12197 vfs_context_t ctx
= vfs_context_current();
12199 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
12200 size_t attrsize
= 0;
12201 u_int32_t nameiflags
;
12203 char uio_buf
[UIO_SIZEOF(1)];
12205 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
12209 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
12210 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
12211 if ((error
= namei(&nd
))) {
12216 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
12217 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
12218 &uio_buf
[0], sizeof(uio_buf
));
12219 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
12222 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
12226 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
12228 *retval
= (user_ssize_t
)attrsize
;
12234 * Retrieve the list of extended attribute names.
12235 * XXX Code duplication here.
12238 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
12242 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
12243 size_t attrsize
= 0;
12245 char uio_buf
[UIO_SIZEOF(1)];
12247 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
12251 if ((error
= file_vnode(uap
->fd
, &vp
))) {
12254 if ((error
= vnode_getwithref(vp
))) {
12255 file_drop(uap
->fd
);
12258 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
12259 auio
= uio_createwithbuffer(1, 0, spacetype
,
12260 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
12261 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
12264 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
12267 file_drop(uap
->fd
);
12269 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
12271 *retval
= (user_ssize_t
)attrsize
;
12277 fsgetpath_internal(vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
12278 vm_size_t bufsize
, caddr_t buf
, uint32_t options
, int *pathlen
)
12281 struct mount
*mp
= NULL
;
12285 /* maximum number of times to retry build_path */
12286 unsigned int retries
= 0x10;
12288 if (bufsize
> PAGE_SIZE
) {
12297 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
12298 error
= ENOTSUP
; /* unexpected failure */
12304 struct vfs_attr vfsattr
;
12305 int use_vfs_root
= TRUE
;
12307 VFSATTR_INIT(&vfsattr
);
12308 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
12309 if (!(options
& FSOPT_ISREALFSID
) &&
12310 vfs_getattr(mp
, &vfsattr
, vfs_context_kernel()) == 0 &&
12311 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
12312 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
) &&
12313 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
)) {
12314 use_vfs_root
= FALSE
;
12318 if (use_vfs_root
) {
12319 error
= VFS_ROOT(mp
, &vp
, ctx
);
12321 error
= VFS_VGET(mp
, objid
, &vp
, ctx
);
12324 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
12327 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
12329 * If the fileid isn't found and we're in a union
12330 * mount volume, then see if the fileid is in the
12331 * mounted-on volume.
12333 struct mount
*tmp
= mp
;
12334 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
12336 if (vfs_busy(mp
, LK_NOWAIT
) == 0) {
12348 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
12355 /* Obtain the absolute path to this vnode. */
12356 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
12357 if (options
& FSOPT_NOFIRMLINKPATH
) {
12358 bpflags
|= BUILDPATH_NO_FIRMLINK
;
12360 bpflags
|= BUILDPATH_CHECK_MOVED
;
12361 error
= build_path(vp
, buf
, (int)bufsize
, &length
, bpflags
, ctx
);
12365 /* there was a race building the path, try a few more times */
12366 if (error
== EAGAIN
) {
12377 AUDIT_ARG(text
, buf
);
12379 if (kdebug_debugid_enabled(VFS_LOOKUP
) && length
> 0) {
12380 unsigned long path_words
[NUMPARMS
];
12381 size_t path_len
= sizeof(path_words
);
12383 if ((size_t)length
< path_len
) {
12384 memcpy((char *)path_words
, buf
, length
);
12385 memset((char *)path_words
+ length
, 0, path_len
- length
);
12389 memcpy((char *)path_words
, buf
+ (length
- path_len
), path_len
);
12392 kdebug_vfs_lookup(path_words
, (int)path_len
, vp
,
12393 KDBG_VFS_LOOKUP_FLAG_LOOKUP
);
12396 *pathlen
= length
; /* may be superseded by error */
12403 * Obtain the full pathname of a file system object by id.
12406 fsgetpath_extended(user_addr_t buf
, user_size_t bufsize
, user_addr_t user_fsid
, uint64_t objid
,
12407 uint32_t options
, user_ssize_t
*retval
)
12409 vfs_context_t ctx
= vfs_context_current();
12415 if (options
& ~(FSOPT_NOFIRMLINKPATH
| FSOPT_ISREALFSID
)) {
12419 if ((error
= copyin(user_fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
12422 AUDIT_ARG(value32
, fsid
.val
[0]);
12423 AUDIT_ARG(value64
, objid
);
12424 /* Restrict output buffer size for now. */
12426 if (bufsize
> PAGE_SIZE
|| bufsize
<= 0) {
12429 realpath
= kheap_alloc(KHEAP_TEMP
, bufsize
, Z_WAITOK
| Z_ZERO
);
12430 if (realpath
== NULL
) {
12434 error
= fsgetpath_internal(ctx
, fsid
.val
[0], objid
, bufsize
, realpath
,
12441 error
= copyout((caddr_t
)realpath
, buf
, length
);
12443 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
12445 kheap_free(KHEAP_TEMP
, realpath
, bufsize
);
12450 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
12452 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
12457 fsgetpath_ext(__unused proc_t p
, struct fsgetpath_ext_args
*uap
, user_ssize_t
*retval
)
12459 return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
,
12460 uap
->options
, retval
);
12464 * Common routine to handle various flavors of statfs data heading out
12467 * Returns: 0 Success
12471 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
12472 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
12473 boolean_t partial_copy
)
12476 int my_size
, copy_size
;
12479 struct user64_statfs sfs
;
12480 my_size
= copy_size
= sizeof(sfs
);
12481 bzero(&sfs
, my_size
);
12482 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12483 sfs
.f_type
= (short)mp
->mnt_vtable
->vfc_typenum
;
12484 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12485 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
12486 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
12487 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
12488 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
12489 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
12490 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
12491 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
12492 sfs
.f_fsid
= sfsp
->f_fsid
;
12493 sfs
.f_owner
= sfsp
->f_owner
;
12494 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12495 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12497 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12499 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12500 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12502 if (partial_copy
) {
12503 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12505 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12507 struct user32_statfs sfs
;
12509 my_size
= copy_size
= sizeof(sfs
);
12510 bzero(&sfs
, my_size
);
12512 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
12513 sfs
.f_type
= (short)mp
->mnt_vtable
->vfc_typenum
;
12514 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
12517 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12518 * have to fudge the numbers here in that case. We inflate the blocksize in order
12519 * to reflect the filesystem size as best we can.
12521 if ((sfsp
->f_blocks
> INT_MAX
)
12522 /* Hack for 4061702 . I think the real fix is for Carbon to
12523 * look for some volume capability and not depend on hidden
12524 * semantics agreed between a FS and carbon.
12525 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12526 * for Carbon to set bNoVolumeSizes volume attribute.
12527 * Without this the webdavfs files cannot be copied onto
12528 * disk as they look huge. This change should not affect
12529 * XSAN as they should not setting these to -1..
12531 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
12532 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
12533 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
12537 * Work out how far we have to shift the block count down to make it fit.
12538 * Note that it's possible to have to shift so far that the resulting
12539 * blocksize would be unreportably large. At that point, we will clip
12540 * any values that don't fit.
12542 * For safety's sake, we also ensure that f_iosize is never reported as
12543 * being smaller than f_bsize.
12545 for (shift
= 0; shift
< 32; shift
++) {
12546 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
) {
12549 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
) {
12553 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
12554 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
12555 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
12556 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
12557 #undef __SHIFT_OR_CLIP
12558 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
12559 sfs
.f_iosize
= (int)lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
12561 /* filesystem is small enough to be reported honestly */
12562 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
12563 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
12564 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
12565 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
12566 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
12568 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
12569 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
12570 sfs
.f_fsid
= sfsp
->f_fsid
;
12571 sfs
.f_owner
= sfsp
->f_owner
;
12572 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
12573 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
12575 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
12577 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
12578 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
12580 if (partial_copy
) {
12581 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
12583 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
12586 if (sizep
!= NULL
) {
12593 * copy stat structure into user_stat structure.
12596 munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
12598 bzero(usbp
, sizeof(*usbp
));
12600 usbp
->st_dev
= sbp
->st_dev
;
12601 usbp
->st_ino
= sbp
->st_ino
;
12602 usbp
->st_mode
= sbp
->st_mode
;
12603 usbp
->st_nlink
= sbp
->st_nlink
;
12604 usbp
->st_uid
= sbp
->st_uid
;
12605 usbp
->st_gid
= sbp
->st_gid
;
12606 usbp
->st_rdev
= sbp
->st_rdev
;
12607 #ifndef _POSIX_C_SOURCE
12608 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12609 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12610 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12611 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12612 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12613 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12615 usbp
->st_atime
= sbp
->st_atime
;
12616 usbp
->st_atimensec
= sbp
->st_atimensec
;
12617 usbp
->st_mtime
= sbp
->st_mtime
;
12618 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12619 usbp
->st_ctime
= sbp
->st_ctime
;
12620 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12622 usbp
->st_size
= sbp
->st_size
;
12623 usbp
->st_blocks
= sbp
->st_blocks
;
12624 usbp
->st_blksize
= sbp
->st_blksize
;
12625 usbp
->st_flags
= sbp
->st_flags
;
12626 usbp
->st_gen
= sbp
->st_gen
;
12627 usbp
->st_lspare
= sbp
->st_lspare
;
12628 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12629 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12633 munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
12635 bzero(usbp
, sizeof(*usbp
));
12637 usbp
->st_dev
= sbp
->st_dev
;
12638 usbp
->st_ino
= sbp
->st_ino
;
12639 usbp
->st_mode
= sbp
->st_mode
;
12640 usbp
->st_nlink
= sbp
->st_nlink
;
12641 usbp
->st_uid
= sbp
->st_uid
;
12642 usbp
->st_gid
= sbp
->st_gid
;
12643 usbp
->st_rdev
= sbp
->st_rdev
;
12644 #ifndef _POSIX_C_SOURCE
12645 usbp
->st_atimespec
.tv_sec
= (user32_time_t
)sbp
->st_atimespec
.tv_sec
;
12646 usbp
->st_atimespec
.tv_nsec
= (user32_long_t
)sbp
->st_atimespec
.tv_nsec
;
12647 usbp
->st_mtimespec
.tv_sec
= (user32_time_t
)sbp
->st_mtimespec
.tv_sec
;
12648 usbp
->st_mtimespec
.tv_nsec
= (user32_long_t
)sbp
->st_mtimespec
.tv_nsec
;
12649 usbp
->st_ctimespec
.tv_sec
= (user32_time_t
)sbp
->st_ctimespec
.tv_sec
;
12650 usbp
->st_ctimespec
.tv_nsec
= (user32_long_t
)sbp
->st_ctimespec
.tv_nsec
;
12652 usbp
->st_atime
= sbp
->st_atime
;
12653 usbp
->st_atimensec
= sbp
->st_atimensec
;
12654 usbp
->st_mtime
= sbp
->st_mtime
;
12655 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12656 usbp
->st_ctime
= sbp
->st_ctime
;
12657 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12659 usbp
->st_size
= sbp
->st_size
;
12660 usbp
->st_blocks
= sbp
->st_blocks
;
12661 usbp
->st_blksize
= sbp
->st_blksize
;
12662 usbp
->st_flags
= sbp
->st_flags
;
12663 usbp
->st_gen
= sbp
->st_gen
;
12664 usbp
->st_lspare
= sbp
->st_lspare
;
12665 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12666 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12670 * copy stat64 structure into user_stat64 structure.
12673 munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
12675 bzero(usbp
, sizeof(*usbp
));
12677 usbp
->st_dev
= sbp
->st_dev
;
12678 usbp
->st_ino
= sbp
->st_ino
;
12679 usbp
->st_mode
= sbp
->st_mode
;
12680 usbp
->st_nlink
= sbp
->st_nlink
;
12681 usbp
->st_uid
= sbp
->st_uid
;
12682 usbp
->st_gid
= sbp
->st_gid
;
12683 usbp
->st_rdev
= sbp
->st_rdev
;
12684 #ifndef _POSIX_C_SOURCE
12685 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
12686 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
12687 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
12688 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
12689 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
12690 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
12691 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
12692 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
12694 usbp
->st_atime
= sbp
->st_atime
;
12695 usbp
->st_atimensec
= sbp
->st_atimensec
;
12696 usbp
->st_mtime
= sbp
->st_mtime
;
12697 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12698 usbp
->st_ctime
= sbp
->st_ctime
;
12699 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12700 usbp
->st_birthtime
= sbp
->st_birthtime
;
12701 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12703 usbp
->st_size
= sbp
->st_size
;
12704 usbp
->st_blocks
= sbp
->st_blocks
;
12705 usbp
->st_blksize
= sbp
->st_blksize
;
12706 usbp
->st_flags
= sbp
->st_flags
;
12707 usbp
->st_gen
= sbp
->st_gen
;
12708 usbp
->st_lspare
= sbp
->st_lspare
;
12709 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12710 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12714 munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
12716 bzero(usbp
, sizeof(*usbp
));
12718 usbp
->st_dev
= sbp
->st_dev
;
12719 usbp
->st_ino
= sbp
->st_ino
;
12720 usbp
->st_mode
= sbp
->st_mode
;
12721 usbp
->st_nlink
= sbp
->st_nlink
;
12722 usbp
->st_uid
= sbp
->st_uid
;
12723 usbp
->st_gid
= sbp
->st_gid
;
12724 usbp
->st_rdev
= sbp
->st_rdev
;
12725 #ifndef _POSIX_C_SOURCE
12726 usbp
->st_atimespec
.tv_sec
= (user32_time_t
)sbp
->st_atimespec
.tv_sec
;
12727 usbp
->st_atimespec
.tv_nsec
= (user32_long_t
)sbp
->st_atimespec
.tv_nsec
;
12728 usbp
->st_mtimespec
.tv_sec
= (user32_time_t
)sbp
->st_mtimespec
.tv_sec
;
12729 usbp
->st_mtimespec
.tv_nsec
= (user32_long_t
)sbp
->st_mtimespec
.tv_nsec
;
12730 usbp
->st_ctimespec
.tv_sec
= (user32_time_t
)sbp
->st_ctimespec
.tv_sec
;
12731 usbp
->st_ctimespec
.tv_nsec
= (user32_long_t
)sbp
->st_ctimespec
.tv_nsec
;
12732 usbp
->st_birthtimespec
.tv_sec
= (user32_time_t
)sbp
->st_birthtimespec
.tv_sec
;
12733 usbp
->st_birthtimespec
.tv_nsec
= (user32_long_t
)sbp
->st_birthtimespec
.tv_nsec
;
12735 usbp
->st_atime
= sbp
->st_atime
;
12736 usbp
->st_atimensec
= sbp
->st_atimensec
;
12737 usbp
->st_mtime
= sbp
->st_mtime
;
12738 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
12739 usbp
->st_ctime
= sbp
->st_ctime
;
12740 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
12741 usbp
->st_birthtime
= sbp
->st_birthtime
;
12742 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
12744 usbp
->st_size
= sbp
->st_size
;
12745 usbp
->st_blocks
= sbp
->st_blocks
;
12746 usbp
->st_blksize
= sbp
->st_blksize
;
12747 usbp
->st_flags
= sbp
->st_flags
;
12748 usbp
->st_gen
= sbp
->st_gen
;
12749 usbp
->st_lspare
= sbp
->st_lspare
;
12750 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
12751 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
12755 * Purge buffer cache for simulating cold starts
12758 vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
12760 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
12762 return VNODE_RETURNED
;
12766 vfs_purge_callback(mount_t mp
, __unused
void * arg
)
12768 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
12770 return VFS_RETURNED
;
12774 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
12776 if (!kauth_cred_issuser(kauth_cred_get())) {
12780 vfs_iterate(0 /* flags */, vfs_purge_callback
, NULL
);
12786 * gets the vnode associated with the (unnamed) snapshot directory
12787 * for a Filesystem. The snapshot directory vnode is returned with
12788 * an iocount on it.
12791 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
12793 return VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
);
12797 * Get the snapshot vnode.
12799 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12800 * needs nameidone() on ndp.
12802 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12804 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12808 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
12809 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
12810 #if !CONFIG_TRIGGERS
12813 enum path_operation pathop
,
12819 struct vfs_attr vfa
;
12824 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
12829 if (!vnode_isvroot(*rvpp
)) {
12834 /* Make sure the filesystem supports snapshots */
12835 VFSATTR_INIT(&vfa
);
12836 VFSATTR_WANTED(&vfa
, f_capabilities
);
12837 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
12838 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
12839 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
12840 VOL_CAP_INT_SNAPSHOT
)) ||
12841 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
12842 VOL_CAP_INT_SNAPSHOT
))) {
12847 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
12852 name_buf
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
);
12853 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12859 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12860 * (the length returned by copyinstr includes the terminating NUL)
12862 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
12863 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
12867 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++) {
12870 if (i
< (int)name_len
) {
12876 if (op
== CREATE
) {
12877 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
12879 } else if (op
== DELETE
) {
12880 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
12888 /* Check if the snapshot already exists ... */
12889 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
12890 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
12891 ndp
->ni_dvp
= *sdvpp
;
12893 error
= namei(ndp
);
12895 zfree(ZV_NAMEI
, name_buf
);
12911 * create a filesystem snapshot (for supporting filesystems)
12913 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12914 * We get to the (unnamed) snapshot directory vnode and create the vnode
12915 * for the snapshot in it.
12919 * a) Passed in name for snapshot cannot have slashes.
12920 * b) name can't be "." or ".."
12922 * Since this requires superuser privileges, vnode_authorize calls are not
12925 static int __attribute__((noinline
))
12926 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12929 vnode_t rvp
, snapdvp
;
12931 struct nameidata
*ndp
;
12933 ndp
= kheap_alloc(KHEAP_TEMP
, sizeof(*ndp
), Z_WAITOK
);
12935 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, ndp
, CREATE
,
12942 vnode_put(ndp
->ni_vp
);
12945 struct vnode_attr
*vap
;
12946 vnode_t vp
= NULLVP
;
12948 vap
= kheap_alloc(KHEAP_TEMP
, sizeof(*vap
), Z_WAITOK
);
12951 VATTR_SET(vap
, va_type
, VREG
);
12952 VATTR_SET(vap
, va_mode
, 0);
12954 error
= vn_create(snapdvp
, &vp
, ndp
, vap
,
12955 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
12956 if (!error
&& vp
) {
12960 kheap_free(KHEAP_TEMP
, vap
, sizeof(*vap
));
12964 vnode_put(snapdvp
);
12967 kheap_free(KHEAP_TEMP
, ndp
, sizeof(*ndp
));
12973 * Delete a Filesystem snapshot
12975 * get the vnode for the unnamed snapshot directory and the snapshot and
12976 * delete the snapshot.
12978 static int __attribute__((noinline
))
12979 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12982 vnode_t rvp
, snapdvp
;
12984 struct nameidata
*ndp
;
12986 ndp
= kheap_alloc(KHEAP_TEMP
, sizeof(*ndp
), Z_WAITOK
);
12988 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, ndp
, DELETE
,
12994 error
= VNOP_REMOVE(snapdvp
, ndp
->ni_vp
, &ndp
->ni_cnd
,
12995 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
12997 vnode_put(ndp
->ni_vp
);
12999 vnode_put(snapdvp
);
13002 kheap_free(KHEAP_TEMP
, ndp
, sizeof(*ndp
));
13008 * Revert a filesystem to a snapshot
13010 * Marks the filesystem to revert to the given snapshot on next mount.
13012 static int __attribute__((noinline
))
13013 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
13019 struct fs_snapshot_revert_args revert_data
;
13020 struct componentname cnp
;
13024 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
13028 mp
= vnode_mount(rvp
);
13030 name_buf
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
);
13031 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
13033 zfree(ZV_NAMEI
, name_buf
);
13039 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
13041 zfree(ZV_NAMEI
, name_buf
);
13048 * Grab mount_iterref so that we can release the vnode,
13049 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
13051 error
= mount_iterref(mp
, 0);
13054 zfree(ZV_NAMEI
, name_buf
);
13058 memset(&cnp
, 0, sizeof(cnp
));
13059 cnp
.cn_pnbuf
= (char *)name_buf
;
13060 cnp
.cn_nameiop
= LOOKUP
;
13061 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
13062 cnp
.cn_pnlen
= MAXPATHLEN
;
13063 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
13064 cnp
.cn_namelen
= (int)name_len
;
13065 revert_data
.sr_cnp
= &cnp
;
13067 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
13068 mount_iterdrop(mp
);
13069 zfree(ZV_NAMEI
, name_buf
);
13072 /* If there was any error, try again using VNOP_IOCTL */
13075 struct nameidata namend
;
13077 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
13084 error
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
13087 vnode_put(namend
.ni_vp
);
13088 nameidone(&namend
);
13089 vnode_put(snapdvp
);
13097 * rename a Filesystem snapshot
13099 * get the vnode for the unnamed snapshot directory and the snapshot and
13100 * rename the snapshot. This is a very specialised (and simple) case of
13101 * rename(2) (which has to deal with a lot more complications). It differs
13102 * slightly from rename(2) in that EEXIST is returned if the new name exists.
13104 static int __attribute__((noinline
))
13105 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
13106 __unused
uint32_t flags
, vfs_context_t ctx
)
13108 vnode_t rvp
, snapdvp
;
13110 caddr_t newname_buf
;
13113 struct nameidata
*fromnd
, *tond
;
13114 /* carving out a chunk for structs that are too big to be on stack. */
13116 struct nameidata from_node
;
13117 struct nameidata to_node
;
13120 __rename_data
= kheap_alloc(KHEAP_TEMP
, sizeof(*__rename_data
), Z_WAITOK
);
13121 fromnd
= &__rename_data
->from_node
;
13122 tond
= &__rename_data
->to_node
;
13124 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
13129 fvp
= fromnd
->ni_vp
;
13131 newname_buf
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
);
13132 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
13138 * Some sanity checks- new name can't be empty, "." or ".." or have
13140 * (the length returned by copyinstr includes the terminating NUL)
13142 * The FS rename VNOP is suppossed to handle this but we'll pick it
13145 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
13146 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
13150 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++) {
13153 if (i
< (int)name_len
) {
13159 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
13166 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
13167 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
13168 tond
->ni_dvp
= snapdvp
;
13170 error
= namei(tond
);
13173 } else if (tond
->ni_vp
) {
13175 * snapshot rename behaves differently than rename(2) - if the
13176 * new name exists, EEXIST is returned.
13178 vnode_put(tond
->ni_vp
);
13183 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
13184 &tond
->ni_cnd
, ctx
);
13189 zfree(ZV_NAMEI
, newname_buf
);
13191 vnode_put(snapdvp
);
13195 kheap_free(KHEAP_TEMP
, __rename_data
, sizeof(*__rename_data
));
13200 * Mount a Filesystem snapshot
13202 * get the vnode for the unnamed snapshot directory and the snapshot and
13203 * mount the snapshot.
13205 static int __attribute__((noinline
))
13206 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
13207 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
13210 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
13211 struct fs_snapshot_mount_args smnt_data
;
13213 struct nameidata
*snapndp
, *dirndp
;
13214 /* carving out a chunk for structs that are too big to be on stack. */
13216 struct nameidata snapnd
;
13217 struct nameidata dirnd
;
13218 } * __snapshot_mount_data
;
13220 __snapshot_mount_data
= kheap_alloc(KHEAP_TEMP
,
13221 sizeof(*__snapshot_mount_data
), Z_WAITOK
);
13222 snapndp
= &__snapshot_mount_data
->snapnd
;
13223 dirndp
= &__snapshot_mount_data
->dirnd
;
13225 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
13231 snapvp
= snapndp
->ni_vp
;
13232 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
13237 /* Get the vnode to be covered */
13238 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
13239 UIO_USERSPACE
, directory
, ctx
);
13240 error
= namei(dirndp
);
13245 vp
= dirndp
->ni_vp
;
13246 pvp
= dirndp
->ni_dvp
;
13247 mp
= vnode_mount(rvp
);
13249 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
13255 error
= mac_mount_check_snapshot_mount(ctx
, rvp
, vp
, &dirndp
->ni_cnd
, snapndp
->ni_cnd
.cn_nameptr
,
13256 mp
->mnt_vfsstat
.f_fstypename
);
13262 smnt_data
.sm_mp
= mp
;
13263 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
13264 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
13265 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags
& MNT_DONTBROWSE
,
13266 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
13274 vnode_put(snapdvp
);
13276 nameidone(snapndp
);
13278 kheap_free(KHEAP_TEMP
, __snapshot_mount_data
,
13279 sizeof(*__snapshot_mount_data
));
13284 * Root from a snapshot of the filesystem
13286 * Marks the filesystem to root from the given snapshot on next boot.
13288 static int __attribute__((noinline
))
13289 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
13295 struct fs_snapshot_root_args root_data
;
13296 struct componentname cnp
;
13300 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
13304 mp
= vnode_mount(rvp
);
13306 name_buf
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
);
13307 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
13309 zfree(ZV_NAMEI
, name_buf
);
13314 // XXX MAC checks ?
13317 * Grab mount_iterref so that we can release the vnode,
13318 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
13320 error
= mount_iterref(mp
, 0);
13323 zfree(ZV_NAMEI
, name_buf
);
13327 memset(&cnp
, 0, sizeof(cnp
));
13328 cnp
.cn_pnbuf
= (char *)name_buf
;
13329 cnp
.cn_nameiop
= LOOKUP
;
13330 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
13331 cnp
.cn_pnlen
= MAXPATHLEN
;
13332 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
13333 cnp
.cn_namelen
= (int)name_len
;
13334 root_data
.sr_cnp
= &cnp
;
13336 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
13338 mount_iterdrop(mp
);
13339 zfree(ZV_NAMEI
, name_buf
);
13345 * FS snapshot operations dispatcher
13348 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
13349 __unused
int32_t *retval
)
13352 vfs_context_t ctx
= vfs_context_current();
13354 AUDIT_ARG(fd
, uap
->dirfd
);
13355 AUDIT_ARG(value32
, uap
->op
);
13357 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
13363 * Enforce user authorization for snapshot modification operations,
13364 * or if trying to root from snapshot.
13366 if (uap
->op
!= SNAPSHOT_OP_MOUNT
) {
13367 vnode_t dvp
= NULLVP
;
13368 vnode_t devvp
= NULLVP
;
13371 error
= vnode_getfromfd(ctx
, uap
->dirfd
, &dvp
);
13375 mp
= vnode_mount(dvp
);
13376 devvp
= mp
->mnt_devvp
;
13378 /* get an iocount on devvp */
13379 if (devvp
== NULLVP
) {
13380 error
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 0, &devvp
, ctx
);
13381 /* for mounts which arent block devices */
13382 if (error
== ENOENT
) {
13386 error
= vnode_getwithref(devvp
);
13394 if ((vfs_context_issuser(ctx
) == 0) &&
13395 (vnode_authorize(devvp
, NULL
, KAUTH_VNODE_WRITE_DATA
, ctx
) != 0)) {
13407 case SNAPSHOT_OP_CREATE
:
13408 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
13410 case SNAPSHOT_OP_DELETE
:
13411 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
13413 case SNAPSHOT_OP_RENAME
:
13414 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
13417 case SNAPSHOT_OP_MOUNT
:
13418 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
13419 uap
->data
, uap
->flags
, ctx
);
13421 case SNAPSHOT_OP_REVERT
:
13422 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
13424 #if CONFIG_MNT_ROOTSNAP
13425 case SNAPSHOT_OP_ROOT
:
13426 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
13428 #endif /* CONFIG_MNT_ROOTSNAP */