2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
110 #include <vfs/vfs_disk_conditioner.h>
112 #include <security/audit/audit.h>
113 #include <bsm/audit_kevents.h>
115 #include <mach/mach_types.h>
116 #include <kern/kern_types.h>
117 #include <kern/kalloc.h>
118 #include <kern/task.h>
120 #include <vm/vm_pageout.h>
121 #include <vm/vm_protos.h>
123 #include <libkern/OSAtomic.h>
124 #include <pexpert/pexpert.h>
125 #include <IOKit/IOBSD.h>
128 #include <miscfs/routefs/routefs.h>
132 #include <security/mac.h>
133 #include <security/mac_framework.h>
137 #define GET_PATH(x) \
138 (x) = get_pathbuff();
139 #define RELEASE_PATH(x) \
142 #define GET_PATH(x) \
143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
144 #define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146 #endif /* CONFIG_FSE */
148 #ifndef HFS_GET_BOOT_INFO
149 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
152 #ifndef HFS_SET_BOOT_INFO
153 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
156 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
157 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
160 extern void disk_conditioner_unmount(mount_t mp
);
162 /* struct for checkdirs iteration */
167 /* callback for checkdirs iteration */
168 static int checkdirs_callback(proc_t p
, void * arg
);
170 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
171 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
172 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
173 static int getfsstat_callback(mount_t mp
, void * arg
);
174 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
175 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
176 static int sync_callback(mount_t
, void *);
177 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
178 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
179 boolean_t partial_copy
);
180 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
182 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
183 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
184 struct componentname
*cnp
, user_addr_t fsmountargs
,
185 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
187 void vfs_notify_mount(vnode_t pdvp
);
189 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
191 struct fd_vn_data
* fg_vn_data_alloc(void);
194 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
195 * Concurrent lookups (or lookups by ids) on hard links can cause the
196 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
197 * does) to return ENOENT as the path cannot be returned from the name cache
198 * alone. We have no option but to retry and hope to get one namei->reverse path
199 * generation done without an intervening lookup, lookup by id on the hard link
200 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
201 * which currently are the MAC hooks for rename, unlink and rmdir.
203 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
205 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
207 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
209 #ifdef CONFIG_IMGSRC_ACCESS
210 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
211 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
212 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
213 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
214 static void mount_end_update(mount_t mp
);
215 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
216 #endif /* CONFIG_IMGSRC_ACCESS */
219 #if CONFIG_MNT_ROOTSNAP
220 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
);
222 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
) __attribute__((unused
));
225 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
228 int sync_internal(void);
231 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
233 extern lck_grp_t
*fd_vn_lck_grp
;
234 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
235 extern lck_attr_t
*fd_vn_lck_attr
;
238 * incremented each time a mount or unmount operation occurs
239 * used to invalidate the cached value of the rootvp in the
240 * mount structure utilized by cache_lookup_path
242 uint32_t mount_generation
= 0;
244 /* counts number of mount and unmount operations */
245 unsigned int vfs_nummntops
= 0;
247 extern const struct fileops vnops
;
248 #if CONFIG_APPLEDOUBLE
249 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
250 #endif /* CONFIG_APPLEDOUBLE */
253 * Virtual File System System Calls
256 #if NFSCLIENT || DEVFS || ROUTEFS
258 * Private in-kernel mounting spi (NFS only, not exported)
262 vfs_iskernelmount(mount_t mp
)
264 return (mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
;
269 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
270 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
276 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
277 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
280 * Get the vnode to be covered if it's not supplied
291 char *pnbuf
= CAST_DOWN(char *, path
);
293 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
294 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
298 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
299 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
309 #endif /* NFSCLIENT || DEVFS */
312 * Mount a file system.
316 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
318 struct __mac_mount_args muap
;
320 muap
.type
= uap
->type
;
321 muap
.path
= uap
->path
;
322 muap
.flags
= uap
->flags
;
323 muap
.data
= uap
->data
;
324 muap
.mac_p
= USER_ADDR_NULL
;
325 return __mac_mount(p
, &muap
, retval
);
329 fmount(__unused proc_t p
, struct fmount_args
*uap
, __unused
int32_t *retval
)
331 struct componentname cn
;
332 vfs_context_t ctx
= vfs_context_current();
335 int flags
= uap
->flags
;
336 char fstypename
[MFSNAMELEN
];
337 char *labelstr
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */
341 AUDIT_ARG(fd
, uap
->fd
);
342 AUDIT_ARG(fflags
, flags
);
343 /* fstypename will get audited by mount_common */
345 /* Sanity check the flags */
346 if (flags
& (MNT_IMGSRC_BY_INDEX
| MNT_ROOTFS
)) {
350 if (flags
& MNT_UNION
) {
354 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
359 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
363 if ((error
= vnode_getwithref(vp
)) != 0) {
368 pvp
= vnode_getparent(vp
);
375 memset(&cn
, 0, sizeof(struct componentname
));
376 MALLOC(cn
.cn_pnbuf
, char *, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
377 cn
.cn_pnlen
= MAXPATHLEN
;
379 if ((error
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) {
380 FREE(cn
.cn_pnbuf
, M_TEMP
);
387 error
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
);
389 FREE(cn
.cn_pnbuf
, M_TEMP
);
398 vfs_notify_mount(vnode_t pdvp
)
400 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
401 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
406 * Mount a file system taking into account MAC label behavior.
407 * See mount(2) man page for more information
409 * Parameters: p Process requesting the mount
410 * uap User argument descriptor (see below)
413 * Indirect: uap->type Filesystem type
414 * uap->path Path to mount
415 * uap->data Mount arguments
416 * uap->mac_p MAC info
417 * uap->flags Mount flags
423 boolean_t root_fs_upgrade_try
= FALSE
;
426 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
430 int need_nameidone
= 0;
431 vfs_context_t ctx
= vfs_context_current();
432 char fstypename
[MFSNAMELEN
];
435 char *labelstr
= NULL
;
436 int flags
= uap
->flags
;
438 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
439 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
444 * Get the fs type name from user space
446 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
452 * Get the vnode to be covered
454 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
455 UIO_USERSPACE
, uap
->path
, ctx
);
464 #ifdef CONFIG_IMGSRC_ACCESS
465 /* Mounting image source cannot be batched with other operations */
466 if (flags
== MNT_IMGSRC_BY_INDEX
) {
467 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
468 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
471 #endif /* CONFIG_IMGSRC_ACCESS */
475 * Get the label string (if any) from user space
477 if (uap
->mac_p
!= USER_ADDR_NULL
) {
482 struct user64_mac mac64
;
483 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
484 mac
.m_buflen
= mac64
.m_buflen
;
485 mac
.m_string
= mac64
.m_string
;
487 struct user32_mac mac32
;
488 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
489 mac
.m_buflen
= mac32
.m_buflen
;
490 mac
.m_string
= mac32
.m_string
;
495 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
496 (mac
.m_buflen
< 2)) {
500 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
501 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
505 AUDIT_ARG(mac_string
, labelstr
);
507 #endif /* CONFIG_MACF */
509 AUDIT_ARG(fflags
, flags
);
512 if (flags
& MNT_UNION
) {
513 /* No union mounts on release kernels */
519 if ((vp
->v_flag
& VROOT
) &&
520 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
521 if (!(flags
& MNT_UNION
)) {
525 * For a union mount on '/', treat it as fresh
526 * mount instead of update.
527 * Otherwise, union mouting on '/' used to panic the
528 * system before, since mnt_vnodecovered was found to
529 * be NULL for '/' which is required for unionlookup
530 * after it gets ENOENT on union mount.
532 flags
= (flags
& ~(MNT_UPDATE
));
536 if ((flags
& MNT_RDONLY
) == 0) {
537 /* Release kernels are not allowed to mount "/" as rw */
543 * See 7392553 for more details on why this check exists.
544 * Suffice to say: If this check is ON and something tries
545 * to mount the rootFS RW, we'll turn off the codesign
546 * bitmap optimization.
548 #if CHECK_CS_VALIDATION_BITMAP
549 if ((flags
& MNT_RDONLY
) == 0) {
550 root_fs_upgrade_try
= TRUE
;
555 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
556 labelstr
, FALSE
, ctx
);
562 FREE(labelstr
, M_MACTEMP
);
564 #endif /* CONFIG_MACF */
572 if (need_nameidone
) {
580 * common mount implementation (final stage of mounting)
583 * fstypename file system type (ie it's vfs name)
584 * pvp parent of covered vnode
586 * cnp component name (ie path) of covered vnode
587 * flags generic mount flags
588 * fsmountargs file system specific data
589 * labelstr optional MAC label
590 * kernelmount TRUE for mounts initiated from inside the kernel
591 * ctx caller's context
594 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
595 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
596 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
599 #pragma unused(labelstr)
601 struct vnode
*devvp
= NULLVP
;
602 struct vnode
*device_vnode
= NULLVP
;
607 struct vfstable
*vfsp
= (struct vfstable
*)0;
608 struct proc
*p
= vfs_context_proc(ctx
);
610 user_addr_t devpath
= USER_ADDR_NULL
;
613 boolean_t vfsp_ref
= FALSE
;
614 boolean_t is_rwlock_locked
= FALSE
;
615 boolean_t did_rele
= FALSE
;
616 boolean_t have_usecount
= FALSE
;
619 * Process an update for an existing mount
621 if (flags
& MNT_UPDATE
) {
622 if ((vp
->v_flag
& VROOT
) == 0) {
628 /* unmount in progress return error */
630 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
636 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
637 is_rwlock_locked
= TRUE
;
639 * We only allow the filesystem to be reloaded if it
640 * is currently mounted read-only.
642 if ((flags
& MNT_RELOAD
) &&
643 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
649 * If content protection is enabled, update mounts are not
650 * allowed to turn it off.
652 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
653 ((flags
& MNT_CPROTECT
) == 0)) {
658 #ifdef CONFIG_IMGSRC_ACCESS
659 /* Can't downgrade the backer of the root FS */
660 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
661 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
665 #endif /* CONFIG_IMGSRC_ACCESS */
668 * Only root, or the user that did the original mount is
669 * permitted to update it.
671 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
672 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
676 error
= mac_mount_check_remount(ctx
, mp
);
682 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
683 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
685 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
686 flags
|= MNT_NOSUID
| MNT_NODEV
;
687 if (mp
->mnt_flag
& MNT_NOEXEC
) {
695 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
697 vfsp
= mp
->mnt_vtable
;
702 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
703 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
705 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
706 flags
|= MNT_NOSUID
| MNT_NODEV
;
707 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
) {
712 /* XXXAUDIT: Should we capture the type on the error path as well? */
713 AUDIT_ARG(text
, fstypename
);
715 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
716 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
717 vfsp
->vfc_refcount
++;
729 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
731 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
732 error
= EINVAL
; /* unsupported request */
736 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
742 * Allocate and initialize the filesystem (mount_t)
744 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
746 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
749 /* Initialize the default IO constraints */
750 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
751 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
752 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
753 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
754 mp
->mnt_devblocksize
= DEV_BSIZE
;
755 mp
->mnt_alignmentmask
= PAGE_MASK
;
756 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
759 mp
->mnt_realrootvp
= NULLVP
;
760 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
762 TAILQ_INIT(&mp
->mnt_vnodelist
);
763 TAILQ_INIT(&mp
->mnt_workerqueue
);
764 TAILQ_INIT(&mp
->mnt_newvnodes
);
766 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
767 is_rwlock_locked
= TRUE
;
768 mp
->mnt_op
= vfsp
->vfc_vfsops
;
769 mp
->mnt_vtable
= vfsp
;
770 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
771 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
772 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
773 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
774 mp
->mnt_vnodecovered
= vp
;
775 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
776 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
777 mp
->mnt_devbsdunit
= 0;
779 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
780 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
782 #if NFSCLIENT || DEVFS || ROUTEFS
784 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
786 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0) {
787 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
789 #endif /* NFSCLIENT || DEVFS */
794 * Set the mount level flags.
796 if (flags
& MNT_RDONLY
) {
797 mp
->mnt_flag
|= MNT_RDONLY
;
798 } else if (mp
->mnt_flag
& MNT_RDONLY
) {
799 // disallow read/write upgrades of file systems that
800 // had the TYPENAME_OVERRIDE feature set.
801 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
805 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
807 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
808 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
809 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
810 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
811 MNT_QUARANTINE
| MNT_CPROTECT
);
816 * On release builds of iOS based platforms, always enforce NOSUID on
817 * all mounts. We do this here because we can catch update mounts as well as
818 * non-update mounts in this case.
820 mp
->mnt_flag
|= (MNT_NOSUID
);
824 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
825 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
826 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
827 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
828 MNT_QUARANTINE
| MNT_CPROTECT
);
831 if (flags
& MNT_MULTILABEL
) {
832 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
836 mp
->mnt_flag
|= MNT_MULTILABEL
;
840 * Process device path for local file systems if requested
842 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
843 !(internal_flags
& KERNEL_MOUNT_SNAPSHOT
)) {
844 if (vfs_context_is64bit(ctx
)) {
845 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
848 fsmountargs
+= sizeof(devpath
);
851 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
854 /* munge into LP64 addr */
855 devpath
= CAST_USER_ADDR_T(tmp
);
856 fsmountargs
+= sizeof(tmp
);
859 /* Lookup device and authorize access to it */
863 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
864 if ((error
= namei(&nd
))) {
868 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
873 if (devvp
->v_type
!= VBLK
) {
877 if (major(devvp
->v_rdev
) >= nblkdev
) {
882 * If mount by non-root, then verify that user has necessary
883 * permissions on the device.
885 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
886 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
888 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
889 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
891 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0) {
896 /* On first mount, preflight and open device */
897 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
898 if ((error
= vnode_ref(devvp
))) {
902 * Disallow multiple mounts of the same device.
903 * Disallow mounting of a device that is currently in use
904 * (except for root, which might share swap device for miniroot).
905 * Flush out any old buffers remaining from a previous use.
907 if ((error
= vfs_mountedon(devvp
))) {
911 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
915 if ((error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
))) {
919 if ((error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0))) {
923 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
925 error
= mac_vnode_check_open(ctx
,
927 ronly
? FREAD
: FREAD
| FWRITE
);
932 if ((error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
))) {
936 mp
->mnt_devvp
= devvp
;
937 device_vnode
= devvp
;
938 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
939 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
940 (device_vnode
= mp
->mnt_devvp
)) {
944 * If upgrade to read-write by non-root, then verify
945 * that user has necessary permissions on the device.
947 vnode_getalways(device_vnode
);
949 if (suser(vfs_context_ucred(ctx
), NULL
) &&
950 (error
= vnode_authorize(device_vnode
, NULL
,
951 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
953 vnode_put(device_vnode
);
957 /* Tell the device that we're upgrading */
958 dev
= (dev_t
)device_vnode
->v_rdev
;
961 if ((u_int
)maj
>= (u_int
)nblkdev
) {
962 panic("Volume mounted on a device with invalid major number.");
965 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
966 vnode_put(device_vnode
);
967 device_vnode
= NULLVP
;
974 if ((flags
& MNT_UPDATE
) == 0) {
975 mac_mount_label_init(mp
);
976 mac_mount_label_associate(ctx
, mp
);
979 if ((flags
& MNT_UPDATE
) != 0) {
980 error
= mac_mount_check_label_update(ctx
, mp
);
988 * Mount the filesystem.
990 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
991 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
992 (caddr_t
)fsmountargs
, 0, ctx
);
994 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
997 if (flags
& MNT_UPDATE
) {
998 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) {
999 mp
->mnt_flag
&= ~MNT_RDONLY
;
1002 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
1003 mp
->mnt_kern_flag
&= ~MNTK_WANTRDWR
;
1005 mp
->mnt_flag
= flag
; /* restore flag value */
1007 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
1008 lck_rw_done(&mp
->mnt_rwlock
);
1009 is_rwlock_locked
= FALSE
;
1011 enablequotas(mp
, ctx
);
1017 * Put the new filesystem on the mount list after root.
1020 struct vfs_attr vfsattr
;
1022 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
1023 error
= VFS_ROOT(mp
, &rvp
, ctx
);
1025 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
1028 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
1030 * drop reference provided by VFS_ROOT
1040 vnode_lock_spin(vp
);
1041 CLR(vp
->v_flag
, VMOUNT
);
1042 vp
->v_mountedhere
= mp
;
1046 * taking the name_cache_lock exclusively will
1047 * insure that everyone is out of the fast path who
1048 * might be trying to use a now stale copy of
1049 * vp->v_mountedhere->mnt_realrootvp
1050 * bumping mount_generation causes the cached values
1055 name_cache_unlock();
1057 error
= vnode_ref(vp
);
1062 have_usecount
= TRUE
;
1064 error
= checkdirs(vp
, ctx
);
1066 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1070 * there is no cleanup code here so I have made it void
1071 * we need to revisit this
1073 (void)VFS_START(mp
, 0, ctx
);
1075 if (mount_list_add(mp
) != 0) {
1077 * The system is shutting down trying to umount
1078 * everything, so fail with a plausible errno.
1083 lck_rw_done(&mp
->mnt_rwlock
);
1084 is_rwlock_locked
= FALSE
;
1086 /* Check if this mounted file system supports EAs or named streams. */
1087 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1088 VFSATTR_INIT(&vfsattr
);
1089 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
1090 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
1091 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
1092 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
1093 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
1094 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
1095 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1098 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
1099 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
1100 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
1103 /* Check if this file system supports path from id lookups. */
1104 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
1105 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
1106 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1107 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
1108 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1109 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1112 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
1113 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
1114 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1117 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1118 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1120 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1121 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1123 /* increment the operations count */
1124 OSAddAtomic(1, &vfs_nummntops
);
1125 enablequotas(mp
, ctx
);
1128 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1131 * cache the IO attributes for the underlying physical media...
1132 * an error return indicates the underlying driver doesn't
1133 * support all the queries necessary... however, reasonable
1134 * defaults will have been set, so no reason to bail or care
1136 vfs_init_io_attributes(device_vnode
, mp
);
1139 /* Now that mount is setup, notify the listeners */
1140 vfs_notify_mount(pvp
);
1141 IOBSDMountChange(mp
, kIOMountChangeMount
);
1143 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1144 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1145 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1146 mp
->mnt_vtable
->vfc_name
, error
);
1149 vnode_lock_spin(vp
);
1150 CLR(vp
->v_flag
, VMOUNT
);
1153 mp
->mnt_vtable
->vfc_refcount
--;
1154 mount_list_unlock();
1157 vnode_rele(device_vnode
);
1158 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
| FWRITE
, ctx
);
1160 lck_rw_done(&mp
->mnt_rwlock
);
1161 is_rwlock_locked
= FALSE
;
1164 * if we get here, we have a mount structure that needs to be freed,
1165 * but since the coveredvp hasn't yet been updated to point at it,
1166 * no need to worry about other threads holding a crossref on this mp
1167 * so it's ok to just free it
1169 mount_lock_destroy(mp
);
1171 mac_mount_label_destroy(mp
);
1173 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1177 * drop I/O count on the device vp if there was one
1179 if (devpath
&& devvp
) {
1185 /* Error condition exits */
1187 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1190 * If the mount has been placed on the covered vp,
1191 * it may have been discovered by now, so we have
1192 * to treat this just like an unmount
1194 mount_lock_spin(mp
);
1195 mp
->mnt_lflag
|= MNT_LDEAD
;
1198 if (device_vnode
!= NULLVP
) {
1199 vnode_rele(device_vnode
);
1200 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
1205 vnode_lock_spin(vp
);
1208 vp
->v_mountedhere
= (mount_t
) 0;
1212 if (have_usecount
) {
1216 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
)) {
1220 if (devpath
&& devvp
) {
1224 /* Release mnt_rwlock only when it was taken */
1225 if (is_rwlock_locked
== TRUE
) {
1226 lck_rw_done(&mp
->mnt_rwlock
);
1230 if (mp
->mnt_crossref
) {
1231 mount_dropcrossref(mp
, vp
, 0);
1233 mount_lock_destroy(mp
);
1235 mac_mount_label_destroy(mp
);
1237 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
1242 vfsp
->vfc_refcount
--;
1243 mount_list_unlock();
1250 * Flush in-core data, check for competing mount attempts,
1254 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1257 #pragma unused(cnp,fsname)
1259 struct vnode_attr va
;
1264 * If the user is not root, ensure that they own the directory
1265 * onto which we are attempting to mount.
1268 VATTR_WANTED(&va
, va_uid
);
1269 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1270 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1271 (!vfs_context_issuser(ctx
)))) {
1277 if ((error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
))) {
1281 if ((error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0))) {
1285 if (vp
->v_type
!= VDIR
) {
1290 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1296 error
= mac_mount_check_mount(ctx
, vp
,
1303 vnode_lock_spin(vp
);
1304 SET(vp
->v_flag
, VMOUNT
);
1311 #if CONFIG_IMGSRC_ACCESS
1314 #define IMGSRC_DEBUG(args...) printf(args)
1316 #define IMGSRC_DEBUG(args...) do { } while(0)
1320 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1322 struct nameidata nd
;
1323 vnode_t vp
, realdevvp
;
1327 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1328 if ((error
= namei(&nd
))) {
1329 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1335 if (!vnode_isblk(vp
)) {
1336 IMGSRC_DEBUG("Not block device.\n");
1341 realdevvp
= mp
->mnt_devvp
;
1342 if (realdevvp
== NULLVP
) {
1343 IMGSRC_DEBUG("No device backs the mount.\n");
1348 error
= vnode_getwithref(realdevvp
);
1350 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1354 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1355 IMGSRC_DEBUG("Wrong dev_t.\n");
1360 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1363 * If mount by non-root, then verify that user has necessary
1364 * permissions on the device.
1366 if (!vfs_context_issuser(ctx
)) {
1367 accessmode
= KAUTH_VNODE_READ_DATA
;
1368 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1369 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1371 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1372 IMGSRC_DEBUG("Access denied.\n");
1380 vnode_put(realdevvp
);
1391 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1392 * and call checkdirs()
1395 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1399 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1401 vnode_lock_spin(vp
);
1402 CLR(vp
->v_flag
, VMOUNT
);
1403 vp
->v_mountedhere
= mp
;
1407 * taking the name_cache_lock exclusively will
1408 * insure that everyone is out of the fast path who
1409 * might be trying to use a now stale copy of
1410 * vp->v_mountedhere->mnt_realrootvp
1411 * bumping mount_generation causes the cached values
1416 name_cache_unlock();
1418 error
= vnode_ref(vp
);
1423 error
= checkdirs(vp
, ctx
);
1425 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1432 mp
->mnt_vnodecovered
= NULLVP
;
1438 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1441 vnode_lock_spin(vp
);
1442 vp
->v_mountedhere
= (mount_t
)NULL
;
1445 mp
->mnt_vnodecovered
= NULLVP
;
1449 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1453 /* unmount in progress return error */
1454 mount_lock_spin(mp
);
1455 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1460 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1463 * We only allow the filesystem to be reloaded if it
1464 * is currently mounted read-only.
1466 if ((flags
& MNT_RELOAD
) &&
1467 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1473 * Only root, or the user that did the original mount is
1474 * permitted to update it.
1476 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1477 (!vfs_context_issuser(ctx
))) {
1482 error
= mac_mount_check_remount(ctx
, mp
);
1490 lck_rw_done(&mp
->mnt_rwlock
);
1497 mount_end_update(mount_t mp
)
1499 lck_rw_done(&mp
->mnt_rwlock
);
1503 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1507 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1511 vp
= imgsrc_rootvnodes
[height
];
1512 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1521 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1522 const char *fsname
, vfs_context_t ctx
,
1523 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1527 boolean_t placed
= FALSE
;
1528 vnode_t devvp
= NULLVP
;
1529 struct vfstable
*vfsp
;
1530 user_addr_t devpath
;
1531 char *old_mntonname
;
1536 /* If we didn't imageboot, nothing to move */
1537 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1541 /* Only root can do this */
1542 if (!vfs_context_issuser(ctx
)) {
1546 IMGSRC_DEBUG("looking for root vnode.\n");
1549 * Get root vnode of filesystem we're moving.
1553 struct user64_mnt_imgsrc_args mia64
;
1554 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1556 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1560 height
= mia64
.mi_height
;
1561 flags
= mia64
.mi_flags
;
1562 devpath
= mia64
.mi_devpath
;
1564 struct user32_mnt_imgsrc_args mia32
;
1565 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1567 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1571 height
= mia32
.mi_height
;
1572 flags
= mia32
.mi_flags
;
1573 devpath
= mia32
.mi_devpath
;
1577 * For binary compatibility--assumes one level of nesting.
1580 if ((error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) {
1585 if ((error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) {
1589 /* munge into LP64 addr */
1590 devpath
= CAST_USER_ADDR_T(tmp
);
1598 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1602 error
= get_imgsrc_rootvnode(height
, &rvp
);
1604 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1608 IMGSRC_DEBUG("got root vnode.\n");
1610 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1612 /* Can only move once */
1613 mp
= vnode_mount(rvp
);
1614 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1615 IMGSRC_DEBUG("Already moved.\n");
1620 IMGSRC_DEBUG("Starting updated.\n");
1622 /* Get exclusive rwlock on mount, authorize update on mp */
1623 error
= mount_begin_update(mp
, ctx
, 0);
1625 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1630 * It can only be moved once. Flag is set under the rwlock,
1631 * so we're now safe to proceed.
1633 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1634 IMGSRC_DEBUG("Already moved [2]\n");
1639 IMGSRC_DEBUG("Preparing coveredvp.\n");
1641 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1642 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1644 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1648 IMGSRC_DEBUG("Covered vp OK.\n");
1650 /* Sanity check the name caller has provided */
1651 vfsp
= mp
->mnt_vtable
;
1652 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1653 IMGSRC_DEBUG("Wrong fs name.\n");
1658 /* Check the device vnode and update mount-from name, for local filesystems */
1659 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1660 IMGSRC_DEBUG("Local, doing device validation.\n");
1662 if (devpath
!= USER_ADDR_NULL
) {
1663 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1665 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1674 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1675 * and increment the name cache's mount generation
1678 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1679 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1686 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1687 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1689 /* Forbid future moves */
1691 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1694 /* Finally, add to mount list, completely ready to go */
1695 if (mount_list_add(mp
) != 0) {
1697 * The system is shutting down trying to umount
1698 * everything, so fail with a plausible errno.
1704 mount_end_update(mp
);
1706 FREE(old_mntonname
, M_TEMP
);
1708 vfs_notify_mount(pvp
);
1712 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1715 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1720 * Placing the mp on the vnode clears VMOUNT,
1721 * so cleanup is different after that point
1724 /* Rele the vp, clear VMOUNT and v_mountedhere */
1725 undo_place_on_covered_vp(mp
, vp
);
1727 vnode_lock_spin(vp
);
1728 CLR(vp
->v_flag
, VMOUNT
);
1732 mount_end_update(mp
);
1736 FREE(old_mntonname
, M_TEMP
);
1740 #endif /* CONFIG_IMGSRC_ACCESS */
1743 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1745 struct nameidata qnd
;
1747 char qfpath
[MAXPATHLEN
];
1748 const char *qfname
= QUOTAFILENAME
;
1749 const char *qfopsname
= QUOTAOPSNAME
;
1750 const char *qfextension
[] = INITQFNAMES
;
1752 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1753 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0) {
1757 * Enable filesystem disk quotas if necessary.
1758 * We ignore errors as this should not interfere with final mount
1760 for (type
= 0; type
< MAXQUOTAS
; type
++) {
1761 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1762 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1763 CAST_USER_ADDR_T(qfpath
), ctx
);
1764 if (namei(&qnd
) != 0) {
1765 continue; /* option file to trigger quotas is not present */
1767 vnode_put(qnd
.ni_vp
);
1769 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1771 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1778 checkdirs_callback(proc_t p
, void * arg
)
1780 struct cdirargs
* cdrp
= (struct cdirargs
*)arg
;
1781 vnode_t olddp
= cdrp
->olddp
;
1782 vnode_t newdp
= cdrp
->newdp
;
1783 struct filedesc
*fdp
;
1787 int cdir_changed
= 0;
1788 int rdir_changed
= 0;
1791 * XXX Also needs to iterate each thread in the process to see if it
1792 * XXX is using a per-thread current working directory, and, if so,
1793 * XXX update that as well.
1798 if (fdp
== (struct filedesc
*)0) {
1800 return PROC_RETURNED
;
1802 fdp_cvp
= fdp
->fd_cdir
;
1803 fdp_rvp
= fdp
->fd_rdir
;
1806 if (fdp_cvp
== olddp
) {
1813 if (fdp_rvp
== olddp
) {
1820 if (cdir_changed
|| rdir_changed
) {
1822 fdp
->fd_cdir
= fdp_cvp
;
1823 fdp
->fd_rdir
= fdp_rvp
;
1826 return PROC_RETURNED
;
1832 * Scan all active processes to see if any of them have a current
1833 * or root directory onto which the new filesystem has just been
1834 * mounted. If so, replace them with the new mount point.
1837 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1842 struct cdirargs cdr
;
1844 if (olddp
->v_usecount
== 1) {
1847 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1851 panic("mount: lost mount: error %d", err
);
1858 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1859 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1861 if (rootvnode
== olddp
) {
1873 * Unmount a file system.
1875 * Note: unmount takes a path to the vnode mounted on as argument,
1876 * not special file (as before).
1880 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1885 struct nameidata nd
;
1886 vfs_context_t ctx
= vfs_context_current();
1888 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1889 UIO_USERSPACE
, uap
->path
, ctx
);
1899 error
= mac_mount_check_umount(ctx
, mp
);
1906 * Must be the root of the filesystem
1908 if ((vp
->v_flag
& VROOT
) == 0) {
1914 /* safedounmount consumes the mount ref */
1915 return safedounmount(mp
, uap
->flags
, ctx
);
1919 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
1923 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1924 if (mp
== (mount_t
)0) {
1929 /* safedounmount consumes the mount ref */
1930 return safedounmount(mp
, flags
, ctx
);
1935 * The mount struct comes with a mount ref which will be consumed.
1936 * Do the actual file system unmount, prevent some common foot shooting.
1939 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1942 proc_t p
= vfs_context_proc(ctx
);
1945 * If the file system is not responding and MNT_NOBLOCK
1946 * is set and not a forced unmount then return EBUSY.
1948 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1949 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1955 * Skip authorization if the mount is tagged as permissive and
1956 * this is not a forced-unmount attempt.
1958 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1960 * Only root, or the user that did the original mount is
1961 * permitted to unmount this filesystem.
1963 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1964 (error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
1969 * Don't allow unmounting the root file system.
1971 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1972 error
= EBUSY
; /* the root is always busy */
1976 #ifdef CONFIG_IMGSRC_ACCESS
1977 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1981 #endif /* CONFIG_IMGSRC_ACCESS */
1983 return dounmount(mp
, flags
, 1, ctx
);
1991 * Do the actual file system unmount.
1994 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1996 vnode_t coveredvp
= (vnode_t
)0;
1999 int forcedunmount
= 0;
2001 struct vnode
*devvp
= NULLVP
;
2003 proc_t p
= vfs_context_proc(ctx
);
2005 int pflags_save
= 0;
2006 #endif /* CONFIG_TRIGGERS */
2009 if (!(flags
& MNT_FORCE
)) {
2010 fsevent_unmount(mp
, ctx
); /* has to come first! */
2017 * If already an unmount in progress just return EBUSY.
2018 * Even a forced unmount cannot override.
2020 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
2028 if (flags
& MNT_FORCE
) {
2030 mp
->mnt_lflag
|= MNT_LFORCE
;
2034 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2035 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
2039 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
2040 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
2041 mp
->mnt_flag
&= ~MNT_ASYNC
;
2043 * anyone currently in the fast path that
2044 * trips over the cached rootvp will be
2045 * dumped out and forced into the slow path
2046 * to regenerate a new cached value
2048 mp
->mnt_realrootvp
= NULLVP
;
2051 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
2053 * Force unmount any mounts in this filesystem.
2054 * If any unmounts fail - just leave them dangling.
2057 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
2061 * taking the name_cache_lock exclusively will
2062 * insure that everyone is out of the fast path who
2063 * might be trying to use a now stale copy of
2064 * vp->v_mountedhere->mnt_realrootvp
2065 * bumping mount_generation causes the cached values
2070 name_cache_unlock();
2073 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2078 if (forcedunmount
== 0) {
2079 ubc_umount(mp
); /* release cached vnodes */
2080 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2081 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
2084 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2085 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2086 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2092 /* free disk_conditioner_info structure for this mount */
2093 disk_conditioner_unmount(mp
);
2095 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
2098 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
2101 if (forcedunmount
) {
2102 lflags
|= FORCECLOSE
;
2104 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
2105 if ((forcedunmount
== 0) && error
) {
2107 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2108 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2109 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2113 /* make sure there are no one in the mount iterations or lookup */
2114 mount_iterdrain(mp
);
2116 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
2118 mount_iterreset(mp
);
2120 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2121 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2122 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2126 /* increment the operations count */
2128 OSAddAtomic(1, &vfs_nummntops
);
2131 if (mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
2132 /* hold an io reference and drop the usecount before close */
2133 devvp
= mp
->mnt_devvp
;
2134 vnode_getalways(devvp
);
2136 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
| FWRITE
,
2138 vnode_clearmountedon(devvp
);
2141 lck_rw_done(&mp
->mnt_rwlock
);
2142 mount_list_remove(mp
);
2143 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2145 /* mark the mount point hook in the vp but not drop the ref yet */
2146 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2148 * The covered vnode needs special handling. Trying to get an
2149 * iocount must not block here as this may lead to deadlocks
2150 * if the Filesystem to which the covered vnode belongs is
2151 * undergoing forced unmounts. Since we hold a usecount, the
2152 * vnode cannot be reused (it can, however, still be terminated)
2154 vnode_getalways(coveredvp
);
2155 vnode_lock_spin(coveredvp
);
2158 coveredvp
->v_mountedhere
= (struct mount
*)0;
2159 CLR(coveredvp
->v_flag
, VMOUNT
);
2161 vnode_unlock(coveredvp
);
2162 vnode_put(coveredvp
);
2166 mp
->mnt_vtable
->vfc_refcount
--;
2167 mount_list_unlock();
2169 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2170 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2172 mp
->mnt_lflag
|= MNT_LDEAD
;
2174 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2176 * do the wakeup here
2177 * in case we block in mount_refdrain
2178 * which will drop the mount lock
2179 * and allow anyone blocked in vfs_busy
2180 * to wakeup and see the LDEAD state
2182 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2183 wakeup((caddr_t
)mp
);
2187 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2188 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2193 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2194 // Restore P_NOREMOTEHANG bit to its previous value
2195 if ((pflags_save
& P_NOREMOTEHANG
) == 0) {
2196 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2201 * Callback and context are set together under the mount lock, and
2202 * never cleared, so we're safe to examine them here, drop the lock,
2205 if (mp
->mnt_triggercallback
!= NULL
) {
2208 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2209 } else if (did_vflush
) {
2210 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2217 #endif /* CONFIG_TRIGGERS */
2219 lck_rw_done(&mp
->mnt_rwlock
);
2222 wakeup((caddr_t
)mp
);
2226 if ((coveredvp
!= NULLVP
)) {
2227 vnode_t pvp
= NULLVP
;
2230 * The covered vnode needs special handling. Trying to
2231 * get an iocount must not block here as this may lead
2232 * to deadlocks if the Filesystem to which the covered
2233 * vnode belongs is undergoing forced unmounts. Since we
2234 * hold a usecount, the vnode cannot be reused
2235 * (it can, however, still be terminated).
2237 vnode_getalways(coveredvp
);
2239 mount_dropcrossref(mp
, coveredvp
, 0);
2241 * We'll _try_ to detect if this really needs to be
2242 * done. The coveredvp can only be in termination (or
2243 * terminated) if the coveredvp's mount point is in a
2244 * forced unmount (or has been) since we still hold the
2247 if (!vnode_isrecycled(coveredvp
)) {
2248 pvp
= vnode_getparent(coveredvp
);
2250 if (coveredvp
->v_resolve
) {
2251 vnode_trigger_rearm(coveredvp
, ctx
);
2256 vnode_rele(coveredvp
);
2257 vnode_put(coveredvp
);
2261 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2264 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2265 mount_lock_destroy(mp
);
2267 mac_mount_label_destroy(mp
);
2269 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2271 panic("dounmount: no coveredvp");
2278 * Unmount any mounts in this filesystem.
2281 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2284 fsid_t
*fsids
, fsid
;
2286 int count
= 0, i
, m
= 0;
2291 // Get an array to hold the submounts fsids.
2292 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2294 fsids_sz
= count
* sizeof(fsid_t
);
2295 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2296 if (fsids
== NULL
) {
2297 mount_list_unlock();
2300 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2303 * Fill the array with submount fsids.
2304 * Since mounts are always added to the tail of the mount list, the
2305 * list is always in mount order.
2306 * For each mount check if the mounted-on vnode belongs to a
2307 * mount that's already added to our array of mounts to be unmounted.
2309 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2310 vp
= smp
->mnt_vnodecovered
;
2314 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2315 for (i
= 0; i
<= m
; i
++) {
2316 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2317 fsids
[i
].val
[1] == fsid
.val
[1]) {
2318 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2323 mount_list_unlock();
2325 // Unmount the submounts in reverse order. Ignore errors.
2326 for (i
= m
; i
> 0; i
--) {
2327 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2330 mount_iterdrop(smp
);
2331 (void) dounmount(smp
, flags
, 1, ctx
);
2336 FREE(fsids
, M_TEMP
);
2341 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2346 if (mp
->mnt_crossref
< 0) {
2347 panic("mount cross refs -ve");
2350 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2352 vnode_put_locked(dp
);
2356 mount_lock_destroy(mp
);
2358 mac_mount_label_destroy(mp
);
2360 FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
);
2364 vnode_put_locked(dp
);
2371 * Sync each mounted filesystem.
2377 int print_vmpage_stat
= 0;
2380 sync_callback(mount_t mp
, __unused
void *arg
)
2382 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2383 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2385 mp
->mnt_flag
&= ~MNT_ASYNC
;
2386 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2388 mp
->mnt_flag
|= MNT_ASYNC
;
2392 return VFS_RETURNED
;
2397 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2399 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2401 if (print_vmpage_stat
) {
2402 vm_countdirtypages();
2409 #endif /* DIAGNOSTIC */
2415 SYNC_ONLY_RELIABLE_MEDIA
= 1,
2416 SYNC_ONLY_UNRELIABLE_MEDIA
= 2
2420 sync_internal_callback(mount_t mp
, void *arg
)
2423 int is_reliable
= !(mp
->mnt_kern_flag
& MNTK_VIRTUALDEV
) &&
2424 (mp
->mnt_flag
& MNT_LOCAL
);
2425 sync_type_t sync_type
= *((sync_type_t
*)arg
);
2427 if ((sync_type
== SYNC_ONLY_RELIABLE_MEDIA
) && !is_reliable
) {
2428 return VFS_RETURNED
;
2429 } else if ((sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
) && is_reliable
) {
2430 return VFS_RETURNED
;
2434 (void)sync_callback(mp
, NULL
);
2436 return VFS_RETURNED
;
2439 int sync_thread_state
= 0;
2440 int sync_timeout_seconds
= 5;
2442 #define SYNC_THREAD_RUN 0x0001
2443 #define SYNC_THREAD_RUNNING 0x0002
2446 sync_thread(__unused
void *arg
, __unused wait_result_t wr
)
2448 sync_type_t sync_type
;
2450 lck_mtx_lock(sync_mtx_lck
);
2451 while (sync_thread_state
& SYNC_THREAD_RUN
) {
2452 sync_thread_state
&= ~SYNC_THREAD_RUN
;
2453 lck_mtx_unlock(sync_mtx_lck
);
2455 sync_type
= SYNC_ONLY_RELIABLE_MEDIA
;
2456 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2457 sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
;
2458 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2460 lck_mtx_lock(sync_mtx_lck
);
2463 * This wakeup _has_ to be issued before the lock is released otherwise
2464 * we may end up waking up a thread in sync_internal which is
2465 * expecting a wakeup from a thread it just created and not from this
2466 * thread which is about to exit.
2468 wakeup(&sync_thread_state
);
2469 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2470 lck_mtx_unlock(sync_mtx_lck
);
2472 if (print_vmpage_stat
) {
2473 vm_countdirtypages();
2480 #endif /* DIAGNOSTIC */
2483 struct timeval sync_timeout_last_print
= {0, 0};
2486 * An in-kernel sync for power management to call.
2487 * This function always returns within sync_timeout seconds.
2489 __private_extern__
int
2494 int thread_created
= FALSE
;
2495 struct timespec ts
= {sync_timeout_seconds
, 0};
2497 lck_mtx_lock(sync_mtx_lck
);
2498 sync_thread_state
|= SYNC_THREAD_RUN
;
2499 if (!(sync_thread_state
& SYNC_THREAD_RUNNING
)) {
2502 sync_thread_state
|= SYNC_THREAD_RUNNING
;
2503 kr
= kernel_thread_start(sync_thread
, NULL
, &thd
);
2504 if (kr
!= KERN_SUCCESS
) {
2505 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2506 lck_mtx_unlock(sync_mtx_lck
);
2507 printf("sync_thread failed\n");
2510 thread_created
= TRUE
;
2513 error
= msleep((caddr_t
)&sync_thread_state
, sync_mtx_lck
,
2514 (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2519 if (now
.tv_sec
- sync_timeout_last_print
.tv_sec
> 120) {
2520 printf("sync timed out: %d sec\n", sync_timeout_seconds
);
2521 sync_timeout_last_print
.tv_sec
= now
.tv_sec
;
2525 if (thread_created
) {
2526 thread_deallocate(thd
);
2530 } /* end of sync_internal call */
2533 * Change filesystem quotas.
2537 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2540 int error
, quota_cmd
, quota_status
= 0;
2543 struct nameidata nd
;
2544 vfs_context_t ctx
= vfs_context_current();
2545 struct dqblk my_dqblk
= {};
2547 AUDIT_ARG(uid
, uap
->uid
);
2548 AUDIT_ARG(cmd
, uap
->cmd
);
2549 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2555 mp
= nd
.ni_vp
->v_mount
;
2556 vnode_put(nd
.ni_vp
);
2559 /* copyin any data we will need for downstream code */
2560 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2562 switch (quota_cmd
) {
2564 /* uap->arg specifies a file from which to take the quotas */
2565 fnamelen
= MAXPATHLEN
;
2566 datap
= kalloc(MAXPATHLEN
);
2567 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2570 /* uap->arg is a pointer to a dqblk structure. */
2571 datap
= (caddr_t
) &my_dqblk
;
2575 /* uap->arg is a pointer to a dqblk structure. */
2576 datap
= (caddr_t
) &my_dqblk
;
2577 if (proc_is64bit(p
)) {
2578 struct user_dqblk my_dqblk64
;
2579 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof(my_dqblk64
));
2581 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2584 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof(my_dqblk
));
2588 /* uap->arg is a pointer to an integer */
2589 datap
= (caddr_t
) "a_status
;
2597 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2600 switch (quota_cmd
) {
2602 if (datap
!= NULL
) {
2603 kfree(datap
, MAXPATHLEN
);
2607 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2609 if (proc_is64bit(p
)) {
2610 struct user_dqblk my_dqblk64
;
2612 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
));
2613 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2614 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof(my_dqblk64
));
2616 error
= copyout(datap
, uap
->arg
, sizeof(struct dqblk
));
2621 /* uap->arg is a pointer to an integer */
2623 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2634 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2641 * Get filesystem statistics.
2643 * Returns: 0 Success
2645 * vfs_update_vfsstat:???
2646 * munge_statfs:EFAULT
2650 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2653 struct vfsstatfs
*sp
;
2655 struct nameidata nd
;
2656 vfs_context_t ctx
= vfs_context_current();
2659 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2660 UIO_USERSPACE
, uap
->path
, ctx
);
2667 sp
= &mp
->mnt_vfsstat
;
2671 error
= mac_mount_check_stat(ctx
, mp
);
2677 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2683 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2689 * Get filesystem statistics.
2693 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2697 struct vfsstatfs
*sp
;
2700 AUDIT_ARG(fd
, uap
->fd
);
2702 if ((error
= file_vnode(uap
->fd
, &vp
))) {
2706 error
= vnode_getwithref(vp
);
2712 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2721 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2727 sp
= &mp
->mnt_vfsstat
;
2728 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2732 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2742 * Common routine to handle copying of statfs64 data to user space
2745 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2748 struct statfs64 sfs
;
2750 bzero(&sfs
, sizeof(sfs
));
2752 sfs
.f_bsize
= sfsp
->f_bsize
;
2753 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2754 sfs
.f_blocks
= sfsp
->f_blocks
;
2755 sfs
.f_bfree
= sfsp
->f_bfree
;
2756 sfs
.f_bavail
= sfsp
->f_bavail
;
2757 sfs
.f_files
= sfsp
->f_files
;
2758 sfs
.f_ffree
= sfsp
->f_ffree
;
2759 sfs
.f_fsid
= sfsp
->f_fsid
;
2760 sfs
.f_owner
= sfsp
->f_owner
;
2761 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2762 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2763 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2764 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2765 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2767 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2769 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2770 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2772 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2778 * Get file system statistics in 64-bit mode
2781 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2784 struct vfsstatfs
*sp
;
2786 struct nameidata nd
;
2787 vfs_context_t ctxp
= vfs_context_current();
2790 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2791 UIO_USERSPACE
, uap
->path
, ctxp
);
2798 sp
= &mp
->mnt_vfsstat
;
2802 error
= mac_mount_check_stat(ctxp
, mp
);
2808 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2814 error
= statfs64_common(mp
, sp
, uap
->buf
);
2821 * Get file system statistics in 64-bit mode
2824 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2828 struct vfsstatfs
*sp
;
2831 AUDIT_ARG(fd
, uap
->fd
);
2833 if ((error
= file_vnode(uap
->fd
, &vp
))) {
2837 error
= vnode_getwithref(vp
);
2843 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2852 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2858 sp
= &mp
->mnt_vfsstat
;
2859 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2863 error
= statfs64_common(mp
, sp
, uap
->buf
);
2872 struct getfsstat_struct
{
2883 getfsstat_callback(mount_t mp
, void * arg
)
2885 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2886 struct vfsstatfs
*sp
;
2888 vfs_context_t ctx
= vfs_context_current();
2890 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2892 error
= mac_mount_check_stat(ctx
, mp
);
2894 fstp
->error
= error
;
2895 return VFS_RETURNED_DONE
;
2898 sp
= &mp
->mnt_vfsstat
;
2900 * If MNT_NOWAIT is specified, do not refresh the
2901 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2903 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2904 (error
= vfs_update_vfsstat(mp
, ctx
,
2906 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2907 return VFS_RETURNED
;
2911 * Need to handle LP64 version of struct statfs
2913 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2915 fstp
->error
= error
;
2916 return VFS_RETURNED_DONE
;
2918 fstp
->sfsp
+= my_size
;
2922 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2924 fstp
->error
= error
;
2925 return VFS_RETURNED_DONE
;
2932 return VFS_RETURNED
;
2936 * Get statistics on all filesystems.
2939 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2941 struct __mac_getfsstat_args muap
;
2943 muap
.buf
= uap
->buf
;
2944 muap
.bufsize
= uap
->bufsize
;
2945 muap
.mac
= USER_ADDR_NULL
;
2947 muap
.flags
= uap
->flags
;
2949 return __mac_getfsstat(p
, &muap
, retval
);
2953 * __mac_getfsstat: Get MAC-related file system statistics
2955 * Parameters: p (ignored)
2956 * uap User argument descriptor (see below)
2957 * retval Count of file system statistics (N stats)
2959 * Indirect: uap->bufsize Buffer size
2960 * uap->macsize MAC info size
2961 * uap->buf Buffer where information will be returned
2963 * uap->flags File system flags
2966 * Returns: 0 Success
2971 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2975 size_t count
, maxcount
, bufsize
, macsize
;
2976 struct getfsstat_struct fst
;
2978 bufsize
= (size_t) uap
->bufsize
;
2979 macsize
= (size_t) uap
->macsize
;
2981 if (IS_64BIT_PROCESS(p
)) {
2982 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2984 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2992 if (uap
->mac
!= USER_ADDR_NULL
) {
2997 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2998 if (count
!= maxcount
) {
3002 /* Copy in the array */
3003 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
3008 error
= copyin(uap
->mac
, mp0
, macsize
);
3010 FREE(mp0
, M_MACTEMP
);
3014 /* Normalize to an array of user_addr_t */
3015 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
3017 FREE(mp0
, M_MACTEMP
);
3021 for (i
= 0; i
< count
; i
++) {
3022 if (IS_64BIT_PROCESS(p
)) {
3023 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
3025 mp
[i
] = (user_addr_t
)mp0
[i
];
3028 FREE(mp0
, M_MACTEMP
);
3035 fst
.flags
= uap
->flags
;
3038 fst
.maxcount
= maxcount
;
3041 vfs_iterate(0, getfsstat_callback
, &fst
);
3044 FREE(mp
, M_MACTEMP
);
3048 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3052 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3053 *retval
= fst
.maxcount
;
3055 *retval
= fst
.count
;
3061 getfsstat64_callback(mount_t mp
, void * arg
)
3063 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3064 struct vfsstatfs
*sp
;
3067 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3069 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3071 fstp
->error
= error
;
3072 return VFS_RETURNED_DONE
;
3075 sp
= &mp
->mnt_vfsstat
;
3077 * If MNT_NOWAIT is specified, do not refresh the fsstat
3078 * cache. MNT_WAIT overrides MNT_NOWAIT.
3080 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3081 * getfsstat, since the constants are out of the same
3084 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
3085 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3086 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
3087 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3088 return VFS_RETURNED
;
3091 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
3093 fstp
->error
= error
;
3094 return VFS_RETURNED_DONE
;
3096 fstp
->sfsp
+= sizeof(struct statfs64
);
3099 return VFS_RETURNED
;
3103 * Get statistics on all file systems in 64 bit mode.
3106 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
3109 int count
, maxcount
;
3110 struct getfsstat_struct fst
;
3112 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
3118 fst
.flags
= uap
->flags
;
3121 fst
.maxcount
= maxcount
;
3123 vfs_iterate(0, getfsstat64_callback
, &fst
);
3126 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3130 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
) {
3131 *retval
= fst
.maxcount
;
3133 *retval
= fst
.count
;
3140 * gets the associated vnode with the file descriptor passed.
3144 * ctx - vfs context of caller
3145 * fd - file descriptor for which vnode is required.
3146 * vpp - Pointer to pointer to vnode to be returned.
3148 * The vnode is returned with an iocount so any vnode obtained
3149 * by this call needs a vnode_put
3153 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
3157 struct fileproc
*fp
;
3158 proc_t p
= vfs_context_proc(ctx
);
3162 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
3167 error
= vnode_getwithref(vp
);
3169 (void)fp_drop(p
, fd
, fp
, 0);
3173 (void)fp_drop(p
, fd
, fp
, 0);
3179 * Wrapper function around namei to start lookup from a directory
3180 * specified by a file descriptor ni_dirfd.
3182 * In addition to all the errors returned by namei, this call can
3183 * return ENOTDIR if the file descriptor does not refer to a directory.
3184 * and EBADF if the file descriptor is not valid.
3187 nameiat(struct nameidata
*ndp
, int dirfd
)
3189 if ((dirfd
!= AT_FDCWD
) &&
3190 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
3191 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3195 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3196 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3201 c
= *((char *)(ndp
->ni_dirp
));
3207 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3213 if (vnode_vtype(dvp_at
) != VDIR
) {
3218 ndp
->ni_dvp
= dvp_at
;
3219 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3221 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3231 * Change current working directory to a given file descriptor.
3235 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3237 struct filedesc
*fdp
= p
->p_fd
;
3243 vfs_context_t ctx
= vfs_context_current();
3245 AUDIT_ARG(fd
, uap
->fd
);
3246 if (per_thread
&& uap
->fd
== -1) {
3248 * Switching back from per-thread to per process CWD; verify we
3249 * in fact have one before proceeding. The only success case
3250 * for this code path is to return 0 preemptively after zapping
3251 * the thread structure contents.
3253 thread_t th
= vfs_context_thread(ctx
);
3255 uthread_t uth
= get_bsdthread_info(th
);
3257 uth
->uu_cdir
= NULLVP
;
3258 if (tvp
!= NULLVP
) {
3266 if ((error
= file_vnode(uap
->fd
, &vp
))) {
3269 if ((error
= vnode_getwithref(vp
))) {
3274 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3276 if (vp
->v_type
!= VDIR
) {
3282 error
= mac_vnode_check_chdir(ctx
, vp
);
3287 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3292 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3293 if (vfs_busy(mp
, LK_NOWAIT
)) {
3297 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3308 if ((error
= vnode_ref(vp
))) {
3314 thread_t th
= vfs_context_thread(ctx
);
3316 uthread_t uth
= get_bsdthread_info(th
);
3319 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3345 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3347 return common_fchdir(p
, uap
, 0);
3351 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3353 return common_fchdir(p
, (void *)uap
, 1);
3357 * Change current working directory (".").
3359 * Returns: 0 Success
3360 * change_dir:ENOTDIR
3362 * vnode_ref:ENOENT No such file or directory
3366 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3368 struct filedesc
*fdp
= p
->p_fd
;
3370 struct nameidata nd
;
3372 vfs_context_t ctx
= vfs_context_current();
3374 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3375 UIO_USERSPACE
, uap
->path
, ctx
);
3376 error
= change_dir(&nd
, ctx
);
3380 if ((error
= vnode_ref(nd
.ni_vp
))) {
3381 vnode_put(nd
.ni_vp
);
3385 * drop the iocount we picked up in change_dir
3387 vnode_put(nd
.ni_vp
);
3390 thread_t th
= vfs_context_thread(ctx
);
3392 uthread_t uth
= get_bsdthread_info(th
);
3394 uth
->uu_cdir
= nd
.ni_vp
;
3395 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3397 vnode_rele(nd
.ni_vp
);
3403 fdp
->fd_cdir
= nd
.ni_vp
;
3418 * Change current working directory (".") for the entire process
3420 * Parameters: p Process requesting the call
3421 * uap User argument descriptor (see below)
3424 * Indirect parameters: uap->path Directory path
3426 * Returns: 0 Success
3427 * common_chdir: ENOTDIR
3428 * common_chdir: ENOENT No such file or directory
3433 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3435 return common_chdir(p
, (void *)uap
, 0);
3441 * Change current working directory (".") for a single thread
3443 * Parameters: p Process requesting the call
3444 * uap User argument descriptor (see below)
3447 * Indirect parameters: uap->path Directory path
3449 * Returns: 0 Success
3450 * common_chdir: ENOTDIR
3451 * common_chdir: ENOENT No such file or directory
3456 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3458 return common_chdir(p
, (void *)uap
, 1);
3463 * Change notion of root (``/'') directory.
3467 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3469 struct filedesc
*fdp
= p
->p_fd
;
3471 struct nameidata nd
;
3473 vfs_context_t ctx
= vfs_context_current();
3475 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
))) {
3479 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3480 UIO_USERSPACE
, uap
->path
, ctx
);
3481 error
= change_dir(&nd
, ctx
);
3487 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3490 vnode_put(nd
.ni_vp
);
3495 if ((error
= vnode_ref(nd
.ni_vp
))) {
3496 vnode_put(nd
.ni_vp
);
3499 vnode_put(nd
.ni_vp
);
3503 fdp
->fd_rdir
= nd
.ni_vp
;
3504 fdp
->fd_flags
|= FD_CHROOT
;
3515 * Common routine for chroot and chdir.
3517 * Returns: 0 Success
3518 * ENOTDIR Not a directory
3519 * namei:??? [anything namei can return]
3520 * vnode_authorize:??? [anything vnode_authorize can return]
3523 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3528 if ((error
= namei(ndp
))) {
3534 if (vp
->v_type
!= VDIR
) {
3540 error
= mac_vnode_check_chdir(ctx
, vp
);
3547 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3557 * Free the vnode data (for directories) associated with the file glob.
3560 fg_vn_data_alloc(void)
3562 struct fd_vn_data
*fvdata
;
3564 /* Allocate per fd vnode data */
3565 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3566 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3567 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3572 * Free the vnode data (for directories) associated with the file glob.
3575 fg_vn_data_free(void *fgvndata
)
3577 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3579 if (fvdata
->fv_buf
) {
3580 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3582 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3583 FREE(fvdata
, M_FD_VN_DATA
);
3587 * Check permissions, allocate an open file structure,
3588 * and call the device open routine if any.
3590 * Returns: 0 Success
3601 * XXX Need to implement uid, gid
3604 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3605 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3608 proc_t p
= vfs_context_proc(ctx
);
3609 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3610 struct fileproc
*fp
;
3613 int type
, indx
, error
;
3615 struct vfs_context context
;
3619 if ((oflags
& O_ACCMODE
) == O_ACCMODE
) {
3623 flags
= FFLAGS(uflags
);
3624 CLR(flags
, FENCRYPTED
);
3625 CLR(flags
, FUNENCRYPTED
);
3627 AUDIT_ARG(fflags
, oflags
);
3628 AUDIT_ARG(mode
, vap
->va_mode
);
3630 if ((error
= falloc_withalloc(p
,
3631 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3634 uu
->uu_dupfd
= -indx
- 1;
3636 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3637 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)) { /* XXX from fdopen */
3638 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3639 fp_drop(p
, indx
, NULL
, 0);
3644 if (error
== ERESTART
) {
3647 fp_free(p
, indx
, fp
);
3653 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3654 fp
->f_fglob
->fg_ops
= &vnops
;
3655 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3657 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3658 lf
.l_whence
= SEEK_SET
;
3661 if (flags
& O_EXLOCK
) {
3662 lf
.l_type
= F_WRLCK
;
3664 lf
.l_type
= F_RDLCK
;
3667 if ((flags
& FNONBLOCK
) == 0) {
3671 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3677 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
))) {
3680 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3683 #if DEVELOPMENT || DEBUG
3685 * XXX VSWAP: Check for entitlements or special flag here
3686 * so we can restrict access appropriately.
3688 #else /* DEVELOPMENT || DEBUG */
3690 if (vnode_isswap(vp
) && (flags
& (FWRITE
| O_TRUNC
)) && (ctx
!= vfs_context_kernel())) {
3691 /* block attempt to write/truncate swapfile */
3695 #endif /* DEVELOPMENT || DEBUG */
3697 /* try to truncate by setting the size attribute */
3698 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0)) {
3703 * For directories we hold some additional information in the fd.
3705 if (vnode_vtype(vp
) == VDIR
) {
3706 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3708 fp
->f_fglob
->fg_vn_data
= NULL
;
3714 * The first terminal open (without a O_NOCTTY) by a session leader
3715 * results in it being set as the controlling terminal.
3717 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3718 !(flags
& O_NOCTTY
)) {
3721 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3722 (caddr_t
)&tmp
, ctx
);
3726 if (flags
& O_CLOEXEC
) {
3727 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3729 if (flags
& O_CLOFORK
) {
3730 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3732 procfdtbl_releasefd(p
, indx
, NULL
);
3734 #if CONFIG_SECLUDED_MEMORY
3735 if (secluded_for_filecache
&&
3736 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
3737 vnode_vtype(vp
) == VREG
) {
3738 memory_object_control_t moc
;
3740 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
3742 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
3743 /* nothing to do... */
3744 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
3745 /* writable -> no longer eligible for secluded pages */
3746 memory_object_mark_eligible_for_secluded(moc
,
3748 } else if (secluded_for_filecache
== 1) {
3749 char pathname
[32] = { 0, };
3751 /* XXX FBDP: better way to detect /Applications/ ? */
3752 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3753 copyinstr(ndp
->ni_dirp
,
3758 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
3763 pathname
[sizeof(pathname
) - 1] = '\0';
3764 if (strncmp(pathname
,
3766 strlen("/Applications/")) == 0 &&
3768 "/Applications/Camera.app/",
3769 strlen("/Applications/Camera.app/")) != 0) {
3772 * AND from "/Applications/"
3773 * AND not from "/Applications/Camera.app/"
3774 * ==> eligible for secluded
3776 memory_object_mark_eligible_for_secluded(moc
,
3779 } else if (secluded_for_filecache
== 2) {
3781 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3783 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3785 /* not implemented... */
3787 if (!strncmp(vp
->v_name
,
3788 DYLD_SHARED_CACHE_NAME
,
3789 strlen(DYLD_SHARED_CACHE_NAME
)) ||
3790 !strncmp(vp
->v_name
,
3792 strlen(vp
->v_name
)) ||
3793 !strncmp(vp
->v_name
,
3795 strlen(vp
->v_name
)) ||
3796 !strncmp(vp
->v_name
,
3798 strlen(vp
->v_name
)) ||
3799 !strncmp(vp
->v_name
,
3801 strlen(vp
->v_name
)) ||
3802 !strncmp(vp
->v_name
,
3804 strlen(vp
->v_name
)) ||
3805 !strncmp(vp
->v_name
,
3807 strlen(vp
->v_name
))) {
3809 * This file matters when launching Camera:
3810 * do not store its contents in the secluded
3811 * pool that will be drained on Camera launch.
3813 memory_object_mark_eligible_for_secluded(moc
,
3818 #endif /* CONFIG_SECLUDED_MEMORY */
3820 fp_drop(p
, indx
, fp
, 1);
3827 context
= *vfs_context_current();
3828 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3830 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3831 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3832 lf
.l_whence
= SEEK_SET
;
3835 lf
.l_type
= F_UNLCK
;
3838 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3841 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3843 fp_free(p
, indx
, fp
);
3849 * While most of the *at syscall handlers can call nameiat() which
3850 * is a wrapper around namei, the use of namei and initialisation
3851 * of nameidata are far removed and in different functions - namei
3852 * gets called in vn_open_auth for open1. So we'll just do here what
3856 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3857 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3860 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3864 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3865 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3870 c
= *((char *)(ndp
->ni_dirp
));
3876 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3882 if (vnode_vtype(dvp_at
) != VDIR
) {
3887 ndp
->ni_dvp
= dvp_at
;
3888 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3889 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3896 return open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
);
3900 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3902 * Parameters: p Process requesting the open
3903 * uap User argument descriptor (see below)
3904 * retval Pointer to an area to receive the
3905 * return calue from the system call
3907 * Indirect: uap->path Path to open (same as 'open')
3908 * uap->flags Flags to open (same as 'open'
3909 * uap->uid UID to set, if creating
3910 * uap->gid GID to set, if creating
3911 * uap->mode File mode, if creating (same as 'open')
3912 * uap->xsecurity ACL to set, if creating
3914 * Returns: 0 Success
3917 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3919 * XXX: We should enummerate the possible errno values here, and where
3920 * in the code they originated.
3923 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3925 struct filedesc
*fdp
= p
->p_fd
;
3927 kauth_filesec_t xsecdst
;
3928 struct vnode_attr va
;
3929 struct nameidata nd
;
3932 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3935 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3936 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
3941 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3942 VATTR_SET(&va
, va_mode
, cmode
);
3943 if (uap
->uid
!= KAUTH_UID_NONE
) {
3944 VATTR_SET(&va
, va_uid
, uap
->uid
);
3946 if (uap
->gid
!= KAUTH_GID_NONE
) {
3947 VATTR_SET(&va
, va_gid
, uap
->gid
);
3949 if (xsecdst
!= NULL
) {
3950 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3953 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3954 uap
->path
, vfs_context_current());
3956 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3957 fileproc_alloc_init
, NULL
, retval
);
3958 if (xsecdst
!= NULL
) {
3959 kauth_filesec_free(xsecdst
);
3966 * Go through the data-protected atomically controlled open (2)
3968 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3971 open_dprotected_np(__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
)
3973 int flags
= uap
->flags
;
3974 int class = uap
->class;
3975 int dpflags
= uap
->dpflags
;
3978 * Follow the same path as normal open(2)
3979 * Look up the item if it exists, and acquire the vnode.
3981 struct filedesc
*fdp
= p
->p_fd
;
3982 struct vnode_attr va
;
3983 struct nameidata nd
;
3988 /* Mask off all but regular access permissions */
3989 cmode
= ((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3990 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3992 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3993 uap
->path
, vfs_context_current());
3996 * Initialize the extra fields in vnode_attr to pass down our
3998 * 1. target cprotect class.
3999 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4001 if (flags
& O_CREAT
) {
4002 /* lower level kernel code validates that the class is valid before applying it. */
4003 if (class != PROTECTION_CLASS_DEFAULT
) {
4005 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4006 * file behave the same as open (2)
4008 VATTR_SET(&va
, va_dataprotect_class
, class);
4012 if (dpflags
& (O_DP_GETRAWENCRYPTED
| O_DP_GETRAWUNENCRYPTED
)) {
4013 if (flags
& (O_RDWR
| O_WRONLY
)) {
4014 /* Not allowed to write raw encrypted bytes */
4017 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
4018 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
4020 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
4021 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
4025 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
4026 fileproc_alloc_init
, NULL
, retval
);
4032 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
4033 int fd
, enum uio_seg segflg
, int *retval
)
4035 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
4036 struct vnode_attr va
;
4037 struct nameidata nd
;
4041 /* Mask off all but regular access permissions */
4042 cmode
= ((mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
4043 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
4045 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
4048 return open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
4053 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
4055 __pthread_testcancel(1);
4056 return open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
);
4060 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
4063 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4064 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
);
4068 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
4071 return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
4072 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
);
4076 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
4078 __pthread_testcancel(1);
4079 return openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
);
4083 * openbyid_np: open a file given a file system id and a file system object id
4084 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4085 * file systems that don't support object ids it is a node id (uint64_t).
4087 * Parameters: p Process requesting the open
4088 * uap User argument descriptor (see below)
4089 * retval Pointer to an area to receive the
4090 * return calue from the system call
4092 * Indirect: uap->path Path to open (same as 'open')
4094 * uap->fsid id of target file system
4095 * uap->objid id of target file system object
4096 * uap->flags Flags to open (same as 'open')
4098 * Returns: 0 Success
4102 * XXX: We should enummerate the possible errno values here, and where
4103 * in the code they originated.
4106 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
4112 int buflen
= MAXPATHLEN
;
4114 vfs_context_t ctx
= vfs_context_current();
4116 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
4120 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
4124 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4125 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
4129 AUDIT_ARG(value32
, fsid
.val
[0]);
4130 AUDIT_ARG(value64
, objid
);
4132 /*resolve path from fsis, objid*/
4134 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
4139 error
= fsgetpath_internal(
4140 ctx
, fsid
.val
[0], objid
,
4141 buflen
, buf
, &pathlen
);
4147 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
4155 error
= openat_internal(
4156 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
4165 * Create a special file.
4167 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
4170 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
4172 struct vnode_attr va
;
4173 vfs_context_t ctx
= vfs_context_current();
4175 struct nameidata nd
;
4179 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4180 VATTR_SET(&va
, va_rdev
, uap
->dev
);
4182 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4183 if ((uap
->mode
& S_IFMT
) == S_IFIFO
) {
4184 return mkfifo1(ctx
, uap
->path
, &va
);
4187 AUDIT_ARG(mode
, uap
->mode
);
4188 AUDIT_ARG(value32
, uap
->dev
);
4190 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
4193 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
4194 UIO_USERSPACE
, uap
->path
, ctx
);
4207 switch (uap
->mode
& S_IFMT
) {
4209 VATTR_SET(&va
, va_type
, VCHR
);
4212 VATTR_SET(&va
, va_type
, VBLK
);
4220 error
= mac_vnode_check_create(ctx
,
4221 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
4227 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4231 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0) {
4236 int update_flags
= 0;
4238 // Make sure the name & parent pointers are hooked up
4239 if (vp
->v_name
== NULL
) {
4240 update_flags
|= VNODE_UPDATE_NAME
;
4242 if (vp
->v_parent
== NULLVP
) {
4243 update_flags
|= VNODE_UPDATE_PARENT
;
4247 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4251 add_fsevent(FSE_CREATE_FILE
, ctx
,
4259 * nameidone has to happen before we vnode_put(dvp)
4260 * since it may need to release the fs_nodelock on the dvp
4273 * Create a named pipe.
4275 * Returns: 0 Success
4278 * vnode_authorize:???
4282 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4286 struct nameidata nd
;
4288 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4289 UIO_USERSPACE
, upath
, ctx
);
4297 /* check that this is a new file and authorize addition */
4302 VATTR_SET(vap
, va_type
, VFIFO
);
4304 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
4308 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4311 * nameidone has to happen before we vnode_put(dvp)
4312 * since it may need to release the fs_nodelock on the dvp
4326 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4328 * Parameters: p Process requesting the open
4329 * uap User argument descriptor (see below)
4332 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4333 * uap->uid UID to set
4334 * uap->gid GID to set
4335 * uap->mode File mode to set (same as 'mkfifo')
4336 * uap->xsecurity ACL to set, if creating
4338 * Returns: 0 Success
4341 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4343 * XXX: We should enummerate the possible errno values here, and where
4344 * in the code they originated.
4347 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4350 kauth_filesec_t xsecdst
;
4351 struct vnode_attr va
;
4353 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4355 xsecdst
= KAUTH_FILESEC_NONE
;
4356 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4357 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
4363 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4364 if (uap
->uid
!= KAUTH_UID_NONE
) {
4365 VATTR_SET(&va
, va_uid
, uap
->uid
);
4367 if (uap
->gid
!= KAUTH_GID_NONE
) {
4368 VATTR_SET(&va
, va_gid
, uap
->gid
);
4370 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4371 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4374 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4376 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
4377 kauth_filesec_free(xsecdst
);
4384 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4386 struct vnode_attr va
;
4389 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4391 return mkfifo1(vfs_context_current(), uap
->path
, &va
);
4396 my_strrchr(char *p
, int ch
)
4400 for (save
= NULL
;; ++p
) {
4411 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4414 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4416 int ret
, len
= _len
;
4418 *truncated_path
= 0;
4419 ret
= vn_getpath(dvp
, path
, &len
);
4420 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4422 path
[len
- 1] = '/';
4423 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
- len
) + 1;
4424 if (len
> MAXPATHLEN
) {
4427 // the string got truncated!
4428 *truncated_path
= 1;
4429 ptr
= my_strrchr(path
, '/');
4431 *ptr
= '\0'; // chop off the string at the last directory component
4433 len
= strlen(path
) + 1;
4436 } else if (ret
== 0) {
4437 *truncated_path
= 1;
4438 } else if (ret
!= 0) {
4439 struct vnode
*mydvp
= dvp
;
4441 if (ret
!= ENOSPC
) {
4442 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4443 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4445 *truncated_path
= 1;
4448 if (mydvp
->v_parent
!= NULL
) {
4449 mydvp
= mydvp
->v_parent
;
4450 } else if (mydvp
->v_mount
) {
4451 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4454 // no parent and no mount point? only thing is to punt and say "/" changed
4455 strlcpy(path
, "/", _len
);
4460 if (mydvp
== NULL
) {
4465 ret
= vn_getpath(mydvp
, path
, &len
);
4466 } while (ret
== ENOSPC
);
4474 * Make a hard file link.
4476 * Returns: 0 Success
4481 * vnode_authorize:???
4486 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4487 user_addr_t link
, int flag
, enum uio_seg segflg
)
4489 vnode_t vp
, dvp
, lvp
;
4490 struct nameidata nd
;
4496 int need_event
, has_listeners
, need_kpath2
;
4497 char *target_path
= NULL
;
4500 vp
= dvp
= lvp
= NULLVP
;
4502 /* look up the object we are linking to */
4503 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4504 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4507 error
= nameiat(&nd
, fd1
);
4516 * Normally, linking to directories is not supported.
4517 * However, some file systems may have limited support.
4519 if (vp
->v_type
== VDIR
) {
4520 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4521 error
= EPERM
; /* POSIX */
4525 /* Linking to a directory requires ownership. */
4526 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4527 struct vnode_attr dva
;
4530 VATTR_WANTED(&dva
, va_uid
);
4531 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4532 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4533 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4540 /* lookup the target node */
4544 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4545 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4547 error
= nameiat(&nd
, fd2
);
4555 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0) {
4560 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4561 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0) {
4565 /* target node must not exist */
4566 if (lvp
!= NULLVP
) {
4570 /* cannot link across mountpoints */
4571 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4576 /* authorize creation of the target note */
4577 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
4581 /* and finally make the link */
4582 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4588 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4592 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4596 has_listeners
= kauth_authorize_fileop_has_listeners();
4600 if (AUDIT_RECORD_EXISTS()) {
4605 if (need_event
|| has_listeners
|| need_kpath2
) {
4606 char *link_to_path
= NULL
;
4607 int len
, link_name_len
;
4609 /* build the path to the new link file */
4610 GET_PATH(target_path
);
4611 if (target_path
== NULL
) {
4616 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4618 AUDIT_ARG(kpath
, target_path
, ARG_KPATH2
);
4620 if (has_listeners
) {
4621 /* build the path to file we are linking to */
4622 GET_PATH(link_to_path
);
4623 if (link_to_path
== NULL
) {
4628 link_name_len
= MAXPATHLEN
;
4629 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4631 * Call out to allow 3rd party notification of rename.
4632 * Ignore result of kauth_authorize_fileop call.
4634 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4635 (uintptr_t)link_to_path
,
4636 (uintptr_t)target_path
);
4638 if (link_to_path
!= NULL
) {
4639 RELEASE_PATH(link_to_path
);
4644 /* construct fsevent */
4645 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4647 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4650 // build the path to the destination of the link
4651 add_fsevent(FSE_CREATE_FILE
, ctx
,
4652 FSE_ARG_STRING
, len
, target_path
,
4653 FSE_ARG_FINFO
, &finfo
,
4657 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4658 FSE_ARG_VNODE
, vp
->v_parent
,
4666 * nameidone has to happen before we vnode_put(dvp)
4667 * since it may need to release the fs_nodelock on the dvp
4670 if (target_path
!= NULL
) {
4671 RELEASE_PATH(target_path
);
4685 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4687 return linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4688 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
);
4692 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4694 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
) {
4698 return linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4699 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
);
4703 * Make a symbolic link.
4705 * We could add support for ACLs here too...
4709 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4710 user_addr_t link
, enum uio_seg segflg
)
4712 struct vnode_attr va
;
4715 struct nameidata nd
;
4721 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4722 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4723 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4725 path
= (char *)path_data
;
4730 AUDIT_ARG(text
, path
); /* This is the link string */
4732 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4735 error
= nameiat(&nd
, fd
);
4742 p
= vfs_context_proc(ctx
);
4744 VATTR_SET(&va
, va_type
, VLNK
);
4745 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4748 error
= mac_vnode_check_create(ctx
,
4749 dvp
, &nd
.ni_cnd
, &va
);
4762 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4764 /* get default ownership, etc. */
4766 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4769 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4773 if (error
== 0 && vp
) {
4774 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4778 /* do fallback attribute handling */
4779 if (error
== 0 && vp
) {
4780 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4784 int update_flags
= 0;
4786 /*check if a new vnode was created, else try to get one*/
4788 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4790 nd
.ni_op
= OP_LOOKUP
;
4792 nd
.ni_cnd
.cn_flags
= 0;
4793 error
= nameiat(&nd
, fd
);
4801 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4802 /* call out to allow 3rd party notification of rename.
4803 * Ignore result of kauth_authorize_fileop call.
4805 if (kauth_authorize_fileop_has_listeners() &&
4807 char *new_link_path
= NULL
;
4810 /* build the path to the new link file */
4811 new_link_path
= get_pathbuff();
4813 vn_getpath(dvp
, new_link_path
, &len
);
4814 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4815 new_link_path
[len
- 1] = '/';
4816 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
- len
);
4819 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4820 (uintptr_t)path
, (uintptr_t)new_link_path
);
4821 if (new_link_path
!= NULL
) {
4822 release_pathbuff(new_link_path
);
4826 // Make sure the name & parent pointers are hooked up
4827 if (vp
->v_name
== NULL
) {
4828 update_flags
|= VNODE_UPDATE_NAME
;
4830 if (vp
->v_parent
== NULLVP
) {
4831 update_flags
|= VNODE_UPDATE_PARENT
;
4835 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4839 add_fsevent(FSE_CREATE_FILE
, ctx
,
4847 * nameidone has to happen before we vnode_put(dvp)
4848 * since it may need to release the fs_nodelock on the dvp
4857 if (path
&& (path
!= (char *)path_data
)) {
4858 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4865 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4867 return symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4868 uap
->link
, UIO_USERSPACE
);
4872 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4873 __unused
int32_t *retval
)
4875 return symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4876 uap
->path2
, UIO_USERSPACE
);
4880 * Delete a whiteout from the filesystem.
4881 * No longer supported.
4884 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4890 * Delete a name from the filesystem.
4894 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4895 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4897 struct nameidata nd
;
4900 struct componentname
*cnp
;
4905 struct vnode_attr va
;
4912 struct vnode_attr
*vap
;
4914 int retry_count
= 0;
4917 cn_flags
= LOCKPARENT
;
4918 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
)) {
4919 cn_flags
|= AUDITVNPATH1
;
4921 /* If a starting dvp is passed, it trumps any fd passed. */
4927 /* unlink or delete is allowed on rsrc forks and named streams */
4928 cn_flags
|= CN_ALLOWRSRCFORK
;
4939 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4941 nd
.ni_dvp
= start_dvp
;
4942 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4946 error
= nameiat(&nd
, fd
);
4955 /* With Carbon delete semantics, busy files cannot be deleted */
4956 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4957 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4960 /* Skip any potential upcalls if told to. */
4961 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4962 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4966 batched
= vnode_compound_remove_available(vp
);
4968 * The root of a mounted filesystem cannot be deleted.
4970 if (vp
->v_flag
& VROOT
) {
4974 #if DEVELOPMENT || DEBUG
4976 * XXX VSWAP: Check for entitlements or special flag here
4977 * so we can restrict access appropriately.
4979 #else /* DEVELOPMENT || DEBUG */
4981 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
4985 #endif /* DEVELOPMENT || DEBUG */
4988 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4990 if (error
== ENOENT
) {
4991 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4992 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5003 if (!vnode_compound_remove_available(dvp
)) {
5004 panic("No vp, but no compound remove?");
5009 need_event
= need_fsevent(FSE_DELETE
, dvp
);
5012 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
5013 /* XXX need to get these data in batched VNOP */
5014 get_fse_info(vp
, &finfo
, ctx
);
5017 error
= vfs_get_notify_attributes(&va
);
5026 has_listeners
= kauth_authorize_fileop_has_listeners();
5027 if (need_event
|| has_listeners
) {
5035 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
5039 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
) {
5040 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
5044 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
5046 if (error
== EKEEPLOOKING
) {
5048 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5051 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
5052 panic("EKEEPLOOKING, but continue flag not set?");
5055 if (vnode_isdir(vp
)) {
5059 goto continue_lookup
;
5060 } else if (error
== ENOENT
&& batched
) {
5061 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
5062 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
5064 * For compound VNOPs, the authorization callback may
5065 * return ENOENT in case of racing hardlink lookups
5066 * hitting the name cache, redrive the lookup.
5076 * Call out to allow 3rd party notification of delete.
5077 * Ignore result of kauth_authorize_fileop call.
5080 if (has_listeners
) {
5081 kauth_authorize_fileop(vfs_context_ucred(ctx
),
5082 KAUTH_FILEOP_DELETE
,
5087 if (vp
->v_flag
& VISHARDLINK
) {
5089 // if a hardlink gets deleted we want to blow away the
5090 // v_parent link because the path that got us to this
5091 // instance of the link is no longer valid. this will
5092 // force the next call to get the path to ask the file
5093 // system instead of just following the v_parent link.
5095 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
5100 if (vp
->v_flag
& VISHARDLINK
) {
5101 get_fse_info(vp
, &finfo
, ctx
);
5103 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
5105 if (truncated_path
) {
5106 finfo
.mode
|= FSE_TRUNCATED_PATH
;
5108 add_fsevent(FSE_DELETE
, ctx
,
5109 FSE_ARG_STRING
, len
, path
,
5110 FSE_ARG_FINFO
, &finfo
,
5122 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5123 * will cause its shadow file to go away if necessary.
5125 if (vp
&& (vnode_isnamedstream(vp
)) &&
5126 (vp
->v_parent
!= NULLVP
) &&
5127 vnode_isshadow(vp
)) {
5132 * nameidone has to happen before we vnode_put(dvp)
5133 * since it may need to release the fs_nodelock on the dvp
5149 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
5150 enum uio_seg segflg
, int unlink_flags
)
5152 return unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
5157 * Delete a name from the filesystem using Carbon semantics.
5160 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
5162 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5163 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
);
5167 * Delete a name from the filesystem using POSIX semantics.
5170 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
5172 return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5173 uap
->path
, UIO_USERSPACE
, 0);
5177 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
5179 if (uap
->flag
& ~AT_REMOVEDIR
) {
5183 if (uap
->flag
& AT_REMOVEDIR
) {
5184 return rmdirat_internal(vfs_context_current(), uap
->fd
,
5185 uap
->path
, UIO_USERSPACE
);
5187 return unlinkat_internal(vfs_context_current(), uap
->fd
,
5188 NULLVP
, uap
->path
, UIO_USERSPACE
, 0);
5193 * Reposition read/write file offset.
5196 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
5198 struct fileproc
*fp
;
5200 struct vfs_context
*ctx
;
5201 off_t offset
= uap
->offset
, file_size
;
5204 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
5205 if (error
== ENOTSUP
) {
5210 if (vnode_isfifo(vp
)) {
5216 ctx
= vfs_context_current();
5218 if (uap
->whence
== L_INCR
&& uap
->offset
== 0) {
5219 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
5222 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
5230 if ((error
= vnode_getwithref(vp
))) {
5235 switch (uap
->whence
) {
5237 offset
+= fp
->f_fglob
->fg_offset
;
5240 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0) {
5243 offset
+= file_size
;
5248 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
5251 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
5257 if (uap
->offset
> 0 && offset
< 0) {
5258 /* Incremented/relative move past max size */
5262 * Allow negative offsets on character devices, per
5263 * POSIX 1003.1-2001. Most likely for writing disk
5266 if (offset
< 0 && vp
->v_type
!= VCHR
) {
5267 /* Decremented/relative move before start */
5271 fp
->f_fglob
->fg_offset
= offset
;
5272 *retval
= fp
->f_fglob
->fg_offset
;
5278 * An lseek can affect whether data is "available to read." Use
5279 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5281 post_event_if_success(vp
, error
, NOTE_NONE
);
5282 (void)vnode_put(vp
);
5289 * Check access permissions.
5291 * Returns: 0 Success
5292 * vnode_authorize:???
5295 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
5297 kauth_action_t action
;
5301 * If just the regular access bits, convert them to something
5302 * that vnode_authorize will understand.
5304 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
5306 if (uflags
& R_OK
) {
5307 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5309 if (uflags
& W_OK
) {
5310 if (vnode_isdir(vp
)) {
5311 action
|= KAUTH_VNODE_ADD_FILE
|
5312 KAUTH_VNODE_ADD_SUBDIRECTORY
;
5313 /* might want delete rights here too */
5315 action
|= KAUTH_VNODE_WRITE_DATA
;
5318 if (uflags
& X_OK
) {
5319 if (vnode_isdir(vp
)) {
5320 action
|= KAUTH_VNODE_SEARCH
;
5322 action
|= KAUTH_VNODE_EXECUTE
;
5326 /* take advantage of definition of uflags */
5327 action
= uflags
>> 8;
5331 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5337 /* action == 0 means only check for existence */
5339 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5350 * access_extended: Check access permissions in bulk.
5352 * Description: uap->entries Pointer to an array of accessx
5353 * descriptor structs, plus one or
5354 * more NULL terminated strings (see
5355 * "Notes" section below).
5356 * uap->size Size of the area pointed to by
5358 * uap->results Pointer to the results array.
5360 * Returns: 0 Success
5361 * ENOMEM Insufficient memory
5362 * EINVAL Invalid arguments
5363 * namei:EFAULT Bad address
5364 * namei:ENAMETOOLONG Filename too long
5365 * namei:ENOENT No such file or directory
5366 * namei:ELOOP Too many levels of symbolic links
5367 * namei:EBADF Bad file descriptor
5368 * namei:ENOTDIR Not a directory
5373 * uap->results Array contents modified
5375 * Notes: The uap->entries are structured as an arbitrary length array
5376 * of accessx descriptors, followed by one or more NULL terminated
5379 * struct accessx_descriptor[0]
5381 * struct accessx_descriptor[n]
5382 * char name_data[0];
5384 * We determine the entry count by walking the buffer containing
5385 * the uap->entries argument descriptor. For each descriptor we
5386 * see, the valid values for the offset ad_name_offset will be
5387 * in the byte range:
5389 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5391 * [ uap->entries + uap->size - 2 ]
5393 * since we must have at least one string, and the string must
5394 * be at least one character plus the NULL terminator in length.
5396 * XXX: Need to support the check-as uid argument
5399 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5401 struct accessx_descriptor
*input
= NULL
;
5402 errno_t
*result
= NULL
;
5405 unsigned int desc_max
, desc_actual
, i
, j
;
5406 struct vfs_context context
;
5407 struct nameidata nd
;
5411 #define ACCESSX_MAX_DESCR_ON_STACK 10
5412 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5414 context
.vc_ucred
= NULL
;
5417 * Validate parameters; if valid, copy the descriptor array and string
5418 * arguments into local memory. Before proceeding, the following
5419 * conditions must have been met:
5421 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5422 * o There must be sufficient room in the request for at least one
5423 * descriptor and a one yte NUL terminated string.
5424 * o The allocation of local storage must not fail.
5426 if (uap
->size
> ACCESSX_MAX_TABLESIZE
) {
5429 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2)) {
5432 if (uap
->size
<= sizeof(stack_input
)) {
5433 input
= stack_input
;
5435 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5436 if (input
== NULL
) {
5441 error
= copyin(uap
->entries
, input
, uap
->size
);
5446 AUDIT_ARG(opaque
, input
, uap
->size
);
5449 * Force NUL termination of the copyin buffer to avoid nami() running
5450 * off the end. If the caller passes us bogus data, they may get a
5453 ((char *)input
)[uap
->size
- 1] = 0;
5456 * Access is defined as checking against the process' real identity,
5457 * even if operations are checking the effective identity. This
5458 * requires that we use a local vfs context.
5460 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5461 context
.vc_thread
= current_thread();
5464 * Find out how many entries we have, so we can allocate the result
5465 * array by walking the list and adjusting the count downward by the
5466 * earliest string offset we see.
5468 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5469 desc_actual
= desc_max
;
5470 for (i
= 0; i
< desc_actual
; i
++) {
5472 * Take the offset to the name string for this entry and
5473 * convert to an input array index, which would be one off
5474 * the end of the array if this entry was the lowest-addressed
5477 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5480 * An offset greater than the max allowable offset is an error.
5481 * It is also an error for any valid entry to point
5482 * to a location prior to the end of the current entry, if
5483 * it's not a reference to the string of the previous entry.
5485 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5490 /* Also do not let ad_name_offset point to something beyond the size of the input */
5491 if (input
[i
].ad_name_offset
>= uap
->size
) {
5497 * An offset of 0 means use the previous descriptor's offset;
5498 * this is used to chain multiple requests for the same file
5499 * to avoid multiple lookups.
5502 /* This is not valid for the first entry */
5511 * If the offset of the string for this descriptor is before
5512 * what we believe is the current actual last descriptor,
5513 * then we need to adjust our estimate downward; this permits
5514 * the string table following the last descriptor to be out
5515 * of order relative to the descriptor list.
5517 if (j
< desc_actual
) {
5523 * We limit the actual number of descriptors we are willing to process
5524 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5525 * requested does not exceed this limit,
5527 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5531 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
| M_ZERO
);
5532 if (result
== NULL
) {
5538 * Do the work by iterating over the descriptor entries we know to
5539 * at least appear to contain valid data.
5542 for (i
= 0; i
< desc_actual
; i
++) {
5544 * If the ad_name_offset is 0, then we use the previous
5545 * results to make the check; otherwise, we are looking up
5548 if (input
[i
].ad_name_offset
!= 0) {
5549 /* discard old vnodes */
5560 * Scan forward in the descriptor list to see if we
5561 * need the parent vnode. We will need it if we are
5562 * deleting, since we must have rights to remove
5563 * entries in the parent directory, as well as the
5564 * rights to delete the object itself.
5566 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5567 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++) {
5568 if (input
[j
].ad_flags
& _DELETE_OK
) {
5573 niopts
= FOLLOW
| AUDITVNPATH1
;
5575 /* need parent for vnode_authorize for deletion test */
5577 niopts
|= WANTPARENT
;
5581 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5582 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5595 * Handle lookup errors.
5605 /* run this access check */
5606 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5609 /* fatal lookup error */
5615 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5617 /* copy out results */
5618 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5621 if (input
&& input
!= stack_input
) {
5622 FREE(input
, M_TEMP
);
5625 FREE(result
, M_TEMP
);
5633 if (IS_VALID_CRED(context
.vc_ucred
)) {
5634 kauth_cred_unref(&context
.vc_ucred
);
5641 * Returns: 0 Success
5642 * namei:EFAULT Bad address
5643 * namei:ENAMETOOLONG Filename too long
5644 * namei:ENOENT No such file or directory
5645 * namei:ELOOP Too many levels of symbolic links
5646 * namei:EBADF Bad file descriptor
5647 * namei:ENOTDIR Not a directory
5652 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5653 int flag
, enum uio_seg segflg
)
5656 struct nameidata nd
;
5658 struct vfs_context context
;
5660 int is_namedstream
= 0;
5664 * Unless the AT_EACCESS option is used, Access is defined as checking
5665 * against the process' real identity, even if operations are checking
5666 * the effective identity. So we need to tweak the credential
5667 * in the context for that case.
5669 if (!(flag
& AT_EACCESS
)) {
5670 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5672 context
.vc_ucred
= ctx
->vc_ucred
;
5674 context
.vc_thread
= ctx
->vc_thread
;
5677 niopts
= FOLLOW
| AUDITVNPATH1
;
5678 /* need parent for vnode_authorize for deletion test */
5679 if (amode
& _DELETE_OK
) {
5680 niopts
|= WANTPARENT
;
5682 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5686 /* access(F_OK) calls are allowed for resource forks. */
5687 if (amode
== F_OK
) {
5688 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5691 error
= nameiat(&nd
, fd
);
5697 /* Grab reference on the shadow stream file vnode to
5698 * force an inactive on release which will mark it
5701 if (vnode_isnamedstream(nd
.ni_vp
) &&
5702 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5703 vnode_isshadow(nd
.ni_vp
)) {
5705 vnode_ref(nd
.ni_vp
);
5709 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5712 if (is_namedstream
) {
5713 vnode_rele(nd
.ni_vp
);
5717 vnode_put(nd
.ni_vp
);
5718 if (amode
& _DELETE_OK
) {
5719 vnode_put(nd
.ni_dvp
);
5724 if (!(flag
& AT_EACCESS
)) {
5725 kauth_cred_unref(&context
.vc_ucred
);
5731 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5733 return faccessat_internal(vfs_context_current(), AT_FDCWD
,
5734 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
);
5738 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5739 __unused
int32_t *retval
)
5741 if (uap
->flag
& ~AT_EACCESS
) {
5745 return faccessat_internal(vfs_context_current(), uap
->fd
,
5746 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
);
5750 * Returns: 0 Success
5757 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5758 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5759 enum uio_seg segflg
, int fd
, int flag
)
5761 struct nameidata nd
;
5768 struct user64_stat user64_sb
;
5769 struct user32_stat user32_sb
;
5770 struct user64_stat64 user64_sb64
;
5771 struct user32_stat64 user32_sb64
;
5775 kauth_filesec_t fsec
;
5776 size_t xsecurity_bufsize
;
5779 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5780 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5784 int is_namedstream
= 0;
5785 /* stat calls are allowed for resource forks. */
5786 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5788 error
= nameiat(&nd
, fd
);
5792 fsec
= KAUTH_FILESEC_NONE
;
5794 statptr
= (void *)&source
;
5797 /* Grab reference on the shadow stream file vnode to
5798 * force an inactive on release which will mark it
5801 if (vnode_isnamedstream(nd
.ni_vp
) &&
5802 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5803 vnode_isshadow(nd
.ni_vp
)) {
5805 vnode_ref(nd
.ni_vp
);
5809 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5812 if (is_namedstream
) {
5813 vnode_rele(nd
.ni_vp
);
5816 vnode_put(nd
.ni_vp
);
5822 /* Zap spare fields */
5823 if (isstat64
!= 0) {
5824 source
.sb64
.st_lspare
= 0;
5825 source
.sb64
.st_qspare
[0] = 0LL;
5826 source
.sb64
.st_qspare
[1] = 0LL;
5827 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5828 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5829 my_size
= sizeof(dest
.user64_sb64
);
5830 sbp
= (caddr_t
)&dest
.user64_sb64
;
5832 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5833 my_size
= sizeof(dest
.user32_sb64
);
5834 sbp
= (caddr_t
)&dest
.user32_sb64
;
5837 * Check if we raced (post lookup) against the last unlink of a file.
5839 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5840 source
.sb64
.st_nlink
= 1;
5843 source
.sb
.st_lspare
= 0;
5844 source
.sb
.st_qspare
[0] = 0LL;
5845 source
.sb
.st_qspare
[1] = 0LL;
5846 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5847 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5848 my_size
= sizeof(dest
.user64_sb
);
5849 sbp
= (caddr_t
)&dest
.user64_sb
;
5851 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5852 my_size
= sizeof(dest
.user32_sb
);
5853 sbp
= (caddr_t
)&dest
.user32_sb
;
5857 * Check if we raced (post lookup) against the last unlink of a file.
5859 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5860 source
.sb
.st_nlink
= 1;
5863 if ((error
= copyout(sbp
, ub
, my_size
)) != 0) {
5867 /* caller wants extended security information? */
5868 if (xsecurity
!= USER_ADDR_NULL
) {
5869 /* did we get any? */
5870 if (fsec
== KAUTH_FILESEC_NONE
) {
5871 if (susize(xsecurity_size
, 0) != 0) {
5876 /* find the user buffer size */
5877 xsecurity_bufsize
= fusize(xsecurity_size
);
5879 /* copy out the actual data size */
5880 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5885 /* if the caller supplied enough room, copy out to it */
5886 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
)) {
5887 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5892 if (fsec
!= KAUTH_FILESEC_NONE
) {
5893 kauth_filesec_free(fsec
);
5899 * stat_extended: Get file status; with extended security (ACL).
5901 * Parameters: p (ignored)
5902 * uap User argument descriptor (see below)
5905 * Indirect: uap->path Path of file to get status from
5906 * uap->ub User buffer (holds file status info)
5907 * uap->xsecurity ACL to get (extended security)
5908 * uap->xsecurity_size Size of ACL
5910 * Returns: 0 Success
5915 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5916 __unused
int32_t *retval
)
5918 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5919 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5924 * Returns: 0 Success
5925 * fstatat_internal:??? [see fstatat_internal() in this file]
5928 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5930 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5931 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0);
5935 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5937 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5938 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0);
5942 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5944 * Parameters: p (ignored)
5945 * uap User argument descriptor (see below)
5948 * Indirect: uap->path Path of file to get status from
5949 * uap->ub User buffer (holds file status info)
5950 * uap->xsecurity ACL to get (extended security)
5951 * uap->xsecurity_size Size of ACL
5953 * Returns: 0 Success
5958 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5960 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5961 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5966 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5968 * Parameters: p (ignored)
5969 * uap User argument descriptor (see below)
5972 * Indirect: uap->path Path of file to get status from
5973 * uap->ub User buffer (holds file status info)
5974 * uap->xsecurity ACL to get (extended security)
5975 * uap->xsecurity_size Size of ACL
5977 * Returns: 0 Success
5982 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5984 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5985 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5986 AT_SYMLINK_NOFOLLOW
);
5990 * Get file status; this version does not follow links.
5993 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5995 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5996 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6000 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
6002 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6003 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
);
6007 * lstat64_extended: Get file status; can handle large inode numbers; does not
6008 * follow links; with extended security (ACL).
6010 * Parameters: p (ignored)
6011 * uap User argument descriptor (see below)
6014 * Indirect: uap->path Path of file to get status from
6015 * uap->ub User buffer (holds file status info)
6016 * uap->xsecurity ACL to get (extended security)
6017 * uap->xsecurity_size Size of ACL
6019 * Returns: 0 Success
6024 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
6026 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6027 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
6028 AT_SYMLINK_NOFOLLOW
);
6032 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
6034 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
6038 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6039 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6043 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
6044 __unused
int32_t *retval
)
6046 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
6050 return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
6051 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
);
6055 * Get configurable pathname variables.
6057 * Returns: 0 Success
6061 * Notes: Global implementation constants are intended to be
6062 * implemented in this function directly; all other constants
6063 * are per-FS implementation, and therefore must be handled in
6064 * each respective FS, instead.
6066 * XXX We implement some things globally right now that should actually be
6067 * XXX per-FS; we will need to deal with this at some point.
6071 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
6074 struct nameidata nd
;
6075 vfs_context_t ctx
= vfs_context_current();
6077 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
6078 UIO_USERSPACE
, uap
->path
, ctx
);
6084 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
6086 vnode_put(nd
.ni_vp
);
6092 * Return target name of a symbolic link.
6096 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
6097 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
6103 struct nameidata nd
;
6104 char uio_buf
[UIO_SIZEOF(1)];
6106 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
6109 error
= nameiat(&nd
, fd
);
6117 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
6118 &uio_buf
[0], sizeof(uio_buf
));
6119 uio_addiov(auio
, buf
, bufsize
);
6120 if (vp
->v_type
!= VLNK
) {
6124 error
= mac_vnode_check_readlink(ctx
, vp
);
6127 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
6131 error
= VNOP_READLINK(vp
, auio
, ctx
);
6136 *retval
= bufsize
- (int)uio_resid(auio
);
6141 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
6143 enum uio_seg procseg
;
6145 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6146 return readlinkat_internal(vfs_context_current(), AT_FDCWD
,
6147 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
6148 uap
->count
, procseg
, retval
);
6152 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
6154 enum uio_seg procseg
;
6156 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6157 return readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6158 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
);
6162 * Change file flags.
6164 * NOTE: this will vnode_put() `vp'
6167 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
6169 struct vnode_attr va
;
6170 kauth_action_t action
;
6174 VATTR_SET(&va
, va_flags
, flags
);
6177 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
6183 /* request authorisation, disregard immutability */
6184 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6188 * Request that the auth layer disregard those file flags it's allowed to when
6189 * authorizing this operation; we need to do this in order to be able to
6190 * clear immutable flags.
6192 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0)) {
6195 error
= vnode_setattr(vp
, &va
, ctx
);
6199 mac_vnode_notify_setflags(ctx
, vp
, flags
);
6203 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6212 * Change flags of a file given a path name.
6216 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
6219 vfs_context_t ctx
= vfs_context_current();
6221 struct nameidata nd
;
6223 AUDIT_ARG(fflags
, uap
->flags
);
6224 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6225 UIO_USERSPACE
, uap
->path
, ctx
);
6233 /* we don't vnode_put() here because chflags1 does internally */
6234 error
= chflags1(vp
, uap
->flags
, ctx
);
6240 * Change flags of a file given a file descriptor.
6244 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
6249 AUDIT_ARG(fd
, uap
->fd
);
6250 AUDIT_ARG(fflags
, uap
->flags
);
6251 if ((error
= file_vnode(uap
->fd
, &vp
))) {
6255 if ((error
= vnode_getwithref(vp
))) {
6260 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6262 /* we don't vnode_put() here because chflags1 does internally */
6263 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
6270 * Change security information on a filesystem object.
6272 * Returns: 0 Success
6273 * EPERM Operation not permitted
6274 * vnode_authattr:??? [anything vnode_authattr can return]
6275 * vnode_authorize:??? [anything vnode_authorize can return]
6276 * vnode_setattr:??? [anything vnode_setattr can return]
6278 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6279 * translated to EPERM before being returned.
6282 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
6284 kauth_action_t action
;
6287 AUDIT_ARG(mode
, vap
->va_mode
);
6288 /* XXX audit new args */
6291 /* chmod calls are not allowed for resource forks. */
6292 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6298 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
6299 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0) {
6303 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6304 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
6305 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6306 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1))) {
6311 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
6312 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
))) {
6317 /* make sure that the caller is allowed to set this security information */
6318 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
6319 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6320 if (error
== EACCES
) {
6326 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0) {
6331 if (VATTR_IS_ACTIVE(vap
, va_mode
)) {
6332 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
6335 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6336 mac_vnode_notify_setowner(ctx
, vp
,
6337 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6338 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
6341 if (VATTR_IS_ACTIVE(vap
, va_acl
)) {
6342 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
6351 * Change mode of a file given a path name.
6353 * Returns: 0 Success
6354 * namei:??? [anything namei can return]
6355 * chmod_vnode:??? [anything chmod_vnode can return]
6358 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
6359 int fd
, int flag
, enum uio_seg segflg
)
6361 struct nameidata nd
;
6364 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6365 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
6367 if ((error
= nameiat(&nd
, fd
))) {
6370 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
6371 vnode_put(nd
.ni_vp
);
6377 * chmod_extended: Change the mode of a file given a path name; with extended
6378 * argument list (including extended security (ACL)).
6380 * Parameters: p Process requesting the open
6381 * uap User argument descriptor (see below)
6384 * Indirect: uap->path Path to object (same as 'chmod')
6385 * uap->uid UID to set
6386 * uap->gid GID to set
6387 * uap->mode File mode to set (same as 'chmod')
6388 * uap->xsecurity ACL to set (or delete)
6390 * Returns: 0 Success
6393 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6395 * XXX: We should enummerate the possible errno values here, and where
6396 * in the code they originated.
6399 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6402 struct vnode_attr va
;
6403 kauth_filesec_t xsecdst
;
6405 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6408 if (uap
->mode
!= -1) {
6409 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6411 if (uap
->uid
!= KAUTH_UID_NONE
) {
6412 VATTR_SET(&va
, va_uid
, uap
->uid
);
6414 if (uap
->gid
!= KAUTH_GID_NONE
) {
6415 VATTR_SET(&va
, va_gid
, uap
->gid
);
6419 switch (uap
->xsecurity
) {
6420 /* explicit remove request */
6421 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6422 VATTR_SET(&va
, va_acl
, NULL
);
6425 case USER_ADDR_NULL
:
6428 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6431 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6432 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6435 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6438 if (xsecdst
!= NULL
) {
6439 kauth_filesec_free(xsecdst
);
6445 * Returns: 0 Success
6446 * chmodat:??? [anything chmodat can return]
6449 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6450 int flag
, enum uio_seg segflg
)
6452 struct vnode_attr va
;
6455 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6457 return chmodat(ctx
, path
, &va
, fd
, flag
, segflg
);
6461 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6463 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6464 AT_FDCWD
, 0, UIO_USERSPACE
);
6468 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6470 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
6474 return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6475 uap
->fd
, uap
->flag
, UIO_USERSPACE
);
6479 * Change mode of a file given a file descriptor.
6482 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6489 if ((error
= file_vnode(fd
, &vp
)) != 0) {
6492 if ((error
= vnode_getwithref(vp
)) != 0) {
6496 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6498 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6499 (void)vnode_put(vp
);
6506 * fchmod_extended: Change mode of a file given a file descriptor; with
6507 * extended argument list (including extended security (ACL)).
6509 * Parameters: p Process requesting to change file mode
6510 * uap User argument descriptor (see below)
6513 * Indirect: uap->mode File mode to set (same as 'chmod')
6514 * uap->uid UID to set
6515 * uap->gid GID to set
6516 * uap->xsecurity ACL to set (or delete)
6517 * uap->fd File descriptor of file to change mode
6519 * Returns: 0 Success
6524 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6527 struct vnode_attr va
;
6528 kauth_filesec_t xsecdst
;
6530 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6533 if (uap
->mode
!= -1) {
6534 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6536 if (uap
->uid
!= KAUTH_UID_NONE
) {
6537 VATTR_SET(&va
, va_uid
, uap
->uid
);
6539 if (uap
->gid
!= KAUTH_GID_NONE
) {
6540 VATTR_SET(&va
, va_gid
, uap
->gid
);
6544 switch (uap
->xsecurity
) {
6545 case USER_ADDR_NULL
:
6546 VATTR_SET(&va
, va_acl
, NULL
);
6548 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6549 VATTR_SET(&va
, va_acl
, NULL
);
6552 case CAST_USER_ADDR_T(-1):
6555 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
6558 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6561 error
= fchmod1(p
, uap
->fd
, &va
);
6564 switch (uap
->xsecurity
) {
6565 case USER_ADDR_NULL
:
6566 case CAST_USER_ADDR_T(-1):
6569 if (xsecdst
!= NULL
) {
6570 kauth_filesec_free(xsecdst
);
6577 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6579 struct vnode_attr va
;
6582 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6584 return fchmod1(p
, uap
->fd
, &va
);
6589 * Set ownership given a path name.
6593 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6594 gid_t gid
, int flag
, enum uio_seg segflg
)
6597 struct vnode_attr va
;
6599 struct nameidata nd
;
6601 kauth_action_t action
;
6603 AUDIT_ARG(owner
, uid
, gid
);
6605 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6606 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6608 error
= nameiat(&nd
, fd
);
6617 if (uid
!= (uid_t
)VNOVAL
) {
6618 VATTR_SET(&va
, va_uid
, uid
);
6620 if (gid
!= (gid_t
)VNOVAL
) {
6621 VATTR_SET(&va
, va_gid
, gid
);
6625 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6631 /* preflight and authorize attribute changes */
6632 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6635 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6638 error
= vnode_setattr(vp
, &va
, ctx
);
6642 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
6648 * EACCES is only allowed from namei(); permissions failure should
6649 * return EPERM, so we need to translate the error code.
6651 if (error
== EACCES
) {
6660 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6662 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6663 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
);
6667 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6669 return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6670 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
);
6674 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6676 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
) {
6680 return fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6681 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
);
6685 * Set ownership given a file descriptor.
6689 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6691 struct vnode_attr va
;
6692 vfs_context_t ctx
= vfs_context_current();
6695 kauth_action_t action
;
6697 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6698 AUDIT_ARG(fd
, uap
->fd
);
6700 if ((error
= file_vnode(uap
->fd
, &vp
))) {
6704 if ((error
= vnode_getwithref(vp
))) {
6708 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6711 if (uap
->uid
!= VNOVAL
) {
6712 VATTR_SET(&va
, va_uid
, uap
->uid
);
6714 if (uap
->gid
!= VNOVAL
) {
6715 VATTR_SET(&va
, va_gid
, uap
->gid
);
6719 /* chown calls are not allowed for resource forks. */
6720 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6727 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6733 /* preflight and authorize attribute changes */
6734 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6737 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6738 if (error
== EACCES
) {
6743 error
= vnode_setattr(vp
, &va
, ctx
);
6747 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6752 (void)vnode_put(vp
);
6758 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6762 if (usrtvp
== USER_ADDR_NULL
) {
6763 struct timeval old_tv
;
6764 /* XXX Y2038 bug because of microtime argument */
6766 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6769 if (IS_64BIT_PROCESS(current_proc())) {
6770 struct user64_timeval tv
[2];
6771 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6775 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6776 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6778 struct user32_timeval tv
[2];
6779 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6783 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6784 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6791 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6795 struct vnode_attr va
;
6796 kauth_action_t action
;
6798 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6801 VATTR_SET(&va
, va_access_time
, ts
[0]);
6802 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6804 va
.va_vaflags
|= VA_UTIMES_NULL
;
6808 /* utimes calls are not allowed for resource forks. */
6809 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6816 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6821 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6822 if (!nullflag
&& error
== EACCES
) {
6828 /* since we may not need to auth anything, check here */
6829 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6830 if (!nullflag
&& error
== EACCES
) {
6835 error
= vnode_setattr(vp
, &va
, ctx
);
6839 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6848 * Set the access and modification times of a file.
6852 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6854 struct timespec ts
[2];
6857 struct nameidata nd
;
6858 vfs_context_t ctx
= vfs_context_current();
6861 * AUDIT: Needed to change the order of operations to do the
6862 * name lookup first because auditing wants the path.
6864 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6865 UIO_USERSPACE
, uap
->path
, ctx
);
6873 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6874 * the current time instead.
6877 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
6881 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6884 vnode_put(nd
.ni_vp
);
6889 * Set the access and modification times of a file.
6893 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6895 struct timespec ts
[2];
6900 AUDIT_ARG(fd
, uap
->fd
);
6902 if ((error
= getutimes(usrtvp
, ts
)) != 0) {
6905 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
6908 if ((error
= vnode_getwithref(vp
))) {
6913 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6920 * Truncate a file given its path name.
6924 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6927 struct vnode_attr va
;
6928 vfs_context_t ctx
= vfs_context_current();
6930 struct nameidata nd
;
6931 kauth_action_t action
;
6933 if (uap
->length
< 0) {
6936 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6937 UIO_USERSPACE
, uap
->path
, ctx
);
6938 if ((error
= namei(&nd
))) {
6946 VATTR_SET(&va
, va_data_size
, uap
->length
);
6949 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6955 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6958 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6961 error
= vnode_setattr(vp
, &va
, ctx
);
6965 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
6975 * Truncate a file given a file descriptor.
6979 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6981 vfs_context_t ctx
= vfs_context_current();
6982 struct vnode_attr va
;
6984 struct fileproc
*fp
;
6988 AUDIT_ARG(fd
, uap
->fd
);
6989 if (uap
->length
< 0) {
6993 if ((error
= fp_lookup(p
, fd
, &fp
, 0))) {
6997 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6999 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
7008 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
7010 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
7011 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7016 if ((error
= vnode_getwithref(vp
)) != 0) {
7020 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7023 error
= mac_vnode_check_truncate(ctx
,
7024 fp
->f_fglob
->fg_cred
, vp
);
7026 (void)vnode_put(vp
);
7031 VATTR_SET(&va
, va_data_size
, uap
->length
);
7032 error
= vnode_setattr(vp
, &va
, ctx
);
7036 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
7040 (void)vnode_put(vp
);
7048 * Sync an open file with synchronized I/O _file_ integrity completion
7052 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
7054 __pthread_testcancel(1);
7055 return fsync_common(p
, uap
, MNT_WAIT
);
7060 * Sync an open file with synchronized I/O _file_ integrity completion
7062 * Notes: This is a legacy support function that does not test for
7063 * thread cancellation points.
7067 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
7069 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
);
7074 * Sync an open file with synchronized I/O _data_ integrity completion
7078 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
7080 __pthread_testcancel(1);
7081 return fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
);
7088 * Common fsync code to support both synchronized I/O file integrity completion
7089 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7091 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7092 * will only guarantee that the file data contents are retrievable. If
7093 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7094 * includes additional metadata unnecessary for retrieving the file data
7095 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7098 * Parameters: p The process
7099 * uap->fd The descriptor to synchronize
7100 * flags The data integrity flags
7102 * Returns: int Success
7103 * fp_getfvp:EBADF Bad file descriptor
7104 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7105 * VNOP_FSYNC:??? unspecified
7107 * Notes: We use struct fsync_args because it is a short name, and all
7108 * caller argument structures are otherwise identical.
7111 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
7114 struct fileproc
*fp
;
7115 vfs_context_t ctx
= vfs_context_current();
7118 AUDIT_ARG(fd
, uap
->fd
);
7120 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
7123 if ((error
= vnode_getwithref(vp
))) {
7128 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7130 error
= VNOP_FSYNC(vp
, flags
, ctx
);
7133 /* Sync resource fork shadow file if necessary. */
7135 (vp
->v_flag
& VISNAMEDSTREAM
) &&
7136 (vp
->v_parent
!= NULLVP
) &&
7137 vnode_isshadow(vp
) &&
7138 (fp
->f_flags
& FP_WRITTEN
)) {
7139 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
7143 (void)vnode_put(vp
);
7149 * Duplicate files. Source must be a file, target must be a file or
7152 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7153 * perform inheritance correctly.
7157 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
7159 vnode_t tvp
, fvp
, tdvp
, sdvp
;
7160 struct nameidata fromnd
, tond
;
7162 vfs_context_t ctx
= vfs_context_current();
7164 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
7165 struct vnode_attr va
;
7168 /* Check that the flags are valid. */
7170 if (uap
->flags
& ~CPF_MASK
) {
7174 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
7175 UIO_USERSPACE
, uap
->from
, ctx
);
7176 if ((error
= namei(&fromnd
))) {
7181 NDINIT(&tond
, CREATE
, OP_LINK
,
7182 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7183 UIO_USERSPACE
, uap
->to
, ctx
);
7184 if ((error
= namei(&tond
))) {
7191 if (!(uap
->flags
& CPF_OVERWRITE
)) {
7197 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
7202 /* This calls existing MAC hooks for open */
7203 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
7210 * See unlinkat_internal for an explanation of the potential
7211 * ENOENT from the MAC hook but the gist is that the MAC hook
7212 * can fail because vn_getpath isn't able to return the full
7213 * path. We choose to ignore this failure.
7215 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
7216 if (error
&& error
!= ENOENT
) {
7224 VATTR_SET(&va
, va_type
, fvp
->v_type
);
7225 /* Mask off all but regular access permissions */
7226 VATTR_SET(&va
, va_mode
,
7227 ((((uap
->mode
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
7228 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
7232 #endif /* CONFIG_MACF */
7234 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) {
7242 * If source is the same as the destination (that is the
7243 * same inode number) then there is nothing to do.
7244 * (fixed to have POSIX semantics - CSM 3/2/98)
7250 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
7253 sdvp
= tond
.ni_startdir
;
7255 * nameidone has to happen before we vnode_put(tdvp)
7256 * since it may need to release the fs_nodelock on the tdvp
7276 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7279 * Helper function for doing clones. The caller is expected to provide an
7280 * iocounted source vnode and release it.
7283 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
7284 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
7287 struct nameidata tond
;
7290 boolean_t free_src_acl
;
7291 boolean_t attr_cleanup
;
7293 kauth_action_t action
;
7294 struct componentname
*cnp
;
7296 struct vnode_attr va
;
7297 struct vnode_attr nva
;
7298 uint32_t vnop_flags
;
7300 v_type
= vnode_vtype(fvp
);
7305 action
= KAUTH_VNODE_ADD_FILE
;
7308 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
7309 fvp
->v_mountedhere
) {
7312 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
7318 AUDIT_ARG(fd2
, dst_dirfd
);
7319 AUDIT_ARG(value32
, flags
);
7321 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7322 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
7323 UIO_USERSPACE
, dst
, ctx
);
7324 if ((error
= nameiat(&tond
, dst_dirfd
))) {
7331 free_src_acl
= FALSE
;
7332 attr_cleanup
= FALSE
;
7339 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
7345 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
))) {
7349 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
))) {
7353 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
7354 if (data_read_authorised
) {
7355 action
&= ~KAUTH_VNODE_READ_DATA
;
7357 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
))) {
7362 * certain attributes may need to be changed from the source, we ask for
7366 VATTR_WANTED(&va
, va_uid
);
7367 VATTR_WANTED(&va
, va_gid
);
7368 VATTR_WANTED(&va
, va_mode
);
7369 VATTR_WANTED(&va
, va_flags
);
7370 VATTR_WANTED(&va
, va_acl
);
7372 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0) {
7377 VATTR_SET(&nva
, va_type
, v_type
);
7378 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
7379 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
7380 free_src_acl
= TRUE
;
7383 /* Handle ACL inheritance, initialize vap. */
7384 if (v_type
== VLNK
) {
7385 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
7387 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
7391 attr_cleanup
= TRUE
;
7394 vnop_flags
= VNODE_CLONEFILE_DEFAULT
;
7396 * We've got initial values for all security parameters,
7397 * If we are superuser, then we can change owners to be the
7398 * same as the source. Both superuser and the owner have default
7399 * WRITE_SECURITY privileges so all other fields can be taken
7400 * from source as well.
7402 if (!(flags
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) {
7403 if (VATTR_IS_SUPPORTED(&va
, va_uid
)) {
7404 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
7406 if (VATTR_IS_SUPPORTED(&va
, va_gid
)) {
7407 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
7410 vnop_flags
|= VNODE_CLONEFILE_NOOWNERCOPY
;
7413 if (VATTR_IS_SUPPORTED(&va
, va_mode
)) {
7414 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
7416 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
7417 VATTR_SET(&nva
, va_flags
,
7418 ((va
.va_flags
& ~(UF_DATAVAULT
| SF_RESTRICTED
)) | /* Turn off from source */
7419 (nva
.va_flags
& (UF_DATAVAULT
| SF_RESTRICTED
))));
7422 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
);
7424 if (!error
&& tvp
) {
7425 int update_flags
= 0;
7428 #endif /* CONFIG_FSE */
7431 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
7432 VNODE_LABEL_CREATE
, ctx
);
7435 * If some of the requested attributes weren't handled by the
7436 * VNOP, use our fallback code.
7438 if (!VATTR_ALL_SUPPORTED(&va
)) {
7439 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
7442 // Make sure the name & parent pointers are hooked up
7443 if (tvp
->v_name
== NULL
) {
7444 update_flags
|= VNODE_UPDATE_NAME
;
7446 if (tvp
->v_parent
== NULLVP
) {
7447 update_flags
|= VNODE_UPDATE_PARENT
;
7451 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
7452 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
7456 switch (vnode_vtype(tvp
)) {
7460 fsevent
= FSE_CREATE_FILE
;
7463 fsevent
= FSE_CREATE_DIR
;
7469 if (need_fsevent(fsevent
, tvp
)) {
7471 * The following is a sequence of three explicit events.
7472 * A pair of FSE_CLONE events representing the source and destination
7473 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7474 * fseventsd may coalesce the destination clone and create events
7475 * into a single event resulting in the following sequence for a client
7477 * FSE_CLONE | FSE_CREATE (dst)
7479 add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
,
7481 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
7484 #endif /* CONFIG_FSE */
7489 vn_attribute_cleanup(&nva
, defaulted
);
7491 if (free_src_acl
&& va
.va_acl
) {
7492 kauth_acl_free(va
.va_acl
);
7503 * clone files or directories, target must not exist.
7507 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7508 __unused
int32_t *retval
)
7511 struct nameidata fromnd
;
7514 vfs_context_t ctx
= vfs_context_current();
7516 /* Check that the flags are valid. */
7517 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7521 AUDIT_ARG(fd
, uap
->src_dirfd
);
7523 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7524 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7525 UIO_USERSPACE
, uap
->src
, ctx
);
7526 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
))) {
7533 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7541 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7542 __unused
int32_t *retval
)
7545 struct fileproc
*fp
;
7547 vfs_context_t ctx
= vfs_context_current();
7549 /* Check that the flags are valid. */
7550 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
)) {
7554 AUDIT_ARG(fd
, uap
->src_fd
);
7555 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7560 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7561 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7566 if ((error
= vnode_getwithref(fvp
))) {
7570 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7572 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7577 file_drop(uap
->src_fd
);
7582 * Rename files. Source and destination must either both be directories,
7583 * or both not be directories. If target is a directory, it must be empty.
7587 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7588 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7590 if (flags
& ~VFS_RENAME_FLAGS_MASK
) {
7594 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
)) {
7600 struct nameidata
*fromnd
, *tond
;
7608 const char *oname
= NULL
;
7609 char *from_name
= NULL
, *to_name
= NULL
;
7610 int from_len
= 0, to_len
= 0;
7611 int holding_mntlock
;
7612 mount_t locked_mp
= NULL
;
7613 vnode_t oparent
= NULLVP
;
7615 fse_info from_finfo
, to_finfo
;
7617 int from_truncated
= 0, to_truncated
;
7619 struct vnode_attr
*fvap
, *tvap
;
7621 /* carving out a chunk for structs that are too big to be on stack. */
7623 struct nameidata from_node
, to_node
;
7624 struct vnode_attr fv_attr
, tv_attr
;
7626 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
7627 fromnd
= &__rename_data
->from_node
;
7628 tond
= &__rename_data
->to_node
;
7630 holding_mntlock
= 0;
7639 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
7641 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7643 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7645 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7648 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7649 if ((error
= nameiat(fromnd
, fromfd
))) {
7652 fdvp
= fromnd
->ni_dvp
;
7653 fvp
= fromnd
->ni_vp
;
7655 if (fvp
&& fvp
->v_type
== VDIR
) {
7656 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
7660 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7661 if ((error
= nameiat(tond
, tofd
))) {
7663 * Translate error code for rename("dir1", "dir2/.").
7665 if (error
== EISDIR
&& fvp
->v_type
== VDIR
) {
7670 tdvp
= tond
->ni_dvp
;
7674 #if DEVELOPMENT || DEBUG
7676 * XXX VSWAP: Check for entitlements or special flag here
7677 * so we can restrict access appropriately.
7679 #else /* DEVELOPMENT || DEBUG */
7681 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
7686 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
7690 #endif /* DEVELOPMENT || DEBUG */
7692 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
7697 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
7702 batched
= vnode_compound_rename_available(fdvp
);
7705 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
7708 get_fse_info(fvp
, &from_finfo
, ctx
);
7710 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
7715 fvap
= &__rename_data
->fv_attr
;
7719 get_fse_info(tvp
, &to_finfo
, ctx
);
7720 } else if (batched
) {
7721 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
7726 tvap
= &__rename_data
->tv_attr
;
7731 #endif /* CONFIG_FSE */
7733 has_listeners
= kauth_authorize_fileop_has_listeners();
7737 if (AUDIT_RECORD_EXISTS()) {
7742 if (need_event
|| has_listeners
) {
7743 if (from_name
== NULL
) {
7744 GET_PATH(from_name
);
7745 if (from_name
== NULL
) {
7751 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
7754 if (need_event
|| need_kpath2
|| has_listeners
) {
7755 if (to_name
== NULL
) {
7757 if (to_name
== NULL
) {
7763 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
7764 if (to_name
&& need_kpath2
) {
7765 AUDIT_ARG(kpath
, to_name
, ARG_KPATH2
);
7770 * Claim: this check will never reject a valid rename.
7771 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7772 * Suppose fdvp and tdvp are not on the same mount.
7773 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7774 * then you can't move it to within another dir on the same mountpoint.
7775 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7777 * If this check passes, then we are safe to pass these vnodes to the same FS.
7779 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
7783 goto skipped_lookup
;
7787 error
= vn_authorize_renamex_with_paths(fdvp
, fvp
, &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
, flags
, NULL
);
7789 if (error
== ENOENT
) {
7790 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7791 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7793 * We encountered a race where after doing the namei, tvp stops
7794 * being valid. If so, simply re-drive the rename call from the
7806 * If the source and destination are the same (i.e. they're
7807 * links to the same vnode) and the target file system is
7808 * case sensitive, then there is nothing to do.
7810 * XXX Come back to this.
7816 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7817 * then assume that this file system is case sensitive.
7819 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
7820 pathconf_val
!= 0) {
7826 * Allow the renaming of mount points.
7827 * - target must not exist
7828 * - target must reside in the same directory as source
7829 * - union mounts cannot be renamed
7830 * - "/" cannot be renamed
7832 * XXX Handle this in VFS after a continued lookup (if we missed
7833 * in the cache to start off)
7835 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7836 * we'll skip past here. The file system is responsible for
7837 * checking that @tvp is not a descendent of @fvp and vice versa
7838 * so it should always return EINVAL if either @tvp or @fvp is the
7841 if ((fvp
->v_flag
& VROOT
) &&
7842 (fvp
->v_type
== VDIR
) &&
7844 (fvp
->v_mountedhere
== NULL
) &&
7846 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
7847 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
7850 /* switch fvp to the covered vnode */
7851 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
7852 if ((vnode_getwithref(coveredvp
))) {
7862 * Check for cross-device rename.
7864 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
7865 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
7871 * If source is the same as the destination (that is the
7872 * same inode number) then there is nothing to do...
7873 * EXCEPT if the underlying file system supports case
7874 * insensitivity and is case preserving. In this case
7875 * the file system needs to handle the special case of
7876 * getting the same vnode as target (fvp) and source (tvp).
7878 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7879 * and _PC_CASE_PRESERVING can have this exception, and they need to
7880 * handle the special case of getting the same vnode as target and
7881 * source. NOTE: Then the target is unlocked going into vnop_rename,
7882 * so not to cause locking problems. There is a single reference on tvp.
7884 * NOTE - that fvp == tvp also occurs if they are hard linked and
7885 * that correct behaviour then is just to return success without doing
7888 * XXX filesystem should take care of this itself, perhaps...
7890 if (fvp
== tvp
&& fdvp
== tdvp
) {
7891 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
7892 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
7893 fromnd
->ni_cnd
.cn_namelen
)) {
7898 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
7900 * we're holding a reference and lock
7901 * on locked_mp, but it no longer matches
7902 * what we want to do... so drop our hold
7904 mount_unlock_renames(locked_mp
);
7905 mount_drop(locked_mp
, 0);
7906 holding_mntlock
= 0;
7908 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
7910 * serialize renames that re-shape
7911 * the tree... if holding_mntlock is
7912 * set, then we're ready to go...
7914 * first need to drop the iocounts
7915 * we picked up, second take the
7916 * lock to serialize the access,
7917 * then finally start the lookup
7918 * process over with the lock held
7920 if (!holding_mntlock
) {
7922 * need to grab a reference on
7923 * the mount point before we
7924 * drop all the iocounts... once
7925 * the iocounts are gone, the mount
7928 locked_mp
= fvp
->v_mount
;
7929 mount_ref(locked_mp
, 0);
7932 * nameidone has to happen before we vnode_put(tvp)
7933 * since it may need to release the fs_nodelock on the tvp
7943 * nameidone has to happen before we vnode_put(fdvp)
7944 * since it may need to release the fs_nodelock on the fvp
7951 mount_lock_renames(locked_mp
);
7952 holding_mntlock
= 1;
7958 * when we dropped the iocounts to take
7959 * the lock, we allowed the identity of
7960 * the various vnodes to change... if they did,
7961 * we may no longer be dealing with a rename
7962 * that reshapes the tree... once we're holding
7963 * the iocounts, the vnodes can't change type
7964 * so we're free to drop the lock at this point
7967 if (holding_mntlock
) {
7968 mount_unlock_renames(locked_mp
);
7969 mount_drop(locked_mp
, 0);
7970 holding_mntlock
= 0;
7974 // save these off so we can later verify that fvp is the same
7975 oname
= fvp
->v_name
;
7976 oparent
= fvp
->v_parent
;
7979 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
7980 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
7983 if (holding_mntlock
) {
7985 * we can drop our serialization
7988 mount_unlock_renames(locked_mp
);
7989 mount_drop(locked_mp
, 0);
7990 holding_mntlock
= 0;
7993 if (error
== EKEEPLOOKING
) {
7994 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7995 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7996 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8000 fromnd
->ni_vp
= fvp
;
8003 goto continue_lookup
;
8007 * We may encounter a race in the VNOP where the destination didn't
8008 * exist when we did the namei, but it does by the time we go and
8009 * try to create the entry. In this case, we should re-drive this rename
8010 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8011 * but other filesystems susceptible to this race could return it, too.
8013 if (error
== ERECYCLE
) {
8018 * For compound VNOPs, the authorization callback may return
8019 * ENOENT in case of racing hardlink lookups hitting the name
8020 * cache, redrive the lookup.
8022 if (batched
&& error
== ENOENT
) {
8023 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
8024 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8033 /* call out to allow 3rd party notification of rename.
8034 * Ignore result of kauth_authorize_fileop call.
8036 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8037 KAUTH_FILEOP_RENAME
,
8038 (uintptr_t)from_name
, (uintptr_t)to_name
);
8039 if (flags
& VFS_RENAME_SWAP
) {
8040 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8041 KAUTH_FILEOP_RENAME
,
8042 (uintptr_t)to_name
, (uintptr_t)from_name
);
8046 if (from_name
!= NULL
&& to_name
!= NULL
) {
8047 if (from_truncated
|| to_truncated
) {
8048 // set it here since only the from_finfo gets reported up to user space
8049 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8053 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
8056 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
8060 add_fsevent(FSE_RENAME
, ctx
,
8061 FSE_ARG_STRING
, from_len
, from_name
,
8062 FSE_ARG_FINFO
, &from_finfo
,
8063 FSE_ARG_STRING
, to_len
, to_name
,
8064 FSE_ARG_FINFO
, &to_finfo
,
8066 if (flags
& VFS_RENAME_SWAP
) {
8068 * Strictly speaking, swap is the equivalent of
8069 * *three* renames. FSEvents clients should only take
8070 * the events as a hint, so we only bother reporting
8073 add_fsevent(FSE_RENAME
, ctx
,
8074 FSE_ARG_STRING
, to_len
, to_name
,
8075 FSE_ARG_FINFO
, &to_finfo
,
8076 FSE_ARG_STRING
, from_len
, from_name
,
8077 FSE_ARG_FINFO
, &from_finfo
,
8081 add_fsevent(FSE_RENAME
, ctx
,
8082 FSE_ARG_STRING
, from_len
, from_name
,
8083 FSE_ARG_FINFO
, &from_finfo
,
8084 FSE_ARG_STRING
, to_len
, to_name
,
8088 #endif /* CONFIG_FSE */
8091 * update filesystem's mount point data
8094 char *cp
, *pathend
, *mpname
;
8100 mp
= fvp
->v_mountedhere
;
8102 if (vfs_busy(mp
, LK_NOWAIT
)) {
8106 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
8108 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
8109 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
8111 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
8114 /* find current mount point prefix */
8115 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
8116 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
8121 /* find last component of target name */
8122 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
8127 /* append name to prefix */
8128 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
8129 bzero(pathend
, maxlen
);
8130 strlcpy(pathend
, mpname
, maxlen
);
8132 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
8137 * fix up name & parent pointers. note that we first
8138 * check that fvp has the same name/parent pointers it
8139 * had before the rename call... this is a 'weak' check
8142 * XXX oparent and oname may not be set in the compound vnop case
8144 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
8147 update_flags
= VNODE_UPDATE_NAME
;
8150 update_flags
|= VNODE_UPDATE_PARENT
;
8153 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
8156 if (to_name
!= NULL
) {
8157 RELEASE_PATH(to_name
);
8160 if (from_name
!= NULL
) {
8161 RELEASE_PATH(from_name
);
8164 if (holding_mntlock
) {
8165 mount_unlock_renames(locked_mp
);
8166 mount_drop(locked_mp
, 0);
8167 holding_mntlock
= 0;
8171 * nameidone has to happen before we vnode_put(tdvp)
8172 * since it may need to release the fs_nodelock on the tdvp
8183 * nameidone has to happen before we vnode_put(fdvp)
8184 * since it may need to release the fs_nodelock on the fdvp
8195 * If things changed after we did the namei, then we will re-drive
8196 * this rename call from the top.
8203 FREE(__rename_data
, M_TEMP
);
8208 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
8210 return renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
8211 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0);
8215 renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
8217 return renameat_internal(
8218 vfs_context_current(),
8219 uap
->fromfd
, uap
->from
,
8221 UIO_USERSPACE
, uap
->flags
);
8225 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
8227 return renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
8228 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0);
8232 * Make a directory file.
8234 * Returns: 0 Success
8237 * vnode_authorize:???
8242 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
8243 enum uio_seg segflg
)
8247 int update_flags
= 0;
8249 struct nameidata nd
;
8251 AUDIT_ARG(mode
, vap
->va_mode
);
8252 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
8254 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
8255 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
8258 error
= nameiat(&nd
, fd
);
8270 batched
= vnode_compound_mkdir_available(dvp
);
8272 VATTR_SET(vap
, va_type
, VDIR
);
8276 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8277 * only get EXISTS or EISDIR for existing path components, and not that it could see
8278 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8279 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8281 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
8282 if (error
== EACCES
|| error
== EPERM
) {
8290 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8291 * rather than EACCESS if the target exists.
8293 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
8295 error2
= nameiat(&nd
, fd
);
8309 * make the directory
8311 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
8312 if (error
== EKEEPLOOKING
) {
8314 goto continue_lookup
;
8320 // Make sure the name & parent pointers are hooked up
8321 if (vp
->v_name
== NULL
) {
8322 update_flags
|= VNODE_UPDATE_NAME
;
8324 if (vp
->v_parent
== NULLVP
) {
8325 update_flags
|= VNODE_UPDATE_PARENT
;
8329 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
8333 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
8338 * nameidone has to happen before we vnode_put(dvp)
8339 * since it may need to release the fs_nodelock on the dvp
8354 * mkdir_extended: Create a directory; with extended security (ACL).
8356 * Parameters: p Process requesting to create the directory
8357 * uap User argument descriptor (see below)
8360 * Indirect: uap->path Path of directory to create
8361 * uap->mode Access permissions to set
8362 * uap->xsecurity ACL to set
8364 * Returns: 0 Success
8369 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
8372 kauth_filesec_t xsecdst
;
8373 struct vnode_attr va
;
8375 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
8378 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
8379 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) {
8384 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8385 if (xsecdst
!= NULL
) {
8386 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
8389 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8391 if (xsecdst
!= NULL
) {
8392 kauth_filesec_free(xsecdst
);
8398 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
8400 struct vnode_attr va
;
8403 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8405 return mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8410 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
8412 struct vnode_attr va
;
8415 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8417 return mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
8422 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
8423 enum uio_seg segflg
)
8427 struct nameidata nd
;
8430 int has_listeners
= 0;
8434 struct vnode_attr va
;
8435 #endif /* CONFIG_FSE */
8436 struct vnode_attr
*vap
= NULL
;
8437 int restart_count
= 0;
8443 * This loop exists to restart rmdir in the unlikely case that two
8444 * processes are simultaneously trying to remove the same directory
8445 * containing orphaned appleDouble files.
8448 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
8449 segflg
, dirpath
, ctx
);
8450 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
8455 error
= nameiat(&nd
, fd
);
8464 batched
= vnode_compound_rmdir_available(vp
);
8466 if (vp
->v_flag
& VROOT
) {
8468 * The root of a mounted filesystem cannot be deleted.
8474 #if DEVELOPMENT || DEBUG
8476 * XXX VSWAP: Check for entitlements or special flag here
8477 * so we can restrict access appropriately.
8479 #else /* DEVELOPMENT || DEBUG */
8481 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
8485 #endif /* DEVELOPMENT || DEBUG */
8488 * Removed a check here; we used to abort if vp's vid
8489 * was not the same as what we'd seen the last time around.
8490 * I do not think that check was valid, because if we retry
8491 * and all dirents are gone, the directory could legitimately
8492 * be recycled but still be present in a situation where we would
8493 * have had permission to delete. Therefore, we won't make
8494 * an effort to preserve that check now that we may not have a
8499 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
8501 if (error
== ENOENT
) {
8502 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
8503 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8514 if (!vnode_compound_rmdir_available(dvp
)) {
8515 panic("No error, but no compound rmdir?");
8522 need_event
= need_fsevent(FSE_DELETE
, dvp
);
8525 get_fse_info(vp
, &finfo
, ctx
);
8527 error
= vfs_get_notify_attributes(&va
);
8536 has_listeners
= kauth_authorize_fileop_has_listeners();
8537 if (need_event
|| has_listeners
) {
8546 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
8549 finfo
.mode
|= FSE_TRUNCATED_PATH
;
8554 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8557 /* Couldn't find a vnode */
8561 if (error
== EKEEPLOOKING
) {
8562 goto continue_lookup
;
8563 } else if (batched
&& error
== ENOENT
) {
8564 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
8565 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8567 * For compound VNOPs, the authorization callback
8568 * may return ENOENT in case of racing hard link lookups
8569 * redrive the lookup.
8576 #if CONFIG_APPLEDOUBLE
8578 * Special case to remove orphaned AppleDouble
8579 * files. I don't like putting this in the kernel,
8580 * but carbon does not like putting this in carbon either,
8583 if (error
== ENOTEMPTY
) {
8584 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
8585 if (error
== EBUSY
) {
8591 * Assuming everything went well, we will try the RMDIR again
8594 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8597 #endif /* CONFIG_APPLEDOUBLE */
8599 * Call out to allow 3rd party notification of delete.
8600 * Ignore result of kauth_authorize_fileop call.
8603 if (has_listeners
) {
8604 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8605 KAUTH_FILEOP_DELETE
,
8610 if (vp
->v_flag
& VISHARDLINK
) {
8611 // see the comment in unlink1() about why we update
8612 // the parent of a hard link when it is removed
8613 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
8619 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
8621 add_fsevent(FSE_DELETE
, ctx
,
8622 FSE_ARG_STRING
, len
, path
,
8623 FSE_ARG_FINFO
, &finfo
,
8635 * nameidone has to happen before we vnode_put(dvp)
8636 * since it may need to release the fs_nodelock on the dvp
8645 if (restart_flag
== 0) {
8646 wakeup_one((caddr_t
)vp
);
8649 tsleep(vp
, PVFS
, "rm AD", 1);
8650 } while (restart_flag
!= 0);
8656 * Remove a directory file.
8660 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
8662 return rmdirat_internal(vfs_context_current(), AT_FDCWD
,
8663 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
);
8666 /* Get direntry length padded to 8 byte alignment */
8667 #define DIRENT64_LEN(namlen) \
8668 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8670 /* Get dirent length padded to 4 byte alignment */
8671 #define DIRENT_LEN(namelen) \
8672 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8674 /* Get the end of this dirent */
8675 #define DIRENT_END(dep) \
8676 (((char *)(dep)) + (dep)->d_reclen - 1)
8679 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
8680 int *numdirent
, vfs_context_t ctxp
)
8682 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8683 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
8684 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
8685 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
8690 struct direntry
*entry64
;
8696 * We're here because the underlying file system does not
8697 * support direnties or we mounted denying support so we must
8698 * fall back to dirents and convert them to direntries.
8700 * Our kernel buffer needs to be smaller since re-packing will
8701 * expand each dirent. The worse case (when the name length
8702 * is 3 or less) corresponds to a struct direntry size of 32
8703 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8704 * (4-byte aligned). So having a buffer that is 3/8 the size
8705 * will prevent us from reading more than we can pack.
8707 * Since this buffer is wired memory, we will limit the
8708 * buffer size to a maximum of 32K. We would really like to
8709 * use 32K in the MIN(), but we use magic number 87371 to
8710 * prevent uio_resid() * 3 / 8 from overflowing.
8712 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
8713 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
8714 if (bufptr
== NULL
) {
8718 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
8719 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
8720 auio
->uio_offset
= uio
->uio_offset
;
8722 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
8724 dep
= (struct dirent
*)bufptr
;
8725 bytesread
= bufsize
- uio_resid(auio
);
8727 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
8730 * Convert all the entries and copy them out to user's buffer.
8732 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
8733 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
8735 if (DIRENT_END(dep
) > ((char *)bufptr
+ bytesread
) ||
8736 DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) {
8737 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
,
8738 vp
->v_mount
->mnt_vfsstat
.f_mntonname
,
8739 vp
->v_name
? vp
->v_name
: "<unknown>");
8744 bzero(entry64
, enbufsize
);
8745 /* Convert a dirent to a dirent64. */
8746 entry64
->d_ino
= dep
->d_ino
;
8747 entry64
->d_seekoff
= 0;
8748 entry64
->d_reclen
= enbufsize
;
8749 entry64
->d_namlen
= dep
->d_namlen
;
8750 entry64
->d_type
= dep
->d_type
;
8751 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
8753 /* Move to next entry. */
8754 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
8756 /* Copy entry64 to user's buffer. */
8757 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
8760 /* Update the real offset using the offset we got from VNOP_READDIR. */
8762 uio
->uio_offset
= auio
->uio_offset
;
8765 FREE(bufptr
, M_TEMP
);
8766 FREE(entry64
, M_TEMP
);
8771 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8774 * Read a block of directory entries in a file system independent format.
8777 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
8778 off_t
*offset
, int flags
)
8781 struct vfs_context context
= *vfs_context_current(); /* local copy */
8782 struct fileproc
*fp
;
8784 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8786 int error
, eofflag
, numdirent
;
8787 char uio_buf
[UIO_SIZEOF(1)];
8789 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
8793 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8794 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8799 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
) {
8800 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
8804 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
8809 if ((error
= vnode_getwithref(vp
))) {
8812 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8815 if (vp
->v_type
!= VDIR
) {
8816 (void)vnode_put(vp
);
8822 error
= mac_vnode_check_readdir(&context
, vp
);
8824 (void)vnode_put(vp
);
8829 loff
= fp
->f_fglob
->fg_offset
;
8830 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8831 uio_addiov(auio
, bufp
, bufsize
);
8833 if (flags
& VNODE_READDIR_EXTENDED
) {
8834 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
8835 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8837 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
8838 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8841 (void)vnode_put(vp
);
8845 if ((user_ssize_t
)bufsize
== uio_resid(auio
)) {
8846 if (union_dircheckp
) {
8847 error
= union_dircheckp(&vp
, fp
, &context
);
8852 (void)vnode_put(vp
);
8857 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
8858 struct vnode
*tvp
= vp
;
8859 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
8861 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8862 fp
->f_fglob
->fg_offset
= 0;
8876 *bytesread
= bufsize
- uio_resid(auio
);
8884 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
8890 AUDIT_ARG(fd
, uap
->fd
);
8891 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
8894 if (proc_is64bit(p
)) {
8895 user64_long_t base
= (user64_long_t
)offset
;
8896 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
8898 user32_long_t base
= (user32_long_t
)offset
;
8899 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
8901 *retval
= bytesread
;
8907 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
8913 AUDIT_ARG(fd
, uap
->fd
);
8914 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
8917 *retval
= bytesread
;
8918 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
8925 * Set the mode mask for creation of filesystem nodes.
8926 * XXX implement xsecurity
8928 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8930 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
8932 struct filedesc
*fdp
;
8934 AUDIT_ARG(mask
, newmask
);
8937 *retval
= fdp
->fd_cmask
;
8938 fdp
->fd_cmask
= newmask
& ALLPERMS
;
8944 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8946 * Parameters: p Process requesting to set the umask
8947 * uap User argument descriptor (see below)
8948 * retval umask of the process (parameter p)
8950 * Indirect: uap->newmask umask to set
8951 * uap->xsecurity ACL to set
8953 * Returns: 0 Success
8958 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
8961 kauth_filesec_t xsecdst
;
8963 xsecdst
= KAUTH_FILESEC_NONE
;
8964 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
8965 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) {
8969 xsecdst
= KAUTH_FILESEC_NONE
;
8972 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
8974 if (xsecdst
!= KAUTH_FILESEC_NONE
) {
8975 kauth_filesec_free(xsecdst
);
8981 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
8983 return umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
);
8987 * Void all references to file by ripping underlying filesystem
8992 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
8995 struct vnode_attr va
;
8996 vfs_context_t ctx
= vfs_context_current();
8998 struct nameidata nd
;
9000 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
9010 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
9015 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
9021 error
= mac_vnode_check_revoke(ctx
, vp
);
9028 VATTR_WANTED(&va
, va_uid
);
9029 if ((error
= vnode_getattr(vp
, &va
, ctx
))) {
9032 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
9033 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
9036 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
))) {
9037 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
9046 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9047 * The following system calls are designed to support features
9048 * which are specific to the HFS & HFS Plus volume formats
9053 * Obtain attribute information on objects in a directory while enumerating
9058 getdirentriesattr(proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
9061 struct fileproc
*fp
;
9063 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9064 uint32_t count
= 0, savecount
= 0;
9065 uint32_t newstate
= 0;
9068 struct attrlist attributelist
;
9069 vfs_context_t ctx
= vfs_context_current();
9071 char uio_buf
[UIO_SIZEOF(1)];
9072 kauth_action_t action
;
9076 /* Get the attributes into kernel space */
9077 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
9080 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
9084 if ((error
= fp_getfvp(p
, fd
, &fp
, &vp
))) {
9087 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
9088 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
9095 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
9103 if ((error
= vnode_getwithref(vp
))) {
9107 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
9110 if (vp
->v_type
!= VDIR
) {
9111 (void)vnode_put(vp
);
9117 error
= mac_vnode_check_readdir(ctx
, vp
);
9119 (void)vnode_put(vp
);
9124 /* set up the uio structure which will contain the users return buffer */
9125 loff
= fp
->f_fglob
->fg_offset
;
9126 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9127 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
9130 * If the only item requested is file names, we can let that past with
9131 * just LIST_DIRECTORY. If they want any other attributes, that means
9132 * they need SEARCH as well.
9134 action
= KAUTH_VNODE_LIST_DIRECTORY
;
9135 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
9136 attributelist
.fileattr
|| attributelist
.dirattr
) {
9137 action
|= KAUTH_VNODE_SEARCH
;
9140 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
9141 /* Believe it or not, uap->options only has 32-bits of valid
9142 * info, so truncate before extending again */
9144 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
9145 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
9149 (void) vnode_put(vp
);
9154 * If we've got the last entry of a directory in a union mount
9155 * then reset the eofflag and pretend there's still more to come.
9156 * The next call will again set eofflag and the buffer will be empty,
9157 * so traverse to the underlying directory and do the directory
9160 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
9161 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
9163 } else { // Empty buffer
9164 struct vnode
*tvp
= vp
;
9165 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
9166 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
9167 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
9168 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
9170 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
9178 (void)vnode_put(vp
);
9183 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
9185 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
)))) {
9188 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
)))) {
9191 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
)))) {
9195 *retval
= eofflag
; /* similar to getdirentries */
9199 return error
; /* return error earlier, an retval of 0 or 1 now */
9200 } /* end of getdirentriesattr system call */
9203 * Exchange data between two files
9208 exchangedata(__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
9210 struct nameidata fnd
, snd
;
9211 vfs_context_t ctx
= vfs_context_current();
9215 u_int32_t nameiflags
;
9218 int flen
= 0, slen
= 0;
9219 int from_truncated
= 0, to_truncated
= 0;
9221 fse_info f_finfo
, s_finfo
;
9225 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
9226 nameiflags
|= FOLLOW
;
9229 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
9230 UIO_USERSPACE
, uap
->path1
, ctx
);
9232 error
= namei(&fnd
);
9240 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
9241 UIO_USERSPACE
, uap
->path2
, ctx
);
9243 error
= namei(&snd
);
9252 * if the files are the same, return an inval error
9260 * if the files are on different volumes, return an error
9262 if (svp
->v_mount
!= fvp
->v_mount
) {
9267 /* If they're not files, return an error */
9268 if ((vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
9274 error
= mac_vnode_check_exchangedata(ctx
,
9280 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
9281 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0)) {
9287 need_fsevent(FSE_EXCHANGE
, fvp
) ||
9289 kauth_authorize_fileop_has_listeners()) {
9292 if (fpath
== NULL
|| spath
== NULL
) {
9297 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
9298 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
9301 get_fse_info(fvp
, &f_finfo
, ctx
);
9302 get_fse_info(svp
, &s_finfo
, ctx
);
9303 if (from_truncated
|| to_truncated
) {
9304 // set it here since only the f_finfo gets reported up to user space
9305 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
9309 /* Ok, make the call */
9310 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
9313 const char *tmpname
;
9315 if (fpath
!= NULL
&& spath
!= NULL
) {
9316 /* call out to allow 3rd party notification of exchangedata.
9317 * Ignore result of kauth_authorize_fileop call.
9319 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
9320 (uintptr_t)fpath
, (uintptr_t)spath
);
9324 tmpname
= fvp
->v_name
;
9325 fvp
->v_name
= svp
->v_name
;
9326 svp
->v_name
= tmpname
;
9328 if (fvp
->v_parent
!= svp
->v_parent
) {
9331 tmp
= fvp
->v_parent
;
9332 fvp
->v_parent
= svp
->v_parent
;
9333 svp
->v_parent
= tmp
;
9335 name_cache_unlock();
9338 if (fpath
!= NULL
&& spath
!= NULL
) {
9339 add_fsevent(FSE_EXCHANGE
, ctx
,
9340 FSE_ARG_STRING
, flen
, fpath
,
9341 FSE_ARG_FINFO
, &f_finfo
,
9342 FSE_ARG_STRING
, slen
, spath
,
9343 FSE_ARG_FINFO
, &s_finfo
,
9350 if (fpath
!= NULL
) {
9351 RELEASE_PATH(fpath
);
9353 if (spath
!= NULL
) {
9354 RELEASE_PATH(spath
);
9363 * Return (in MB) the amount of freespace on the given vnode's volume.
9365 uint32_t freespace_mb(vnode_t vp
);
9368 freespace_mb(vnode_t vp
)
9370 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
9371 return ((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
9372 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20;
9380 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
9385 struct nameidata nd
;
9386 struct user64_fssearchblock searchblock
;
9387 struct searchstate
*state
;
9388 struct attrlist
*returnattrs
;
9389 struct timeval timelimit
;
9390 void *searchparams1
, *searchparams2
;
9392 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9393 uint32_t nummatches
;
9395 uint32_t nameiflags
;
9396 vfs_context_t ctx
= vfs_context_current();
9397 char uio_buf
[UIO_SIZEOF(1)];
9399 /* Start by copying in fsearchblock parameter list */
9400 if (IS_64BIT_PROCESS(p
)) {
9401 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
9402 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
9403 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
9405 struct user32_fssearchblock tmp_searchblock
;
9407 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
9408 // munge into 64-bit version
9409 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
9410 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
9411 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
9412 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
9414 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9415 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9417 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
9418 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
9419 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
9420 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
9421 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
9422 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
9423 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
9429 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9431 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
9432 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
) {
9436 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9437 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9438 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9441 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9442 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9443 /* assumes the size is still 556 bytes it will continue to work */
9445 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
9446 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2 * sizeof(uint32_t));
9448 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
9450 /* Now set up the various pointers to the correct place in our newly allocated memory */
9452 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
9453 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
9454 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof(struct attrlist
));
9456 /* Now copy in the stuff given our local variables. */
9458 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
))) {
9462 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
))) {
9466 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
)))) {
9470 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
)))) {
9475 * When searching a union mount, need to set the
9476 * start flag at the first call on each layer to
9477 * reset state for the new volume.
9479 if (uap
->options
& SRCHFS_START
) {
9480 state
->ss_union_layer
= 0;
9482 uap
->options
|= state
->ss_union_flags
;
9484 state
->ss_union_flags
= 0;
9487 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9488 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
9489 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9490 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
9491 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9494 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
9495 attrreference_t
* string_ref
;
9496 u_int32_t
* start_length
;
9497 user64_size_t param_length
;
9499 /* validate searchparams1 */
9500 param_length
= searchblock
.sizeofsearchparams1
;
9501 /* skip the word that specifies length of the buffer */
9502 start_length
= (u_int32_t
*) searchparams1
;
9503 start_length
= start_length
+ 1;
9504 string_ref
= (attrreference_t
*) start_length
;
9506 /* ensure no negative offsets or too big offsets */
9507 if (string_ref
->attr_dataoffset
< 0) {
9511 if (string_ref
->attr_length
> MAXPATHLEN
) {
9516 /* Check for pointer overflow in the string ref */
9517 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
9522 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
9526 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
9532 /* set up the uio structure which will contain the users return buffer */
9533 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9534 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
9537 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
9538 nameiflags
|= FOLLOW
;
9540 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
9541 UIO_USERSPACE
, uap
->path
, ctx
);
9551 * Switch to the root vnode for the volume
9553 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
9561 * If it's a union mount, the path lookup takes
9562 * us to the top layer. But we may need to descend
9563 * to a lower layer. For non-union mounts the layer
9566 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
9567 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0) {
9571 vp
= vp
->v_mount
->mnt_vnodecovered
;
9577 error
= vnode_getwithref(vp
);
9585 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
9594 * If searchblock.maxmatches == 0, then skip the search. This has happened
9595 * before and sometimes the underlying code doesnt deal with it well.
9597 if (searchblock
.maxmatches
== 0) {
9603 * Allright, we have everything we need, so lets make that call.
9605 * We keep special track of the return value from the file system:
9606 * EAGAIN is an acceptable error condition that shouldn't keep us
9607 * from copying out any results...
9610 fserror
= VNOP_SEARCHFS(vp
,
9613 &searchblock
.searchattrs
,
9614 (u_long
)searchblock
.maxmatches
,
9618 (u_long
)uap
->scriptcode
,
9619 (u_long
)uap
->options
,
9621 (struct searchstate
*) &state
->ss_fsstate
,
9625 * If it's a union mount we need to be called again
9626 * to search the mounted-on filesystem.
9628 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
9629 state
->ss_union_flags
= SRCHFS_START
;
9630 state
->ss_union_layer
++; // search next layer down
9638 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9639 * search state. Everything was already put into he return buffer by the vop call. */
9641 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0) {
9645 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0) {
9653 FREE(searchparams1
, M_TEMP
);
9656 } /* end of searchfs system call */
9658 #else /* CONFIG_SEARCHFS */
9661 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
9666 #endif /* CONFIG_SEARCHFS */
9669 lck_grp_attr_t
* nspace_group_attr
;
9670 lck_attr_t
* nspace_lock_attr
;
9671 lck_grp_t
* nspace_mutex_group
;
9673 lck_mtx_t nspace_handler_lock
;
9674 lck_mtx_t nspace_handler_exclusion_lock
;
9676 time_t snapshot_timestamp
= 0;
9677 int nspace_allow_virtual_devs
= 0;
9679 void nspace_handler_init(void);
9681 typedef struct nspace_item_info
{
9691 #define MAX_NSPACE_ITEMS 128
9692 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
9693 uint32_t nspace_item_idx
= 0; // also used as the sleep/wakeup rendezvous address
9694 uint32_t nspace_token_id
= 0;
9695 uint32_t nspace_handler_timeout
= 15; // seconds
9697 #define NSPACE_ITEM_NEW 0x0001
9698 #define NSPACE_ITEM_PROCESSING 0x0002
9699 #define NSPACE_ITEM_DEAD 0x0004
9700 #define NSPACE_ITEM_CANCELLED 0x0008
9701 #define NSPACE_ITEM_DONE 0x0010
9702 #define NSPACE_ITEM_RESET_TIMER 0x0020
9704 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9705 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9707 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9709 //#pragma optimization_level 0
9712 NSPACE_HANDLER_NSPACE
= 0,
9713 NSPACE_HANDLER_SNAPSHOT
= 1,
9715 NSPACE_HANDLER_COUNT
,
9719 uint64_t handler_tid
;
9720 struct proc
*handler_proc
;
9724 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
9726 /* namespace fsctl functions */
9727 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
9728 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
9729 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
9730 static nspace_type_t
nspace_type_for_op(uint64_t op
);
9731 static int nspace_is_special_process(struct proc
*proc
);
9732 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
9733 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
9734 static int validate_namespace_args(int is64bit
, int size
);
9735 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
9739 nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
9741 switch (nspace_type
) {
9742 case NSPACE_HANDLER_NSPACE
:
9743 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
9744 case NSPACE_HANDLER_SNAPSHOT
:
9745 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
9747 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
9753 nspace_item_flags_for_type(nspace_type_t nspace_type
)
9755 switch (nspace_type
) {
9756 case NSPACE_HANDLER_NSPACE
:
9757 return NSPACE_ITEM_NSPACE_EVENT
;
9758 case NSPACE_HANDLER_SNAPSHOT
:
9759 return NSPACE_ITEM_SNAPSHOT_EVENT
;
9761 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
9767 nspace_open_flags_for_type(nspace_type_t nspace_type
)
9769 switch (nspace_type
) {
9770 case NSPACE_HANDLER_NSPACE
:
9771 return FREAD
| FWRITE
| O_EVTONLY
;
9772 case NSPACE_HANDLER_SNAPSHOT
:
9773 return FREAD
| O_EVTONLY
;
9775 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
9780 static inline nspace_type_t
9781 nspace_type_for_op(uint64_t op
)
9783 switch (op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
9784 case NAMESPACE_HANDLER_NSPACE_EVENT
:
9785 return NSPACE_HANDLER_NSPACE
;
9786 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
9787 return NSPACE_HANDLER_SNAPSHOT
;
9789 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
9790 return NSPACE_HANDLER_NSPACE
;
9795 nspace_is_special_process(struct proc
*proc
)
9798 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9799 if (proc
== nspace_handlers
[i
].handler_proc
) {
9807 nspace_handler_init(void)
9809 nspace_lock_attr
= lck_attr_alloc_init();
9810 nspace_group_attr
= lck_grp_attr_alloc_init();
9811 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
9812 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
9813 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
9814 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
9818 nspace_proc_exit(struct proc
*p
)
9820 int i
, event_mask
= 0;
9822 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9823 if (p
== nspace_handlers
[i
].handler_proc
) {
9824 event_mask
|= nspace_item_flags_for_type(i
);
9825 nspace_handlers
[i
].handler_tid
= 0;
9826 nspace_handlers
[i
].handler_proc
= NULL
;
9830 if (event_mask
== 0) {
9834 lck_mtx_lock(&nspace_handler_lock
);
9835 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9836 // if this process was the snapshot handler, zero snapshot_timeout
9837 snapshot_timestamp
= 0;
9841 // unblock anyone that's waiting for the handler that died
9843 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9844 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
9845 if (nspace_items
[i
].flags
& event_mask
) {
9846 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9847 vnode_lock_spin(nspace_items
[i
].vp
);
9848 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9849 vnode_unlock(nspace_items
[i
].vp
);
9851 nspace_items
[i
].vp
= NULL
;
9852 nspace_items
[i
].vid
= 0;
9853 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9854 nspace_items
[i
].token
= 0;
9856 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9861 wakeup((caddr_t
)&nspace_item_idx
);
9862 lck_mtx_unlock(&nspace_handler_lock
);
9867 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
9869 return resolve_nspace_item_ext(vp
, op
, NULL
);
9873 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
9875 int i
, error
, keep_waiting
;
9877 nspace_type_t nspace_type
= nspace_type_for_op(op
);
9879 // only allow namespace events on regular files, directories and symlinks.
9880 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
9885 // if this is a snapshot event and the vnode is on a
9886 // disk image just pretend nothing happened since any
9887 // change to the disk image will cause the disk image
9888 // itself to get backed up and this avoids multi-way
9889 // deadlocks between the snapshot handler and the ever
9890 // popular diskimages-helper process. the variable
9891 // nspace_allow_virtual_devs allows this behavior to
9892 // be overridden (for use by the Mobile TimeMachine
9893 // testing infrastructure which uses disk images)
9895 if ((op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
9896 && (vp
->v_mount
!= NULL
)
9897 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
9898 && !nspace_allow_virtual_devs
) {
9902 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9903 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9907 if (nspace_is_special_process(current_proc())) {
9911 lck_mtx_lock(&nspace_handler_lock
);
9914 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9915 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
9920 if (i
>= MAX_NSPACE_ITEMS
) {
9921 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9922 if (nspace_items
[i
].flags
== 0) {
9927 nspace_items
[i
].refcount
++;
9930 if (i
>= MAX_NSPACE_ITEMS
) {
9931 ts
.tv_sec
= nspace_handler_timeout
;
9934 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
| PCATCH
, "nspace-no-space", &ts
);
9936 // an entry got free'd up, go see if we can get a slot
9939 lck_mtx_unlock(&nspace_handler_lock
);
9945 // if it didn't already exist, add it. if it did exist
9946 // we'll get woken up when someone does a wakeup() on
9947 // the slot in the nspace_items table.
9949 if (vp
!= nspace_items
[i
].vp
) {
9950 nspace_items
[i
].vp
= vp
;
9951 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
9952 nspace_items
[i
].op
= op
;
9953 nspace_items
[i
].vid
= vnode_vid(vp
);
9954 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
9955 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
9956 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9958 vnode_lock_spin(vp
);
9959 vp
->v_flag
|= VNEEDSSNAPSHOT
;
9964 nspace_items
[i
].token
= 0;
9965 nspace_items
[i
].refcount
= 1;
9967 wakeup((caddr_t
)&nspace_item_idx
);
9971 // Now go to sleep until the handler does a wakeup on this
9972 // slot in the nspace_items table (or we timeout).
9975 while (keep_waiting
) {
9976 ts
.tv_sec
= nspace_handler_timeout
;
9978 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
| PCATCH
, "namespace-done", &ts
);
9980 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
9982 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
9983 error
= nspace_items
[i
].token
;
9984 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
9985 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
9986 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
9991 } else if (error
== 0) {
9992 // hmmm, why did we get woken up?
9993 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9994 nspace_items
[i
].token
);
9997 if (--nspace_items
[i
].refcount
== 0) {
9998 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
9999 nspace_items
[i
].arg
= NULL
;
10000 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
10001 nspace_items
[i
].flags
= 0; // this clears it for re-use
10003 wakeup(&nspace_token_id
);
10007 lck_mtx_unlock(&nspace_handler_lock
);
10013 nspace_snapshot_event(vnode_t vp
, time_t ctime
, uint64_t op_type
, void *arg
)
10015 int snapshot_error
= 0;
10021 /* Swap files are special; skip them */
10022 if (vnode_isswap(vp
)) {
10026 if (ctime
!= 0 && snapshot_timestamp
!= 0 && (ctime
<= snapshot_timestamp
|| vnode_needssnapshots(vp
))) {
10027 // the change time is within this epoch
10030 error
= resolve_nspace_item_ext(vp
, op_type
| NAMESPACE_HANDLER_SNAPSHOT_EVENT
, arg
);
10031 if (error
== EDEADLK
) {
10032 snapshot_error
= 0;
10033 } else if (error
) {
10034 if (error
== EAGAIN
) {
10035 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
10036 } else if (error
== EINTR
) {
10037 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
10038 snapshot_error
= EINTR
;
10043 return snapshot_error
;
10047 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
10051 lck_mtx_lock(&nspace_handler_lock
);
10052 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
10053 if (nspace_items
[i
].vp
== vp
) {
10058 if (i
>= MAX_NSPACE_ITEMS
) {
10059 lck_mtx_unlock(&nspace_handler_lock
);
10063 *status
= nspace_items
[i
].flags
;
10064 lck_mtx_unlock(&nspace_handler_lock
);
10071 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
10073 struct vnode_attr va
;
10077 VATTR_WANTED(&va
, va_fsid
);
10078 VATTR_WANTED(&va
, va_fileid
);
10080 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
10081 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
10084 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
10093 // Note: this function does NOT check permissions on all of the
10094 // parent directories leading to this vnode. It should only be
10095 // called on behalf of a root process. Otherwise a process may
10096 // get access to a file because the file itself is readable even
10097 // though its parent directories would prevent access.
10100 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
10104 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10109 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
10115 /* compute action to be authorized */
10117 if (fmode
& FREAD
) {
10118 action
|= KAUTH_VNODE_READ_DATA
;
10120 if (fmode
& (FWRITE
| O_TRUNC
)) {
10122 * If we are writing, appending, and not truncating,
10123 * indicate that we are appending so that if the
10124 * UF_APPEND or SF_APPEND bits are set, we do not deny
10127 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
10128 action
|= KAUTH_VNODE_APPEND_DATA
;
10130 action
|= KAUTH_VNODE_WRITE_DATA
;
10134 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0) {
10140 // if the vnode is tagged VOPENEVT and the current process
10141 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
10142 // flag to the open mode so that this open won't count against
10143 // the vnode when carbon delete() does a vnode_isinuse() to see
10144 // if a file is currently in use. this allows spotlight
10145 // importers to not interfere with carbon apps that depend on
10146 // the no-delete-if-busy semantics of carbon delete().
10148 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
10149 fmode
|= O_EVTONLY
;
10152 if ((error
= VNOP_OPEN(vp
, fmode
, ctx
))) {
10155 if ((error
= vnode_ref_ext(vp
, fmode
, 0))) {
10156 VNOP_CLOSE(vp
, fmode
, ctx
);
10160 /* Call out to allow 3rd party notification of open.
10161 * Ignore result of kauth_authorize_fileop call.
10164 mac_vnode_notify_open(ctx
, vp
, fmode
);
10166 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
10174 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
10181 lck_mtx_lock(&nspace_handler_exclusion_lock
);
10182 if (nspace_handlers
[nspace_type
].handler_busy
) {
10183 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
10187 nspace_handlers
[nspace_type
].handler_busy
= 1;
10188 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
10191 * Any process that gets here will be one of the namespace handlers.
10192 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
10193 * as we can cause deadlocks to occur, because the namespace handler may prevent
10194 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
10197 curtask
= current_task();
10198 bsd_set_dependency_capable(curtask
);
10200 lck_mtx_lock(&nspace_handler_lock
);
10201 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
10202 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
10203 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
10206 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
10207 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
10211 while (error
== 0) {
10212 /* Try to find matching namespace item */
10213 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
10214 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
10215 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
10221 if (i
>= MAX_NSPACE_ITEMS
) {
10222 /* Nothing is there yet. Wait for wake up and retry */
10223 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
| PCATCH
, "namespace-items", 0);
10224 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
10225 /* Prevent infinite loop if snapshot handler exited */
10232 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
10233 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
10234 nspace_items
[i
].token
= ++nspace_token_id
;
10236 assert(nspace_items
[i
].vp
);
10237 struct fileproc
*fp
;
10240 struct proc
*p
= current_proc();
10241 vfs_context_t ctx
= vfs_context_current();
10242 struct vnode_attr va
;
10243 bool vn_get_succsessful
= false;
10244 bool vn_open_successful
= false;
10245 bool fp_alloc_successful
= false;
10248 * Use vnode pointer to acquire a file descriptor for
10249 * hand-off to userland
10251 fmode
= nspace_open_flags_for_type(nspace_type
);
10252 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
10256 vn_get_succsessful
= true;
10258 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
10262 vn_open_successful
= true;
10264 error
= falloc(p
, &fp
, &indx
, ctx
);
10268 fp_alloc_successful
= true;
10270 fp
->f_fglob
->fg_flag
= fmode
;
10271 fp
->f_fglob
->fg_ops
= &vnops
;
10272 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
10275 procfdtbl_releasefd(p
, indx
, NULL
);
10276 fp_drop(p
, indx
, fp
, 1);
10280 * All variants of the namespace handler struct support these three fields:
10281 * token, flags, and the FD pointer
10283 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
10287 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
10291 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
10297 * Handle optional fields:
10298 * extended version support an info ptr (offset, length), and the
10300 * namedata version supports a unique per-link object ID
10303 if (nhd
->infoptr
) {
10304 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
10305 uint64_t u_offset
, u_length
;
10308 u_offset
= uio_offset(uio
);
10309 u_length
= uio_resid(uio
);
10314 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
10318 error
= copyout(&u_length
, nhd
->infoptr
+ sizeof(uint64_t), sizeof(uint64_t));
10326 VATTR_WANTED(&va
, va_linkid
);
10327 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
10332 uint64_t linkid
= 0;
10333 if (VATTR_IS_SUPPORTED(&va
, va_linkid
)) {
10334 linkid
= (uint64_t)va
.va_linkid
;
10336 error
= copyout(&linkid
, nhd
->objid
, sizeof(uint64_t));
10340 if (fp_alloc_successful
) {
10341 fp_free(p
, indx
, fp
);
10343 if (vn_open_successful
) {
10344 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
10349 if (vn_get_succsessful
) {
10350 vnode_put(nspace_items
[i
].vp
);
10357 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
10358 vnode_lock_spin(nspace_items
[i
].vp
);
10359 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10360 vnode_unlock(nspace_items
[i
].vp
);
10362 nspace_items
[i
].vp
= NULL
;
10363 nspace_items
[i
].vid
= 0;
10364 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
10365 nspace_items
[i
].token
= 0;
10367 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10370 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
10371 // just go through every snapshot event and unblock it immediately.
10372 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
10373 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
10374 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
10375 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
10376 nspace_items
[i
].vp
= NULL
;
10377 nspace_items
[i
].vid
= 0;
10378 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
10379 nspace_items
[i
].token
= 0;
10381 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10388 lck_mtx_unlock(&nspace_handler_lock
);
10390 lck_mtx_lock(&nspace_handler_exclusion_lock
);
10391 nspace_handlers
[nspace_type
].handler_busy
= 0;
10392 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
10398 validate_namespace_args(int is64bit
, int size
)
10401 /* Must be one of these */
10402 if (size
== sizeof(user64_namespace_handler_info
)) {
10405 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
10408 if (size
== sizeof(user64_namespace_handler_data
)) {
10413 /* 32 bit -- must be one of these */
10414 if (size
== sizeof(user32_namespace_handler_info
)) {
10417 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
10420 if (size
== sizeof(user32_namespace_handler_data
)) {
10432 process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
10435 namespace_handler_data nhd
;
10437 bzero(&nhd
, sizeof(namespace_handler_data
));
10439 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10443 error
= validate_namespace_args(is64bit
, size
);
10448 /* Copy in the userland pointers into our kernel-only struct */
10451 /* 64 bit userland structures */
10452 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
10453 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
10454 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
10456 /* If the size is greater than the standard info struct, add in extra fields */
10457 if (size
> (sizeof(user64_namespace_handler_info
))) {
10458 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
10459 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
10461 if (size
== (sizeof(user64_namespace_handler_data
))) {
10462 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
10464 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10467 /* 32 bit userland structures */
10468 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
10469 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
10470 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
10472 if (size
> (sizeof(user32_namespace_handler_info
))) {
10473 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
10474 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
10476 if (size
== (sizeof(user32_namespace_handler_data
))) {
10477 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
10479 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10483 return wait_for_namespace_event(&nhd
, nspace_type
);
10486 static unsigned long
10487 fsctl_bogus_command_compat(unsigned long cmd
)
10490 case IOCBASECMD(FSIOC_SYNC_VOLUME
):
10491 return FSIOC_SYNC_VOLUME
;
10492 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
):
10493 return FSIOC_ROUTEFS_SETROUTEID
;
10494 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
):
10495 return FSIOC_SET_PACKAGE_EXTS
;
10496 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET
):
10497 return FSIOC_NAMESPACE_HANDLER_GET
;
10498 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET
):
10499 return FSIOC_OLD_SNAPSHOT_HANDLER_GET
;
10500 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT
):
10501 return FSIOC_SNAPSHOT_HANDLER_GET_EXT
;
10502 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE
):
10503 return FSIOC_NAMESPACE_HANDLER_UPDATE
;
10504 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK
):
10505 return FSIOC_NAMESPACE_HANDLER_UNBLOCK
;
10506 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL
):
10507 return FSIOC_NAMESPACE_HANDLER_CANCEL
;
10508 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
):
10509 return FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
;
10510 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
):
10511 return FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
;
10512 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
):
10513 return FSIOC_SET_FSTYPENAME_OVERRIDE
;
10514 case IOCBASECMD(DISK_CONDITIONER_IOC_GET
):
10515 return DISK_CONDITIONER_IOC_GET
;
10516 case IOCBASECMD(DISK_CONDITIONER_IOC_SET
):
10517 return DISK_CONDITIONER_IOC_SET
;
10518 case IOCBASECMD(FSIOC_FIOSEEKHOLE
):
10519 return FSIOC_FIOSEEKHOLE
;
10520 case IOCBASECMD(FSIOC_FIOSEEKDATA
):
10521 return FSIOC_FIOSEEKDATA
;
10522 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
):
10523 return SPOTLIGHT_IOC_GET_MOUNT_TIME
;
10524 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
):
10525 return SPOTLIGHT_IOC_GET_LAST_MTIME
;
10532 * Make a filesystem-specific control call:
10536 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
10541 #define STK_PARAMS 128
10542 char stkbuf
[STK_PARAMS
] = {0};
10543 caddr_t data
, memp
;
10544 vnode_t vp
= *arg_vp
;
10546 cmd
= fsctl_bogus_command_compat(cmd
);
10548 size
= IOCPARM_LEN(cmd
);
10549 if (size
> IOCPARM_MAX
) {
10553 is64bit
= proc_is64bit(p
);
10557 if (size
> sizeof(stkbuf
)) {
10558 if ((memp
= (caddr_t
)kalloc(size
)) == 0) {
10566 if (cmd
& IOC_IN
) {
10568 error
= copyin(udata
, data
, size
);
10577 *(user_addr_t
*)data
= udata
;
10579 *(uint32_t *)data
= (uint32_t)udata
;
10582 } else if ((cmd
& IOC_OUT
) && size
) {
10584 * Zero the buffer so the user always
10585 * gets back something deterministic.
10588 } else if (cmd
& IOC_VOID
) {
10590 *(user_addr_t
*)data
= udata
;
10592 *(uint32_t *)data
= (uint32_t)udata
;
10596 /* Check to see if it's a generic command */
10598 case FSIOC_SYNC_VOLUME
: {
10599 mount_t mp
= vp
->v_mount
;
10600 int arg
= *(uint32_t*)data
;
10602 /* record vid of vp so we can drop it below. */
10603 uint32_t vvid
= vp
->v_id
;
10606 * Then grab mount_iterref so that we can release the vnode.
10607 * Without this, a thread may call vnode_iterate_prepare then
10608 * get into a deadlock because we've never released the root vp
10610 error
= mount_iterref(mp
, 0);
10616 /* issue the sync for this volume */
10617 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
10620 * Then release the mount_iterref once we're done syncing; it's not
10621 * needed for the VNOP_IOCTL below
10623 mount_iterdrop(mp
);
10625 if (arg
& FSCTL_SYNC_FULLSYNC
) {
10626 /* re-obtain vnode iocount on the root vp, if possible */
10627 error
= vnode_getwithvid(vp
, vvid
);
10629 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
10633 /* mark the argument VP as having been released */
10638 case FSIOC_ROUTEFS_SETROUTEID
: {
10640 char routepath
[MAXPATHLEN
];
10643 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10646 bzero(routepath
, MAXPATHLEN
);
10647 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
10651 error
= routefs_kernel_mount(routepath
);
10659 case FSIOC_SET_PACKAGE_EXTS
: {
10660 user_addr_t ext_strings
;
10661 uint32_t num_entries
;
10662 uint32_t max_width
;
10664 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0))) {
10668 if ((is64bit
&& size
!= sizeof(user64_package_ext_info
))
10669 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
10670 // either you're 64-bit and passed a 64-bit struct or
10671 // you're 32-bit and passed a 32-bit struct. otherwise
10678 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
10679 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
10680 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
10682 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
10683 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
10684 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
10686 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
10690 /* namespace handlers */
10691 case FSIOC_NAMESPACE_HANDLER_GET
: {
10692 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
10696 /* Snapshot handlers */
10697 case FSIOC_OLD_SNAPSHOT_HANDLER_GET
: {
10698 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10702 case FSIOC_SNAPSHOT_HANDLER_GET_EXT
: {
10703 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10707 case FSIOC_NAMESPACE_HANDLER_UPDATE
: {
10708 uint32_t token
, val
;
10711 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10715 if (!nspace_is_special_process(p
)) {
10720 token
= ((uint32_t *)data
)[0];
10721 val
= ((uint32_t *)data
)[1];
10723 lck_mtx_lock(&nspace_handler_lock
);
10725 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
10726 if (nspace_items
[i
].token
== token
) {
10727 break; /* exit for loop, not case stmt */
10731 if (i
>= MAX_NSPACE_ITEMS
) {
10735 // if this bit is set, when resolve_nspace_item() times out
10736 // it will loop and go back to sleep.
10738 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
10741 lck_mtx_unlock(&nspace_handler_lock
);
10744 printf("nspace-handler-update: did not find token %u\n", token
);
10749 case FSIOC_NAMESPACE_HANDLER_UNBLOCK
: {
10750 uint32_t token
, val
;
10753 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10757 if (!nspace_is_special_process(p
)) {
10762 token
= ((uint32_t *)data
)[0];
10763 val
= ((uint32_t *)data
)[1];
10765 lck_mtx_lock(&nspace_handler_lock
);
10767 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
10768 if (nspace_items
[i
].token
== token
) {
10769 break; /* exit for loop, not case statement */
10773 if (i
>= MAX_NSPACE_ITEMS
) {
10774 printf("nspace-handler-unblock: did not find token %u\n", token
);
10777 if (val
== 0 && nspace_items
[i
].vp
) {
10778 vnode_lock_spin(nspace_items
[i
].vp
);
10779 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10780 vnode_unlock(nspace_items
[i
].vp
);
10783 nspace_items
[i
].vp
= NULL
;
10784 nspace_items
[i
].arg
= NULL
;
10785 nspace_items
[i
].op
= 0;
10786 nspace_items
[i
].vid
= 0;
10787 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
10788 nspace_items
[i
].token
= 0;
10790 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10793 lck_mtx_unlock(&nspace_handler_lock
);
10797 case FSIOC_NAMESPACE_HANDLER_CANCEL
: {
10798 uint32_t token
, val
;
10801 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10805 if (!nspace_is_special_process(p
)) {
10810 token
= ((uint32_t *)data
)[0];
10811 val
= ((uint32_t *)data
)[1];
10813 lck_mtx_lock(&nspace_handler_lock
);
10815 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
10816 if (nspace_items
[i
].token
== token
) {
10817 break; /* exit for loop, not case stmt */
10821 if (i
>= MAX_NSPACE_ITEMS
) {
10822 printf("nspace-handler-cancel: did not find token %u\n", token
);
10825 if (nspace_items
[i
].vp
) {
10826 vnode_lock_spin(nspace_items
[i
].vp
);
10827 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10828 vnode_unlock(nspace_items
[i
].vp
);
10831 nspace_items
[i
].vp
= NULL
;
10832 nspace_items
[i
].arg
= NULL
;
10833 nspace_items
[i
].vid
= 0;
10834 nspace_items
[i
].token
= val
;
10835 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
10836 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
10838 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10841 lck_mtx_unlock(&nspace_handler_lock
);
10845 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
10846 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10850 // we explicitly do not do the namespace_handler_proc check here
10852 lck_mtx_lock(&nspace_handler_lock
);
10853 snapshot_timestamp
= ((uint32_t *)data
)[0];
10854 wakeup(&nspace_item_idx
);
10855 lck_mtx_unlock(&nspace_handler_lock
);
10856 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
10860 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
10862 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10866 lck_mtx_lock(&nspace_handler_lock
);
10867 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
10868 lck_mtx_unlock(&nspace_handler_lock
);
10869 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10870 nspace_allow_virtual_devs
? "" : " NOT");
10875 case FSIOC_SET_FSTYPENAME_OVERRIDE
:
10877 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10881 mount_lock(vp
->v_mount
);
10882 if (data
[0] != 0) {
10883 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
10884 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
10885 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10886 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
10887 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
10890 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10891 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
10893 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
10894 vp
->v_mount
->fstypename_override
[0] = '\0';
10896 mount_unlock(vp
->v_mount
);
10901 case DISK_CONDITIONER_IOC_GET
: {
10902 error
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
10906 case DISK_CONDITIONER_IOC_SET
: {
10907 error
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
10911 case FSIOC_FD_ONLY_OPEN_ONCE
: {
10912 if (vnode_usecount(vp
) > 1) {
10921 /* other, known commands shouldn't be passed down here */
10924 case F_TRIM_ACTIVE_FILE
:
10926 case F_TRANSCODEKEY
:
10927 case F_GETPROTECTIONLEVEL
:
10928 case F_GETDEFAULTPROTLEVEL
:
10929 case F_MAKECOMPRESSED
:
10930 case F_SET_GREEDY_MODE
:
10931 case F_SETSTATICCONTENT
:
10933 case F_SETBACKINGSTORE
:
10934 case F_GETPATH_MTMINFO
:
10935 case APFSIOC_REVERT_TO_SNAPSHOT
:
10936 case FSIOC_FIOSEEKHOLE
:
10937 case FSIOC_FIOSEEKDATA
:
10938 case HFS_GET_BOOT_INFO
:
10939 case HFS_SET_BOOT_INFO
:
10943 case F_BARRIERFSYNC
:
10949 /* Invoke the filesystem-specific code */
10950 error
= VNOP_IOCTL(vp
, cmd
, data
, options
, ctx
);
10952 } /* end switch stmt */
10955 * if no errors, copy any data to user. Size was
10956 * already set and checked above.
10958 if (error
== 0 && (cmd
& IOC_OUT
) && size
) {
10959 error
= copyout(data
, udata
, size
);
10972 fsctl(proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
10975 struct nameidata nd
;
10978 vfs_context_t ctx
= vfs_context_current();
10980 AUDIT_ARG(cmd
, uap
->cmd
);
10981 AUDIT_ARG(value32
, uap
->options
);
10982 /* Get the vnode for the file we are getting info on: */
10985 // if we come through fsctl() then the file is by definition not open.
10986 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
10987 // lest the caller mistakenly thinks the only open is their own (but in
10988 // reality it's someone elses).
10990 if (uap
->cmd
== FSIOC_FD_ONLY_OPEN_ONCE
) {
10993 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) {
10994 nameiflags
|= FOLLOW
;
10996 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
10997 UIO_USERSPACE
, uap
->path
, ctx
);
10998 if ((error
= namei(&nd
))) {
11005 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
11011 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11021 ffsctl(proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
11025 vfs_context_t ctx
= vfs_context_current();
11028 AUDIT_ARG(fd
, uap
->fd
);
11029 AUDIT_ARG(cmd
, uap
->cmd
);
11030 AUDIT_ARG(value32
, uap
->options
);
11032 /* Get the vnode for the file we are getting info on: */
11033 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11037 if ((error
= vnode_getwithref(vp
))) {
11043 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
11050 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
11054 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11061 /* end of fsctl system call */
11064 * Retrieve the data of an extended attribute.
11067 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
11070 struct nameidata nd
;
11071 char attrname
[XATTR_MAXNAMELEN
+ 1];
11072 vfs_context_t ctx
= vfs_context_current();
11074 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11075 size_t attrsize
= 0;
11077 u_int32_t nameiflags
;
11079 char uio_buf
[UIO_SIZEOF(1)];
11081 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11085 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11086 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11087 if ((error
= namei(&nd
))) {
11093 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11097 if (xattr_protected(attrname
)) {
11098 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
11104 * the specific check for 0xffffffff is a hack to preserve
11105 * binaray compatibilty in K64 with applications that discovered
11106 * that passing in a buf pointer and a size of -1 resulted in
11107 * just the size of the indicated extended attribute being returned.
11108 * this isn't part of the documented behavior, but because of the
11109 * original implemtation's check for "uap->size > 0", this behavior
11110 * was allowed. In K32 that check turned into a signed comparison
11111 * even though uap->size is unsigned... in K64, we blow by that
11112 * check because uap->size is unsigned and doesn't get sign smeared
11113 * in the munger for a 32 bit user app. we also need to add a
11114 * check to limit the maximum size of the buffer being passed in...
11115 * unfortunately, the underlying fileystems seem to just malloc
11116 * the requested size even if the actual extended attribute is tiny.
11117 * because that malloc is for kernel wired memory, we have to put a
11118 * sane limit on it.
11120 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11121 * U64 running on K64 will yield -1 (64 bits wide)
11122 * U32/U64 running on K32 will yield -1 (32 bits wide)
11124 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1) {
11129 if (uap
->size
> (size_t)XATTR_MAXSIZE
) {
11130 uap
->size
= XATTR_MAXSIZE
;
11133 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11134 &uio_buf
[0], sizeof(uio_buf
));
11135 uio_addiov(auio
, uap
->value
, uap
->size
);
11138 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
11143 *retval
= uap
->size
- uio_resid(auio
);
11145 *retval
= (user_ssize_t
)attrsize
;
11152 * Retrieve the data of an extended attribute.
11155 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
11158 char attrname
[XATTR_MAXNAMELEN
+ 1];
11160 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11161 size_t attrsize
= 0;
11164 char uio_buf
[UIO_SIZEOF(1)];
11166 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11170 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11173 if ((error
= vnode_getwithref(vp
))) {
11174 file_drop(uap
->fd
);
11177 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11181 if (xattr_protected(attrname
)) {
11185 if (uap
->value
&& uap
->size
> 0) {
11186 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
11187 &uio_buf
[0], sizeof(uio_buf
));
11188 uio_addiov(auio
, uap
->value
, uap
->size
);
11191 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11193 (void)vnode_put(vp
);
11194 file_drop(uap
->fd
);
11197 *retval
= uap
->size
- uio_resid(auio
);
11199 *retval
= (user_ssize_t
)attrsize
;
11205 * Set the data of an extended attribute.
11208 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
11211 struct nameidata nd
;
11212 char attrname
[XATTR_MAXNAMELEN
+ 1];
11213 vfs_context_t ctx
= vfs_context_current();
11215 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11217 u_int32_t nameiflags
;
11219 char uio_buf
[UIO_SIZEOF(1)];
11221 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11225 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11227 if (error
== EPERM
) {
11228 /* if the string won't fit in attrname, copyinstr emits EPERM */
11229 return ENAMETOOLONG
;
11231 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11234 if (xattr_protected(attrname
)) {
11237 if (uap
->size
!= 0 && uap
->value
== 0) {
11241 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11242 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11243 if ((error
= namei(&nd
))) {
11249 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11250 &uio_buf
[0], sizeof(uio_buf
));
11251 uio_addiov(auio
, uap
->value
, uap
->size
);
11253 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
11256 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11267 * Set the data of an extended attribute.
11270 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
11273 char attrname
[XATTR_MAXNAMELEN
+ 1];
11275 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11278 char uio_buf
[UIO_SIZEOF(1)];
11280 vfs_context_t ctx
= vfs_context_current();
11283 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11287 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11289 if (error
== EPERM
) {
11290 /* if the string won't fit in attrname, copyinstr emits EPERM */
11291 return ENAMETOOLONG
;
11293 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11296 if (xattr_protected(attrname
)) {
11299 if (uap
->size
!= 0 && uap
->value
== 0) {
11302 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11305 if ((error
= vnode_getwithref(vp
))) {
11306 file_drop(uap
->fd
);
11309 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
11310 &uio_buf
[0], sizeof(uio_buf
));
11311 uio_addiov(auio
, uap
->value
, uap
->size
);
11313 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
11316 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
11322 file_drop(uap
->fd
);
11328 * Remove an extended attribute.
11329 * XXX Code duplication here.
11332 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
11335 struct nameidata nd
;
11336 char attrname
[XATTR_MAXNAMELEN
+ 1];
11337 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11338 vfs_context_t ctx
= vfs_context_current();
11340 u_int32_t nameiflags
;
11343 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11347 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11351 if (xattr_protected(attrname
)) {
11354 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11355 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11356 if ((error
= namei(&nd
))) {
11362 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
11365 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11376 * Remove an extended attribute.
11377 * XXX Code duplication here.
11380 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
11383 char attrname
[XATTR_MAXNAMELEN
+ 1];
11387 vfs_context_t ctx
= vfs_context_current();
11390 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11394 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
11398 if (xattr_protected(attrname
)) {
11401 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11404 if ((error
= vnode_getwithref(vp
))) {
11405 file_drop(uap
->fd
);
11409 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
11412 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11418 file_drop(uap
->fd
);
11424 * Retrieve the list of extended attribute names.
11425 * XXX Code duplication here.
11428 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
11431 struct nameidata nd
;
11432 vfs_context_t ctx
= vfs_context_current();
11434 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11435 size_t attrsize
= 0;
11436 u_int32_t nameiflags
;
11438 char uio_buf
[UIO_SIZEOF(1)];
11440 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11444 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11445 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11446 if ((error
= namei(&nd
))) {
11451 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11452 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
11453 &uio_buf
[0], sizeof(uio_buf
));
11454 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11457 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
11461 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11463 *retval
= (user_ssize_t
)attrsize
;
11469 * Retrieve the list of extended attribute names.
11470 * XXX Code duplication here.
11473 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
11477 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11478 size_t attrsize
= 0;
11480 char uio_buf
[UIO_SIZEOF(1)];
11482 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
)) {
11486 if ((error
= file_vnode(uap
->fd
, &vp
))) {
11489 if ((error
= vnode_getwithref(vp
))) {
11490 file_drop(uap
->fd
);
11493 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11494 auio
= uio_createwithbuffer(1, 0, spacetype
,
11495 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
11496 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11499 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11502 file_drop(uap
->fd
);
11504 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11506 *retval
= (user_ssize_t
)attrsize
;
11512 fsgetpath_internal(
11513 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
11514 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
11517 struct mount
*mp
= NULL
;
11521 /* maximum number of times to retry build_path */
11522 unsigned int retries
= 0x10;
11524 if (bufsize
> PAGE_SIZE
) {
11533 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
11534 error
= ENOTSUP
; /* unexpected failure */
11540 error
= VFS_ROOT(mp
, &vp
, ctx
);
11542 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
11545 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
11547 * If the fileid isn't found and we're in a union
11548 * mount volume, then see if the fileid is in the
11549 * mounted-on volume.
11551 struct mount
*tmp
= mp
;
11552 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
11554 if (vfs_busy(mp
, LK_NOWAIT
) == 0) {
11566 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
11573 /* Obtain the absolute path to this vnode. */
11574 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
11575 bpflags
|= BUILDPATH_CHECK_MOVED
;
11576 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
11580 /* there was a race building the path, try a few more times */
11581 if (error
== EAGAIN
) {
11592 AUDIT_ARG(text
, buf
);
11594 if (kdebug_enable
) {
11595 long dbg_parms
[NUMPARMS
];
11598 dbg_namelen
= (int)sizeof(dbg_parms
);
11600 if (length
< dbg_namelen
) {
11601 memcpy((char *)dbg_parms
, buf
, length
);
11602 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
11604 dbg_namelen
= length
;
11606 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
11609 kdebug_vfs_lookup(dbg_parms
, dbg_namelen
, (void *)vp
,
11610 KDBG_VFS_LOOKUP_FLAG_LOOKUP
);
11613 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
11620 * Obtain the full pathname of a file system object by id.
11623 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
11625 vfs_context_t ctx
= vfs_context_current();
11631 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
11634 AUDIT_ARG(value32
, fsid
.val
[0]);
11635 AUDIT_ARG(value64
, uap
->objid
);
11636 /* Restrict output buffer size for now. */
11638 if (uap
->bufsize
> PAGE_SIZE
) {
11641 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
| M_ZERO
);
11642 if (realpath
== NULL
) {
11646 error
= fsgetpath_internal(
11647 ctx
, fsid
.val
[0], uap
->objid
,
11648 uap
->bufsize
, realpath
, &length
);
11654 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
11656 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
11659 FREE(realpath
, M_TEMP
);
11665 * Common routine to handle various flavors of statfs data heading out
11668 * Returns: 0 Success
11672 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
11673 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
11674 boolean_t partial_copy
)
11677 int my_size
, copy_size
;
11680 struct user64_statfs sfs
;
11681 my_size
= copy_size
= sizeof(sfs
);
11682 bzero(&sfs
, my_size
);
11683 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
11684 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
11685 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
11686 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
11687 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
11688 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
11689 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
11690 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
11691 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
11692 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
11693 sfs
.f_fsid
= sfsp
->f_fsid
;
11694 sfs
.f_owner
= sfsp
->f_owner
;
11695 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11696 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11698 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11700 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11701 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11703 if (partial_copy
) {
11704 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11706 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11708 struct user32_statfs sfs
;
11710 my_size
= copy_size
= sizeof(sfs
);
11711 bzero(&sfs
, my_size
);
11713 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
11714 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
11715 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
11718 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11719 * have to fudge the numbers here in that case. We inflate the blocksize in order
11720 * to reflect the filesystem size as best we can.
11722 if ((sfsp
->f_blocks
> INT_MAX
)
11723 /* Hack for 4061702 . I think the real fix is for Carbon to
11724 * look for some volume capability and not depend on hidden
11725 * semantics agreed between a FS and carbon.
11726 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11727 * for Carbon to set bNoVolumeSizes volume attribute.
11728 * Without this the webdavfs files cannot be copied onto
11729 * disk as they look huge. This change should not affect
11730 * XSAN as they should not setting these to -1..
11732 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
11733 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
11734 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
11738 * Work out how far we have to shift the block count down to make it fit.
11739 * Note that it's possible to have to shift so far that the resulting
11740 * blocksize would be unreportably large. At that point, we will clip
11741 * any values that don't fit.
11743 * For safety's sake, we also ensure that f_iosize is never reported as
11744 * being smaller than f_bsize.
11746 for (shift
= 0; shift
< 32; shift
++) {
11747 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
) {
11750 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
) {
11754 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11755 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
11756 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
11757 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
11758 #undef __SHIFT_OR_CLIP
11759 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
11760 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
11762 /* filesystem is small enough to be reported honestly */
11763 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
11764 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
11765 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
11766 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
11767 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
11769 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
11770 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
11771 sfs
.f_fsid
= sfsp
->f_fsid
;
11772 sfs
.f_owner
= sfsp
->f_owner
;
11773 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11774 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11776 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11778 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11779 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11781 if (partial_copy
) {
11782 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11784 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11787 if (sizep
!= NULL
) {
11794 * copy stat structure into user_stat structure.
11797 munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
11799 bzero(usbp
, sizeof(*usbp
));
11801 usbp
->st_dev
= sbp
->st_dev
;
11802 usbp
->st_ino
= sbp
->st_ino
;
11803 usbp
->st_mode
= sbp
->st_mode
;
11804 usbp
->st_nlink
= sbp
->st_nlink
;
11805 usbp
->st_uid
= sbp
->st_uid
;
11806 usbp
->st_gid
= sbp
->st_gid
;
11807 usbp
->st_rdev
= sbp
->st_rdev
;
11808 #ifndef _POSIX_C_SOURCE
11809 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11810 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11811 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11812 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11813 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11814 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11816 usbp
->st_atime
= sbp
->st_atime
;
11817 usbp
->st_atimensec
= sbp
->st_atimensec
;
11818 usbp
->st_mtime
= sbp
->st_mtime
;
11819 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11820 usbp
->st_ctime
= sbp
->st_ctime
;
11821 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11823 usbp
->st_size
= sbp
->st_size
;
11824 usbp
->st_blocks
= sbp
->st_blocks
;
11825 usbp
->st_blksize
= sbp
->st_blksize
;
11826 usbp
->st_flags
= sbp
->st_flags
;
11827 usbp
->st_gen
= sbp
->st_gen
;
11828 usbp
->st_lspare
= sbp
->st_lspare
;
11829 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11830 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11834 munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
11836 bzero(usbp
, sizeof(*usbp
));
11838 usbp
->st_dev
= sbp
->st_dev
;
11839 usbp
->st_ino
= sbp
->st_ino
;
11840 usbp
->st_mode
= sbp
->st_mode
;
11841 usbp
->st_nlink
= sbp
->st_nlink
;
11842 usbp
->st_uid
= sbp
->st_uid
;
11843 usbp
->st_gid
= sbp
->st_gid
;
11844 usbp
->st_rdev
= sbp
->st_rdev
;
11845 #ifndef _POSIX_C_SOURCE
11846 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11847 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11848 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11849 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11850 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11851 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11853 usbp
->st_atime
= sbp
->st_atime
;
11854 usbp
->st_atimensec
= sbp
->st_atimensec
;
11855 usbp
->st_mtime
= sbp
->st_mtime
;
11856 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11857 usbp
->st_ctime
= sbp
->st_ctime
;
11858 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11860 usbp
->st_size
= sbp
->st_size
;
11861 usbp
->st_blocks
= sbp
->st_blocks
;
11862 usbp
->st_blksize
= sbp
->st_blksize
;
11863 usbp
->st_flags
= sbp
->st_flags
;
11864 usbp
->st_gen
= sbp
->st_gen
;
11865 usbp
->st_lspare
= sbp
->st_lspare
;
11866 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11867 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11871 * copy stat64 structure into user_stat64 structure.
11874 munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
11876 bzero(usbp
, sizeof(*usbp
));
11878 usbp
->st_dev
= sbp
->st_dev
;
11879 usbp
->st_ino
= sbp
->st_ino
;
11880 usbp
->st_mode
= sbp
->st_mode
;
11881 usbp
->st_nlink
= sbp
->st_nlink
;
11882 usbp
->st_uid
= sbp
->st_uid
;
11883 usbp
->st_gid
= sbp
->st_gid
;
11884 usbp
->st_rdev
= sbp
->st_rdev
;
11885 #ifndef _POSIX_C_SOURCE
11886 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11887 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11888 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11889 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11890 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11891 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11892 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11893 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11895 usbp
->st_atime
= sbp
->st_atime
;
11896 usbp
->st_atimensec
= sbp
->st_atimensec
;
11897 usbp
->st_mtime
= sbp
->st_mtime
;
11898 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11899 usbp
->st_ctime
= sbp
->st_ctime
;
11900 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11901 usbp
->st_birthtime
= sbp
->st_birthtime
;
11902 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11904 usbp
->st_size
= sbp
->st_size
;
11905 usbp
->st_blocks
= sbp
->st_blocks
;
11906 usbp
->st_blksize
= sbp
->st_blksize
;
11907 usbp
->st_flags
= sbp
->st_flags
;
11908 usbp
->st_gen
= sbp
->st_gen
;
11909 usbp
->st_lspare
= sbp
->st_lspare
;
11910 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11911 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11915 munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
11917 bzero(usbp
, sizeof(*usbp
));
11919 usbp
->st_dev
= sbp
->st_dev
;
11920 usbp
->st_ino
= sbp
->st_ino
;
11921 usbp
->st_mode
= sbp
->st_mode
;
11922 usbp
->st_nlink
= sbp
->st_nlink
;
11923 usbp
->st_uid
= sbp
->st_uid
;
11924 usbp
->st_gid
= sbp
->st_gid
;
11925 usbp
->st_rdev
= sbp
->st_rdev
;
11926 #ifndef _POSIX_C_SOURCE
11927 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11928 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11929 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11930 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11931 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11932 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11933 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11934 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11936 usbp
->st_atime
= sbp
->st_atime
;
11937 usbp
->st_atimensec
= sbp
->st_atimensec
;
11938 usbp
->st_mtime
= sbp
->st_mtime
;
11939 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11940 usbp
->st_ctime
= sbp
->st_ctime
;
11941 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11942 usbp
->st_birthtime
= sbp
->st_birthtime
;
11943 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11945 usbp
->st_size
= sbp
->st_size
;
11946 usbp
->st_blocks
= sbp
->st_blocks
;
11947 usbp
->st_blksize
= sbp
->st_blksize
;
11948 usbp
->st_flags
= sbp
->st_flags
;
11949 usbp
->st_gen
= sbp
->st_gen
;
11950 usbp
->st_lspare
= sbp
->st_lspare
;
11951 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11952 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11956 * Purge buffer cache for simulating cold starts
11959 vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
11961 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
11963 return VNODE_RETURNED
;
11967 vfs_purge_callback(mount_t mp
, __unused
void * arg
)
11969 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
11971 return VFS_RETURNED
;
11975 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
11977 if (!kauth_cred_issuser(kauth_cred_get())) {
11981 vfs_iterate(0 /* flags */, vfs_purge_callback
, NULL
);
11987 * gets the vnode associated with the (unnamed) snapshot directory
11988 * for a Filesystem. The snapshot directory vnode is returned with
11989 * an iocount on it.
11992 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
11994 return VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
);
11998 * Get the snapshot vnode.
12000 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12001 * needs nameidone() on ndp.
12003 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12005 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12009 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
12010 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
12011 #if !CONFIG_TRIGGERS
12014 enum path_operation pathop
,
12020 struct vfs_attr vfa
;
12025 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
12030 if (!vnode_isvroot(*rvpp
)) {
12035 /* Make sure the filesystem supports snapshots */
12036 VFSATTR_INIT(&vfa
);
12037 VFSATTR_WANTED(&vfa
, f_capabilities
);
12038 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
12039 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
12040 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
12041 VOL_CAP_INT_SNAPSHOT
)) ||
12042 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
12043 VOL_CAP_INT_SNAPSHOT
))) {
12048 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
12053 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12054 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12060 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12061 * (the length returned by copyinstr includes the terminating NUL)
12063 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
12064 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
12068 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++) {
12071 if (i
< (int)name_len
) {
12077 if (op
== CREATE
) {
12078 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
12080 } else if (op
== DELETE
) {
12081 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
12089 /* Check if the snapshot already exists ... */
12090 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
12091 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
12092 ndp
->ni_dvp
= *sdvpp
;
12094 error
= namei(ndp
);
12096 FREE(name_buf
, M_TEMP
);
12112 * create a filesystem snapshot (for supporting filesystems)
12114 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12115 * We get to the (unnamed) snapshot directory vnode and create the vnode
12116 * for the snapshot in it.
12120 * a) Passed in name for snapshot cannot have slashes.
12121 * b) name can't be "." or ".."
12123 * Since this requires superuser privileges, vnode_authorize calls are not
12127 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12130 vnode_t rvp
, snapdvp
;
12132 struct nameidata namend
;
12134 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
12140 if (namend
.ni_vp
) {
12141 vnode_put(namend
.ni_vp
);
12144 struct vnode_attr va
;
12145 vnode_t vp
= NULLVP
;
12148 VATTR_SET(&va
, va_type
, VREG
);
12149 VATTR_SET(&va
, va_mode
, 0);
12151 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
12152 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
12153 if (!error
&& vp
) {
12158 nameidone(&namend
);
12159 vnode_put(snapdvp
);
12165 * Delete a Filesystem snapshot
12167 * get the vnode for the unnamed snapshot directory and the snapshot and
12168 * delete the snapshot.
12171 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12174 vnode_t rvp
, snapdvp
;
12176 struct nameidata namend
;
12178 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
12184 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
12185 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
12187 vnode_put(namend
.ni_vp
);
12188 nameidone(&namend
);
12189 vnode_put(snapdvp
);
12196 * Revert a filesystem to a snapshot
12198 * Marks the filesystem to revert to the given snapshot on next mount.
12201 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12207 struct fs_snapshot_revert_args revert_data
;
12208 struct componentname cnp
;
12212 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12216 mp
= vnode_mount(rvp
);
12218 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12219 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12221 FREE(name_buf
, M_TEMP
);
12227 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
12229 FREE(name_buf
, M_TEMP
);
12236 * Grab mount_iterref so that we can release the vnode,
12237 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12239 error
= mount_iterref(mp
, 0);
12242 FREE(name_buf
, M_TEMP
);
12246 memset(&cnp
, 0, sizeof(cnp
));
12247 cnp
.cn_pnbuf
= (char *)name_buf
;
12248 cnp
.cn_nameiop
= LOOKUP
;
12249 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12250 cnp
.cn_pnlen
= MAXPATHLEN
;
12251 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12252 cnp
.cn_namelen
= (int)name_len
;
12253 revert_data
.sr_cnp
= &cnp
;
12255 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
12256 mount_iterdrop(mp
);
12257 FREE(name_buf
, M_TEMP
);
12260 /* If there was any error, try again using VNOP_IOCTL */
12263 struct nameidata namend
;
12265 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
12272 error
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
12275 vnode_put(namend
.ni_vp
);
12276 nameidone(&namend
);
12277 vnode_put(snapdvp
);
12285 * rename a Filesystem snapshot
12287 * get the vnode for the unnamed snapshot directory and the snapshot and
12288 * rename the snapshot. This is a very specialised (and simple) case of
12289 * rename(2) (which has to deal with a lot more complications). It differs
12290 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12293 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
12294 __unused
uint32_t flags
, vfs_context_t ctx
)
12296 vnode_t rvp
, snapdvp
;
12298 caddr_t newname_buf
;
12301 struct nameidata
*fromnd
, *tond
;
12302 /* carving out a chunk for structs that are too big to be on stack. */
12304 struct nameidata from_node
;
12305 struct nameidata to_node
;
12308 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
12309 fromnd
= &__rename_data
->from_node
;
12310 tond
= &__rename_data
->to_node
;
12312 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
12317 fvp
= fromnd
->ni_vp
;
12319 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12320 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
12326 * Some sanity checks- new name can't be empty, "." or ".." or have
12328 * (the length returned by copyinstr includes the terminating NUL)
12330 * The FS rename VNOP is suppossed to handle this but we'll pick it
12333 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
12334 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
12338 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++) {
12341 if (i
< (int)name_len
) {
12347 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
12354 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
12355 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
12356 tond
->ni_dvp
= snapdvp
;
12358 error
= namei(tond
);
12361 } else if (tond
->ni_vp
) {
12363 * snapshot rename behaves differently than rename(2) - if the
12364 * new name exists, EEXIST is returned.
12366 vnode_put(tond
->ni_vp
);
12371 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
12372 &tond
->ni_cnd
, ctx
);
12377 FREE(newname_buf
, M_TEMP
);
12379 vnode_put(snapdvp
);
12383 FREE(__rename_data
, M_TEMP
);
12388 * Mount a Filesystem snapshot
12390 * get the vnode for the unnamed snapshot directory and the snapshot and
12391 * mount the snapshot.
12394 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
12395 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
12397 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
12399 struct nameidata
*snapndp
, *dirndp
;
12400 /* carving out a chunk for structs that are too big to be on stack. */
12402 struct nameidata snapnd
;
12403 struct nameidata dirnd
;
12404 } * __snapshot_mount_data
;
12406 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
12408 snapndp
= &__snapshot_mount_data
->snapnd
;
12409 dirndp
= &__snapshot_mount_data
->dirnd
;
12411 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
12417 snapvp
= snapndp
->ni_vp
;
12418 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
12423 /* Get the vnode to be covered */
12424 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
12425 UIO_USERSPACE
, directory
, ctx
);
12426 error
= namei(dirndp
);
12431 vp
= dirndp
->ni_vp
;
12432 pvp
= dirndp
->ni_dvp
;
12434 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
12437 mount_t mp
= vnode_mount(rvp
);
12438 struct fs_snapshot_mount_args smnt_data
;
12440 smnt_data
.sm_mp
= mp
;
12441 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
12442 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
12443 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags
& MNT_DONTBROWSE
,
12444 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
12452 vnode_put(snapdvp
);
12454 nameidone(snapndp
);
12456 FREE(__snapshot_mount_data
, M_TEMP
);
12461 * Root from a snapshot of the filesystem
12463 * Marks the filesystem to root from the given snapshot on next boot.
12466 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12472 struct fs_snapshot_root_args root_data
;
12473 struct componentname cnp
;
12477 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12481 mp
= vnode_mount(rvp
);
12483 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12484 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12486 FREE(name_buf
, M_TEMP
);
12491 // XXX MAC checks ?
12494 * Grab mount_iterref so that we can release the vnode,
12495 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12497 error
= mount_iterref(mp
, 0);
12500 FREE(name_buf
, M_TEMP
);
12504 memset(&cnp
, 0, sizeof(cnp
));
12505 cnp
.cn_pnbuf
= (char *)name_buf
;
12506 cnp
.cn_nameiop
= LOOKUP
;
12507 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12508 cnp
.cn_pnlen
= MAXPATHLEN
;
12509 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12510 cnp
.cn_namelen
= (int)name_len
;
12511 root_data
.sr_cnp
= &cnp
;
12513 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
12515 mount_iterdrop(mp
);
12516 FREE(name_buf
, M_TEMP
);
12522 * FS snapshot operations dispatcher
12525 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
12526 __unused
int32_t *retval
)
12529 vfs_context_t ctx
= vfs_context_current();
12531 AUDIT_ARG(fd
, uap
->dirfd
);
12532 AUDIT_ARG(value32
, uap
->op
);
12534 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
12540 * Enforce user authorization for snapshot modification operations
12542 if ((uap
->op
!= SNAPSHOT_OP_MOUNT
) &&
12543 (uap
->op
!= SNAPSHOT_OP_ROOT
)) {
12544 vnode_t dvp
= NULLVP
;
12545 vnode_t devvp
= NULLVP
;
12548 error
= vnode_getfromfd(ctx
, uap
->dirfd
, &dvp
);
12552 mp
= vnode_mount(dvp
);
12553 devvp
= mp
->mnt_devvp
;
12555 /* get an iocount on devvp */
12556 if (devvp
== NULLVP
) {
12557 error
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 0, &devvp
, ctx
);
12558 /* for mounts which arent block devices */
12559 if (error
== ENOENT
) {
12563 error
= vnode_getwithref(devvp
);
12571 if ((vfs_context_issuser(ctx
) == 0) &&
12572 (vnode_authorize(devvp
, NULL
, KAUTH_VNODE_WRITE_DATA
, ctx
) != 0)) {
12584 case SNAPSHOT_OP_CREATE
:
12585 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12587 case SNAPSHOT_OP_DELETE
:
12588 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12590 case SNAPSHOT_OP_RENAME
:
12591 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
12594 case SNAPSHOT_OP_MOUNT
:
12595 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
12596 uap
->data
, uap
->flags
, ctx
);
12598 case SNAPSHOT_OP_REVERT
:
12599 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12601 #if CONFIG_MNT_ROOTSNAP
12602 case SNAPSHOT_OP_ROOT
:
12603 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12605 #endif /* CONFIG_MNT_ROOTSNAP */