2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
110 #include <vfs/vfs_disk_conditioner.h>
112 #include <security/audit/audit.h>
113 #include <bsm/audit_kevents.h>
115 #include <mach/mach_types.h>
116 #include <kern/kern_types.h>
117 #include <kern/kalloc.h>
118 #include <kern/task.h>
120 #include <vm/vm_pageout.h>
121 #include <vm/vm_protos.h>
123 #include <libkern/OSAtomic.h>
124 #include <pexpert/pexpert.h>
125 #include <IOKit/IOBSD.h>
128 #include <miscfs/routefs/routefs.h>
132 #include <security/mac.h>
133 #include <security/mac_framework.h>
137 #define GET_PATH(x) \
138 (x) = get_pathbuff();
139 #define RELEASE_PATH(x) \
142 #define GET_PATH(x) \
143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
144 #define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146 #endif /* CONFIG_FSE */
148 #ifndef HFS_GET_BOOT_INFO
149 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
152 #ifndef HFS_SET_BOOT_INFO
153 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
156 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
157 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
160 extern void disk_conditioner_unmount(mount_t mp
);
162 /* struct for checkdirs iteration */
167 /* callback for checkdirs iteration */
168 static int checkdirs_callback(proc_t p
, void * arg
);
170 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
171 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
172 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
173 static int getfsstat_callback(mount_t mp
, void * arg
);
174 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
175 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
176 static int sync_callback(mount_t
, void *);
177 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
178 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
179 boolean_t partial_copy
);
180 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
182 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
183 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
184 struct componentname
*cnp
, user_addr_t fsmountargs
,
185 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
187 void vfs_notify_mount(vnode_t pdvp
);
189 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
191 struct fd_vn_data
* fg_vn_data_alloc(void);
194 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
195 * Concurrent lookups (or lookups by ids) on hard links can cause the
196 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
197 * does) to return ENOENT as the path cannot be returned from the name cache
198 * alone. We have no option but to retry and hope to get one namei->reverse path
199 * generation done without an intervening lookup, lookup by id on the hard link
200 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
201 * which currently are the MAC hooks for rename, unlink and rmdir.
203 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
205 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
207 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
209 #ifdef CONFIG_IMGSRC_ACCESS
210 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
211 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
212 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
213 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
214 static void mount_end_update(mount_t mp
);
215 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
216 #endif /* CONFIG_IMGSRC_ACCESS */
219 #if CONFIG_MNT_ROOTSNAP
220 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
);
222 static int snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
) __attribute__((unused
));
225 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
228 int sync_internal(void);
231 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
233 extern lck_grp_t
*fd_vn_lck_grp
;
234 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
235 extern lck_attr_t
*fd_vn_lck_attr
;
238 * incremented each time a mount or unmount operation occurs
239 * used to invalidate the cached value of the rootvp in the
240 * mount structure utilized by cache_lookup_path
242 uint32_t mount_generation
= 0;
244 /* counts number of mount and unmount operations */
245 unsigned int vfs_nummntops
=0;
247 extern const struct fileops vnops
;
248 #if CONFIG_APPLEDOUBLE
249 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
250 #endif /* CONFIG_APPLEDOUBLE */
253 * Virtual File System System Calls
256 #if NFSCLIENT || DEVFS || ROUTEFS
258 * Private in-kernel mounting spi (NFS only, not exported)
262 vfs_iskernelmount(mount_t mp
)
264 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
269 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
270 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
276 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
277 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
280 * Get the vnode to be covered if it's not supplied
290 char *pnbuf
= CAST_DOWN(char *, path
);
292 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
293 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
297 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
298 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
308 #endif /* NFSCLIENT || DEVFS */
311 * Mount a file system.
315 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
317 struct __mac_mount_args muap
;
319 muap
.type
= uap
->type
;
320 muap
.path
= uap
->path
;
321 muap
.flags
= uap
->flags
;
322 muap
.data
= uap
->data
;
323 muap
.mac_p
= USER_ADDR_NULL
;
324 return (__mac_mount(p
, &muap
, retval
));
328 fmount(__unused proc_t p
, struct fmount_args
*uap
, __unused
int32_t *retval
)
330 struct componentname cn
;
331 vfs_context_t ctx
= vfs_context_current();
334 int flags
= uap
->flags
;
335 char fstypename
[MFSNAMELEN
];
336 char *labelstr
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */
340 AUDIT_ARG(fd
, uap
->fd
);
341 AUDIT_ARG(fflags
, flags
);
342 /* fstypename will get audited by mount_common */
344 /* Sanity check the flags */
345 if (flags
& (MNT_IMGSRC_BY_INDEX
|MNT_ROOTFS
)) {
349 if (flags
& MNT_UNION
) {
353 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
358 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
362 if ((error
= vnode_getwithref(vp
)) != 0) {
367 pvp
= vnode_getparent(vp
);
374 memset(&cn
, 0, sizeof(struct componentname
));
375 MALLOC(cn
.cn_pnbuf
, char *, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
376 cn
.cn_pnlen
= MAXPATHLEN
;
378 if((error
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) {
379 FREE(cn
.cn_pnbuf
, M_TEMP
);
386 error
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
);
388 FREE(cn
.cn_pnbuf
, M_TEMP
);
397 vfs_notify_mount(vnode_t pdvp
)
399 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
400 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
405 * Mount a file system taking into account MAC label behavior.
406 * See mount(2) man page for more information
408 * Parameters: p Process requesting the mount
409 * uap User argument descriptor (see below)
412 * Indirect: uap->type Filesystem type
413 * uap->path Path to mount
414 * uap->data Mount arguments
415 * uap->mac_p MAC info
416 * uap->flags Mount flags
422 boolean_t root_fs_upgrade_try
= FALSE
;
425 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
429 int need_nameidone
= 0;
430 vfs_context_t ctx
= vfs_context_current();
431 char fstypename
[MFSNAMELEN
];
434 char *labelstr
= NULL
;
435 int flags
= uap
->flags
;
437 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
438 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
443 * Get the fs type name from user space
445 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
450 * Get the vnode to be covered
452 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
453 UIO_USERSPACE
, uap
->path
, ctx
);
462 #ifdef CONFIG_IMGSRC_ACCESS
463 /* Mounting image source cannot be batched with other operations */
464 if (flags
== MNT_IMGSRC_BY_INDEX
) {
465 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
466 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
469 #endif /* CONFIG_IMGSRC_ACCESS */
473 * Get the label string (if any) from user space
475 if (uap
->mac_p
!= USER_ADDR_NULL
) {
480 struct user64_mac mac64
;
481 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
482 mac
.m_buflen
= mac64
.m_buflen
;
483 mac
.m_string
= mac64
.m_string
;
485 struct user32_mac mac32
;
486 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
487 mac
.m_buflen
= mac32
.m_buflen
;
488 mac
.m_string
= mac32
.m_string
;
492 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
493 (mac
.m_buflen
< 2)) {
497 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
498 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
502 AUDIT_ARG(mac_string
, labelstr
);
504 #endif /* CONFIG_MACF */
506 AUDIT_ARG(fflags
, flags
);
509 if (flags
& MNT_UNION
) {
510 /* No union mounts on release kernels */
516 if ((vp
->v_flag
& VROOT
) &&
517 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
518 if (!(flags
& MNT_UNION
)) {
523 * For a union mount on '/', treat it as fresh
524 * mount instead of update.
525 * Otherwise, union mouting on '/' used to panic the
526 * system before, since mnt_vnodecovered was found to
527 * be NULL for '/' which is required for unionlookup
528 * after it gets ENOENT on union mount.
530 flags
= (flags
& ~(MNT_UPDATE
));
534 if ((flags
& MNT_RDONLY
) == 0) {
535 /* Release kernels are not allowed to mount "/" as rw */
541 * See 7392553 for more details on why this check exists.
542 * Suffice to say: If this check is ON and something tries
543 * to mount the rootFS RW, we'll turn off the codesign
544 * bitmap optimization.
546 #if CHECK_CS_VALIDATION_BITMAP
547 if ((flags
& MNT_RDONLY
) == 0 ) {
548 root_fs_upgrade_try
= TRUE
;
553 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
554 labelstr
, FALSE
, ctx
);
560 FREE(labelstr
, M_MACTEMP
);
561 #endif /* CONFIG_MACF */
569 if (need_nameidone
) {
577 * common mount implementation (final stage of mounting)
580 * fstypename file system type (ie it's vfs name)
581 * pvp parent of covered vnode
583 * cnp component name (ie path) of covered vnode
584 * flags generic mount flags
585 * fsmountargs file system specific data
586 * labelstr optional MAC label
587 * kernelmount TRUE for mounts initiated from inside the kernel
588 * ctx caller's context
591 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
592 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
593 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
596 #pragma unused(labelstr)
598 struct vnode
*devvp
= NULLVP
;
599 struct vnode
*device_vnode
= NULLVP
;
604 struct vfstable
*vfsp
= (struct vfstable
*)0;
605 struct proc
*p
= vfs_context_proc(ctx
);
607 user_addr_t devpath
= USER_ADDR_NULL
;
610 boolean_t vfsp_ref
= FALSE
;
611 boolean_t is_rwlock_locked
= FALSE
;
612 boolean_t did_rele
= FALSE
;
613 boolean_t have_usecount
= FALSE
;
616 * Process an update for an existing mount
618 if (flags
& MNT_UPDATE
) {
619 if ((vp
->v_flag
& VROOT
) == 0) {
625 /* unmount in progress return error */
627 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
633 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
634 is_rwlock_locked
= TRUE
;
636 * We only allow the filesystem to be reloaded if it
637 * is currently mounted read-only.
639 if ((flags
& MNT_RELOAD
) &&
640 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
646 * If content protection is enabled, update mounts are not
647 * allowed to turn it off.
649 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
650 ((flags
& MNT_CPROTECT
) == 0)) {
655 #ifdef CONFIG_IMGSRC_ACCESS
656 /* Can't downgrade the backer of the root FS */
657 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
658 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
662 #endif /* CONFIG_IMGSRC_ACCESS */
665 * Only root, or the user that did the original mount is
666 * permitted to update it.
668 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
669 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
673 error
= mac_mount_check_remount(ctx
, mp
);
679 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
680 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
682 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
683 flags
|= MNT_NOSUID
| MNT_NODEV
;
684 if (mp
->mnt_flag
& MNT_NOEXEC
)
691 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
693 vfsp
= mp
->mnt_vtable
;
698 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
699 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
701 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
702 flags
|= MNT_NOSUID
| MNT_NODEV
;
703 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
707 /* XXXAUDIT: Should we capture the type on the error path as well? */
708 AUDIT_ARG(text
, fstypename
);
710 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
711 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
712 vfsp
->vfc_refcount
++;
723 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
725 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
726 error
= EINVAL
; /* unsupported request */
730 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
736 * Allocate and initialize the filesystem (mount_t)
738 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
740 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
743 /* Initialize the default IO constraints */
744 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
745 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
746 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
747 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
748 mp
->mnt_devblocksize
= DEV_BSIZE
;
749 mp
->mnt_alignmentmask
= PAGE_MASK
;
750 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
753 mp
->mnt_realrootvp
= NULLVP
;
754 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
756 TAILQ_INIT(&mp
->mnt_vnodelist
);
757 TAILQ_INIT(&mp
->mnt_workerqueue
);
758 TAILQ_INIT(&mp
->mnt_newvnodes
);
760 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
761 is_rwlock_locked
= TRUE
;
762 mp
->mnt_op
= vfsp
->vfc_vfsops
;
763 mp
->mnt_vtable
= vfsp
;
764 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
765 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
766 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
767 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
768 mp
->mnt_vnodecovered
= vp
;
769 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
770 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
771 mp
->mnt_devbsdunit
= 0;
773 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
774 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
776 #if NFSCLIENT || DEVFS || ROUTEFS
778 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
779 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
780 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
781 #endif /* NFSCLIENT || DEVFS */
786 * Set the mount level flags.
788 if (flags
& MNT_RDONLY
)
789 mp
->mnt_flag
|= MNT_RDONLY
;
790 else if (mp
->mnt_flag
& MNT_RDONLY
) {
791 // disallow read/write upgrades of file systems that
792 // had the TYPENAME_OVERRIDE feature set.
793 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
797 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
799 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
800 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
801 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
802 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
803 MNT_QUARANTINE
| MNT_CPROTECT
);
808 * On release builds of iOS based platforms, always enforce NOSUID on
809 * all mounts. We do this here because we can catch update mounts as well as
810 * non-update mounts in this case.
812 mp
->mnt_flag
|= (MNT_NOSUID
);
816 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
817 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
818 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
819 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
820 MNT_QUARANTINE
| MNT_CPROTECT
);
823 if (flags
& MNT_MULTILABEL
) {
824 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
828 mp
->mnt_flag
|= MNT_MULTILABEL
;
832 * Process device path for local file systems if requested
834 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
835 !(internal_flags
& KERNEL_MOUNT_SNAPSHOT
)) {
836 if (vfs_context_is64bit(ctx
)) {
837 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
839 fsmountargs
+= sizeof(devpath
);
842 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
844 /* munge into LP64 addr */
845 devpath
= CAST_USER_ADDR_T(tmp
);
846 fsmountargs
+= sizeof(tmp
);
849 /* Lookup device and authorize access to it */
853 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
854 if ( (error
= namei(&nd
)) )
857 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
862 if (devvp
->v_type
!= VBLK
) {
866 if (major(devvp
->v_rdev
) >= nblkdev
) {
871 * If mount by non-root, then verify that user has necessary
872 * permissions on the device.
874 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
875 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
877 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
878 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
879 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
883 /* On first mount, preflight and open device */
884 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
885 if ( (error
= vnode_ref(devvp
)) )
888 * Disallow multiple mounts of the same device.
889 * Disallow mounting of a device that is currently in use
890 * (except for root, which might share swap device for miniroot).
891 * Flush out any old buffers remaining from a previous use.
893 if ( (error
= vfs_mountedon(devvp
)) )
896 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
900 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
904 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
907 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
909 error
= mac_vnode_check_open(ctx
,
911 ronly
? FREAD
: FREAD
|FWRITE
);
915 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
918 mp
->mnt_devvp
= devvp
;
919 device_vnode
= devvp
;
921 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
922 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
923 (device_vnode
= mp
->mnt_devvp
)) {
927 * If upgrade to read-write by non-root, then verify
928 * that user has necessary permissions on the device.
930 vnode_getalways(device_vnode
);
932 if (suser(vfs_context_ucred(ctx
), NULL
) &&
933 (error
= vnode_authorize(device_vnode
, NULL
,
934 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
936 vnode_put(device_vnode
);
940 /* Tell the device that we're upgrading */
941 dev
= (dev_t
)device_vnode
->v_rdev
;
944 if ((u_int
)maj
>= (u_int
)nblkdev
)
945 panic("Volume mounted on a device with invalid major number.");
947 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
948 vnode_put(device_vnode
);
949 device_vnode
= NULLVP
;
956 if ((flags
& MNT_UPDATE
) == 0) {
957 mac_mount_label_init(mp
);
958 mac_mount_label_associate(ctx
, mp
);
961 if ((flags
& MNT_UPDATE
) != 0) {
962 error
= mac_mount_check_label_update(ctx
, mp
);
969 * Mount the filesystem.
971 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
972 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
973 (caddr_t
)fsmountargs
, 0, ctx
);
975 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
978 if (flags
& MNT_UPDATE
) {
979 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
980 mp
->mnt_flag
&= ~MNT_RDONLY
;
982 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
983 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
985 mp
->mnt_flag
= flag
; /* restore flag value */
986 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
987 lck_rw_done(&mp
->mnt_rwlock
);
988 is_rwlock_locked
= FALSE
;
990 enablequotas(mp
, ctx
);
995 * Put the new filesystem on the mount list after root.
998 struct vfs_attr vfsattr
;
1000 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
1001 error
= VFS_ROOT(mp
, &rvp
, ctx
);
1003 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
1006 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
1008 * drop reference provided by VFS_ROOT
1017 vnode_lock_spin(vp
);
1018 CLR(vp
->v_flag
, VMOUNT
);
1019 vp
->v_mountedhere
= mp
;
1023 * taking the name_cache_lock exclusively will
1024 * insure that everyone is out of the fast path who
1025 * might be trying to use a now stale copy of
1026 * vp->v_mountedhere->mnt_realrootvp
1027 * bumping mount_generation causes the cached values
1032 name_cache_unlock();
1034 error
= vnode_ref(vp
);
1039 have_usecount
= TRUE
;
1041 error
= checkdirs(vp
, ctx
);
1043 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1047 * there is no cleanup code here so I have made it void
1048 * we need to revisit this
1050 (void)VFS_START(mp
, 0, ctx
);
1052 if (mount_list_add(mp
) != 0) {
1054 * The system is shutting down trying to umount
1055 * everything, so fail with a plausible errno.
1060 lck_rw_done(&mp
->mnt_rwlock
);
1061 is_rwlock_locked
= FALSE
;
1063 /* Check if this mounted file system supports EAs or named streams. */
1064 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1065 VFSATTR_INIT(&vfsattr
);
1066 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
1067 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
1068 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
1069 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
1070 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
1071 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
1072 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1075 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
1076 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
1077 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
1080 /* Check if this file system supports path from id lookups. */
1081 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
1082 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
1083 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1084 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
1085 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1086 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1089 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
1090 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
1091 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1094 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1095 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1097 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1098 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1100 /* increment the operations count */
1101 OSAddAtomic(1, &vfs_nummntops
);
1102 enablequotas(mp
, ctx
);
1105 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1108 * cache the IO attributes for the underlying physical media...
1109 * an error return indicates the underlying driver doesn't
1110 * support all the queries necessary... however, reasonable
1111 * defaults will have been set, so no reason to bail or care
1113 vfs_init_io_attributes(device_vnode
, mp
);
1116 /* Now that mount is setup, notify the listeners */
1117 vfs_notify_mount(pvp
);
1118 IOBSDMountChange(mp
, kIOMountChangeMount
);
1121 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1122 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1123 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1124 mp
->mnt_vtable
->vfc_name
, error
);
1127 vnode_lock_spin(vp
);
1128 CLR(vp
->v_flag
, VMOUNT
);
1131 mp
->mnt_vtable
->vfc_refcount
--;
1132 mount_list_unlock();
1134 if (device_vnode
) {
1135 vnode_rele(device_vnode
);
1136 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
1138 lck_rw_done(&mp
->mnt_rwlock
);
1139 is_rwlock_locked
= FALSE
;
1142 * if we get here, we have a mount structure that needs to be freed,
1143 * but since the coveredvp hasn't yet been updated to point at it,
1144 * no need to worry about other threads holding a crossref on this mp
1145 * so it's ok to just free it
1147 mount_lock_destroy(mp
);
1149 mac_mount_label_destroy(mp
);
1151 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1155 * drop I/O count on the device vp if there was one
1157 if (devpath
&& devvp
)
1162 /* Error condition exits */
1164 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1167 * If the mount has been placed on the covered vp,
1168 * it may have been discovered by now, so we have
1169 * to treat this just like an unmount
1171 mount_lock_spin(mp
);
1172 mp
->mnt_lflag
|= MNT_LDEAD
;
1175 if (device_vnode
!= NULLVP
) {
1176 vnode_rele(device_vnode
);
1177 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1182 vnode_lock_spin(vp
);
1185 vp
->v_mountedhere
= (mount_t
) 0;
1189 if (have_usecount
) {
1193 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1196 if (devpath
&& devvp
)
1199 /* Release mnt_rwlock only when it was taken */
1200 if (is_rwlock_locked
== TRUE
) {
1201 lck_rw_done(&mp
->mnt_rwlock
);
1205 if (mp
->mnt_crossref
)
1206 mount_dropcrossref(mp
, vp
, 0);
1208 mount_lock_destroy(mp
);
1210 mac_mount_label_destroy(mp
);
1212 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1217 vfsp
->vfc_refcount
--;
1218 mount_list_unlock();
1225 * Flush in-core data, check for competing mount attempts,
1229 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1232 #pragma unused(cnp,fsname)
1234 struct vnode_attr va
;
1239 * If the user is not root, ensure that they own the directory
1240 * onto which we are attempting to mount.
1243 VATTR_WANTED(&va
, va_uid
);
1244 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1245 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1246 (!vfs_context_issuser(ctx
)))) {
1252 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1255 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1258 if (vp
->v_type
!= VDIR
) {
1263 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1269 error
= mac_mount_check_mount(ctx
, vp
,
1275 vnode_lock_spin(vp
);
1276 SET(vp
->v_flag
, VMOUNT
);
1283 #if CONFIG_IMGSRC_ACCESS
1286 #define IMGSRC_DEBUG(args...) printf(args)
1288 #define IMGSRC_DEBUG(args...) do { } while(0)
1292 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1294 struct nameidata nd
;
1295 vnode_t vp
, realdevvp
;
1299 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1300 if ( (error
= namei(&nd
)) ) {
1301 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1307 if (!vnode_isblk(vp
)) {
1308 IMGSRC_DEBUG("Not block device.\n");
1313 realdevvp
= mp
->mnt_devvp
;
1314 if (realdevvp
== NULLVP
) {
1315 IMGSRC_DEBUG("No device backs the mount.\n");
1320 error
= vnode_getwithref(realdevvp
);
1322 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1326 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1327 IMGSRC_DEBUG("Wrong dev_t.\n");
1332 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1335 * If mount by non-root, then verify that user has necessary
1336 * permissions on the device.
1338 if (!vfs_context_issuser(ctx
)) {
1339 accessmode
= KAUTH_VNODE_READ_DATA
;
1340 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1341 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1342 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1343 IMGSRC_DEBUG("Access denied.\n");
1351 vnode_put(realdevvp
);
1362 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1363 * and call checkdirs()
1366 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1370 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1372 vnode_lock_spin(vp
);
1373 CLR(vp
->v_flag
, VMOUNT
);
1374 vp
->v_mountedhere
= mp
;
1378 * taking the name_cache_lock exclusively will
1379 * insure that everyone is out of the fast path who
1380 * might be trying to use a now stale copy of
1381 * vp->v_mountedhere->mnt_realrootvp
1382 * bumping mount_generation causes the cached values
1387 name_cache_unlock();
1389 error
= vnode_ref(vp
);
1394 error
= checkdirs(vp
, ctx
);
1396 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1403 mp
->mnt_vnodecovered
= NULLVP
;
1409 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1412 vnode_lock_spin(vp
);
1413 vp
->v_mountedhere
= (mount_t
)NULL
;
1416 mp
->mnt_vnodecovered
= NULLVP
;
1420 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1424 /* unmount in progress return error */
1425 mount_lock_spin(mp
);
1426 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1431 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1434 * We only allow the filesystem to be reloaded if it
1435 * is currently mounted read-only.
1437 if ((flags
& MNT_RELOAD
) &&
1438 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1444 * Only root, or the user that did the original mount is
1445 * permitted to update it.
1447 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1448 (!vfs_context_issuser(ctx
))) {
1453 error
= mac_mount_check_remount(ctx
, mp
);
1461 lck_rw_done(&mp
->mnt_rwlock
);
1468 mount_end_update(mount_t mp
)
1470 lck_rw_done(&mp
->mnt_rwlock
);
1474 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1478 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1482 vp
= imgsrc_rootvnodes
[height
];
1483 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1492 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1493 const char *fsname
, vfs_context_t ctx
,
1494 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1498 boolean_t placed
= FALSE
;
1499 vnode_t devvp
= NULLVP
;
1500 struct vfstable
*vfsp
;
1501 user_addr_t devpath
;
1502 char *old_mntonname
;
1507 /* If we didn't imageboot, nothing to move */
1508 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1512 /* Only root can do this */
1513 if (!vfs_context_issuser(ctx
)) {
1517 IMGSRC_DEBUG("looking for root vnode.\n");
1520 * Get root vnode of filesystem we're moving.
1524 struct user64_mnt_imgsrc_args mia64
;
1525 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1527 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1531 height
= mia64
.mi_height
;
1532 flags
= mia64
.mi_flags
;
1533 devpath
= mia64
.mi_devpath
;
1535 struct user32_mnt_imgsrc_args mia32
;
1536 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1538 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1542 height
= mia32
.mi_height
;
1543 flags
= mia32
.mi_flags
;
1544 devpath
= mia32
.mi_devpath
;
1548 * For binary compatibility--assumes one level of nesting.
1551 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1555 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1558 /* munge into LP64 addr */
1559 devpath
= CAST_USER_ADDR_T(tmp
);
1567 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1571 error
= get_imgsrc_rootvnode(height
, &rvp
);
1573 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1577 IMGSRC_DEBUG("got root vnode.\n");
1579 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1581 /* Can only move once */
1582 mp
= vnode_mount(rvp
);
1583 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1584 IMGSRC_DEBUG("Already moved.\n");
1589 IMGSRC_DEBUG("Starting updated.\n");
1591 /* Get exclusive rwlock on mount, authorize update on mp */
1592 error
= mount_begin_update(mp
, ctx
, 0);
1594 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1599 * It can only be moved once. Flag is set under the rwlock,
1600 * so we're now safe to proceed.
1602 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1603 IMGSRC_DEBUG("Already moved [2]\n");
1608 IMGSRC_DEBUG("Preparing coveredvp.\n");
1610 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1611 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1613 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1617 IMGSRC_DEBUG("Covered vp OK.\n");
1619 /* Sanity check the name caller has provided */
1620 vfsp
= mp
->mnt_vtable
;
1621 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1622 IMGSRC_DEBUG("Wrong fs name.\n");
1627 /* Check the device vnode and update mount-from name, for local filesystems */
1628 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1629 IMGSRC_DEBUG("Local, doing device validation.\n");
1631 if (devpath
!= USER_ADDR_NULL
) {
1632 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1634 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1643 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1644 * and increment the name cache's mount generation
1647 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1648 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1655 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1656 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1658 /* Forbid future moves */
1660 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1663 /* Finally, add to mount list, completely ready to go */
1664 if (mount_list_add(mp
) != 0) {
1666 * The system is shutting down trying to umount
1667 * everything, so fail with a plausible errno.
1673 mount_end_update(mp
);
1675 FREE(old_mntonname
, M_TEMP
);
1677 vfs_notify_mount(pvp
);
1681 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1684 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1689 * Placing the mp on the vnode clears VMOUNT,
1690 * so cleanup is different after that point
1693 /* Rele the vp, clear VMOUNT and v_mountedhere */
1694 undo_place_on_covered_vp(mp
, vp
);
1696 vnode_lock_spin(vp
);
1697 CLR(vp
->v_flag
, VMOUNT
);
1701 mount_end_update(mp
);
1705 FREE(old_mntonname
, M_TEMP
);
1709 #endif /* CONFIG_IMGSRC_ACCESS */
1712 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1714 struct nameidata qnd
;
1716 char qfpath
[MAXPATHLEN
];
1717 const char *qfname
= QUOTAFILENAME
;
1718 const char *qfopsname
= QUOTAOPSNAME
;
1719 const char *qfextension
[] = INITQFNAMES
;
1721 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1722 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1726 * Enable filesystem disk quotas if necessary.
1727 * We ignore errors as this should not interfere with final mount
1729 for (type
=0; type
< MAXQUOTAS
; type
++) {
1730 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1731 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1732 CAST_USER_ADDR_T(qfpath
), ctx
);
1733 if (namei(&qnd
) != 0)
1734 continue; /* option file to trigger quotas is not present */
1735 vnode_put(qnd
.ni_vp
);
1737 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1739 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1746 checkdirs_callback(proc_t p
, void * arg
)
1748 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1749 vnode_t olddp
= cdrp
->olddp
;
1750 vnode_t newdp
= cdrp
->newdp
;
1751 struct filedesc
*fdp
;
1755 int cdir_changed
= 0;
1756 int rdir_changed
= 0;
1759 * XXX Also needs to iterate each thread in the process to see if it
1760 * XXX is using a per-thread current working directory, and, if so,
1761 * XXX update that as well.
1766 if (fdp
== (struct filedesc
*)0) {
1768 return(PROC_RETURNED
);
1770 fdp_cvp
= fdp
->fd_cdir
;
1771 fdp_rvp
= fdp
->fd_rdir
;
1774 if (fdp_cvp
== olddp
) {
1781 if (fdp_rvp
== olddp
) {
1788 if (cdir_changed
|| rdir_changed
) {
1790 fdp
->fd_cdir
= fdp_cvp
;
1791 fdp
->fd_rdir
= fdp_rvp
;
1794 return(PROC_RETURNED
);
1800 * Scan all active processes to see if any of them have a current
1801 * or root directory onto which the new filesystem has just been
1802 * mounted. If so, replace them with the new mount point.
1805 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1810 struct cdirargs cdr
;
1812 if (olddp
->v_usecount
== 1)
1814 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1818 panic("mount: lost mount: error %d", err
);
1825 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1826 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1828 if (rootvnode
== olddp
) {
1840 * Unmount a file system.
1842 * Note: unmount takes a path to the vnode mounted on as argument,
1843 * not special file (as before).
1847 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1852 struct nameidata nd
;
1853 vfs_context_t ctx
= vfs_context_current();
1855 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1856 UIO_USERSPACE
, uap
->path
, ctx
);
1865 error
= mac_mount_check_umount(ctx
, mp
);
1872 * Must be the root of the filesystem
1874 if ((vp
->v_flag
& VROOT
) == 0) {
1880 /* safedounmount consumes the mount ref */
1881 return (safedounmount(mp
, uap
->flags
, ctx
));
1885 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
1889 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1890 if (mp
== (mount_t
)0) {
1895 /* safedounmount consumes the mount ref */
1896 return(safedounmount(mp
, flags
, ctx
));
1901 * The mount struct comes with a mount ref which will be consumed.
1902 * Do the actual file system unmount, prevent some common foot shooting.
1905 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1908 proc_t p
= vfs_context_proc(ctx
);
1911 * If the file system is not responding and MNT_NOBLOCK
1912 * is set and not a forced unmount then return EBUSY.
1914 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1915 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1921 * Skip authorization if the mount is tagged as permissive and
1922 * this is not a forced-unmount attempt.
1924 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1926 * Only root, or the user that did the original mount is
1927 * permitted to unmount this filesystem.
1929 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1930 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1934 * Don't allow unmounting the root file system.
1936 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1937 error
= EBUSY
; /* the root is always busy */
1941 #ifdef CONFIG_IMGSRC_ACCESS
1942 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1946 #endif /* CONFIG_IMGSRC_ACCESS */
1948 return (dounmount(mp
, flags
, 1, ctx
));
1956 * Do the actual file system unmount.
1959 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1961 vnode_t coveredvp
= (vnode_t
)0;
1964 int forcedunmount
= 0;
1966 struct vnode
*devvp
= NULLVP
;
1968 proc_t p
= vfs_context_proc(ctx
);
1970 int pflags_save
= 0;
1971 #endif /* CONFIG_TRIGGERS */
1974 if (!(flags
& MNT_FORCE
)) {
1975 fsevent_unmount(mp
, ctx
); /* has to come first! */
1982 * If already an unmount in progress just return EBUSY.
1983 * Even a forced unmount cannot override.
1985 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1992 if (flags
& MNT_FORCE
) {
1994 mp
->mnt_lflag
|= MNT_LFORCE
;
1998 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1999 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
2002 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
2003 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
2004 mp
->mnt_flag
&=~ MNT_ASYNC
;
2006 * anyone currently in the fast path that
2007 * trips over the cached rootvp will be
2008 * dumped out and forced into the slow path
2009 * to regenerate a new cached value
2011 mp
->mnt_realrootvp
= NULLVP
;
2014 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
2016 * Force unmount any mounts in this filesystem.
2017 * If any unmounts fail - just leave them dangling.
2020 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
2024 * taking the name_cache_lock exclusively will
2025 * insure that everyone is out of the fast path who
2026 * might be trying to use a now stale copy of
2027 * vp->v_mountedhere->mnt_realrootvp
2028 * bumping mount_generation causes the cached values
2033 name_cache_unlock();
2036 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2040 if (forcedunmount
== 0) {
2041 ubc_umount(mp
); /* release cached vnodes */
2042 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2043 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
2046 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2047 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2048 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2054 /* free disk_conditioner_info structure for this mount */
2055 disk_conditioner_unmount(mp
);
2057 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
2060 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
2064 lflags
|= FORCECLOSE
;
2065 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
2066 if ((forcedunmount
== 0) && error
) {
2068 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2069 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2070 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2074 /* make sure there are no one in the mount iterations or lookup */
2075 mount_iterdrain(mp
);
2077 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
2079 mount_iterreset(mp
);
2081 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2082 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2083 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2087 /* increment the operations count */
2089 OSAddAtomic(1, &vfs_nummntops
);
2091 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
2092 /* hold an io reference and drop the usecount before close */
2093 devvp
= mp
->mnt_devvp
;
2094 vnode_getalways(devvp
);
2096 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
2098 vnode_clearmountedon(devvp
);
2101 lck_rw_done(&mp
->mnt_rwlock
);
2102 mount_list_remove(mp
);
2103 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2105 /* mark the mount point hook in the vp but not drop the ref yet */
2106 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2108 * The covered vnode needs special handling. Trying to get an
2109 * iocount must not block here as this may lead to deadlocks
2110 * if the Filesystem to which the covered vnode belongs is
2111 * undergoing forced unmounts. Since we hold a usecount, the
2112 * vnode cannot be reused (it can, however, still be terminated)
2114 vnode_getalways(coveredvp
);
2115 vnode_lock_spin(coveredvp
);
2118 coveredvp
->v_mountedhere
= (struct mount
*)0;
2119 CLR(coveredvp
->v_flag
, VMOUNT
);
2121 vnode_unlock(coveredvp
);
2122 vnode_put(coveredvp
);
2126 mp
->mnt_vtable
->vfc_refcount
--;
2127 mount_list_unlock();
2129 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2130 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2132 mp
->mnt_lflag
|= MNT_LDEAD
;
2134 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2136 * do the wakeup here
2137 * in case we block in mount_refdrain
2138 * which will drop the mount lock
2139 * and allow anyone blocked in vfs_busy
2140 * to wakeup and see the LDEAD state
2142 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2143 wakeup((caddr_t
)mp
);
2147 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2148 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2153 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2154 // Restore P_NOREMOTEHANG bit to its previous value
2155 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2156 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2160 * Callback and context are set together under the mount lock, and
2161 * never cleared, so we're safe to examine them here, drop the lock,
2164 if (mp
->mnt_triggercallback
!= NULL
) {
2167 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2168 } else if (did_vflush
) {
2169 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2176 #endif /* CONFIG_TRIGGERS */
2178 lck_rw_done(&mp
->mnt_rwlock
);
2181 wakeup((caddr_t
)mp
);
2184 if ((coveredvp
!= NULLVP
)) {
2185 vnode_t pvp
= NULLVP
;
2188 * The covered vnode needs special handling. Trying to
2189 * get an iocount must not block here as this may lead
2190 * to deadlocks if the Filesystem to which the covered
2191 * vnode belongs is undergoing forced unmounts. Since we
2192 * hold a usecount, the vnode cannot be reused
2193 * (it can, however, still be terminated).
2195 vnode_getalways(coveredvp
);
2197 mount_dropcrossref(mp
, coveredvp
, 0);
2199 * We'll _try_ to detect if this really needs to be
2200 * done. The coveredvp can only be in termination (or
2201 * terminated) if the coveredvp's mount point is in a
2202 * forced unmount (or has been) since we still hold the
2205 if (!vnode_isrecycled(coveredvp
)) {
2206 pvp
= vnode_getparent(coveredvp
);
2208 if (coveredvp
->v_resolve
) {
2209 vnode_trigger_rearm(coveredvp
, ctx
);
2214 vnode_rele(coveredvp
);
2215 vnode_put(coveredvp
);
2219 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2222 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2223 mount_lock_destroy(mp
);
2225 mac_mount_label_destroy(mp
);
2227 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2229 panic("dounmount: no coveredvp");
2235 * Unmount any mounts in this filesystem.
2238 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2241 fsid_t
*fsids
, fsid
;
2243 int count
= 0, i
, m
= 0;
2248 // Get an array to hold the submounts fsids.
2249 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2251 fsids_sz
= count
* sizeof(fsid_t
);
2252 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2253 if (fsids
== NULL
) {
2254 mount_list_unlock();
2257 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2260 * Fill the array with submount fsids.
2261 * Since mounts are always added to the tail of the mount list, the
2262 * list is always in mount order.
2263 * For each mount check if the mounted-on vnode belongs to a
2264 * mount that's already added to our array of mounts to be unmounted.
2266 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2267 vp
= smp
->mnt_vnodecovered
;
2270 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2271 for (i
= 0; i
<= m
; i
++) {
2272 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2273 fsids
[i
].val
[1] == fsid
.val
[1]) {
2274 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2279 mount_list_unlock();
2281 // Unmount the submounts in reverse order. Ignore errors.
2282 for (i
= m
; i
> 0; i
--) {
2283 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2286 mount_iterdrop(smp
);
2287 (void) dounmount(smp
, flags
, 1, ctx
);
2292 FREE(fsids
, M_TEMP
);
2296 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2301 if (mp
->mnt_crossref
< 0)
2302 panic("mount cross refs -ve");
2304 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2307 vnode_put_locked(dp
);
2310 mount_lock_destroy(mp
);
2312 mac_mount_label_destroy(mp
);
2314 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2318 vnode_put_locked(dp
);
2324 * Sync each mounted filesystem.
2330 int print_vmpage_stat
=0;
2333 sync_callback(mount_t mp
, __unused
void *arg
)
2335 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2336 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2338 mp
->mnt_flag
&= ~MNT_ASYNC
;
2339 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2341 mp
->mnt_flag
|= MNT_ASYNC
;
2344 return (VFS_RETURNED
);
2349 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2351 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2353 if (print_vmpage_stat
) {
2354 vm_countdirtypages();
2360 #endif /* DIAGNOSTIC */
2366 SYNC_ONLY_RELIABLE_MEDIA
= 1,
2367 SYNC_ONLY_UNRELIABLE_MEDIA
= 2
2371 sync_internal_callback(mount_t mp
, void *arg
)
2374 int is_reliable
= !(mp
->mnt_kern_flag
& MNTK_VIRTUALDEV
) &&
2375 (mp
->mnt_flag
& MNT_LOCAL
);
2376 sync_type_t sync_type
= *((sync_type_t
*)arg
);
2378 if ((sync_type
== SYNC_ONLY_RELIABLE_MEDIA
) && !is_reliable
)
2379 return (VFS_RETURNED
);
2380 else if ((sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
) && is_reliable
)
2381 return (VFS_RETURNED
);
2384 (void)sync_callback(mp
, NULL
);
2386 return (VFS_RETURNED
);
2389 int sync_thread_state
= 0;
2390 int sync_timeout_seconds
= 5;
2392 #define SYNC_THREAD_RUN 0x0001
2393 #define SYNC_THREAD_RUNNING 0x0002
2396 sync_thread(__unused
void *arg
, __unused wait_result_t wr
)
2398 sync_type_t sync_type
;
2400 lck_mtx_lock(sync_mtx_lck
);
2401 while (sync_thread_state
& SYNC_THREAD_RUN
) {
2402 sync_thread_state
&= ~SYNC_THREAD_RUN
;
2403 lck_mtx_unlock(sync_mtx_lck
);
2405 sync_type
= SYNC_ONLY_RELIABLE_MEDIA
;
2406 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2407 sync_type
= SYNC_ONLY_UNRELIABLE_MEDIA
;
2408 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
);
2410 lck_mtx_lock(sync_mtx_lck
);
2413 * This wakeup _has_ to be issued before the lock is released otherwise
2414 * we may end up waking up a thread in sync_internal which is
2415 * expecting a wakeup from a thread it just created and not from this
2416 * thread which is about to exit.
2418 wakeup(&sync_thread_state
);
2419 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2420 lck_mtx_unlock(sync_mtx_lck
);
2422 if (print_vmpage_stat
) {
2423 vm_countdirtypages();
2429 #endif /* DIAGNOSTIC */
2432 struct timeval sync_timeout_last_print
= {0, 0};
2435 * An in-kernel sync for power management to call.
2436 * This function always returns within sync_timeout seconds.
2438 __private_extern__
int
2443 int thread_created
= FALSE
;
2444 struct timespec ts
= {sync_timeout_seconds
, 0};
2446 lck_mtx_lock(sync_mtx_lck
);
2447 sync_thread_state
|= SYNC_THREAD_RUN
;
2448 if (!(sync_thread_state
& SYNC_THREAD_RUNNING
)) {
2451 sync_thread_state
|= SYNC_THREAD_RUNNING
;
2452 kr
= kernel_thread_start(sync_thread
, NULL
, &thd
);
2453 if (kr
!= KERN_SUCCESS
) {
2454 sync_thread_state
&= ~SYNC_THREAD_RUNNING
;
2455 lck_mtx_unlock(sync_mtx_lck
);
2456 printf("sync_thread failed\n");
2459 thread_created
= TRUE
;
2462 error
= msleep((caddr_t
)&sync_thread_state
, sync_mtx_lck
,
2463 (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2468 if (now
.tv_sec
- sync_timeout_last_print
.tv_sec
> 120) {
2469 printf("sync timed out: %d sec\n", sync_timeout_seconds
);
2470 sync_timeout_last_print
.tv_sec
= now
.tv_sec
;
2475 thread_deallocate(thd
);
2478 } /* end of sync_internal call */
2481 * Change filesystem quotas.
2485 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2488 int error
, quota_cmd
, quota_status
= 0;
2491 struct nameidata nd
;
2492 vfs_context_t ctx
= vfs_context_current();
2493 struct dqblk my_dqblk
= {};
2495 AUDIT_ARG(uid
, uap
->uid
);
2496 AUDIT_ARG(cmd
, uap
->cmd
);
2497 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2502 mp
= nd
.ni_vp
->v_mount
;
2503 vnode_put(nd
.ni_vp
);
2506 /* copyin any data we will need for downstream code */
2507 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2509 switch (quota_cmd
) {
2511 /* uap->arg specifies a file from which to take the quotas */
2512 fnamelen
= MAXPATHLEN
;
2513 datap
= kalloc(MAXPATHLEN
);
2514 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2517 /* uap->arg is a pointer to a dqblk structure. */
2518 datap
= (caddr_t
) &my_dqblk
;
2522 /* uap->arg is a pointer to a dqblk structure. */
2523 datap
= (caddr_t
) &my_dqblk
;
2524 if (proc_is64bit(p
)) {
2525 struct user_dqblk my_dqblk64
;
2526 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2528 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2532 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2536 /* uap->arg is a pointer to an integer */
2537 datap
= (caddr_t
) "a_status
;
2545 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2548 switch (quota_cmd
) {
2551 kfree(datap
, MAXPATHLEN
);
2554 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2556 if (proc_is64bit(p
)) {
2557 struct user_dqblk my_dqblk64
;
2559 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
));
2560 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2561 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2564 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2569 /* uap->arg is a pointer to an integer */
2571 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2582 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2584 return (EOPNOTSUPP
);
2589 * Get filesystem statistics.
2591 * Returns: 0 Success
2593 * vfs_update_vfsstat:???
2594 * munge_statfs:EFAULT
2598 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2601 struct vfsstatfs
*sp
;
2603 struct nameidata nd
;
2604 vfs_context_t ctx
= vfs_context_current();
2607 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2608 UIO_USERSPACE
, uap
->path
, ctx
);
2614 sp
= &mp
->mnt_vfsstat
;
2618 error
= mac_mount_check_stat(ctx
, mp
);
2623 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2629 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2635 * Get filesystem statistics.
2639 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2643 struct vfsstatfs
*sp
;
2646 AUDIT_ARG(fd
, uap
->fd
);
2648 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2651 error
= vnode_getwithref(vp
);
2657 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2666 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2671 sp
= &mp
->mnt_vfsstat
;
2672 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2676 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2686 * Common routine to handle copying of statfs64 data to user space
2689 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2692 struct statfs64 sfs
;
2694 bzero(&sfs
, sizeof(sfs
));
2696 sfs
.f_bsize
= sfsp
->f_bsize
;
2697 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2698 sfs
.f_blocks
= sfsp
->f_blocks
;
2699 sfs
.f_bfree
= sfsp
->f_bfree
;
2700 sfs
.f_bavail
= sfsp
->f_bavail
;
2701 sfs
.f_files
= sfsp
->f_files
;
2702 sfs
.f_ffree
= sfsp
->f_ffree
;
2703 sfs
.f_fsid
= sfsp
->f_fsid
;
2704 sfs
.f_owner
= sfsp
->f_owner
;
2705 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2706 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2707 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2708 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2709 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2711 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2713 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2714 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2716 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2722 * Get file system statistics in 64-bit mode
2725 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2728 struct vfsstatfs
*sp
;
2730 struct nameidata nd
;
2731 vfs_context_t ctxp
= vfs_context_current();
2734 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2735 UIO_USERSPACE
, uap
->path
, ctxp
);
2741 sp
= &mp
->mnt_vfsstat
;
2745 error
= mac_mount_check_stat(ctxp
, mp
);
2750 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2756 error
= statfs64_common(mp
, sp
, uap
->buf
);
2763 * Get file system statistics in 64-bit mode
2766 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2770 struct vfsstatfs
*sp
;
2773 AUDIT_ARG(fd
, uap
->fd
);
2775 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2778 error
= vnode_getwithref(vp
);
2784 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2793 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2798 sp
= &mp
->mnt_vfsstat
;
2799 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2803 error
= statfs64_common(mp
, sp
, uap
->buf
);
2812 struct getfsstat_struct
{
2823 getfsstat_callback(mount_t mp
, void * arg
)
2826 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2827 struct vfsstatfs
*sp
;
2829 vfs_context_t ctx
= vfs_context_current();
2831 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2833 error
= mac_mount_check_stat(ctx
, mp
);
2835 fstp
->error
= error
;
2836 return(VFS_RETURNED_DONE
);
2839 sp
= &mp
->mnt_vfsstat
;
2841 * If MNT_NOWAIT is specified, do not refresh the
2842 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2844 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2845 (error
= vfs_update_vfsstat(mp
, ctx
,
2847 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2848 return(VFS_RETURNED
);
2852 * Need to handle LP64 version of struct statfs
2854 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2856 fstp
->error
= error
;
2857 return(VFS_RETURNED_DONE
);
2859 fstp
->sfsp
+= my_size
;
2863 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2865 fstp
->error
= error
;
2866 return(VFS_RETURNED_DONE
);
2873 return(VFS_RETURNED
);
2877 * Get statistics on all filesystems.
2880 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2882 struct __mac_getfsstat_args muap
;
2884 muap
.buf
= uap
->buf
;
2885 muap
.bufsize
= uap
->bufsize
;
2886 muap
.mac
= USER_ADDR_NULL
;
2888 muap
.flags
= uap
->flags
;
2890 return (__mac_getfsstat(p
, &muap
, retval
));
2894 * __mac_getfsstat: Get MAC-related file system statistics
2896 * Parameters: p (ignored)
2897 * uap User argument descriptor (see below)
2898 * retval Count of file system statistics (N stats)
2900 * Indirect: uap->bufsize Buffer size
2901 * uap->macsize MAC info size
2902 * uap->buf Buffer where information will be returned
2904 * uap->flags File system flags
2907 * Returns: 0 Success
2912 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2916 size_t count
, maxcount
, bufsize
, macsize
;
2917 struct getfsstat_struct fst
;
2919 bufsize
= (size_t) uap
->bufsize
;
2920 macsize
= (size_t) uap
->macsize
;
2922 if (IS_64BIT_PROCESS(p
)) {
2923 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2926 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2934 if (uap
->mac
!= USER_ADDR_NULL
) {
2939 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2940 if (count
!= maxcount
)
2943 /* Copy in the array */
2944 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2949 error
= copyin(uap
->mac
, mp0
, macsize
);
2951 FREE(mp0
, M_MACTEMP
);
2955 /* Normalize to an array of user_addr_t */
2956 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2958 FREE(mp0
, M_MACTEMP
);
2962 for (i
= 0; i
< count
; i
++) {
2963 if (IS_64BIT_PROCESS(p
))
2964 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2966 mp
[i
] = (user_addr_t
)mp0
[i
];
2968 FREE(mp0
, M_MACTEMP
);
2975 fst
.flags
= uap
->flags
;
2978 fst
.maxcount
= maxcount
;
2981 vfs_iterate(0, getfsstat_callback
, &fst
);
2984 FREE(mp
, M_MACTEMP
);
2987 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2991 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2992 *retval
= fst
.maxcount
;
2994 *retval
= fst
.count
;
2999 getfsstat64_callback(mount_t mp
, void * arg
)
3001 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
3002 struct vfsstatfs
*sp
;
3005 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
3007 error
= mac_mount_check_stat(vfs_context_current(), mp
);
3009 fstp
->error
= error
;
3010 return(VFS_RETURNED_DONE
);
3013 sp
= &mp
->mnt_vfsstat
;
3015 * If MNT_NOWAIT is specified, do not refresh the fsstat
3016 * cache. MNT_WAIT overrides MNT_NOWAIT.
3018 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3019 * getfsstat, since the constants are out of the same
3022 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
3023 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
3024 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
3025 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
3026 return(VFS_RETURNED
);
3029 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
3031 fstp
->error
= error
;
3032 return(VFS_RETURNED_DONE
);
3034 fstp
->sfsp
+= sizeof(struct statfs64
);
3037 return(VFS_RETURNED
);
3041 * Get statistics on all file systems in 64 bit mode.
3044 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
3047 int count
, maxcount
;
3048 struct getfsstat_struct fst
;
3050 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
3056 fst
.flags
= uap
->flags
;
3059 fst
.maxcount
= maxcount
;
3061 vfs_iterate(0, getfsstat64_callback
, &fst
);
3064 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3068 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
3069 *retval
= fst
.maxcount
;
3071 *retval
= fst
.count
;
3077 * gets the associated vnode with the file descriptor passed.
3081 * ctx - vfs context of caller
3082 * fd - file descriptor for which vnode is required.
3083 * vpp - Pointer to pointer to vnode to be returned.
3085 * The vnode is returned with an iocount so any vnode obtained
3086 * by this call needs a vnode_put
3090 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
3094 struct fileproc
*fp
;
3095 proc_t p
= vfs_context_proc(ctx
);
3099 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
3103 error
= vnode_getwithref(vp
);
3105 (void)fp_drop(p
, fd
, fp
, 0);
3109 (void)fp_drop(p
, fd
, fp
, 0);
3115 * Wrapper function around namei to start lookup from a directory
3116 * specified by a file descriptor ni_dirfd.
3118 * In addition to all the errors returned by namei, this call can
3119 * return ENOTDIR if the file descriptor does not refer to a directory.
3120 * and EBADF if the file descriptor is not valid.
3123 nameiat(struct nameidata
*ndp
, int dirfd
)
3125 if ((dirfd
!= AT_FDCWD
) &&
3126 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
3127 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3131 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3132 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3136 c
= *((char *)(ndp
->ni_dirp
));
3142 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3147 if (vnode_vtype(dvp_at
) != VDIR
) {
3152 ndp
->ni_dvp
= dvp_at
;
3153 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3155 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3161 return (namei(ndp
));
3165 * Change current working directory to a given file descriptor.
3169 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3171 struct filedesc
*fdp
= p
->p_fd
;
3177 vfs_context_t ctx
= vfs_context_current();
3179 AUDIT_ARG(fd
, uap
->fd
);
3180 if (per_thread
&& uap
->fd
== -1) {
3182 * Switching back from per-thread to per process CWD; verify we
3183 * in fact have one before proceeding. The only success case
3184 * for this code path is to return 0 preemptively after zapping
3185 * the thread structure contents.
3187 thread_t th
= vfs_context_thread(ctx
);
3189 uthread_t uth
= get_bsdthread_info(th
);
3191 uth
->uu_cdir
= NULLVP
;
3192 if (tvp
!= NULLVP
) {
3200 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
3202 if ( (error
= vnode_getwithref(vp
)) ) {
3207 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3209 if (vp
->v_type
!= VDIR
) {
3215 error
= mac_vnode_check_chdir(ctx
, vp
);
3219 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3223 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3224 if (vfs_busy(mp
, LK_NOWAIT
)) {
3228 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3237 if ( (error
= vnode_ref(vp
)) )
3242 thread_t th
= vfs_context_thread(ctx
);
3244 uthread_t uth
= get_bsdthread_info(th
);
3247 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3272 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3274 return common_fchdir(p
, uap
, 0);
3278 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3280 return common_fchdir(p
, (void *)uap
, 1);
3284 * Change current working directory (".").
3286 * Returns: 0 Success
3287 * change_dir:ENOTDIR
3289 * vnode_ref:ENOENT No such file or directory
3293 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3295 struct filedesc
*fdp
= p
->p_fd
;
3297 struct nameidata nd
;
3299 vfs_context_t ctx
= vfs_context_current();
3301 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3302 UIO_USERSPACE
, uap
->path
, ctx
);
3303 error
= change_dir(&nd
, ctx
);
3306 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3307 vnode_put(nd
.ni_vp
);
3311 * drop the iocount we picked up in change_dir
3313 vnode_put(nd
.ni_vp
);
3316 thread_t th
= vfs_context_thread(ctx
);
3318 uthread_t uth
= get_bsdthread_info(th
);
3320 uth
->uu_cdir
= nd
.ni_vp
;
3321 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3323 vnode_rele(nd
.ni_vp
);
3329 fdp
->fd_cdir
= nd
.ni_vp
;
3343 * Change current working directory (".") for the entire process
3345 * Parameters: p Process requesting the call
3346 * uap User argument descriptor (see below)
3349 * Indirect parameters: uap->path Directory path
3351 * Returns: 0 Success
3352 * common_chdir: ENOTDIR
3353 * common_chdir: ENOENT No such file or directory
3358 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3360 return common_chdir(p
, (void *)uap
, 0);
3366 * Change current working directory (".") for a single thread
3368 * Parameters: p Process requesting the call
3369 * uap User argument descriptor (see below)
3372 * Indirect parameters: uap->path Directory path
3374 * Returns: 0 Success
3375 * common_chdir: ENOTDIR
3376 * common_chdir: ENOENT No such file or directory
3381 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3383 return common_chdir(p
, (void *)uap
, 1);
3388 * Change notion of root (``/'') directory.
3392 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3394 struct filedesc
*fdp
= p
->p_fd
;
3396 struct nameidata nd
;
3398 vfs_context_t ctx
= vfs_context_current();
3400 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3403 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3404 UIO_USERSPACE
, uap
->path
, ctx
);
3405 error
= change_dir(&nd
, ctx
);
3410 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3413 vnode_put(nd
.ni_vp
);
3418 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3419 vnode_put(nd
.ni_vp
);
3422 vnode_put(nd
.ni_vp
);
3426 fdp
->fd_rdir
= nd
.ni_vp
;
3427 fdp
->fd_flags
|= FD_CHROOT
;
3437 * Common routine for chroot and chdir.
3439 * Returns: 0 Success
3440 * ENOTDIR Not a directory
3441 * namei:??? [anything namei can return]
3442 * vnode_authorize:??? [anything vnode_authorize can return]
3445 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3450 if ((error
= namei(ndp
)))
3455 if (vp
->v_type
!= VDIR
) {
3461 error
= mac_vnode_check_chdir(ctx
, vp
);
3468 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3478 * Free the vnode data (for directories) associated with the file glob.
3481 fg_vn_data_alloc(void)
3483 struct fd_vn_data
*fvdata
;
3485 /* Allocate per fd vnode data */
3486 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3487 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3488 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3493 * Free the vnode data (for directories) associated with the file glob.
3496 fg_vn_data_free(void *fgvndata
)
3498 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3501 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3502 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3503 FREE(fvdata
, M_FD_VN_DATA
);
3507 * Check permissions, allocate an open file structure,
3508 * and call the device open routine if any.
3510 * Returns: 0 Success
3521 * XXX Need to implement uid, gid
3524 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3525 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3528 proc_t p
= vfs_context_proc(ctx
);
3529 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3530 struct fileproc
*fp
;
3533 int type
, indx
, error
;
3535 struct vfs_context context
;
3539 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3542 flags
= FFLAGS(uflags
);
3543 CLR(flags
, FENCRYPTED
);
3544 CLR(flags
, FUNENCRYPTED
);
3546 AUDIT_ARG(fflags
, oflags
);
3547 AUDIT_ARG(mode
, vap
->va_mode
);
3549 if ((error
= falloc_withalloc(p
,
3550 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3553 uu
->uu_dupfd
= -indx
- 1;
3555 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3556 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3557 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3558 fp_drop(p
, indx
, NULL
, 0);
3563 if (error
== ERESTART
)
3565 fp_free(p
, indx
, fp
);
3571 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3572 fp
->f_fglob
->fg_ops
= &vnops
;
3573 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3575 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3576 lf
.l_whence
= SEEK_SET
;
3579 if (flags
& O_EXLOCK
)
3580 lf
.l_type
= F_WRLCK
;
3582 lf
.l_type
= F_RDLCK
;
3584 if ((flags
& FNONBLOCK
) == 0)
3587 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3592 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3594 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3597 #if DEVELOPMENT || DEBUG
3599 * XXX VSWAP: Check for entitlements or special flag here
3600 * so we can restrict access appropriately.
3602 #else /* DEVELOPMENT || DEBUG */
3604 if (vnode_isswap(vp
) && (flags
& (FWRITE
| O_TRUNC
)) && (ctx
!= vfs_context_kernel())) {
3605 /* block attempt to write/truncate swapfile */
3609 #endif /* DEVELOPMENT || DEBUG */
3611 /* try to truncate by setting the size attribute */
3612 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3616 * For directories we hold some additional information in the fd.
3618 if (vnode_vtype(vp
) == VDIR
) {
3619 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3621 fp
->f_fglob
->fg_vn_data
= NULL
;
3627 * The first terminal open (without a O_NOCTTY) by a session leader
3628 * results in it being set as the controlling terminal.
3630 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3631 !(flags
& O_NOCTTY
)) {
3634 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3635 (caddr_t
)&tmp
, ctx
);
3639 if (flags
& O_CLOEXEC
)
3640 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3641 if (flags
& O_CLOFORK
)
3642 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3643 procfdtbl_releasefd(p
, indx
, NULL
);
3645 #if CONFIG_SECLUDED_MEMORY
3646 if (secluded_for_filecache
&&
3647 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
3648 vnode_vtype(vp
) == VREG
) {
3649 memory_object_control_t moc
;
3651 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
3653 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
3654 /* nothing to do... */
3655 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
3656 /* writable -> no longer eligible for secluded pages */
3657 memory_object_mark_eligible_for_secluded(moc
,
3659 } else if (secluded_for_filecache
== 1) {
3660 char pathname
[32] = { 0, };
3662 /* XXX FBDP: better way to detect /Applications/ ? */
3663 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3664 copyinstr(ndp
->ni_dirp
,
3669 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
3674 pathname
[sizeof (pathname
) - 1] = '\0';
3675 if (strncmp(pathname
,
3677 strlen("/Applications/")) == 0 &&
3679 "/Applications/Camera.app/",
3680 strlen("/Applications/Camera.app/")) != 0) {
3683 * AND from "/Applications/"
3684 * AND not from "/Applications/Camera.app/"
3685 * ==> eligible for secluded
3687 memory_object_mark_eligible_for_secluded(moc
,
3690 } else if (secluded_for_filecache
== 2) {
3692 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3694 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3696 /* not implemented... */
3698 if (!strncmp(vp
->v_name
,
3699 DYLD_SHARED_CACHE_NAME
,
3700 strlen(DYLD_SHARED_CACHE_NAME
)) ||
3701 !strncmp(vp
->v_name
,
3703 strlen(vp
->v_name
)) ||
3704 !strncmp(vp
->v_name
,
3706 strlen(vp
->v_name
)) ||
3707 !strncmp(vp
->v_name
,
3709 strlen(vp
->v_name
)) ||
3710 !strncmp(vp
->v_name
,
3712 strlen(vp
->v_name
)) ||
3713 !strncmp(vp
->v_name
,
3715 strlen(vp
->v_name
)) ||
3716 !strncmp(vp
->v_name
,
3718 strlen(vp
->v_name
))) {
3720 * This file matters when launching Camera:
3721 * do not store its contents in the secluded
3722 * pool that will be drained on Camera launch.
3724 memory_object_mark_eligible_for_secluded(moc
,
3729 #endif /* CONFIG_SECLUDED_MEMORY */
3731 fp_drop(p
, indx
, fp
, 1);
3738 context
= *vfs_context_current();
3739 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3741 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3742 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3743 lf
.l_whence
= SEEK_SET
;
3746 lf
.l_type
= F_UNLCK
;
3749 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3752 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3754 fp_free(p
, indx
, fp
);
3760 * While most of the *at syscall handlers can call nameiat() which
3761 * is a wrapper around namei, the use of namei and initialisation
3762 * of nameidata are far removed and in different functions - namei
3763 * gets called in vn_open_auth for open1. So we'll just do here what
3767 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3768 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3771 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3775 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3776 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3780 c
= *((char *)(ndp
->ni_dirp
));
3786 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3791 if (vnode_vtype(dvp_at
) != VDIR
) {
3796 ndp
->ni_dvp
= dvp_at
;
3797 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3798 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3805 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3809 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3811 * Parameters: p Process requesting the open
3812 * uap User argument descriptor (see below)
3813 * retval Pointer to an area to receive the
3814 * return calue from the system call
3816 * Indirect: uap->path Path to open (same as 'open')
3817 * uap->flags Flags to open (same as 'open'
3818 * uap->uid UID to set, if creating
3819 * uap->gid GID to set, if creating
3820 * uap->mode File mode, if creating (same as 'open')
3821 * uap->xsecurity ACL to set, if creating
3823 * Returns: 0 Success
3826 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3828 * XXX: We should enummerate the possible errno values here, and where
3829 * in the code they originated.
3832 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3834 struct filedesc
*fdp
= p
->p_fd
;
3836 kauth_filesec_t xsecdst
;
3837 struct vnode_attr va
;
3838 struct nameidata nd
;
3841 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3844 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3845 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3849 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3850 VATTR_SET(&va
, va_mode
, cmode
);
3851 if (uap
->uid
!= KAUTH_UID_NONE
)
3852 VATTR_SET(&va
, va_uid
, uap
->uid
);
3853 if (uap
->gid
!= KAUTH_GID_NONE
)
3854 VATTR_SET(&va
, va_gid
, uap
->gid
);
3855 if (xsecdst
!= NULL
)
3856 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3858 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3859 uap
->path
, vfs_context_current());
3861 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3862 fileproc_alloc_init
, NULL
, retval
);
3863 if (xsecdst
!= NULL
)
3864 kauth_filesec_free(xsecdst
);
3870 * Go through the data-protected atomically controlled open (2)
3872 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3874 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3875 int flags
= uap
->flags
;
3876 int class = uap
->class;
3877 int dpflags
= uap
->dpflags
;
3880 * Follow the same path as normal open(2)
3881 * Look up the item if it exists, and acquire the vnode.
3883 struct filedesc
*fdp
= p
->p_fd
;
3884 struct vnode_attr va
;
3885 struct nameidata nd
;
3890 /* Mask off all but regular access permissions */
3891 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3892 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3894 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3895 uap
->path
, vfs_context_current());
3898 * Initialize the extra fields in vnode_attr to pass down our
3900 * 1. target cprotect class.
3901 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3903 if (flags
& O_CREAT
) {
3904 /* lower level kernel code validates that the class is valid before applying it. */
3905 if (class != PROTECTION_CLASS_DEFAULT
) {
3907 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3908 * file behave the same as open (2)
3910 VATTR_SET(&va
, va_dataprotect_class
, class);
3914 if (dpflags
& (O_DP_GETRAWENCRYPTED
|O_DP_GETRAWUNENCRYPTED
)) {
3915 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3916 /* Not allowed to write raw encrypted bytes */
3919 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
3920 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3922 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
3923 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
3927 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3928 fileproc_alloc_init
, NULL
, retval
);
3934 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3935 int fd
, enum uio_seg segflg
, int *retval
)
3937 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3938 struct vnode_attr va
;
3939 struct nameidata nd
;
3943 /* Mask off all but regular access permissions */
3944 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3945 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3947 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3950 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3955 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3957 __pthread_testcancel(1);
3958 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3962 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3965 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3966 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3970 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3973 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3974 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3978 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3980 __pthread_testcancel(1);
3981 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3985 * openbyid_np: open a file given a file system id and a file system object id
3986 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3987 * file systems that don't support object ids it is a node id (uint64_t).
3989 * Parameters: p Process requesting the open
3990 * uap User argument descriptor (see below)
3991 * retval Pointer to an area to receive the
3992 * return calue from the system call
3994 * Indirect: uap->path Path to open (same as 'open')
3996 * uap->fsid id of target file system
3997 * uap->objid id of target file system object
3998 * uap->flags Flags to open (same as 'open')
4000 * Returns: 0 Success
4004 * XXX: We should enummerate the possible errno values here, and where
4005 * in the code they originated.
4008 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
4014 int buflen
= MAXPATHLEN
;
4016 vfs_context_t ctx
= vfs_context_current();
4018 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
4022 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
4026 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4027 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
4031 AUDIT_ARG(value32
, fsid
.val
[0]);
4032 AUDIT_ARG(value64
, objid
);
4034 /*resolve path from fsis, objid*/
4036 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
4041 error
= fsgetpath_internal(
4042 ctx
, fsid
.val
[0], objid
,
4043 buflen
, buf
, &pathlen
);
4049 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
4057 error
= openat_internal(
4058 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
4067 * Create a special file.
4069 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
4072 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
4074 struct vnode_attr va
;
4075 vfs_context_t ctx
= vfs_context_current();
4077 struct nameidata nd
;
4081 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4082 VATTR_SET(&va
, va_rdev
, uap
->dev
);
4084 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4085 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
4086 return(mkfifo1(ctx
, uap
->path
, &va
));
4088 AUDIT_ARG(mode
, uap
->mode
);
4089 AUDIT_ARG(value32
, uap
->dev
);
4091 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
4093 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
4094 UIO_USERSPACE
, uap
->path
, ctx
);
4106 switch (uap
->mode
& S_IFMT
) {
4108 VATTR_SET(&va
, va_type
, VCHR
);
4111 VATTR_SET(&va
, va_type
, VBLK
);
4119 error
= mac_vnode_check_create(ctx
,
4120 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
4125 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4128 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
4132 int update_flags
= 0;
4134 // Make sure the name & parent pointers are hooked up
4135 if (vp
->v_name
== NULL
)
4136 update_flags
|= VNODE_UPDATE_NAME
;
4137 if (vp
->v_parent
== NULLVP
)
4138 update_flags
|= VNODE_UPDATE_PARENT
;
4141 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4144 add_fsevent(FSE_CREATE_FILE
, ctx
,
4152 * nameidone has to happen before we vnode_put(dvp)
4153 * since it may need to release the fs_nodelock on the dvp
4165 * Create a named pipe.
4167 * Returns: 0 Success
4170 * vnode_authorize:???
4174 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4178 struct nameidata nd
;
4180 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4181 UIO_USERSPACE
, upath
, ctx
);
4188 /* check that this is a new file and authorize addition */
4193 VATTR_SET(vap
, va_type
, VFIFO
);
4195 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
4198 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4201 * nameidone has to happen before we vnode_put(dvp)
4202 * since it may need to release the fs_nodelock on the dvp
4215 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4217 * Parameters: p Process requesting the open
4218 * uap User argument descriptor (see below)
4221 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4222 * uap->uid UID to set
4223 * uap->gid GID to set
4224 * uap->mode File mode to set (same as 'mkfifo')
4225 * uap->xsecurity ACL to set, if creating
4227 * Returns: 0 Success
4230 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4232 * XXX: We should enummerate the possible errno values here, and where
4233 * in the code they originated.
4236 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4239 kauth_filesec_t xsecdst
;
4240 struct vnode_attr va
;
4242 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4244 xsecdst
= KAUTH_FILESEC_NONE
;
4245 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4246 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
4251 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4252 if (uap
->uid
!= KAUTH_UID_NONE
)
4253 VATTR_SET(&va
, va_uid
, uap
->uid
);
4254 if (uap
->gid
!= KAUTH_GID_NONE
)
4255 VATTR_SET(&va
, va_gid
, uap
->gid
);
4256 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4257 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4259 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4261 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4262 kauth_filesec_free(xsecdst
);
4268 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4270 struct vnode_attr va
;
4273 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4275 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
4280 my_strrchr(char *p
, int ch
)
4284 for (save
= NULL
;; ++p
) {
4293 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4296 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4298 int ret
, len
= _len
;
4300 *truncated_path
= 0;
4301 ret
= vn_getpath(dvp
, path
, &len
);
4302 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4305 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
4306 if (len
> MAXPATHLEN
) {
4309 // the string got truncated!
4310 *truncated_path
= 1;
4311 ptr
= my_strrchr(path
, '/');
4313 *ptr
= '\0'; // chop off the string at the last directory component
4315 len
= strlen(path
) + 1;
4318 } else if (ret
== 0) {
4319 *truncated_path
= 1;
4320 } else if (ret
!= 0) {
4321 struct vnode
*mydvp
=dvp
;
4323 if (ret
!= ENOSPC
) {
4324 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4325 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4327 *truncated_path
= 1;
4330 if (mydvp
->v_parent
!= NULL
) {
4331 mydvp
= mydvp
->v_parent
;
4332 } else if (mydvp
->v_mount
) {
4333 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4336 // no parent and no mount point? only thing is to punt and say "/" changed
4337 strlcpy(path
, "/", _len
);
4342 if (mydvp
== NULL
) {
4347 ret
= vn_getpath(mydvp
, path
, &len
);
4348 } while (ret
== ENOSPC
);
4356 * Make a hard file link.
4358 * Returns: 0 Success
4363 * vnode_authorize:???
4368 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4369 user_addr_t link
, int flag
, enum uio_seg segflg
)
4371 vnode_t vp
, dvp
, lvp
;
4372 struct nameidata nd
;
4378 int need_event
, has_listeners
;
4379 char *target_path
= NULL
;
4382 vp
= dvp
= lvp
= NULLVP
;
4384 /* look up the object we are linking to */
4385 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4386 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4389 error
= nameiat(&nd
, fd1
);
4397 * Normally, linking to directories is not supported.
4398 * However, some file systems may have limited support.
4400 if (vp
->v_type
== VDIR
) {
4401 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4402 error
= EPERM
; /* POSIX */
4406 /* Linking to a directory requires ownership. */
4407 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4408 struct vnode_attr dva
;
4411 VATTR_WANTED(&dva
, va_uid
);
4412 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4413 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4414 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4421 /* lookup the target node */
4425 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4426 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4428 error
= nameiat(&nd
, fd2
);
4435 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4439 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4440 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4443 /* target node must not exist */
4444 if (lvp
!= NULLVP
) {
4448 /* cannot link across mountpoints */
4449 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4454 /* authorize creation of the target note */
4455 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4458 /* and finally make the link */
4459 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4464 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4468 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4472 has_listeners
= kauth_authorize_fileop_has_listeners();
4474 if (need_event
|| has_listeners
) {
4475 char *link_to_path
= NULL
;
4476 int len
, link_name_len
;
4478 /* build the path to the new link file */
4479 GET_PATH(target_path
);
4480 if (target_path
== NULL
) {
4485 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4487 if (has_listeners
) {
4488 /* build the path to file we are linking to */
4489 GET_PATH(link_to_path
);
4490 if (link_to_path
== NULL
) {
4495 link_name_len
= MAXPATHLEN
;
4496 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4498 * Call out to allow 3rd party notification of rename.
4499 * Ignore result of kauth_authorize_fileop call.
4501 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4502 (uintptr_t)link_to_path
,
4503 (uintptr_t)target_path
);
4505 if (link_to_path
!= NULL
) {
4506 RELEASE_PATH(link_to_path
);
4511 /* construct fsevent */
4512 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4514 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4517 // build the path to the destination of the link
4518 add_fsevent(FSE_CREATE_FILE
, ctx
,
4519 FSE_ARG_STRING
, len
, target_path
,
4520 FSE_ARG_FINFO
, &finfo
,
4524 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4525 FSE_ARG_VNODE
, vp
->v_parent
,
4533 * nameidone has to happen before we vnode_put(dvp)
4534 * since it may need to release the fs_nodelock on the dvp
4537 if (target_path
!= NULL
) {
4538 RELEASE_PATH(target_path
);
4550 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4552 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4553 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4557 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4559 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4562 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4563 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4567 * Make a symbolic link.
4569 * We could add support for ACLs here too...
4573 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4574 user_addr_t link
, enum uio_seg segflg
)
4576 struct vnode_attr va
;
4579 struct nameidata nd
;
4585 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4586 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4587 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4589 path
= (char *)path_data
;
4593 AUDIT_ARG(text
, path
); /* This is the link string */
4595 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4598 error
= nameiat(&nd
, fd
);
4604 p
= vfs_context_proc(ctx
);
4606 VATTR_SET(&va
, va_type
, VLNK
);
4607 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4610 error
= mac_vnode_check_create(ctx
,
4611 dvp
, &nd
.ni_cnd
, &va
);
4624 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4625 /* get default ownership, etc. */
4627 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4629 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4632 if (error
== 0 && vp
)
4633 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4636 /* do fallback attribute handling */
4637 if (error
== 0 && vp
)
4638 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4641 int update_flags
= 0;
4643 /*check if a new vnode was created, else try to get one*/
4645 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4647 nd
.ni_op
= OP_LOOKUP
;
4649 nd
.ni_cnd
.cn_flags
= 0;
4650 error
= nameiat(&nd
, fd
);
4657 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4658 /* call out to allow 3rd party notification of rename.
4659 * Ignore result of kauth_authorize_fileop call.
4661 if (kauth_authorize_fileop_has_listeners() &&
4663 char *new_link_path
= NULL
;
4666 /* build the path to the new link file */
4667 new_link_path
= get_pathbuff();
4669 vn_getpath(dvp
, new_link_path
, &len
);
4670 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4671 new_link_path
[len
- 1] = '/';
4672 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4675 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4676 (uintptr_t)path
, (uintptr_t)new_link_path
);
4677 if (new_link_path
!= NULL
)
4678 release_pathbuff(new_link_path
);
4681 // Make sure the name & parent pointers are hooked up
4682 if (vp
->v_name
== NULL
)
4683 update_flags
|= VNODE_UPDATE_NAME
;
4684 if (vp
->v_parent
== NULLVP
)
4685 update_flags
|= VNODE_UPDATE_PARENT
;
4688 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4691 add_fsevent(FSE_CREATE_FILE
, ctx
,
4699 * nameidone has to happen before we vnode_put(dvp)
4700 * since it may need to release the fs_nodelock on the dvp
4708 if (path
&& (path
!= (char *)path_data
))
4709 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4715 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4717 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4718 uap
->link
, UIO_USERSPACE
));
4722 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4723 __unused
int32_t *retval
)
4725 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4726 uap
->path2
, UIO_USERSPACE
));
4730 * Delete a whiteout from the filesystem.
4731 * No longer supported.
4734 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4740 * Delete a name from the filesystem.
4744 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4745 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4747 struct nameidata nd
;
4750 struct componentname
*cnp
;
4755 struct vnode_attr va
;
4762 struct vnode_attr
*vap
;
4764 int retry_count
= 0;
4767 cn_flags
= LOCKPARENT
;
4768 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
))
4769 cn_flags
|= AUDITVNPATH1
;
4770 /* If a starting dvp is passed, it trumps any fd passed. */
4775 /* unlink or delete is allowed on rsrc forks and named streams */
4776 cn_flags
|= CN_ALLOWRSRCFORK
;
4787 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4789 nd
.ni_dvp
= start_dvp
;
4790 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4794 error
= nameiat(&nd
, fd
);
4802 /* With Carbon delete semantics, busy files cannot be deleted */
4803 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4804 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4807 /* Skip any potential upcalls if told to. */
4808 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4809 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4813 batched
= vnode_compound_remove_available(vp
);
4815 * The root of a mounted filesystem cannot be deleted.
4817 if (vp
->v_flag
& VROOT
) {
4821 #if DEVELOPMENT || DEBUG
4823 * XXX VSWAP: Check for entitlements or special flag here
4824 * so we can restrict access appropriately.
4826 #else /* DEVELOPMENT || DEBUG */
4828 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
4832 #endif /* DEVELOPMENT || DEBUG */
4835 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4837 if (error
== ENOENT
) {
4838 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4839 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4850 if (!vnode_compound_remove_available(dvp
)) {
4851 panic("No vp, but no compound remove?");
4856 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4859 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4860 /* XXX need to get these data in batched VNOP */
4861 get_fse_info(vp
, &finfo
, ctx
);
4864 error
= vfs_get_notify_attributes(&va
);
4873 has_listeners
= kauth_authorize_fileop_has_listeners();
4874 if (need_event
|| has_listeners
) {
4882 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4886 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4887 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4891 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
4893 if (error
== EKEEPLOOKING
) {
4895 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4898 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4899 panic("EKEEPLOOKING, but continue flag not set?");
4902 if (vnode_isdir(vp
)) {
4906 goto continue_lookup
;
4907 } else if (error
== ENOENT
&& batched
) {
4908 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4909 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4911 * For compound VNOPs, the authorization callback may
4912 * return ENOENT in case of racing hardlink lookups
4913 * hitting the name cache, redrive the lookup.
4923 * Call out to allow 3rd party notification of delete.
4924 * Ignore result of kauth_authorize_fileop call.
4927 if (has_listeners
) {
4928 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4929 KAUTH_FILEOP_DELETE
,
4934 if (vp
->v_flag
& VISHARDLINK
) {
4936 // if a hardlink gets deleted we want to blow away the
4937 // v_parent link because the path that got us to this
4938 // instance of the link is no longer valid. this will
4939 // force the next call to get the path to ask the file
4940 // system instead of just following the v_parent link.
4942 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4947 if (vp
->v_flag
& VISHARDLINK
) {
4948 get_fse_info(vp
, &finfo
, ctx
);
4950 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4952 if (truncated_path
) {
4953 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4955 add_fsevent(FSE_DELETE
, ctx
,
4956 FSE_ARG_STRING
, len
, path
,
4957 FSE_ARG_FINFO
, &finfo
,
4968 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4969 * will cause its shadow file to go away if necessary.
4971 if (vp
&& (vnode_isnamedstream(vp
)) &&
4972 (vp
->v_parent
!= NULLVP
) &&
4973 vnode_isshadow(vp
)) {
4978 * nameidone has to happen before we vnode_put(dvp)
4979 * since it may need to release the fs_nodelock on the dvp
4995 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
4996 enum uio_seg segflg
, int unlink_flags
)
4998 return (unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
5003 * Delete a name from the filesystem using Carbon semantics.
5006 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
5008 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5009 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
));
5013 * Delete a name from the filesystem using POSIX semantics.
5016 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
5018 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
5019 uap
->path
, UIO_USERSPACE
, 0));
5023 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
5025 if (uap
->flag
& ~AT_REMOVEDIR
)
5028 if (uap
->flag
& AT_REMOVEDIR
)
5029 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
5030 uap
->path
, UIO_USERSPACE
));
5032 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
5033 NULLVP
, uap
->path
, UIO_USERSPACE
, 0));
5037 * Reposition read/write file offset.
5040 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
5042 struct fileproc
*fp
;
5044 struct vfs_context
*ctx
;
5045 off_t offset
= uap
->offset
, file_size
;
5048 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
5049 if (error
== ENOTSUP
)
5053 if (vnode_isfifo(vp
)) {
5059 ctx
= vfs_context_current();
5061 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
5062 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
5065 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
5072 if ( (error
= vnode_getwithref(vp
)) ) {
5077 switch (uap
->whence
) {
5079 offset
+= fp
->f_fglob
->fg_offset
;
5082 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
5084 offset
+= file_size
;
5089 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
5092 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
5098 if (uap
->offset
> 0 && offset
< 0) {
5099 /* Incremented/relative move past max size */
5103 * Allow negative offsets on character devices, per
5104 * POSIX 1003.1-2001. Most likely for writing disk
5107 if (offset
< 0 && vp
->v_type
!= VCHR
) {
5108 /* Decremented/relative move before start */
5112 fp
->f_fglob
->fg_offset
= offset
;
5113 *retval
= fp
->f_fglob
->fg_offset
;
5119 * An lseek can affect whether data is "available to read." Use
5120 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5122 post_event_if_success(vp
, error
, NOTE_NONE
);
5123 (void)vnode_put(vp
);
5130 * Check access permissions.
5132 * Returns: 0 Success
5133 * vnode_authorize:???
5136 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
5138 kauth_action_t action
;
5142 * If just the regular access bits, convert them to something
5143 * that vnode_authorize will understand.
5145 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
5148 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5149 if (uflags
& W_OK
) {
5150 if (vnode_isdir(vp
)) {
5151 action
|= KAUTH_VNODE_ADD_FILE
|
5152 KAUTH_VNODE_ADD_SUBDIRECTORY
;
5153 /* might want delete rights here too */
5155 action
|= KAUTH_VNODE_WRITE_DATA
;
5158 if (uflags
& X_OK
) {
5159 if (vnode_isdir(vp
)) {
5160 action
|= KAUTH_VNODE_SEARCH
;
5162 action
|= KAUTH_VNODE_EXECUTE
;
5166 /* take advantage of definition of uflags */
5167 action
= uflags
>> 8;
5171 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5176 /* action == 0 means only check for existence */
5178 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5189 * access_extended: Check access permissions in bulk.
5191 * Description: uap->entries Pointer to an array of accessx
5192 * descriptor structs, plus one or
5193 * more NULL terminated strings (see
5194 * "Notes" section below).
5195 * uap->size Size of the area pointed to by
5197 * uap->results Pointer to the results array.
5199 * Returns: 0 Success
5200 * ENOMEM Insufficient memory
5201 * EINVAL Invalid arguments
5202 * namei:EFAULT Bad address
5203 * namei:ENAMETOOLONG Filename too long
5204 * namei:ENOENT No such file or directory
5205 * namei:ELOOP Too many levels of symbolic links
5206 * namei:EBADF Bad file descriptor
5207 * namei:ENOTDIR Not a directory
5212 * uap->results Array contents modified
5214 * Notes: The uap->entries are structured as an arbitrary length array
5215 * of accessx descriptors, followed by one or more NULL terminated
5218 * struct accessx_descriptor[0]
5220 * struct accessx_descriptor[n]
5221 * char name_data[0];
5223 * We determine the entry count by walking the buffer containing
5224 * the uap->entries argument descriptor. For each descriptor we
5225 * see, the valid values for the offset ad_name_offset will be
5226 * in the byte range:
5228 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5230 * [ uap->entries + uap->size - 2 ]
5232 * since we must have at least one string, and the string must
5233 * be at least one character plus the NULL terminator in length.
5235 * XXX: Need to support the check-as uid argument
5238 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5240 struct accessx_descriptor
*input
= NULL
;
5241 errno_t
*result
= NULL
;
5244 unsigned int desc_max
, desc_actual
, i
, j
;
5245 struct vfs_context context
;
5246 struct nameidata nd
;
5250 #define ACCESSX_MAX_DESCR_ON_STACK 10
5251 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5253 context
.vc_ucred
= NULL
;
5256 * Validate parameters; if valid, copy the descriptor array and string
5257 * arguments into local memory. Before proceeding, the following
5258 * conditions must have been met:
5260 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5261 * o There must be sufficient room in the request for at least one
5262 * descriptor and a one yte NUL terminated string.
5263 * o The allocation of local storage must not fail.
5265 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
5267 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
5269 if (uap
->size
<= sizeof (stack_input
)) {
5270 input
= stack_input
;
5272 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5273 if (input
== NULL
) {
5278 error
= copyin(uap
->entries
, input
, uap
->size
);
5282 AUDIT_ARG(opaque
, input
, uap
->size
);
5285 * Force NUL termination of the copyin buffer to avoid nami() running
5286 * off the end. If the caller passes us bogus data, they may get a
5289 ((char *)input
)[uap
->size
- 1] = 0;
5292 * Access is defined as checking against the process' real identity,
5293 * even if operations are checking the effective identity. This
5294 * requires that we use a local vfs context.
5296 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5297 context
.vc_thread
= current_thread();
5300 * Find out how many entries we have, so we can allocate the result
5301 * array by walking the list and adjusting the count downward by the
5302 * earliest string offset we see.
5304 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5305 desc_actual
= desc_max
;
5306 for (i
= 0; i
< desc_actual
; i
++) {
5308 * Take the offset to the name string for this entry and
5309 * convert to an input array index, which would be one off
5310 * the end of the array if this entry was the lowest-addressed
5313 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5316 * An offset greater than the max allowable offset is an error.
5317 * It is also an error for any valid entry to point
5318 * to a location prior to the end of the current entry, if
5319 * it's not a reference to the string of the previous entry.
5321 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5326 /* Also do not let ad_name_offset point to something beyond the size of the input */
5327 if (input
[i
].ad_name_offset
>= uap
->size
) {
5333 * An offset of 0 means use the previous descriptor's offset;
5334 * this is used to chain multiple requests for the same file
5335 * to avoid multiple lookups.
5338 /* This is not valid for the first entry */
5347 * If the offset of the string for this descriptor is before
5348 * what we believe is the current actual last descriptor,
5349 * then we need to adjust our estimate downward; this permits
5350 * the string table following the last descriptor to be out
5351 * of order relative to the descriptor list.
5353 if (j
< desc_actual
)
5358 * We limit the actual number of descriptors we are willing to process
5359 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5360 * requested does not exceed this limit,
5362 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5366 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
| M_ZERO
);
5367 if (result
== NULL
) {
5373 * Do the work by iterating over the descriptor entries we know to
5374 * at least appear to contain valid data.
5377 for (i
= 0; i
< desc_actual
; i
++) {
5379 * If the ad_name_offset is 0, then we use the previous
5380 * results to make the check; otherwise, we are looking up
5383 if (input
[i
].ad_name_offset
!= 0) {
5384 /* discard old vnodes */
5395 * Scan forward in the descriptor list to see if we
5396 * need the parent vnode. We will need it if we are
5397 * deleting, since we must have rights to remove
5398 * entries in the parent directory, as well as the
5399 * rights to delete the object itself.
5401 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5402 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5403 if (input
[j
].ad_flags
& _DELETE_OK
)
5406 niopts
= FOLLOW
| AUDITVNPATH1
;
5408 /* need parent for vnode_authorize for deletion test */
5410 niopts
|= WANTPARENT
;
5413 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5414 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5426 * Handle lookup errors.
5436 /* run this access check */
5437 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5440 /* fatal lookup error */
5446 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5448 /* copy out results */
5449 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5452 if (input
&& input
!= stack_input
)
5453 FREE(input
, M_TEMP
);
5455 FREE(result
, M_TEMP
);
5460 if (IS_VALID_CRED(context
.vc_ucred
))
5461 kauth_cred_unref(&context
.vc_ucred
);
5467 * Returns: 0 Success
5468 * namei:EFAULT Bad address
5469 * namei:ENAMETOOLONG Filename too long
5470 * namei:ENOENT No such file or directory
5471 * namei:ELOOP Too many levels of symbolic links
5472 * namei:EBADF Bad file descriptor
5473 * namei:ENOTDIR Not a directory
5478 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5479 int flag
, enum uio_seg segflg
)
5482 struct nameidata nd
;
5484 struct vfs_context context
;
5486 int is_namedstream
= 0;
5490 * Unless the AT_EACCESS option is used, Access is defined as checking
5491 * against the process' real identity, even if operations are checking
5492 * the effective identity. So we need to tweak the credential
5493 * in the context for that case.
5495 if (!(flag
& AT_EACCESS
))
5496 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5498 context
.vc_ucred
= ctx
->vc_ucred
;
5499 context
.vc_thread
= ctx
->vc_thread
;
5502 niopts
= FOLLOW
| AUDITVNPATH1
;
5503 /* need parent for vnode_authorize for deletion test */
5504 if (amode
& _DELETE_OK
)
5505 niopts
|= WANTPARENT
;
5506 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5510 /* access(F_OK) calls are allowed for resource forks. */
5512 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5514 error
= nameiat(&nd
, fd
);
5519 /* Grab reference on the shadow stream file vnode to
5520 * force an inactive on release which will mark it
5523 if (vnode_isnamedstream(nd
.ni_vp
) &&
5524 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5525 vnode_isshadow(nd
.ni_vp
)) {
5527 vnode_ref(nd
.ni_vp
);
5531 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5534 if (is_namedstream
) {
5535 vnode_rele(nd
.ni_vp
);
5539 vnode_put(nd
.ni_vp
);
5540 if (amode
& _DELETE_OK
)
5541 vnode_put(nd
.ni_dvp
);
5545 if (!(flag
& AT_EACCESS
))
5546 kauth_cred_unref(&context
.vc_ucred
);
5551 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5553 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5554 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5558 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5559 __unused
int32_t *retval
)
5561 if (uap
->flag
& ~AT_EACCESS
)
5564 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5565 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5569 * Returns: 0 Success
5576 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5577 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5578 enum uio_seg segflg
, int fd
, int flag
)
5580 struct nameidata nd
;
5587 struct user64_stat user64_sb
;
5588 struct user32_stat user32_sb
;
5589 struct user64_stat64 user64_sb64
;
5590 struct user32_stat64 user32_sb64
;
5594 kauth_filesec_t fsec
;
5595 size_t xsecurity_bufsize
;
5598 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5599 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5603 int is_namedstream
= 0;
5604 /* stat calls are allowed for resource forks. */
5605 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5607 error
= nameiat(&nd
, fd
);
5610 fsec
= KAUTH_FILESEC_NONE
;
5612 statptr
= (void *)&source
;
5615 /* Grab reference on the shadow stream file vnode to
5616 * force an inactive on release which will mark it
5619 if (vnode_isnamedstream(nd
.ni_vp
) &&
5620 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5621 vnode_isshadow(nd
.ni_vp
)) {
5623 vnode_ref(nd
.ni_vp
);
5627 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5630 if (is_namedstream
) {
5631 vnode_rele(nd
.ni_vp
);
5634 vnode_put(nd
.ni_vp
);
5639 /* Zap spare fields */
5640 if (isstat64
!= 0) {
5641 source
.sb64
.st_lspare
= 0;
5642 source
.sb64
.st_qspare
[0] = 0LL;
5643 source
.sb64
.st_qspare
[1] = 0LL;
5644 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5645 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5646 my_size
= sizeof(dest
.user64_sb64
);
5647 sbp
= (caddr_t
)&dest
.user64_sb64
;
5649 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5650 my_size
= sizeof(dest
.user32_sb64
);
5651 sbp
= (caddr_t
)&dest
.user32_sb64
;
5654 * Check if we raced (post lookup) against the last unlink of a file.
5656 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5657 source
.sb64
.st_nlink
= 1;
5660 source
.sb
.st_lspare
= 0;
5661 source
.sb
.st_qspare
[0] = 0LL;
5662 source
.sb
.st_qspare
[1] = 0LL;
5663 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5664 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5665 my_size
= sizeof(dest
.user64_sb
);
5666 sbp
= (caddr_t
)&dest
.user64_sb
;
5668 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5669 my_size
= sizeof(dest
.user32_sb
);
5670 sbp
= (caddr_t
)&dest
.user32_sb
;
5674 * Check if we raced (post lookup) against the last unlink of a file.
5676 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5677 source
.sb
.st_nlink
= 1;
5680 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5683 /* caller wants extended security information? */
5684 if (xsecurity
!= USER_ADDR_NULL
) {
5686 /* did we get any? */
5687 if (fsec
== KAUTH_FILESEC_NONE
) {
5688 if (susize(xsecurity_size
, 0) != 0) {
5693 /* find the user buffer size */
5694 xsecurity_bufsize
= fusize(xsecurity_size
);
5696 /* copy out the actual data size */
5697 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5702 /* if the caller supplied enough room, copy out to it */
5703 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5704 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5708 if (fsec
!= KAUTH_FILESEC_NONE
)
5709 kauth_filesec_free(fsec
);
5714 * stat_extended: Get file status; with extended security (ACL).
5716 * Parameters: p (ignored)
5717 * uap User argument descriptor (see below)
5720 * Indirect: uap->path Path of file to get status from
5721 * uap->ub User buffer (holds file status info)
5722 * uap->xsecurity ACL to get (extended security)
5723 * uap->xsecurity_size Size of ACL
5725 * Returns: 0 Success
5730 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5731 __unused
int32_t *retval
)
5733 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5734 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5739 * Returns: 0 Success
5740 * fstatat_internal:??? [see fstatat_internal() in this file]
5743 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5745 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5746 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5750 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5752 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5753 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5757 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5759 * Parameters: p (ignored)
5760 * uap User argument descriptor (see below)
5763 * Indirect: uap->path Path of file to get status from
5764 * uap->ub User buffer (holds file status info)
5765 * uap->xsecurity ACL to get (extended security)
5766 * uap->xsecurity_size Size of ACL
5768 * Returns: 0 Success
5773 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5775 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5776 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5781 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5783 * Parameters: p (ignored)
5784 * uap User argument descriptor (see below)
5787 * Indirect: uap->path Path of file to get status from
5788 * uap->ub User buffer (holds file status info)
5789 * uap->xsecurity ACL to get (extended security)
5790 * uap->xsecurity_size Size of ACL
5792 * Returns: 0 Success
5797 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5799 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5800 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5801 AT_SYMLINK_NOFOLLOW
));
5805 * Get file status; this version does not follow links.
5808 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5810 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5811 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5815 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5817 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5818 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5822 * lstat64_extended: Get file status; can handle large inode numbers; does not
5823 * follow links; with extended security (ACL).
5825 * Parameters: p (ignored)
5826 * uap User argument descriptor (see below)
5829 * Indirect: uap->path Path of file to get status from
5830 * uap->ub User buffer (holds file status info)
5831 * uap->xsecurity ACL to get (extended security)
5832 * uap->xsecurity_size Size of ACL
5834 * Returns: 0 Success
5839 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5841 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5842 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5843 AT_SYMLINK_NOFOLLOW
));
5847 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5849 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5852 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5853 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5857 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5858 __unused
int32_t *retval
)
5860 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5863 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5864 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5868 * Get configurable pathname variables.
5870 * Returns: 0 Success
5874 * Notes: Global implementation constants are intended to be
5875 * implemented in this function directly; all other constants
5876 * are per-FS implementation, and therefore must be handled in
5877 * each respective FS, instead.
5879 * XXX We implement some things globally right now that should actually be
5880 * XXX per-FS; we will need to deal with this at some point.
5884 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5887 struct nameidata nd
;
5888 vfs_context_t ctx
= vfs_context_current();
5890 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5891 UIO_USERSPACE
, uap
->path
, ctx
);
5896 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5898 vnode_put(nd
.ni_vp
);
5904 * Return target name of a symbolic link.
5908 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5909 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5915 struct nameidata nd
;
5916 char uio_buf
[ UIO_SIZEOF(1) ];
5918 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5921 error
= nameiat(&nd
, fd
);
5928 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5929 &uio_buf
[0], sizeof(uio_buf
));
5930 uio_addiov(auio
, buf
, bufsize
);
5931 if (vp
->v_type
!= VLNK
) {
5935 error
= mac_vnode_check_readlink(ctx
, vp
);
5938 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5941 error
= VNOP_READLINK(vp
, auio
, ctx
);
5945 *retval
= bufsize
- (int)uio_resid(auio
);
5950 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5952 enum uio_seg procseg
;
5954 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5955 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5956 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5957 uap
->count
, procseg
, retval
));
5961 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5963 enum uio_seg procseg
;
5965 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5966 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5967 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5971 * Change file flags.
5973 * NOTE: this will vnode_put() `vp'
5976 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5978 struct vnode_attr va
;
5979 kauth_action_t action
;
5983 VATTR_SET(&va
, va_flags
, flags
);
5986 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5991 /* request authorisation, disregard immutability */
5992 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5995 * Request that the auth layer disregard those file flags it's allowed to when
5996 * authorizing this operation; we need to do this in order to be able to
5997 * clear immutable flags.
5999 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
6001 error
= vnode_setattr(vp
, &va
, ctx
);
6005 mac_vnode_notify_setflags(ctx
, vp
, flags
);
6008 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6017 * Change flags of a file given a path name.
6021 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
6024 vfs_context_t ctx
= vfs_context_current();
6026 struct nameidata nd
;
6028 AUDIT_ARG(fflags
, uap
->flags
);
6029 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6030 UIO_USERSPACE
, uap
->path
, ctx
);
6037 /* we don't vnode_put() here because chflags1 does internally */
6038 error
= chflags1(vp
, uap
->flags
, ctx
);
6044 * Change flags of a file given a file descriptor.
6048 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
6053 AUDIT_ARG(fd
, uap
->fd
);
6054 AUDIT_ARG(fflags
, uap
->flags
);
6055 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6058 if ((error
= vnode_getwithref(vp
))) {
6063 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6065 /* we don't vnode_put() here because chflags1 does internally */
6066 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
6073 * Change security information on a filesystem object.
6075 * Returns: 0 Success
6076 * EPERM Operation not permitted
6077 * vnode_authattr:??? [anything vnode_authattr can return]
6078 * vnode_authorize:??? [anything vnode_authorize can return]
6079 * vnode_setattr:??? [anything vnode_setattr can return]
6081 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6082 * translated to EPERM before being returned.
6085 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
6087 kauth_action_t action
;
6090 AUDIT_ARG(mode
, vap
->va_mode
);
6091 /* XXX audit new args */
6094 /* chmod calls are not allowed for resource forks. */
6095 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6101 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
6102 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
6105 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6106 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
6107 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6108 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1)))
6112 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
6113 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
)))
6117 /* make sure that the caller is allowed to set this security information */
6118 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
6119 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6120 if (error
== EACCES
)
6125 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0)
6129 if (VATTR_IS_ACTIVE(vap
, va_mode
))
6130 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
6132 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
))
6133 mac_vnode_notify_setowner(ctx
, vp
,
6134 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6135 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
6137 if (VATTR_IS_ACTIVE(vap
, va_acl
))
6138 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
6146 * Change mode of a file given a path name.
6148 * Returns: 0 Success
6149 * namei:??? [anything namei can return]
6150 * chmod_vnode:??? [anything chmod_vnode can return]
6153 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
6154 int fd
, int flag
, enum uio_seg segflg
)
6156 struct nameidata nd
;
6159 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6160 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
6162 if ((error
= nameiat(&nd
, fd
)))
6164 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
6165 vnode_put(nd
.ni_vp
);
6171 * chmod_extended: Change the mode of a file given a path name; with extended
6172 * argument list (including extended security (ACL)).
6174 * Parameters: p Process requesting the open
6175 * uap User argument descriptor (see below)
6178 * Indirect: uap->path Path to object (same as 'chmod')
6179 * uap->uid UID to set
6180 * uap->gid GID to set
6181 * uap->mode File mode to set (same as 'chmod')
6182 * uap->xsecurity ACL to set (or delete)
6184 * Returns: 0 Success
6187 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6189 * XXX: We should enummerate the possible errno values here, and where
6190 * in the code they originated.
6193 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6196 struct vnode_attr va
;
6197 kauth_filesec_t xsecdst
;
6199 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6202 if (uap
->mode
!= -1)
6203 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6204 if (uap
->uid
!= KAUTH_UID_NONE
)
6205 VATTR_SET(&va
, va_uid
, uap
->uid
);
6206 if (uap
->gid
!= KAUTH_GID_NONE
)
6207 VATTR_SET(&va
, va_gid
, uap
->gid
);
6210 switch(uap
->xsecurity
) {
6211 /* explicit remove request */
6212 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6213 VATTR_SET(&va
, va_acl
, NULL
);
6216 case USER_ADDR_NULL
:
6219 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6221 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6222 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6225 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6228 if (xsecdst
!= NULL
)
6229 kauth_filesec_free(xsecdst
);
6234 * Returns: 0 Success
6235 * chmodat:??? [anything chmodat can return]
6238 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6239 int flag
, enum uio_seg segflg
)
6241 struct vnode_attr va
;
6244 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6246 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
6250 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6252 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6253 AT_FDCWD
, 0, UIO_USERSPACE
));
6257 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6259 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6262 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6263 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
6267 * Change mode of a file given a file descriptor.
6270 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6277 if ((error
= file_vnode(fd
, &vp
)) != 0)
6279 if ((error
= vnode_getwithref(vp
)) != 0) {
6283 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6285 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6286 (void)vnode_put(vp
);
6293 * fchmod_extended: Change mode of a file given a file descriptor; with
6294 * extended argument list (including extended security (ACL)).
6296 * Parameters: p Process requesting to change file mode
6297 * uap User argument descriptor (see below)
6300 * Indirect: uap->mode File mode to set (same as 'chmod')
6301 * uap->uid UID to set
6302 * uap->gid GID to set
6303 * uap->xsecurity ACL to set (or delete)
6304 * uap->fd File descriptor of file to change mode
6306 * Returns: 0 Success
6311 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6314 struct vnode_attr va
;
6315 kauth_filesec_t xsecdst
;
6317 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6320 if (uap
->mode
!= -1)
6321 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6322 if (uap
->uid
!= KAUTH_UID_NONE
)
6323 VATTR_SET(&va
, va_uid
, uap
->uid
);
6324 if (uap
->gid
!= KAUTH_GID_NONE
)
6325 VATTR_SET(&va
, va_gid
, uap
->gid
);
6328 switch(uap
->xsecurity
) {
6329 case USER_ADDR_NULL
:
6330 VATTR_SET(&va
, va_acl
, NULL
);
6332 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6333 VATTR_SET(&va
, va_acl
, NULL
);
6336 case CAST_USER_ADDR_T(-1):
6339 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6341 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6344 error
= fchmod1(p
, uap
->fd
, &va
);
6347 switch(uap
->xsecurity
) {
6348 case USER_ADDR_NULL
:
6349 case CAST_USER_ADDR_T(-1):
6352 if (xsecdst
!= NULL
)
6353 kauth_filesec_free(xsecdst
);
6359 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6361 struct vnode_attr va
;
6364 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6366 return(fchmod1(p
, uap
->fd
, &va
));
6371 * Set ownership given a path name.
6375 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6376 gid_t gid
, int flag
, enum uio_seg segflg
)
6379 struct vnode_attr va
;
6381 struct nameidata nd
;
6383 kauth_action_t action
;
6385 AUDIT_ARG(owner
, uid
, gid
);
6387 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6388 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6390 error
= nameiat(&nd
, fd
);
6398 if (uid
!= (uid_t
)VNOVAL
)
6399 VATTR_SET(&va
, va_uid
, uid
);
6400 if (gid
!= (gid_t
)VNOVAL
)
6401 VATTR_SET(&va
, va_gid
, gid
);
6404 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6409 /* preflight and authorize attribute changes */
6410 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6412 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6414 error
= vnode_setattr(vp
, &va
, ctx
);
6418 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
6423 * EACCES is only allowed from namei(); permissions failure should
6424 * return EPERM, so we need to translate the error code.
6426 if (error
== EACCES
)
6434 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6436 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6437 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6441 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6443 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6444 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6448 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6450 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6453 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6454 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6458 * Set ownership given a file descriptor.
6462 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6464 struct vnode_attr va
;
6465 vfs_context_t ctx
= vfs_context_current();
6468 kauth_action_t action
;
6470 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6471 AUDIT_ARG(fd
, uap
->fd
);
6473 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6476 if ( (error
= vnode_getwithref(vp
)) ) {
6480 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6483 if (uap
->uid
!= VNOVAL
)
6484 VATTR_SET(&va
, va_uid
, uap
->uid
);
6485 if (uap
->gid
!= VNOVAL
)
6486 VATTR_SET(&va
, va_gid
, uap
->gid
);
6489 /* chown calls are not allowed for resource forks. */
6490 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6497 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6502 /* preflight and authorize attribute changes */
6503 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6505 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6506 if (error
== EACCES
)
6510 error
= vnode_setattr(vp
, &va
, ctx
);
6514 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6518 (void)vnode_put(vp
);
6524 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6528 if (usrtvp
== USER_ADDR_NULL
) {
6529 struct timeval old_tv
;
6530 /* XXX Y2038 bug because of microtime argument */
6532 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6535 if (IS_64BIT_PROCESS(current_proc())) {
6536 struct user64_timeval tv
[2];
6537 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6540 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6541 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6543 struct user32_timeval tv
[2];
6544 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6547 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6548 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6555 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6559 struct vnode_attr va
;
6560 kauth_action_t action
;
6562 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6565 VATTR_SET(&va
, va_access_time
, ts
[0]);
6566 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6568 va
.va_vaflags
|= VA_UTIMES_NULL
;
6571 /* utimes calls are not allowed for resource forks. */
6572 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6579 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6583 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6584 if (!nullflag
&& error
== EACCES
)
6589 /* since we may not need to auth anything, check here */
6590 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6591 if (!nullflag
&& error
== EACCES
)
6595 error
= vnode_setattr(vp
, &va
, ctx
);
6599 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6607 * Set the access and modification times of a file.
6611 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6613 struct timespec ts
[2];
6616 struct nameidata nd
;
6617 vfs_context_t ctx
= vfs_context_current();
6620 * AUDIT: Needed to change the order of operations to do the
6621 * name lookup first because auditing wants the path.
6623 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6624 UIO_USERSPACE
, uap
->path
, ctx
);
6631 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6632 * the current time instead.
6635 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6638 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6641 vnode_put(nd
.ni_vp
);
6646 * Set the access and modification times of a file.
6650 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6652 struct timespec ts
[2];
6657 AUDIT_ARG(fd
, uap
->fd
);
6659 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6661 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6663 if((error
= vnode_getwithref(vp
))) {
6668 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6675 * Truncate a file given its path name.
6679 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6682 struct vnode_attr va
;
6683 vfs_context_t ctx
= vfs_context_current();
6685 struct nameidata nd
;
6686 kauth_action_t action
;
6688 if (uap
->length
< 0)
6690 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6691 UIO_USERSPACE
, uap
->path
, ctx
);
6692 if ((error
= namei(&nd
)))
6699 VATTR_SET(&va
, va_data_size
, uap
->length
);
6702 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6707 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6709 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6711 error
= vnode_setattr(vp
, &va
, ctx
);
6715 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
6724 * Truncate a file given a file descriptor.
6728 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6730 vfs_context_t ctx
= vfs_context_current();
6731 struct vnode_attr va
;
6733 struct fileproc
*fp
;
6737 AUDIT_ARG(fd
, uap
->fd
);
6738 if (uap
->length
< 0)
6741 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6745 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6747 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6756 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6758 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6759 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6764 if ((error
= vnode_getwithref(vp
)) != 0) {
6768 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6771 error
= mac_vnode_check_truncate(ctx
,
6772 fp
->f_fglob
->fg_cred
, vp
);
6774 (void)vnode_put(vp
);
6779 VATTR_SET(&va
, va_data_size
, uap
->length
);
6780 error
= vnode_setattr(vp
, &va
, ctx
);
6784 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
6787 (void)vnode_put(vp
);
6795 * Sync an open file with synchronized I/O _file_ integrity completion
6799 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6801 __pthread_testcancel(1);
6802 return(fsync_common(p
, uap
, MNT_WAIT
));
6807 * Sync an open file with synchronized I/O _file_ integrity completion
6809 * Notes: This is a legacy support function that does not test for
6810 * thread cancellation points.
6814 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6816 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6821 * Sync an open file with synchronized I/O _data_ integrity completion
6825 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6827 __pthread_testcancel(1);
6828 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6835 * Common fsync code to support both synchronized I/O file integrity completion
6836 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6838 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6839 * will only guarantee that the file data contents are retrievable. If
6840 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6841 * includes additional metadata unnecessary for retrieving the file data
6842 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6845 * Parameters: p The process
6846 * uap->fd The descriptor to synchronize
6847 * flags The data integrity flags
6849 * Returns: int Success
6850 * fp_getfvp:EBADF Bad file descriptor
6851 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6852 * VNOP_FSYNC:??? unspecified
6854 * Notes: We use struct fsync_args because it is a short name, and all
6855 * caller argument structures are otherwise identical.
6858 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6861 struct fileproc
*fp
;
6862 vfs_context_t ctx
= vfs_context_current();
6865 AUDIT_ARG(fd
, uap
->fd
);
6867 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6869 if ( (error
= vnode_getwithref(vp
)) ) {
6874 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6876 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6879 /* Sync resource fork shadow file if necessary. */
6881 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6882 (vp
->v_parent
!= NULLVP
) &&
6883 vnode_isshadow(vp
) &&
6884 (fp
->f_flags
& FP_WRITTEN
)) {
6885 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6889 (void)vnode_put(vp
);
6895 * Duplicate files. Source must be a file, target must be a file or
6898 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6899 * perform inheritance correctly.
6903 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6905 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6906 struct nameidata fromnd
, tond
;
6908 vfs_context_t ctx
= vfs_context_current();
6910 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
6911 struct vnode_attr va
;
6914 /* Check that the flags are valid. */
6916 if (uap
->flags
& ~CPF_MASK
) {
6920 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
6921 UIO_USERSPACE
, uap
->from
, ctx
);
6922 if ((error
= namei(&fromnd
)))
6926 NDINIT(&tond
, CREATE
, OP_LINK
,
6927 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6928 UIO_USERSPACE
, uap
->to
, ctx
);
6929 if ((error
= namei(&tond
))) {
6936 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6942 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6947 /* This calls existing MAC hooks for open */
6948 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
6955 * See unlinkat_internal for an explanation of the potential
6956 * ENOENT from the MAC hook but the gist is that the MAC hook
6957 * can fail because vn_getpath isn't able to return the full
6958 * path. We choose to ignore this failure.
6960 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
6961 if (error
&& error
!= ENOENT
)
6968 VATTR_SET(&va
, va_type
, fvp
->v_type
);
6969 /* Mask off all but regular access permissions */
6970 VATTR_SET(&va
, va_mode
,
6971 ((((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
6972 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
6975 #endif /* CONFIG_MACF */
6977 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6983 * If source is the same as the destination (that is the
6984 * same inode number) then there is nothing to do.
6985 * (fixed to have POSIX semantics - CSM 3/2/98)
6990 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6992 sdvp
= tond
.ni_startdir
;
6994 * nameidone has to happen before we vnode_put(tdvp)
6995 * since it may need to release the fs_nodelock on the tdvp
7013 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7016 * Helper function for doing clones. The caller is expected to provide an
7017 * iocounted source vnode and release it.
7020 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
7021 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
7024 struct nameidata tond
;
7027 boolean_t free_src_acl
;
7028 boolean_t attr_cleanup
;
7030 kauth_action_t action
;
7031 struct componentname
*cnp
;
7033 struct vnode_attr va
;
7034 struct vnode_attr nva
;
7035 uint32_t vnop_flags
;
7037 v_type
= vnode_vtype(fvp
);
7042 action
= KAUTH_VNODE_ADD_FILE
;
7045 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
7046 fvp
->v_mountedhere
) {
7049 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
7055 AUDIT_ARG(fd2
, dst_dirfd
);
7056 AUDIT_ARG(value32
, flags
);
7058 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7059 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
7060 UIO_USERSPACE
, dst
, ctx
);
7061 if ((error
= nameiat(&tond
, dst_dirfd
)))
7067 free_src_acl
= FALSE
;
7068 attr_cleanup
= FALSE
;
7075 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
7081 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
)))
7084 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
)))
7087 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
7088 if (data_read_authorised
)
7089 action
&= ~KAUTH_VNODE_READ_DATA
;
7090 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
)))
7094 * certain attributes may need to be changed from the source, we ask for
7098 VATTR_WANTED(&va
, va_uid
);
7099 VATTR_WANTED(&va
, va_gid
);
7100 VATTR_WANTED(&va
, va_mode
);
7101 VATTR_WANTED(&va
, va_flags
);
7102 VATTR_WANTED(&va
, va_acl
);
7104 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0)
7108 VATTR_SET(&nva
, va_type
, v_type
);
7109 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
7110 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
7111 free_src_acl
= TRUE
;
7114 /* Handle ACL inheritance, initialize vap. */
7115 if (v_type
== VLNK
) {
7116 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
7118 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
7121 attr_cleanup
= TRUE
;
7124 vnop_flags
= VNODE_CLONEFILE_DEFAULT
;
7126 * We've got initial values for all security parameters,
7127 * If we are superuser, then we can change owners to be the
7128 * same as the source. Both superuser and the owner have default
7129 * WRITE_SECURITY privileges so all other fields can be taken
7130 * from source as well.
7132 if (!(flags
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) {
7133 if (VATTR_IS_SUPPORTED(&va
, va_uid
))
7134 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
7135 if (VATTR_IS_SUPPORTED(&va
, va_gid
))
7136 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
7138 vnop_flags
|= VNODE_CLONEFILE_NOOWNERCOPY
;
7141 if (VATTR_IS_SUPPORTED(&va
, va_mode
))
7142 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
7143 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
7144 VATTR_SET(&nva
, va_flags
,
7145 ((va
.va_flags
& ~(UF_DATAVAULT
| SF_RESTRICTED
)) | /* Turn off from source */
7146 (nva
.va_flags
& (UF_DATAVAULT
| SF_RESTRICTED
))));
7149 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
);
7151 if (!error
&& tvp
) {
7152 int update_flags
= 0;
7155 #endif /* CONFIG_FSE */
7158 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
7159 VNODE_LABEL_CREATE
, ctx
);
7162 * If some of the requested attributes weren't handled by the
7163 * VNOP, use our fallback code.
7165 if (!VATTR_ALL_SUPPORTED(&va
))
7166 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
7168 // Make sure the name & parent pointers are hooked up
7169 if (tvp
->v_name
== NULL
)
7170 update_flags
|= VNODE_UPDATE_NAME
;
7171 if (tvp
->v_parent
== NULLVP
)
7172 update_flags
|= VNODE_UPDATE_PARENT
;
7175 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
7176 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
7180 switch (vnode_vtype(tvp
)) {
7184 fsevent
= FSE_CREATE_FILE
;
7187 fsevent
= FSE_CREATE_DIR
;
7193 if (need_fsevent(fsevent
, tvp
)) {
7195 * The following is a sequence of three explicit events.
7196 * A pair of FSE_CLONE events representing the source and destination
7197 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7198 * fseventsd may coalesce the destination clone and create events
7199 * into a single event resulting in the following sequence for a client
7201 * FSE_CLONE | FSE_CREATE (dst)
7203 add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
,
7205 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
7208 #endif /* CONFIG_FSE */
7213 vn_attribute_cleanup(&nva
, defaulted
);
7214 if (free_src_acl
&& va
.va_acl
)
7215 kauth_acl_free(va
.va_acl
);
7224 * clone files or directories, target must not exist.
7228 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7229 __unused
int32_t *retval
)
7232 struct nameidata fromnd
;
7235 vfs_context_t ctx
= vfs_context_current();
7237 /* Check that the flags are valid. */
7238 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
))
7241 AUDIT_ARG(fd
, uap
->src_dirfd
);
7243 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7244 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7245 UIO_USERSPACE
, uap
->src
, ctx
);
7246 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
)))
7252 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7260 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7261 __unused
int32_t *retval
)
7264 struct fileproc
*fp
;
7266 vfs_context_t ctx
= vfs_context_current();
7268 /* Check that the flags are valid. */
7269 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
))
7272 AUDIT_ARG(fd
, uap
->src_fd
);
7273 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7277 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7278 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7283 if ((error
= vnode_getwithref(fvp
)))
7286 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7288 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7293 file_drop(uap
->src_fd
);
7298 * Rename files. Source and destination must either both be directories,
7299 * or both not be directories. If target is a directory, it must be empty.
7303 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7304 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7306 if (flags
& ~VFS_RENAME_FLAGS_MASK
)
7309 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
))
7314 struct nameidata
*fromnd
, *tond
;
7320 const char *oname
= NULL
;
7321 char *from_name
= NULL
, *to_name
= NULL
;
7322 int from_len
=0, to_len
=0;
7323 int holding_mntlock
;
7324 mount_t locked_mp
= NULL
;
7325 vnode_t oparent
= NULLVP
;
7327 fse_info from_finfo
, to_finfo
;
7329 int from_truncated
=0, to_truncated
;
7331 struct vnode_attr
*fvap
, *tvap
;
7333 /* carving out a chunk for structs that are too big to be on stack. */
7335 struct nameidata from_node
, to_node
;
7336 struct vnode_attr fv_attr
, tv_attr
;
7338 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
7339 fromnd
= &__rename_data
->from_node
;
7340 tond
= &__rename_data
->to_node
;
7342 holding_mntlock
= 0;
7351 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
7353 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7355 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7357 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7360 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7361 if ( (error
= nameiat(fromnd
, fromfd
)) )
7363 fdvp
= fromnd
->ni_dvp
;
7364 fvp
= fromnd
->ni_vp
;
7366 if (fvp
&& fvp
->v_type
== VDIR
)
7367 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
7370 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7371 if ( (error
= nameiat(tond
, tofd
)) ) {
7373 * Translate error code for rename("dir1", "dir2/.").
7375 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
7379 tdvp
= tond
->ni_dvp
;
7383 #if DEVELOPMENT || DEBUG
7385 * XXX VSWAP: Check for entitlements or special flag here
7386 * so we can restrict access appropriately.
7388 #else /* DEVELOPMENT || DEBUG */
7390 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
7395 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
7399 #endif /* DEVELOPMENT || DEBUG */
7401 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
7406 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
7411 batched
= vnode_compound_rename_available(fdvp
);
7414 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
7417 get_fse_info(fvp
, &from_finfo
, ctx
);
7419 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
7424 fvap
= &__rename_data
->fv_attr
;
7428 get_fse_info(tvp
, &to_finfo
, ctx
);
7429 } else if (batched
) {
7430 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
7435 tvap
= &__rename_data
->tv_attr
;
7440 #endif /* CONFIG_FSE */
7442 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
7443 if (from_name
== NULL
) {
7444 GET_PATH(from_name
);
7445 if (from_name
== NULL
) {
7451 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
7453 if (to_name
== NULL
) {
7455 if (to_name
== NULL
) {
7461 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
7465 * Claim: this check will never reject a valid rename.
7466 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7467 * Suppose fdvp and tdvp are not on the same mount.
7468 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7469 * then you can't move it to within another dir on the same mountpoint.
7470 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7472 * If this check passes, then we are safe to pass these vnodes to the same FS.
7474 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
7478 goto skipped_lookup
;
7482 error
= vn_authorize_renamex_with_paths(fdvp
, fvp
, &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
, flags
, NULL
);
7484 if (error
== ENOENT
) {
7485 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7486 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7488 * We encountered a race where after doing the namei, tvp stops
7489 * being valid. If so, simply re-drive the rename call from the
7501 * If the source and destination are the same (i.e. they're
7502 * links to the same vnode) and the target file system is
7503 * case sensitive, then there is nothing to do.
7505 * XXX Come back to this.
7511 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7512 * then assume that this file system is case sensitive.
7514 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
7515 pathconf_val
!= 0) {
7521 * Allow the renaming of mount points.
7522 * - target must not exist
7523 * - target must reside in the same directory as source
7524 * - union mounts cannot be renamed
7525 * - "/" cannot be renamed
7527 * XXX Handle this in VFS after a continued lookup (if we missed
7528 * in the cache to start off)
7530 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7531 * we'll skip past here. The file system is responsible for
7532 * checking that @tvp is not a descendent of @fvp and vice versa
7533 * so it should always return EINVAL if either @tvp or @fvp is the
7536 if ((fvp
->v_flag
& VROOT
) &&
7537 (fvp
->v_type
== VDIR
) &&
7539 (fvp
->v_mountedhere
== NULL
) &&
7541 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
7542 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
7545 /* switch fvp to the covered vnode */
7546 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
7547 if ( (vnode_getwithref(coveredvp
)) ) {
7557 * Check for cross-device rename.
7559 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
7560 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
7566 * If source is the same as the destination (that is the
7567 * same inode number) then there is nothing to do...
7568 * EXCEPT if the underlying file system supports case
7569 * insensitivity and is case preserving. In this case
7570 * the file system needs to handle the special case of
7571 * getting the same vnode as target (fvp) and source (tvp).
7573 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7574 * and _PC_CASE_PRESERVING can have this exception, and they need to
7575 * handle the special case of getting the same vnode as target and
7576 * source. NOTE: Then the target is unlocked going into vnop_rename,
7577 * so not to cause locking problems. There is a single reference on tvp.
7579 * NOTE - that fvp == tvp also occurs if they are hard linked and
7580 * that correct behaviour then is just to return success without doing
7583 * XXX filesystem should take care of this itself, perhaps...
7585 if (fvp
== tvp
&& fdvp
== tdvp
) {
7586 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
7587 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
7588 fromnd
->ni_cnd
.cn_namelen
)) {
7593 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
7595 * we're holding a reference and lock
7596 * on locked_mp, but it no longer matches
7597 * what we want to do... so drop our hold
7599 mount_unlock_renames(locked_mp
);
7600 mount_drop(locked_mp
, 0);
7601 holding_mntlock
= 0;
7603 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
7605 * serialize renames that re-shape
7606 * the tree... if holding_mntlock is
7607 * set, then we're ready to go...
7609 * first need to drop the iocounts
7610 * we picked up, second take the
7611 * lock to serialize the access,
7612 * then finally start the lookup
7613 * process over with the lock held
7615 if (!holding_mntlock
) {
7617 * need to grab a reference on
7618 * the mount point before we
7619 * drop all the iocounts... once
7620 * the iocounts are gone, the mount
7623 locked_mp
= fvp
->v_mount
;
7624 mount_ref(locked_mp
, 0);
7627 * nameidone has to happen before we vnode_put(tvp)
7628 * since it may need to release the fs_nodelock on the tvp
7637 * nameidone has to happen before we vnode_put(fdvp)
7638 * since it may need to release the fs_nodelock on the fvp
7645 mount_lock_renames(locked_mp
);
7646 holding_mntlock
= 1;
7652 * when we dropped the iocounts to take
7653 * the lock, we allowed the identity of
7654 * the various vnodes to change... if they did,
7655 * we may no longer be dealing with a rename
7656 * that reshapes the tree... once we're holding
7657 * the iocounts, the vnodes can't change type
7658 * so we're free to drop the lock at this point
7661 if (holding_mntlock
) {
7662 mount_unlock_renames(locked_mp
);
7663 mount_drop(locked_mp
, 0);
7664 holding_mntlock
= 0;
7668 // save these off so we can later verify that fvp is the same
7669 oname
= fvp
->v_name
;
7670 oparent
= fvp
->v_parent
;
7673 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
7674 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
7677 if (holding_mntlock
) {
7679 * we can drop our serialization
7682 mount_unlock_renames(locked_mp
);
7683 mount_drop(locked_mp
, 0);
7684 holding_mntlock
= 0;
7687 if (error
== EKEEPLOOKING
) {
7688 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7689 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7690 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7694 fromnd
->ni_vp
= fvp
;
7697 goto continue_lookup
;
7701 * We may encounter a race in the VNOP where the destination didn't
7702 * exist when we did the namei, but it does by the time we go and
7703 * try to create the entry. In this case, we should re-drive this rename
7704 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7705 * but other filesystems susceptible to this race could return it, too.
7707 if (error
== ERECYCLE
) {
7712 * For compound VNOPs, the authorization callback may return
7713 * ENOENT in case of racing hardlink lookups hitting the name
7714 * cache, redrive the lookup.
7716 if (batched
&& error
== ENOENT
) {
7717 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7718 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7727 /* call out to allow 3rd party notification of rename.
7728 * Ignore result of kauth_authorize_fileop call.
7730 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7731 KAUTH_FILEOP_RENAME
,
7732 (uintptr_t)from_name
, (uintptr_t)to_name
);
7733 if (flags
& VFS_RENAME_SWAP
) {
7734 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7735 KAUTH_FILEOP_RENAME
,
7736 (uintptr_t)to_name
, (uintptr_t)from_name
);
7740 if (from_name
!= NULL
&& to_name
!= NULL
) {
7741 if (from_truncated
|| to_truncated
) {
7742 // set it here since only the from_finfo gets reported up to user space
7743 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
7747 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
7750 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
7754 add_fsevent(FSE_RENAME
, ctx
,
7755 FSE_ARG_STRING
, from_len
, from_name
,
7756 FSE_ARG_FINFO
, &from_finfo
,
7757 FSE_ARG_STRING
, to_len
, to_name
,
7758 FSE_ARG_FINFO
, &to_finfo
,
7760 if (flags
& VFS_RENAME_SWAP
) {
7762 * Strictly speaking, swap is the equivalent of
7763 * *three* renames. FSEvents clients should only take
7764 * the events as a hint, so we only bother reporting
7767 add_fsevent(FSE_RENAME
, ctx
,
7768 FSE_ARG_STRING
, to_len
, to_name
,
7769 FSE_ARG_FINFO
, &to_finfo
,
7770 FSE_ARG_STRING
, from_len
, from_name
,
7771 FSE_ARG_FINFO
, &from_finfo
,
7775 add_fsevent(FSE_RENAME
, ctx
,
7776 FSE_ARG_STRING
, from_len
, from_name
,
7777 FSE_ARG_FINFO
, &from_finfo
,
7778 FSE_ARG_STRING
, to_len
, to_name
,
7782 #endif /* CONFIG_FSE */
7785 * update filesystem's mount point data
7788 char *cp
, *pathend
, *mpname
;
7794 mp
= fvp
->v_mountedhere
;
7796 if (vfs_busy(mp
, LK_NOWAIT
)) {
7800 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7802 if (UIO_SEG_IS_USER_SPACE(segflg
))
7803 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7805 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7807 /* find current mount point prefix */
7808 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7809 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7813 /* find last component of target name */
7814 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7818 /* append name to prefix */
7819 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7820 bzero(pathend
, maxlen
);
7821 strlcpy(pathend
, mpname
, maxlen
);
7823 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7828 * fix up name & parent pointers. note that we first
7829 * check that fvp has the same name/parent pointers it
7830 * had before the rename call... this is a 'weak' check
7833 * XXX oparent and oname may not be set in the compound vnop case
7835 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7838 update_flags
= VNODE_UPDATE_NAME
;
7841 update_flags
|= VNODE_UPDATE_PARENT
;
7843 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7846 if (to_name
!= NULL
) {
7847 RELEASE_PATH(to_name
);
7850 if (from_name
!= NULL
) {
7851 RELEASE_PATH(from_name
);
7854 if (holding_mntlock
) {
7855 mount_unlock_renames(locked_mp
);
7856 mount_drop(locked_mp
, 0);
7857 holding_mntlock
= 0;
7861 * nameidone has to happen before we vnode_put(tdvp)
7862 * since it may need to release the fs_nodelock on the tdvp
7872 * nameidone has to happen before we vnode_put(fdvp)
7873 * since it may need to release the fs_nodelock on the fdvp
7883 * If things changed after we did the namei, then we will re-drive
7884 * this rename call from the top.
7891 FREE(__rename_data
, M_TEMP
);
7896 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7898 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7899 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7902 int renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
7904 return renameat_internal(
7905 vfs_context_current(),
7906 uap
->fromfd
, uap
->from
,
7908 UIO_USERSPACE
, uap
->flags
);
7912 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7914 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7915 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7919 * Make a directory file.
7921 * Returns: 0 Success
7924 * vnode_authorize:???
7929 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7930 enum uio_seg segflg
)
7934 int update_flags
= 0;
7936 struct nameidata nd
;
7938 AUDIT_ARG(mode
, vap
->va_mode
);
7939 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7941 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7942 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7945 error
= nameiat(&nd
, fd
);
7956 batched
= vnode_compound_mkdir_available(dvp
);
7958 VATTR_SET(vap
, va_type
, VDIR
);
7962 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7963 * only get EXISTS or EISDIR for existing path components, and not that it could see
7964 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7965 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7967 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7968 if (error
== EACCES
|| error
== EPERM
) {
7976 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7977 * rather than EACCESS if the target exists.
7979 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7981 error2
= nameiat(&nd
, fd
);
7995 * make the directory
7997 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7998 if (error
== EKEEPLOOKING
) {
8000 goto continue_lookup
;
8006 // Make sure the name & parent pointers are hooked up
8007 if (vp
->v_name
== NULL
)
8008 update_flags
|= VNODE_UPDATE_NAME
;
8009 if (vp
->v_parent
== NULLVP
)
8010 update_flags
|= VNODE_UPDATE_PARENT
;
8013 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
8016 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
8021 * nameidone has to happen before we vnode_put(dvp)
8022 * since it may need to release the fs_nodelock on the dvp
8035 * mkdir_extended: Create a directory; with extended security (ACL).
8037 * Parameters: p Process requesting to create the directory
8038 * uap User argument descriptor (see below)
8041 * Indirect: uap->path Path of directory to create
8042 * uap->mode Access permissions to set
8043 * uap->xsecurity ACL to set
8045 * Returns: 0 Success
8050 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
8053 kauth_filesec_t xsecdst
;
8054 struct vnode_attr va
;
8056 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
8059 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
8060 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
8064 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8065 if (xsecdst
!= NULL
)
8066 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
8068 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8070 if (xsecdst
!= NULL
)
8071 kauth_filesec_free(xsecdst
);
8076 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
8078 struct vnode_attr va
;
8081 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8083 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8088 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
8090 struct vnode_attr va
;
8093 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8095 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
8100 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
8101 enum uio_seg segflg
)
8105 struct nameidata nd
;
8108 int has_listeners
= 0;
8112 struct vnode_attr va
;
8113 #endif /* CONFIG_FSE */
8114 struct vnode_attr
*vap
= NULL
;
8115 int restart_count
= 0;
8121 * This loop exists to restart rmdir in the unlikely case that two
8122 * processes are simultaneously trying to remove the same directory
8123 * containing orphaned appleDouble files.
8126 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
8127 segflg
, dirpath
, ctx
);
8128 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
8133 error
= nameiat(&nd
, fd
);
8141 batched
= vnode_compound_rmdir_available(vp
);
8143 if (vp
->v_flag
& VROOT
) {
8145 * The root of a mounted filesystem cannot be deleted.
8151 #if DEVELOPMENT || DEBUG
8153 * XXX VSWAP: Check for entitlements or special flag here
8154 * so we can restrict access appropriately.
8156 #else /* DEVELOPMENT || DEBUG */
8158 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
8162 #endif /* DEVELOPMENT || DEBUG */
8165 * Removed a check here; we used to abort if vp's vid
8166 * was not the same as what we'd seen the last time around.
8167 * I do not think that check was valid, because if we retry
8168 * and all dirents are gone, the directory could legitimately
8169 * be recycled but still be present in a situation where we would
8170 * have had permission to delete. Therefore, we won't make
8171 * an effort to preserve that check now that we may not have a
8176 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
8178 if (error
== ENOENT
) {
8179 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
8180 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8191 if (!vnode_compound_rmdir_available(dvp
)) {
8192 panic("No error, but no compound rmdir?");
8199 need_event
= need_fsevent(FSE_DELETE
, dvp
);
8202 get_fse_info(vp
, &finfo
, ctx
);
8204 error
= vfs_get_notify_attributes(&va
);
8213 has_listeners
= kauth_authorize_fileop_has_listeners();
8214 if (need_event
|| has_listeners
) {
8223 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
8226 finfo
.mode
|= FSE_TRUNCATED_PATH
;
8231 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8234 /* Couldn't find a vnode */
8238 if (error
== EKEEPLOOKING
) {
8239 goto continue_lookup
;
8240 } else if (batched
&& error
== ENOENT
) {
8241 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
8242 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8244 * For compound VNOPs, the authorization callback
8245 * may return ENOENT in case of racing hard link lookups
8246 * redrive the lookup.
8253 #if CONFIG_APPLEDOUBLE
8255 * Special case to remove orphaned AppleDouble
8256 * files. I don't like putting this in the kernel,
8257 * but carbon does not like putting this in carbon either,
8260 if (error
== ENOTEMPTY
) {
8261 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
8262 if (error
== EBUSY
) {
8268 * Assuming everything went well, we will try the RMDIR again
8271 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8273 #endif /* CONFIG_APPLEDOUBLE */
8275 * Call out to allow 3rd party notification of delete.
8276 * Ignore result of kauth_authorize_fileop call.
8279 if (has_listeners
) {
8280 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8281 KAUTH_FILEOP_DELETE
,
8286 if (vp
->v_flag
& VISHARDLINK
) {
8287 // see the comment in unlink1() about why we update
8288 // the parent of a hard link when it is removed
8289 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
8295 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
8297 add_fsevent(FSE_DELETE
, ctx
,
8298 FSE_ARG_STRING
, len
, path
,
8299 FSE_ARG_FINFO
, &finfo
,
8311 * nameidone has to happen before we vnode_put(dvp)
8312 * since it may need to release the fs_nodelock on the dvp
8320 if (restart_flag
== 0) {
8321 wakeup_one((caddr_t
)vp
);
8324 tsleep(vp
, PVFS
, "rm AD", 1);
8326 } while (restart_flag
!= 0);
8333 * Remove a directory file.
8337 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
8339 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
8340 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
8343 /* Get direntry length padded to 8 byte alignment */
8344 #define DIRENT64_LEN(namlen) \
8345 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8347 /* Get dirent length padded to 4 byte alignment */
8348 #define DIRENT_LEN(namelen) \
8349 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8351 /* Get the end of this dirent */
8352 #define DIRENT_END(dep) \
8353 (((char *)(dep)) + (dep)->d_reclen - 1)
8356 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
8357 int *numdirent
, vfs_context_t ctxp
)
8359 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8360 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
8361 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
8362 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
8367 struct direntry
*entry64
;
8373 * We're here because the underlying file system does not
8374 * support direnties or we mounted denying support so we must
8375 * fall back to dirents and convert them to direntries.
8377 * Our kernel buffer needs to be smaller since re-packing will
8378 * expand each dirent. The worse case (when the name length
8379 * is 3 or less) corresponds to a struct direntry size of 32
8380 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8381 * (4-byte aligned). So having a buffer that is 3/8 the size
8382 * will prevent us from reading more than we can pack.
8384 * Since this buffer is wired memory, we will limit the
8385 * buffer size to a maximum of 32K. We would really like to
8386 * use 32K in the MIN(), but we use magic number 87371 to
8387 * prevent uio_resid() * 3 / 8 from overflowing.
8389 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
8390 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
8391 if (bufptr
== NULL
) {
8395 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
8396 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
8397 auio
->uio_offset
= uio
->uio_offset
;
8399 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
8401 dep
= (struct dirent
*)bufptr
;
8402 bytesread
= bufsize
- uio_resid(auio
);
8404 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
8407 * Convert all the entries and copy them out to user's buffer.
8409 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
8410 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
8412 if (DIRENT_END(dep
) > ((char *)bufptr
+ bytesread
) ||
8413 DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) {
8414 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
,
8415 vp
->v_mount
->mnt_vfsstat
.f_mntonname
,
8416 vp
->v_name
? vp
->v_name
: "<unknown>");
8421 bzero(entry64
, enbufsize
);
8422 /* Convert a dirent to a dirent64. */
8423 entry64
->d_ino
= dep
->d_ino
;
8424 entry64
->d_seekoff
= 0;
8425 entry64
->d_reclen
= enbufsize
;
8426 entry64
->d_namlen
= dep
->d_namlen
;
8427 entry64
->d_type
= dep
->d_type
;
8428 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
8430 /* Move to next entry. */
8431 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
8433 /* Copy entry64 to user's buffer. */
8434 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
8437 /* Update the real offset using the offset we got from VNOP_READDIR. */
8439 uio
->uio_offset
= auio
->uio_offset
;
8442 FREE(bufptr
, M_TEMP
);
8443 FREE(entry64
, M_TEMP
);
8448 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8451 * Read a block of directory entries in a file system independent format.
8454 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
8455 off_t
*offset
, int flags
)
8458 struct vfs_context context
= *vfs_context_current(); /* local copy */
8459 struct fileproc
*fp
;
8461 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8463 int error
, eofflag
, numdirent
;
8464 char uio_buf
[ UIO_SIZEOF(1) ];
8466 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
8470 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8471 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8476 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
8477 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
8480 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
8484 if ( (error
= vnode_getwithref(vp
)) ) {
8487 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8490 if (vp
->v_type
!= VDIR
) {
8491 (void)vnode_put(vp
);
8497 error
= mac_vnode_check_readdir(&context
, vp
);
8499 (void)vnode_put(vp
);
8504 loff
= fp
->f_fglob
->fg_offset
;
8505 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8506 uio_addiov(auio
, bufp
, bufsize
);
8508 if (flags
& VNODE_READDIR_EXTENDED
) {
8509 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
8510 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8512 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
8513 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8516 (void)vnode_put(vp
);
8520 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
8521 if (union_dircheckp
) {
8522 error
= union_dircheckp(&vp
, fp
, &context
);
8526 (void)vnode_put(vp
);
8531 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
8532 struct vnode
*tvp
= vp
;
8533 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
8535 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8536 fp
->f_fglob
->fg_offset
= 0;
8550 *bytesread
= bufsize
- uio_resid(auio
);
8558 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
8564 AUDIT_ARG(fd
, uap
->fd
);
8565 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
8568 if (proc_is64bit(p
)) {
8569 user64_long_t base
= (user64_long_t
)offset
;
8570 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
8572 user32_long_t base
= (user32_long_t
)offset
;
8573 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
8575 *retval
= bytesread
;
8581 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
8587 AUDIT_ARG(fd
, uap
->fd
);
8588 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
8591 *retval
= bytesread
;
8592 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
8599 * Set the mode mask for creation of filesystem nodes.
8600 * XXX implement xsecurity
8602 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8604 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
8606 struct filedesc
*fdp
;
8608 AUDIT_ARG(mask
, newmask
);
8611 *retval
= fdp
->fd_cmask
;
8612 fdp
->fd_cmask
= newmask
& ALLPERMS
;
8618 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8620 * Parameters: p Process requesting to set the umask
8621 * uap User argument descriptor (see below)
8622 * retval umask of the process (parameter p)
8624 * Indirect: uap->newmask umask to set
8625 * uap->xsecurity ACL to set
8627 * Returns: 0 Success
8632 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
8635 kauth_filesec_t xsecdst
;
8637 xsecdst
= KAUTH_FILESEC_NONE
;
8638 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
8639 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
8642 xsecdst
= KAUTH_FILESEC_NONE
;
8645 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
8647 if (xsecdst
!= KAUTH_FILESEC_NONE
)
8648 kauth_filesec_free(xsecdst
);
8653 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
8655 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
8659 * Void all references to file by ripping underlying filesystem
8664 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
8667 struct vnode_attr va
;
8668 vfs_context_t ctx
= vfs_context_current();
8670 struct nameidata nd
;
8672 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
8681 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
8686 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
8692 error
= mac_vnode_check_revoke(ctx
, vp
);
8698 VATTR_WANTED(&va
, va_uid
);
8699 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
8701 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
8702 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
8704 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
8705 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
8713 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8714 * The following system calls are designed to support features
8715 * which are specific to the HFS & HFS Plus volume formats
8720 * Obtain attribute information on objects in a directory while enumerating
8725 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
8728 struct fileproc
*fp
;
8730 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8731 uint32_t count
= 0, savecount
= 0;
8732 uint32_t newstate
= 0;
8735 struct attrlist attributelist
;
8736 vfs_context_t ctx
= vfs_context_current();
8738 char uio_buf
[ UIO_SIZEOF(1) ];
8739 kauth_action_t action
;
8743 /* Get the attributes into kernel space */
8744 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
8747 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
8751 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
8754 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8755 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8762 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
8769 if ( (error
= vnode_getwithref(vp
)) )
8772 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8775 if (vp
->v_type
!= VDIR
) {
8776 (void)vnode_put(vp
);
8782 error
= mac_vnode_check_readdir(ctx
, vp
);
8784 (void)vnode_put(vp
);
8789 /* set up the uio structure which will contain the users return buffer */
8790 loff
= fp
->f_fglob
->fg_offset
;
8791 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8792 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
8795 * If the only item requested is file names, we can let that past with
8796 * just LIST_DIRECTORY. If they want any other attributes, that means
8797 * they need SEARCH as well.
8799 action
= KAUTH_VNODE_LIST_DIRECTORY
;
8800 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
8801 attributelist
.fileattr
|| attributelist
.dirattr
)
8802 action
|= KAUTH_VNODE_SEARCH
;
8804 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
8806 /* Believe it or not, uap->options only has 32-bits of valid
8807 * info, so truncate before extending again */
8809 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
8810 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
8814 (void) vnode_put(vp
);
8819 * If we've got the last entry of a directory in a union mount
8820 * then reset the eofflag and pretend there's still more to come.
8821 * The next call will again set eofflag and the buffer will be empty,
8822 * so traverse to the underlying directory and do the directory
8825 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
8826 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
8828 } else { // Empty buffer
8829 struct vnode
*tvp
= vp
;
8830 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
8831 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
8832 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8833 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
8835 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
8843 (void)vnode_put(vp
);
8847 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
8849 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8851 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8853 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8856 *retval
= eofflag
; /* similar to getdirentries */
8860 return (error
); /* return error earlier, an retval of 0 or 1 now */
8862 } /* end of getdirentriesattr system call */
8865 * Exchange data between two files
8870 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8873 struct nameidata fnd
, snd
;
8874 vfs_context_t ctx
= vfs_context_current();
8878 u_int32_t nameiflags
;
8882 int from_truncated
=0, to_truncated
=0;
8884 fse_info f_finfo
, s_finfo
;
8888 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8890 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8891 UIO_USERSPACE
, uap
->path1
, ctx
);
8893 error
= namei(&fnd
);
8900 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8901 UIO_USERSPACE
, uap
->path2
, ctx
);
8903 error
= namei(&snd
);
8912 * if the files are the same, return an inval error
8920 * if the files are on different volumes, return an error
8922 if (svp
->v_mount
!= fvp
->v_mount
) {
8927 /* If they're not files, return an error */
8928 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8934 error
= mac_vnode_check_exchangedata(ctx
,
8939 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8940 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8945 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8947 kauth_authorize_fileop_has_listeners()) {
8950 if (fpath
== NULL
|| spath
== NULL
) {
8955 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8956 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8959 get_fse_info(fvp
, &f_finfo
, ctx
);
8960 get_fse_info(svp
, &s_finfo
, ctx
);
8961 if (from_truncated
|| to_truncated
) {
8962 // set it here since only the f_finfo gets reported up to user space
8963 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8967 /* Ok, make the call */
8968 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8971 const char *tmpname
;
8973 if (fpath
!= NULL
&& spath
!= NULL
) {
8974 /* call out to allow 3rd party notification of exchangedata.
8975 * Ignore result of kauth_authorize_fileop call.
8977 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8978 (uintptr_t)fpath
, (uintptr_t)spath
);
8982 tmpname
= fvp
->v_name
;
8983 fvp
->v_name
= svp
->v_name
;
8984 svp
->v_name
= tmpname
;
8986 if (fvp
->v_parent
!= svp
->v_parent
) {
8989 tmp
= fvp
->v_parent
;
8990 fvp
->v_parent
= svp
->v_parent
;
8991 svp
->v_parent
= tmp
;
8993 name_cache_unlock();
8996 if (fpath
!= NULL
&& spath
!= NULL
) {
8997 add_fsevent(FSE_EXCHANGE
, ctx
,
8998 FSE_ARG_STRING
, flen
, fpath
,
8999 FSE_ARG_FINFO
, &f_finfo
,
9000 FSE_ARG_STRING
, slen
, spath
,
9001 FSE_ARG_FINFO
, &s_finfo
,
9009 RELEASE_PATH(fpath
);
9011 RELEASE_PATH(spath
);
9019 * Return (in MB) the amount of freespace on the given vnode's volume.
9021 uint32_t freespace_mb(vnode_t vp
);
9024 freespace_mb(vnode_t vp
)
9026 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
9027 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
9028 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
9036 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
9041 struct nameidata nd
;
9042 struct user64_fssearchblock searchblock
;
9043 struct searchstate
*state
;
9044 struct attrlist
*returnattrs
;
9045 struct timeval timelimit
;
9046 void *searchparams1
,*searchparams2
;
9048 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9049 uint32_t nummatches
;
9051 uint32_t nameiflags
;
9052 vfs_context_t ctx
= vfs_context_current();
9053 char uio_buf
[ UIO_SIZEOF(1) ];
9055 /* Start by copying in fsearchblock parameter list */
9056 if (IS_64BIT_PROCESS(p
)) {
9057 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
9058 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
9059 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
9062 struct user32_fssearchblock tmp_searchblock
;
9064 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
9065 // munge into 64-bit version
9066 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
9067 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
9068 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
9069 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
9071 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9072 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9074 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
9075 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
9076 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
9077 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
9078 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
9079 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
9080 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
9085 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9087 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
9088 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
9091 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9092 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9093 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9096 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9097 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9098 /* assumes the size is still 556 bytes it will continue to work */
9100 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
9101 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
9103 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
9105 /* Now set up the various pointers to the correct place in our newly allocated memory */
9107 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
9108 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
9109 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
9111 /* Now copy in the stuff given our local variables. */
9113 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
9116 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
9119 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
9122 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
9126 * When searching a union mount, need to set the
9127 * start flag at the first call on each layer to
9128 * reset state for the new volume.
9130 if (uap
->options
& SRCHFS_START
)
9131 state
->ss_union_layer
= 0;
9133 uap
->options
|= state
->ss_union_flags
;
9134 state
->ss_union_flags
= 0;
9137 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9138 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
9139 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9140 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
9141 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9144 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
9145 attrreference_t
* string_ref
;
9146 u_int32_t
* start_length
;
9147 user64_size_t param_length
;
9149 /* validate searchparams1 */
9150 param_length
= searchblock
.sizeofsearchparams1
;
9151 /* skip the word that specifies length of the buffer */
9152 start_length
= (u_int32_t
*) searchparams1
;
9153 start_length
= start_length
+1;
9154 string_ref
= (attrreference_t
*) start_length
;
9156 /* ensure no negative offsets or too big offsets */
9157 if (string_ref
->attr_dataoffset
< 0 ) {
9161 if (string_ref
->attr_length
> MAXPATHLEN
) {
9166 /* Check for pointer overflow in the string ref */
9167 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
9172 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
9176 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
9182 /* set up the uio structure which will contain the users return buffer */
9183 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9184 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
9187 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
9188 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
9189 UIO_USERSPACE
, uap
->path
, ctx
);
9198 * Switch to the root vnode for the volume
9200 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
9207 * If it's a union mount, the path lookup takes
9208 * us to the top layer. But we may need to descend
9209 * to a lower layer. For non-union mounts the layer
9212 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
9213 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
9216 vp
= vp
->v_mount
->mnt_vnodecovered
;
9222 error
= vnode_getwithref(vp
);
9229 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
9238 * If searchblock.maxmatches == 0, then skip the search. This has happened
9239 * before and sometimes the underlying code doesnt deal with it well.
9241 if (searchblock
.maxmatches
== 0) {
9247 * Allright, we have everything we need, so lets make that call.
9249 * We keep special track of the return value from the file system:
9250 * EAGAIN is an acceptable error condition that shouldn't keep us
9251 * from copying out any results...
9254 fserror
= VNOP_SEARCHFS(vp
,
9257 &searchblock
.searchattrs
,
9258 (u_long
)searchblock
.maxmatches
,
9262 (u_long
)uap
->scriptcode
,
9263 (u_long
)uap
->options
,
9265 (struct searchstate
*) &state
->ss_fsstate
,
9269 * If it's a union mount we need to be called again
9270 * to search the mounted-on filesystem.
9272 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
9273 state
->ss_union_flags
= SRCHFS_START
;
9274 state
->ss_union_layer
++; // search next layer down
9282 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9283 search state. Everything was already put into he return buffer by the vop call. */
9285 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
9288 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
9295 FREE(searchparams1
,M_TEMP
);
9300 } /* end of searchfs system call */
9302 #else /* CONFIG_SEARCHFS */
9305 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
9310 #endif /* CONFIG_SEARCHFS */
9313 lck_grp_attr_t
* nspace_group_attr
;
9314 lck_attr_t
* nspace_lock_attr
;
9315 lck_grp_t
* nspace_mutex_group
;
9317 lck_mtx_t nspace_handler_lock
;
9318 lck_mtx_t nspace_handler_exclusion_lock
;
9320 time_t snapshot_timestamp
=0;
9321 int nspace_allow_virtual_devs
=0;
9323 void nspace_handler_init(void);
9325 typedef struct nspace_item_info
{
9335 #define MAX_NSPACE_ITEMS 128
9336 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
9337 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
9338 uint32_t nspace_token_id
=0;
9339 uint32_t nspace_handler_timeout
= 15; // seconds
9341 #define NSPACE_ITEM_NEW 0x0001
9342 #define NSPACE_ITEM_PROCESSING 0x0002
9343 #define NSPACE_ITEM_DEAD 0x0004
9344 #define NSPACE_ITEM_CANCELLED 0x0008
9345 #define NSPACE_ITEM_DONE 0x0010
9346 #define NSPACE_ITEM_RESET_TIMER 0x0020
9348 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9349 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9351 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9353 //#pragma optimization_level 0
9356 NSPACE_HANDLER_NSPACE
= 0,
9357 NSPACE_HANDLER_SNAPSHOT
= 1,
9359 NSPACE_HANDLER_COUNT
,
9363 uint64_t handler_tid
;
9364 struct proc
*handler_proc
;
9368 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
9370 /* namespace fsctl functions */
9371 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
9372 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
9373 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
9374 static nspace_type_t
nspace_type_for_op(uint64_t op
);
9375 static int nspace_is_special_process(struct proc
*proc
);
9376 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
9377 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
9378 static int validate_namespace_args (int is64bit
, int size
);
9379 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
9382 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
9384 switch(nspace_type
) {
9385 case NSPACE_HANDLER_NSPACE
:
9386 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
9387 case NSPACE_HANDLER_SNAPSHOT
:
9388 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
9390 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
9395 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
9397 switch(nspace_type
) {
9398 case NSPACE_HANDLER_NSPACE
:
9399 return NSPACE_ITEM_NSPACE_EVENT
;
9400 case NSPACE_HANDLER_SNAPSHOT
:
9401 return NSPACE_ITEM_SNAPSHOT_EVENT
;
9403 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
9408 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
9410 switch(nspace_type
) {
9411 case NSPACE_HANDLER_NSPACE
:
9412 return FREAD
| FWRITE
| O_EVTONLY
;
9413 case NSPACE_HANDLER_SNAPSHOT
:
9414 return FREAD
| O_EVTONLY
;
9416 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
9421 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
9423 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
9424 case NAMESPACE_HANDLER_NSPACE_EVENT
:
9425 return NSPACE_HANDLER_NSPACE
;
9426 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
9427 return NSPACE_HANDLER_SNAPSHOT
;
9429 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
9430 return NSPACE_HANDLER_NSPACE
;
9434 static inline int nspace_is_special_process(struct proc
*proc
)
9437 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9438 if (proc
== nspace_handlers
[i
].handler_proc
)
9445 nspace_handler_init(void)
9447 nspace_lock_attr
= lck_attr_alloc_init();
9448 nspace_group_attr
= lck_grp_attr_alloc_init();
9449 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
9450 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
9451 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
9452 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
9456 nspace_proc_exit(struct proc
*p
)
9458 int i
, event_mask
= 0;
9460 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9461 if (p
== nspace_handlers
[i
].handler_proc
) {
9462 event_mask
|= nspace_item_flags_for_type(i
);
9463 nspace_handlers
[i
].handler_tid
= 0;
9464 nspace_handlers
[i
].handler_proc
= NULL
;
9468 if (event_mask
== 0) {
9472 lck_mtx_lock(&nspace_handler_lock
);
9473 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9474 // if this process was the snapshot handler, zero snapshot_timeout
9475 snapshot_timestamp
= 0;
9479 // unblock anyone that's waiting for the handler that died
9481 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9482 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
9484 if ( nspace_items
[i
].flags
& event_mask
) {
9486 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9487 vnode_lock_spin(nspace_items
[i
].vp
);
9488 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9489 vnode_unlock(nspace_items
[i
].vp
);
9491 nspace_items
[i
].vp
= NULL
;
9492 nspace_items
[i
].vid
= 0;
9493 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9494 nspace_items
[i
].token
= 0;
9496 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9501 wakeup((caddr_t
)&nspace_item_idx
);
9502 lck_mtx_unlock(&nspace_handler_lock
);
9507 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
9509 return resolve_nspace_item_ext(vp
, op
, NULL
);
9513 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
9515 int i
, error
, keep_waiting
;
9517 nspace_type_t nspace_type
= nspace_type_for_op(op
);
9519 // only allow namespace events on regular files, directories and symlinks.
9520 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
9525 // if this is a snapshot event and the vnode is on a
9526 // disk image just pretend nothing happened since any
9527 // change to the disk image will cause the disk image
9528 // itself to get backed up and this avoids multi-way
9529 // deadlocks between the snapshot handler and the ever
9530 // popular diskimages-helper process. the variable
9531 // nspace_allow_virtual_devs allows this behavior to
9532 // be overridden (for use by the Mobile TimeMachine
9533 // testing infrastructure which uses disk images)
9535 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
9536 && (vp
->v_mount
!= NULL
)
9537 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
9538 && !nspace_allow_virtual_devs
) {
9543 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9544 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9548 if (nspace_is_special_process(current_proc())) {
9552 lck_mtx_lock(&nspace_handler_lock
);
9555 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9556 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
9561 if (i
>= MAX_NSPACE_ITEMS
) {
9562 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9563 if (nspace_items
[i
].flags
== 0) {
9568 nspace_items
[i
].refcount
++;
9571 if (i
>= MAX_NSPACE_ITEMS
) {
9572 ts
.tv_sec
= nspace_handler_timeout
;
9575 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
9577 // an entry got free'd up, go see if we can get a slot
9580 lck_mtx_unlock(&nspace_handler_lock
);
9586 // if it didn't already exist, add it. if it did exist
9587 // we'll get woken up when someone does a wakeup() on
9588 // the slot in the nspace_items table.
9590 if (vp
!= nspace_items
[i
].vp
) {
9591 nspace_items
[i
].vp
= vp
;
9592 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
9593 nspace_items
[i
].op
= op
;
9594 nspace_items
[i
].vid
= vnode_vid(vp
);
9595 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
9596 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
9597 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9599 vnode_lock_spin(vp
);
9600 vp
->v_flag
|= VNEEDSSNAPSHOT
;
9605 nspace_items
[i
].token
= 0;
9606 nspace_items
[i
].refcount
= 1;
9608 wakeup((caddr_t
)&nspace_item_idx
);
9612 // Now go to sleep until the handler does a wakeup on this
9613 // slot in the nspace_items table (or we timeout).
9616 while(keep_waiting
) {
9617 ts
.tv_sec
= nspace_handler_timeout
;
9619 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
9621 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
9623 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
9624 error
= nspace_items
[i
].token
;
9625 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
9626 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
9627 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
9632 } else if (error
== 0) {
9633 // hmmm, why did we get woken up?
9634 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9635 nspace_items
[i
].token
);
9638 if (--nspace_items
[i
].refcount
== 0) {
9639 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
9640 nspace_items
[i
].arg
= NULL
;
9641 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
9642 nspace_items
[i
].flags
= 0; // this clears it for re-use
9644 wakeup(&nspace_token_id
);
9648 lck_mtx_unlock(&nspace_handler_lock
);
9653 int nspace_snapshot_event(vnode_t vp
, time_t ctime
, uint64_t op_type
, void *arg
)
9655 int snapshot_error
= 0;
9661 /* Swap files are special; skip them */
9662 if (vnode_isswap(vp
)) {
9666 if (ctime
!= 0 && snapshot_timestamp
!= 0 && (ctime
<= snapshot_timestamp
|| vnode_needssnapshots(vp
))) {
9667 // the change time is within this epoch
9670 error
= resolve_nspace_item_ext(vp
, op_type
| NAMESPACE_HANDLER_SNAPSHOT_EVENT
, arg
);
9671 if (error
== EDEADLK
) {
9674 if (error
== EAGAIN
) {
9675 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9676 } else if (error
== EINTR
) {
9677 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9678 snapshot_error
= EINTR
;
9683 return snapshot_error
;
9687 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
9691 lck_mtx_lock(&nspace_handler_lock
);
9692 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9693 if (nspace_items
[i
].vp
== vp
) {
9698 if (i
>= MAX_NSPACE_ITEMS
) {
9699 lck_mtx_unlock(&nspace_handler_lock
);
9703 *status
= nspace_items
[i
].flags
;
9704 lck_mtx_unlock(&nspace_handler_lock
);
9711 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
9713 struct vnode_attr va
;
9717 VATTR_WANTED(&va
, va_fsid
);
9718 VATTR_WANTED(&va
, va_fileid
);
9720 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
9721 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
9724 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
9733 // Note: this function does NOT check permissions on all of the
9734 // parent directories leading to this vnode. It should only be
9735 // called on behalf of a root process. Otherwise a process may
9736 // get access to a file because the file itself is readable even
9737 // though its parent directories would prevent access.
9740 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
9744 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9749 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
9754 /* compute action to be authorized */
9756 if (fmode
& FREAD
) {
9757 action
|= KAUTH_VNODE_READ_DATA
;
9759 if (fmode
& (FWRITE
| O_TRUNC
)) {
9761 * If we are writing, appending, and not truncating,
9762 * indicate that we are appending so that if the
9763 * UF_APPEND or SF_APPEND bits are set, we do not deny
9766 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
9767 action
|= KAUTH_VNODE_APPEND_DATA
;
9769 action
|= KAUTH_VNODE_WRITE_DATA
;
9773 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
9778 // if the vnode is tagged VOPENEVT and the current process
9779 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9780 // flag to the open mode so that this open won't count against
9781 // the vnode when carbon delete() does a vnode_isinuse() to see
9782 // if a file is currently in use. this allows spotlight
9783 // importers to not interfere with carbon apps that depend on
9784 // the no-delete-if-busy semantics of carbon delete().
9786 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
9790 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
9793 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
9794 VNOP_CLOSE(vp
, fmode
, ctx
);
9798 /* Call out to allow 3rd party notification of open.
9799 * Ignore result of kauth_authorize_fileop call.
9802 mac_vnode_notify_open(ctx
, vp
, fmode
);
9804 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
9812 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
9819 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9820 if (nspace_handlers
[nspace_type
].handler_busy
) {
9821 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9825 nspace_handlers
[nspace_type
].handler_busy
= 1;
9826 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9829 * Any process that gets here will be one of the namespace handlers.
9830 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9831 * as we can cause deadlocks to occur, because the namespace handler may prevent
9832 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9835 curtask
= current_task();
9836 bsd_set_dependency_capable (curtask
);
9838 lck_mtx_lock(&nspace_handler_lock
);
9839 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9840 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
9841 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
9844 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9845 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9849 while (error
== 0) {
9851 /* Try to find matching namespace item */
9852 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9853 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9854 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9860 if (i
>= MAX_NSPACE_ITEMS
) {
9861 /* Nothing is there yet. Wait for wake up and retry */
9862 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9863 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9864 /* Prevent infinite loop if snapshot handler exited */
9871 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
9872 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
9873 nspace_items
[i
].token
= ++nspace_token_id
;
9875 assert(nspace_items
[i
].vp
);
9876 struct fileproc
*fp
;
9879 struct proc
*p
= current_proc();
9880 vfs_context_t ctx
= vfs_context_current();
9881 struct vnode_attr va
;
9882 bool vn_get_succsessful
= false;
9883 bool vn_open_successful
= false;
9884 bool fp_alloc_successful
= false;
9887 * Use vnode pointer to acquire a file descriptor for
9888 * hand-off to userland
9890 fmode
= nspace_open_flags_for_type(nspace_type
);
9891 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
9892 if (error
) goto cleanup
;
9893 vn_get_succsessful
= true;
9895 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
9896 if (error
) goto cleanup
;
9897 vn_open_successful
= true;
9899 error
= falloc(p
, &fp
, &indx
, ctx
);
9900 if (error
) goto cleanup
;
9901 fp_alloc_successful
= true;
9903 fp
->f_fglob
->fg_flag
= fmode
;
9904 fp
->f_fglob
->fg_ops
= &vnops
;
9905 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9908 procfdtbl_releasefd(p
, indx
, NULL
);
9909 fp_drop(p
, indx
, fp
, 1);
9913 * All variants of the namespace handler struct support these three fields:
9914 * token, flags, and the FD pointer
9916 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9917 if (error
) goto cleanup
;
9918 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9919 if (error
) goto cleanup
;
9920 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9921 if (error
) goto cleanup
;
9924 * Handle optional fields:
9925 * extended version support an info ptr (offset, length), and the
9927 * namedata version supports a unique per-link object ID
9931 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9932 uint64_t u_offset
, u_length
;
9935 u_offset
= uio_offset(uio
);
9936 u_length
= uio_resid(uio
);
9941 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9942 if (error
) goto cleanup
;
9943 error
= copyout(&u_length
, nhd
->infoptr
+ sizeof(uint64_t), sizeof(uint64_t));
9944 if (error
) goto cleanup
;
9949 VATTR_WANTED(&va
, va_linkid
);
9950 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9951 if (error
) goto cleanup
;
9953 uint64_t linkid
= 0;
9954 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9955 linkid
= (uint64_t)va
.va_linkid
;
9957 error
= copyout(&linkid
, nhd
->objid
, sizeof(uint64_t));
9961 if (fp_alloc_successful
) fp_free(p
, indx
, fp
);
9962 if (vn_open_successful
) vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9966 if (vn_get_succsessful
) vnode_put(nspace_items
[i
].vp
);
9972 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9973 vnode_lock_spin(nspace_items
[i
].vp
);
9974 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9975 vnode_unlock(nspace_items
[i
].vp
);
9977 nspace_items
[i
].vp
= NULL
;
9978 nspace_items
[i
].vid
= 0;
9979 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9980 nspace_items
[i
].token
= 0;
9982 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9985 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9986 // just go through every snapshot event and unblock it immediately.
9987 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9988 for(i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9989 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9990 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9991 nspace_items
[i
].vp
= NULL
;
9992 nspace_items
[i
].vid
= 0;
9993 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9994 nspace_items
[i
].token
= 0;
9996 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10003 lck_mtx_unlock(&nspace_handler_lock
);
10005 lck_mtx_lock(&nspace_handler_exclusion_lock
);
10006 nspace_handlers
[nspace_type
].handler_busy
= 0;
10007 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
10012 static inline int validate_namespace_args (int is64bit
, int size
) {
10015 /* Must be one of these */
10016 if (size
== sizeof(user64_namespace_handler_info
)) {
10019 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
10022 if (size
== sizeof(user64_namespace_handler_data
)) {
10028 /* 32 bit -- must be one of these */
10029 if (size
== sizeof(user32_namespace_handler_info
)) {
10032 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
10035 if (size
== sizeof(user32_namespace_handler_data
)) {
10047 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
10050 namespace_handler_data nhd
;
10052 bzero (&nhd
, sizeof(namespace_handler_data
));
10054 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10058 error
= validate_namespace_args (is64bit
, size
);
10063 /* Copy in the userland pointers into our kernel-only struct */
10066 /* 64 bit userland structures */
10067 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
10068 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
10069 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
10071 /* If the size is greater than the standard info struct, add in extra fields */
10072 if (size
> (sizeof(user64_namespace_handler_info
))) {
10073 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
10074 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
10076 if (size
== (sizeof(user64_namespace_handler_data
))) {
10077 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
10079 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10083 /* 32 bit userland structures */
10084 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
10085 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
10086 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
10088 if (size
> (sizeof(user32_namespace_handler_info
))) {
10089 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
10090 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
10092 if (size
== (sizeof(user32_namespace_handler_data
))) {
10093 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
10095 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10099 return wait_for_namespace_event(&nhd
, nspace_type
);
10102 static unsigned long
10103 fsctl_bogus_command_compat(unsigned long cmd
)
10107 case IOCBASECMD(FSIOC_SYNC_VOLUME
):
10108 return (FSIOC_SYNC_VOLUME
);
10109 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
):
10110 return (FSIOC_ROUTEFS_SETROUTEID
);
10111 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
):
10112 return (FSIOC_SET_PACKAGE_EXTS
);
10113 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET
):
10114 return (FSIOC_NAMESPACE_HANDLER_GET
);
10115 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET
):
10116 return (FSIOC_OLD_SNAPSHOT_HANDLER_GET
);
10117 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT
):
10118 return (FSIOC_SNAPSHOT_HANDLER_GET_EXT
);
10119 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE
):
10120 return (FSIOC_NAMESPACE_HANDLER_UPDATE
);
10121 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK
):
10122 return (FSIOC_NAMESPACE_HANDLER_UNBLOCK
);
10123 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL
):
10124 return (FSIOC_NAMESPACE_HANDLER_CANCEL
);
10125 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
):
10126 return (FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
);
10127 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
):
10128 return (FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
);
10129 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
):
10130 return (FSIOC_SET_FSTYPENAME_OVERRIDE
);
10131 case IOCBASECMD(DISK_CONDITIONER_IOC_GET
):
10132 return (DISK_CONDITIONER_IOC_GET
);
10133 case IOCBASECMD(DISK_CONDITIONER_IOC_SET
):
10134 return (DISK_CONDITIONER_IOC_SET
);
10135 case IOCBASECMD(FSIOC_FIOSEEKHOLE
):
10136 return (FSIOC_FIOSEEKHOLE
);
10137 case IOCBASECMD(FSIOC_FIOSEEKDATA
):
10138 return (FSIOC_FIOSEEKDATA
);
10139 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
):
10140 return (SPOTLIGHT_IOC_GET_MOUNT_TIME
);
10141 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
):
10142 return (SPOTLIGHT_IOC_GET_LAST_MTIME
);
10149 * Make a filesystem-specific control call:
10153 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
10158 #define STK_PARAMS 128
10159 char stkbuf
[STK_PARAMS
] = {0};
10160 caddr_t data
, memp
;
10161 vnode_t vp
= *arg_vp
;
10163 cmd
= fsctl_bogus_command_compat(cmd
);
10165 size
= IOCPARM_LEN(cmd
);
10166 if (size
> IOCPARM_MAX
) return (EINVAL
);
10168 is64bit
= proc_is64bit(p
);
10172 if (size
> sizeof (stkbuf
)) {
10173 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
10179 if (cmd
& IOC_IN
) {
10181 error
= copyin(udata
, data
, size
);
10184 kfree (memp
, size
);
10190 *(user_addr_t
*)data
= udata
;
10193 *(uint32_t *)data
= (uint32_t)udata
;
10196 } else if ((cmd
& IOC_OUT
) && size
) {
10198 * Zero the buffer so the user always
10199 * gets back something deterministic.
10202 } else if (cmd
& IOC_VOID
) {
10204 *(user_addr_t
*)data
= udata
;
10207 *(uint32_t *)data
= (uint32_t)udata
;
10211 /* Check to see if it's a generic command */
10214 case FSIOC_SYNC_VOLUME
: {
10215 mount_t mp
= vp
->v_mount
;
10216 int arg
= *(uint32_t*)data
;
10218 /* record vid of vp so we can drop it below. */
10219 uint32_t vvid
= vp
->v_id
;
10222 * Then grab mount_iterref so that we can release the vnode.
10223 * Without this, a thread may call vnode_iterate_prepare then
10224 * get into a deadlock because we've never released the root vp
10226 error
= mount_iterref (mp
, 0);
10232 /* issue the sync for this volume */
10233 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
10236 * Then release the mount_iterref once we're done syncing; it's not
10237 * needed for the VNOP_IOCTL below
10239 mount_iterdrop(mp
);
10241 if (arg
& FSCTL_SYNC_FULLSYNC
) {
10242 /* re-obtain vnode iocount on the root vp, if possible */
10243 error
= vnode_getwithvid (vp
, vvid
);
10245 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
10249 /* mark the argument VP as having been released */
10254 case FSIOC_ROUTEFS_SETROUTEID
: {
10256 char routepath
[MAXPATHLEN
];
10259 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10262 bzero(routepath
, MAXPATHLEN
);
10263 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
10267 error
= routefs_kernel_mount(routepath
);
10275 case FSIOC_SET_PACKAGE_EXTS
: {
10276 user_addr_t ext_strings
;
10277 uint32_t num_entries
;
10278 uint32_t max_width
;
10280 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0)))
10283 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
10284 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
10286 // either you're 64-bit and passed a 64-bit struct or
10287 // you're 32-bit and passed a 32-bit struct. otherwise
10294 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
10295 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
10296 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
10298 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
10299 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
10300 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
10302 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
10306 /* namespace handlers */
10307 case FSIOC_NAMESPACE_HANDLER_GET
: {
10308 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
10312 /* Snapshot handlers */
10313 case FSIOC_OLD_SNAPSHOT_HANDLER_GET
: {
10314 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10318 case FSIOC_SNAPSHOT_HANDLER_GET_EXT
: {
10319 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10323 case FSIOC_NAMESPACE_HANDLER_UPDATE
: {
10324 uint32_t token
, val
;
10327 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10331 if (!nspace_is_special_process(p
)) {
10336 token
= ((uint32_t *)data
)[0];
10337 val
= ((uint32_t *)data
)[1];
10339 lck_mtx_lock(&nspace_handler_lock
);
10341 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10342 if (nspace_items
[i
].token
== token
) {
10343 break; /* exit for loop, not case stmt */
10347 if (i
>= MAX_NSPACE_ITEMS
) {
10351 // if this bit is set, when resolve_nspace_item() times out
10352 // it will loop and go back to sleep.
10354 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
10357 lck_mtx_unlock(&nspace_handler_lock
);
10360 printf("nspace-handler-update: did not find token %u\n", token
);
10365 case FSIOC_NAMESPACE_HANDLER_UNBLOCK
: {
10366 uint32_t token
, val
;
10369 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10373 if (!nspace_is_special_process(p
)) {
10378 token
= ((uint32_t *)data
)[0];
10379 val
= ((uint32_t *)data
)[1];
10381 lck_mtx_lock(&nspace_handler_lock
);
10383 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10384 if (nspace_items
[i
].token
== token
) {
10385 break; /* exit for loop, not case statement */
10389 if (i
>= MAX_NSPACE_ITEMS
) {
10390 printf("nspace-handler-unblock: did not find token %u\n", token
);
10393 if (val
== 0 && nspace_items
[i
].vp
) {
10394 vnode_lock_spin(nspace_items
[i
].vp
);
10395 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10396 vnode_unlock(nspace_items
[i
].vp
);
10399 nspace_items
[i
].vp
= NULL
;
10400 nspace_items
[i
].arg
= NULL
;
10401 nspace_items
[i
].op
= 0;
10402 nspace_items
[i
].vid
= 0;
10403 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
10404 nspace_items
[i
].token
= 0;
10406 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10409 lck_mtx_unlock(&nspace_handler_lock
);
10413 case FSIOC_NAMESPACE_HANDLER_CANCEL
: {
10414 uint32_t token
, val
;
10417 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10421 if (!nspace_is_special_process(p
)) {
10426 token
= ((uint32_t *)data
)[0];
10427 val
= ((uint32_t *)data
)[1];
10429 lck_mtx_lock(&nspace_handler_lock
);
10431 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10432 if (nspace_items
[i
].token
== token
) {
10433 break; /* exit for loop, not case stmt */
10437 if (i
>= MAX_NSPACE_ITEMS
) {
10438 printf("nspace-handler-cancel: did not find token %u\n", token
);
10441 if (nspace_items
[i
].vp
) {
10442 vnode_lock_spin(nspace_items
[i
].vp
);
10443 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10444 vnode_unlock(nspace_items
[i
].vp
);
10447 nspace_items
[i
].vp
= NULL
;
10448 nspace_items
[i
].arg
= NULL
;
10449 nspace_items
[i
].vid
= 0;
10450 nspace_items
[i
].token
= val
;
10451 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
10452 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
10454 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10457 lck_mtx_unlock(&nspace_handler_lock
);
10461 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
10462 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10466 // we explicitly do not do the namespace_handler_proc check here
10468 lck_mtx_lock(&nspace_handler_lock
);
10469 snapshot_timestamp
= ((uint32_t *)data
)[0];
10470 wakeup(&nspace_item_idx
);
10471 lck_mtx_unlock(&nspace_handler_lock
);
10472 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
10477 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
10479 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10483 lck_mtx_lock(&nspace_handler_lock
);
10484 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
10485 lck_mtx_unlock(&nspace_handler_lock
);
10486 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10487 nspace_allow_virtual_devs
? "" : " NOT");
10493 case FSIOC_SET_FSTYPENAME_OVERRIDE
:
10495 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10499 mount_lock(vp
->v_mount
);
10500 if (data
[0] != 0) {
10501 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
10502 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
10503 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10504 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
10505 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
10508 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10509 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
10511 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
10512 vp
->v_mount
->fstypename_override
[0] = '\0';
10514 mount_unlock(vp
->v_mount
);
10519 case DISK_CONDITIONER_IOC_GET
: {
10520 error
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
10524 case DISK_CONDITIONER_IOC_SET
: {
10525 error
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
10530 /* other, known commands shouldn't be passed down here */
10533 case F_TRIM_ACTIVE_FILE
:
10535 case F_TRANSCODEKEY
:
10536 case F_GETPROTECTIONLEVEL
:
10537 case F_GETDEFAULTPROTLEVEL
:
10538 case F_MAKECOMPRESSED
:
10539 case F_SET_GREEDY_MODE
:
10540 case F_SETSTATICCONTENT
:
10542 case F_SETBACKINGSTORE
:
10543 case F_GETPATH_MTMINFO
:
10544 case APFSIOC_REVERT_TO_SNAPSHOT
:
10545 case FSIOC_FIOSEEKHOLE
:
10546 case FSIOC_FIOSEEKDATA
:
10547 case HFS_GET_BOOT_INFO
:
10548 case HFS_SET_BOOT_INFO
:
10552 case F_BARRIERFSYNC
:
10558 /* Invoke the filesystem-specific code */
10559 error
= VNOP_IOCTL(vp
, cmd
, data
, options
, ctx
);
10562 } /* end switch stmt */
10565 * if no errors, copy any data to user. Size was
10566 * already set and checked above.
10568 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
10569 error
= copyout(data
, udata
, size
);
10581 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
10584 struct nameidata nd
;
10587 vfs_context_t ctx
= vfs_context_current();
10589 AUDIT_ARG(cmd
, uap
->cmd
);
10590 AUDIT_ARG(value32
, uap
->options
);
10591 /* Get the vnode for the file we are getting info on: */
10593 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
10594 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
10595 UIO_USERSPACE
, uap
->path
, ctx
);
10596 if ((error
= namei(&nd
))) goto done
;
10601 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
10607 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10616 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
10620 vfs_context_t ctx
= vfs_context_current();
10623 AUDIT_ARG(fd
, uap
->fd
);
10624 AUDIT_ARG(cmd
, uap
->cmd
);
10625 AUDIT_ARG(value32
, uap
->options
);
10627 /* Get the vnode for the file we are getting info on: */
10628 if ((error
= file_vnode(uap
->fd
, &vp
)))
10631 if ((error
= vnode_getwithref(vp
))) {
10637 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
10644 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10648 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10655 /* end of fsctl system call */
10658 * Retrieve the data of an extended attribute.
10661 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
10664 struct nameidata nd
;
10665 char attrname
[XATTR_MAXNAMELEN
+1];
10666 vfs_context_t ctx
= vfs_context_current();
10668 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10669 size_t attrsize
= 0;
10671 u_int32_t nameiflags
;
10673 char uio_buf
[ UIO_SIZEOF(1) ];
10675 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10678 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10679 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10680 if ((error
= namei(&nd
))) {
10686 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10690 if (xattr_protected(attrname
)) {
10691 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
10697 * the specific check for 0xffffffff is a hack to preserve
10698 * binaray compatibilty in K64 with applications that discovered
10699 * that passing in a buf pointer and a size of -1 resulted in
10700 * just the size of the indicated extended attribute being returned.
10701 * this isn't part of the documented behavior, but because of the
10702 * original implemtation's check for "uap->size > 0", this behavior
10703 * was allowed. In K32 that check turned into a signed comparison
10704 * even though uap->size is unsigned... in K64, we blow by that
10705 * check because uap->size is unsigned and doesn't get sign smeared
10706 * in the munger for a 32 bit user app. we also need to add a
10707 * check to limit the maximum size of the buffer being passed in...
10708 * unfortunately, the underlying fileystems seem to just malloc
10709 * the requested size even if the actual extended attribute is tiny.
10710 * because that malloc is for kernel wired memory, we have to put a
10711 * sane limit on it.
10713 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10714 * U64 running on K64 will yield -1 (64 bits wide)
10715 * U32/U64 running on K32 will yield -1 (32 bits wide)
10717 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
10721 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
10722 uap
->size
= XATTR_MAXSIZE
;
10724 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10725 &uio_buf
[0], sizeof(uio_buf
));
10726 uio_addiov(auio
, uap
->value
, uap
->size
);
10729 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
10734 *retval
= uap
->size
- uio_resid(auio
);
10736 *retval
= (user_ssize_t
)attrsize
;
10743 * Retrieve the data of an extended attribute.
10746 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
10749 char attrname
[XATTR_MAXNAMELEN
+1];
10751 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10752 size_t attrsize
= 0;
10755 char uio_buf
[ UIO_SIZEOF(1) ];
10757 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10760 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10763 if ( (error
= vnode_getwithref(vp
)) ) {
10764 file_drop(uap
->fd
);
10767 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10771 if (xattr_protected(attrname
)) {
10775 if (uap
->value
&& uap
->size
> 0) {
10776 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10777 &uio_buf
[0], sizeof(uio_buf
));
10778 uio_addiov(auio
, uap
->value
, uap
->size
);
10781 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10783 (void)vnode_put(vp
);
10784 file_drop(uap
->fd
);
10787 *retval
= uap
->size
- uio_resid(auio
);
10789 *retval
= (user_ssize_t
)attrsize
;
10795 * Set the data of an extended attribute.
10798 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
10801 struct nameidata nd
;
10802 char attrname
[XATTR_MAXNAMELEN
+1];
10803 vfs_context_t ctx
= vfs_context_current();
10805 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10807 u_int32_t nameiflags
;
10809 char uio_buf
[ UIO_SIZEOF(1) ];
10811 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10814 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10816 if (error
== EPERM
) {
10817 /* if the string won't fit in attrname, copyinstr emits EPERM */
10818 return (ENAMETOOLONG
);
10820 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10823 if (xattr_protected(attrname
))
10825 if (uap
->size
!= 0 && uap
->value
== 0) {
10829 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10830 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10831 if ((error
= namei(&nd
))) {
10837 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10838 &uio_buf
[0], sizeof(uio_buf
));
10839 uio_addiov(auio
, uap
->value
, uap
->size
);
10841 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
10844 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10855 * Set the data of an extended attribute.
10858 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
10861 char attrname
[XATTR_MAXNAMELEN
+1];
10863 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10866 char uio_buf
[ UIO_SIZEOF(1) ];
10868 vfs_context_t ctx
= vfs_context_current();
10871 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10874 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10876 if (error
== EPERM
) {
10877 /* if the string won't fit in attrname, copyinstr emits EPERM */
10878 return (ENAMETOOLONG
);
10880 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10883 if (xattr_protected(attrname
))
10885 if (uap
->size
!= 0 && uap
->value
== 0) {
10888 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10891 if ( (error
= vnode_getwithref(vp
)) ) {
10892 file_drop(uap
->fd
);
10895 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10896 &uio_buf
[0], sizeof(uio_buf
));
10897 uio_addiov(auio
, uap
->value
, uap
->size
);
10899 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
10902 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10908 file_drop(uap
->fd
);
10914 * Remove an extended attribute.
10915 * XXX Code duplication here.
10918 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
10921 struct nameidata nd
;
10922 char attrname
[XATTR_MAXNAMELEN
+1];
10923 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10924 vfs_context_t ctx
= vfs_context_current();
10926 u_int32_t nameiflags
;
10929 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10932 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10936 if (xattr_protected(attrname
))
10938 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10939 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10940 if ((error
= namei(&nd
))) {
10946 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
10949 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10960 * Remove an extended attribute.
10961 * XXX Code duplication here.
10964 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
10967 char attrname
[XATTR_MAXNAMELEN
+1];
10971 vfs_context_t ctx
= vfs_context_current();
10974 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10977 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10981 if (xattr_protected(attrname
))
10983 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10986 if ( (error
= vnode_getwithref(vp
)) ) {
10987 file_drop(uap
->fd
);
10991 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
10994 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
11000 file_drop(uap
->fd
);
11006 * Retrieve the list of extended attribute names.
11007 * XXX Code duplication here.
11010 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
11013 struct nameidata nd
;
11014 vfs_context_t ctx
= vfs_context_current();
11016 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11017 size_t attrsize
= 0;
11018 u_int32_t nameiflags
;
11020 char uio_buf
[ UIO_SIZEOF(1) ];
11022 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
11025 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
11026 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
11027 if ((error
= namei(&nd
))) {
11032 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11033 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
11034 &uio_buf
[0], sizeof(uio_buf
));
11035 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11038 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
11042 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11044 *retval
= (user_ssize_t
)attrsize
;
11050 * Retrieve the list of extended attribute names.
11051 * XXX Code duplication here.
11054 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
11058 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
11059 size_t attrsize
= 0;
11061 char uio_buf
[ UIO_SIZEOF(1) ];
11063 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
11066 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
11069 if ( (error
= vnode_getwithref(vp
)) ) {
11070 file_drop(uap
->fd
);
11073 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11074 auio
= uio_createwithbuffer(1, 0, spacetype
,
11075 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
11076 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11079 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11082 file_drop(uap
->fd
);
11084 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11086 *retval
= (user_ssize_t
)attrsize
;
11091 static int fsgetpath_internal(
11092 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
11093 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
11096 struct mount
*mp
= NULL
;
11100 /* maximum number of times to retry build_path */
11101 unsigned int retries
= 0x10;
11103 if (bufsize
> PAGE_SIZE
) {
11112 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
11113 error
= ENOTSUP
; /* unexpected failure */
11119 error
= VFS_ROOT(mp
, &vp
, ctx
);
11121 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
11124 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
11126 * If the fileid isn't found and we're in a union
11127 * mount volume, then see if the fileid is in the
11128 * mounted-on volume.
11130 struct mount
*tmp
= mp
;
11131 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
11133 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
11144 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
11151 /* Obtain the absolute path to this vnode. */
11152 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
11153 bpflags
|= BUILDPATH_CHECK_MOVED
;
11154 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
11158 /* there was a race building the path, try a few more times */
11159 if (error
== EAGAIN
) {
11169 AUDIT_ARG(text
, buf
);
11171 if (kdebug_enable
) {
11172 long dbg_parms
[NUMPARMS
];
11175 dbg_namelen
= (int)sizeof(dbg_parms
);
11177 if (length
< dbg_namelen
) {
11178 memcpy((char *)dbg_parms
, buf
, length
);
11179 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
11181 dbg_namelen
= length
;
11183 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
11186 kdebug_vfs_lookup(dbg_parms
, dbg_namelen
, (void *)vp
,
11187 KDBG_VFS_LOOKUP_FLAG_LOOKUP
);
11190 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
11197 * Obtain the full pathname of a file system object by id.
11200 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
11202 vfs_context_t ctx
= vfs_context_current();
11208 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
11211 AUDIT_ARG(value32
, fsid
.val
[0]);
11212 AUDIT_ARG(value64
, uap
->objid
);
11213 /* Restrict output buffer size for now. */
11215 if (uap
->bufsize
> PAGE_SIZE
) {
11218 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
| M_ZERO
);
11219 if (realpath
== NULL
) {
11223 error
= fsgetpath_internal(
11224 ctx
, fsid
.val
[0], uap
->objid
,
11225 uap
->bufsize
, realpath
, &length
);
11231 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
11233 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
11236 FREE(realpath
, M_TEMP
);
11242 * Common routine to handle various flavors of statfs data heading out
11245 * Returns: 0 Success
11249 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
11250 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
11251 boolean_t partial_copy
)
11254 int my_size
, copy_size
;
11257 struct user64_statfs sfs
;
11258 my_size
= copy_size
= sizeof(sfs
);
11259 bzero(&sfs
, my_size
);
11260 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
11261 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
11262 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
11263 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
11264 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
11265 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
11266 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
11267 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
11268 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
11269 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
11270 sfs
.f_fsid
= sfsp
->f_fsid
;
11271 sfs
.f_owner
= sfsp
->f_owner
;
11272 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11273 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11275 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11277 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11278 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11280 if (partial_copy
) {
11281 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11283 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11286 struct user32_statfs sfs
;
11288 my_size
= copy_size
= sizeof(sfs
);
11289 bzero(&sfs
, my_size
);
11291 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
11292 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
11293 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
11296 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11297 * have to fudge the numbers here in that case. We inflate the blocksize in order
11298 * to reflect the filesystem size as best we can.
11300 if ((sfsp
->f_blocks
> INT_MAX
)
11301 /* Hack for 4061702 . I think the real fix is for Carbon to
11302 * look for some volume capability and not depend on hidden
11303 * semantics agreed between a FS and carbon.
11304 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11305 * for Carbon to set bNoVolumeSizes volume attribute.
11306 * Without this the webdavfs files cannot be copied onto
11307 * disk as they look huge. This change should not affect
11308 * XSAN as they should not setting these to -1..
11310 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
11311 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
11312 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
11316 * Work out how far we have to shift the block count down to make it fit.
11317 * Note that it's possible to have to shift so far that the resulting
11318 * blocksize would be unreportably large. At that point, we will clip
11319 * any values that don't fit.
11321 * For safety's sake, we also ensure that f_iosize is never reported as
11322 * being smaller than f_bsize.
11324 for (shift
= 0; shift
< 32; shift
++) {
11325 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
11327 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
11330 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11331 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
11332 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
11333 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
11334 #undef __SHIFT_OR_CLIP
11335 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
11336 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
11338 /* filesystem is small enough to be reported honestly */
11339 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
11340 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
11341 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
11342 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
11343 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
11345 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
11346 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
11347 sfs
.f_fsid
= sfsp
->f_fsid
;
11348 sfs
.f_owner
= sfsp
->f_owner
;
11349 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11350 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11352 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11354 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11355 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11357 if (partial_copy
) {
11358 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11360 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11363 if (sizep
!= NULL
) {
11370 * copy stat structure into user_stat structure.
11372 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
11374 bzero(usbp
, sizeof(*usbp
));
11376 usbp
->st_dev
= sbp
->st_dev
;
11377 usbp
->st_ino
= sbp
->st_ino
;
11378 usbp
->st_mode
= sbp
->st_mode
;
11379 usbp
->st_nlink
= sbp
->st_nlink
;
11380 usbp
->st_uid
= sbp
->st_uid
;
11381 usbp
->st_gid
= sbp
->st_gid
;
11382 usbp
->st_rdev
= sbp
->st_rdev
;
11383 #ifndef _POSIX_C_SOURCE
11384 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11385 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11386 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11387 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11388 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11389 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11391 usbp
->st_atime
= sbp
->st_atime
;
11392 usbp
->st_atimensec
= sbp
->st_atimensec
;
11393 usbp
->st_mtime
= sbp
->st_mtime
;
11394 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11395 usbp
->st_ctime
= sbp
->st_ctime
;
11396 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11398 usbp
->st_size
= sbp
->st_size
;
11399 usbp
->st_blocks
= sbp
->st_blocks
;
11400 usbp
->st_blksize
= sbp
->st_blksize
;
11401 usbp
->st_flags
= sbp
->st_flags
;
11402 usbp
->st_gen
= sbp
->st_gen
;
11403 usbp
->st_lspare
= sbp
->st_lspare
;
11404 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11405 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11408 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
11410 bzero(usbp
, sizeof(*usbp
));
11412 usbp
->st_dev
= sbp
->st_dev
;
11413 usbp
->st_ino
= sbp
->st_ino
;
11414 usbp
->st_mode
= sbp
->st_mode
;
11415 usbp
->st_nlink
= sbp
->st_nlink
;
11416 usbp
->st_uid
= sbp
->st_uid
;
11417 usbp
->st_gid
= sbp
->st_gid
;
11418 usbp
->st_rdev
= sbp
->st_rdev
;
11419 #ifndef _POSIX_C_SOURCE
11420 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11421 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11422 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11423 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11424 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11425 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11427 usbp
->st_atime
= sbp
->st_atime
;
11428 usbp
->st_atimensec
= sbp
->st_atimensec
;
11429 usbp
->st_mtime
= sbp
->st_mtime
;
11430 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11431 usbp
->st_ctime
= sbp
->st_ctime
;
11432 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11434 usbp
->st_size
= sbp
->st_size
;
11435 usbp
->st_blocks
= sbp
->st_blocks
;
11436 usbp
->st_blksize
= sbp
->st_blksize
;
11437 usbp
->st_flags
= sbp
->st_flags
;
11438 usbp
->st_gen
= sbp
->st_gen
;
11439 usbp
->st_lspare
= sbp
->st_lspare
;
11440 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11441 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11445 * copy stat64 structure into user_stat64 structure.
11447 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
11449 bzero(usbp
, sizeof(*usbp
));
11451 usbp
->st_dev
= sbp
->st_dev
;
11452 usbp
->st_ino
= sbp
->st_ino
;
11453 usbp
->st_mode
= sbp
->st_mode
;
11454 usbp
->st_nlink
= sbp
->st_nlink
;
11455 usbp
->st_uid
= sbp
->st_uid
;
11456 usbp
->st_gid
= sbp
->st_gid
;
11457 usbp
->st_rdev
= sbp
->st_rdev
;
11458 #ifndef _POSIX_C_SOURCE
11459 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11460 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11461 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11462 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11463 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11464 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11465 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11466 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11468 usbp
->st_atime
= sbp
->st_atime
;
11469 usbp
->st_atimensec
= sbp
->st_atimensec
;
11470 usbp
->st_mtime
= sbp
->st_mtime
;
11471 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11472 usbp
->st_ctime
= sbp
->st_ctime
;
11473 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11474 usbp
->st_birthtime
= sbp
->st_birthtime
;
11475 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11477 usbp
->st_size
= sbp
->st_size
;
11478 usbp
->st_blocks
= sbp
->st_blocks
;
11479 usbp
->st_blksize
= sbp
->st_blksize
;
11480 usbp
->st_flags
= sbp
->st_flags
;
11481 usbp
->st_gen
= sbp
->st_gen
;
11482 usbp
->st_lspare
= sbp
->st_lspare
;
11483 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11484 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11487 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
11489 bzero(usbp
, sizeof(*usbp
));
11491 usbp
->st_dev
= sbp
->st_dev
;
11492 usbp
->st_ino
= sbp
->st_ino
;
11493 usbp
->st_mode
= sbp
->st_mode
;
11494 usbp
->st_nlink
= sbp
->st_nlink
;
11495 usbp
->st_uid
= sbp
->st_uid
;
11496 usbp
->st_gid
= sbp
->st_gid
;
11497 usbp
->st_rdev
= sbp
->st_rdev
;
11498 #ifndef _POSIX_C_SOURCE
11499 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11500 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11501 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11502 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11503 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11504 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11505 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11506 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11508 usbp
->st_atime
= sbp
->st_atime
;
11509 usbp
->st_atimensec
= sbp
->st_atimensec
;
11510 usbp
->st_mtime
= sbp
->st_mtime
;
11511 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11512 usbp
->st_ctime
= sbp
->st_ctime
;
11513 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11514 usbp
->st_birthtime
= sbp
->st_birthtime
;
11515 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11517 usbp
->st_size
= sbp
->st_size
;
11518 usbp
->st_blocks
= sbp
->st_blocks
;
11519 usbp
->st_blksize
= sbp
->st_blksize
;
11520 usbp
->st_flags
= sbp
->st_flags
;
11521 usbp
->st_gen
= sbp
->st_gen
;
11522 usbp
->st_lspare
= sbp
->st_lspare
;
11523 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11524 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11528 * Purge buffer cache for simulating cold starts
11530 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
11532 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
11534 return VNODE_RETURNED
;
11537 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
11539 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
11541 return VFS_RETURNED
;
11545 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
11547 if (!kauth_cred_issuser(kauth_cred_get()))
11550 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);
11556 * gets the vnode associated with the (unnamed) snapshot directory
11557 * for a Filesystem. The snapshot directory vnode is returned with
11558 * an iocount on it.
11561 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
11563 return (VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
));
11567 * Get the snapshot vnode.
11569 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11570 * needs nameidone() on ndp.
11572 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11574 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11578 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
11579 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
11580 #if !CONFIG_TRIGGERS
11583 enum path_operation pathop
,
11589 struct vfs_attr vfa
;
11594 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
11598 if (!vnode_isvroot(*rvpp
)) {
11603 /* Make sure the filesystem supports snapshots */
11604 VFSATTR_INIT(&vfa
);
11605 VFSATTR_WANTED(&vfa
, f_capabilities
);
11606 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
11607 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
11608 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
11609 VOL_CAP_INT_SNAPSHOT
)) ||
11610 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
11611 VOL_CAP_INT_SNAPSHOT
))) {
11616 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
11620 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11621 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11626 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11627 * (the length returned by copyinstr includes the terminating NUL)
11629 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
11630 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
11634 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++);
11635 if (i
< (int)name_len
) {
11641 if (op
== CREATE
) {
11642 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
11644 } else if (op
== DELETE
) {
11645 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
11652 /* Check if the snapshot already exists ... */
11653 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
11654 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
11655 ndp
->ni_dvp
= *sdvpp
;
11657 error
= namei(ndp
);
11659 FREE(name_buf
, M_TEMP
);
11675 * create a filesystem snapshot (for supporting filesystems)
11677 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11678 * We get to the (unnamed) snapshot directory vnode and create the vnode
11679 * for the snapshot in it.
11683 * a) Passed in name for snapshot cannot have slashes.
11684 * b) name can't be "." or ".."
11686 * Since this requires superuser privileges, vnode_authorize calls are not
11690 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11693 vnode_t rvp
, snapdvp
;
11695 struct nameidata namend
;
11697 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
11702 if (namend
.ni_vp
) {
11703 vnode_put(namend
.ni_vp
);
11706 struct vnode_attr va
;
11707 vnode_t vp
= NULLVP
;
11710 VATTR_SET(&va
, va_type
, VREG
);
11711 VATTR_SET(&va
, va_mode
, 0);
11713 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
11714 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
11719 nameidone(&namend
);
11720 vnode_put(snapdvp
);
11726 * Delete a Filesystem snapshot
11728 * get the vnode for the unnamed snapshot directory and the snapshot and
11729 * delete the snapshot.
11732 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11735 vnode_t rvp
, snapdvp
;
11737 struct nameidata namend
;
11739 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
11744 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
11745 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
11747 vnode_put(namend
.ni_vp
);
11748 nameidone(&namend
);
11749 vnode_put(snapdvp
);
11756 * Revert a filesystem to a snapshot
11758 * Marks the filesystem to revert to the given snapshot on next mount.
11761 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11767 struct fs_snapshot_revert_args revert_data
;
11768 struct componentname cnp
;
11772 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
11776 mp
= vnode_mount(rvp
);
11778 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11779 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11781 FREE(name_buf
, M_TEMP
);
11787 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
11789 FREE(name_buf
, M_TEMP
);
11796 * Grab mount_iterref so that we can release the vnode,
11797 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11799 error
= mount_iterref (mp
, 0);
11802 FREE(name_buf
, M_TEMP
);
11806 memset(&cnp
, 0, sizeof(cnp
));
11807 cnp
.cn_pnbuf
= (char *)name_buf
;
11808 cnp
.cn_nameiop
= LOOKUP
;
11809 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
11810 cnp
.cn_pnlen
= MAXPATHLEN
;
11811 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
11812 cnp
.cn_namelen
= (int)name_len
;
11813 revert_data
.sr_cnp
= &cnp
;
11815 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
11816 mount_iterdrop(mp
);
11817 FREE(name_buf
, M_TEMP
);
11820 /* If there was any error, try again using VNOP_IOCTL */
11823 struct nameidata namend
;
11825 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
11832 error
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
11835 vnode_put(namend
.ni_vp
);
11836 nameidone(&namend
);
11837 vnode_put(snapdvp
);
11845 * rename a Filesystem snapshot
11847 * get the vnode for the unnamed snapshot directory and the snapshot and
11848 * rename the snapshot. This is a very specialised (and simple) case of
11849 * rename(2) (which has to deal with a lot more complications). It differs
11850 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11853 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
11854 __unused
uint32_t flags
, vfs_context_t ctx
)
11856 vnode_t rvp
, snapdvp
;
11858 caddr_t newname_buf
;
11861 struct nameidata
*fromnd
, *tond
;
11862 /* carving out a chunk for structs that are too big to be on stack. */
11864 struct nameidata from_node
;
11865 struct nameidata to_node
;
11868 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
11869 fromnd
= &__rename_data
->from_node
;
11870 tond
= &__rename_data
->to_node
;
11872 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
11876 fvp
= fromnd
->ni_vp
;
11878 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11879 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
11884 * Some sanity checks- new name can't be empty, "." or ".." or have
11886 * (the length returned by copyinstr includes the terminating NUL)
11888 * The FS rename VNOP is suppossed to handle this but we'll pick it
11891 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
11892 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
11896 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++);
11897 if (i
< (int)name_len
) {
11903 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
11909 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
11910 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
11911 tond
->ni_dvp
= snapdvp
;
11913 error
= namei(tond
);
11916 } else if (tond
->ni_vp
) {
11918 * snapshot rename behaves differently than rename(2) - if the
11919 * new name exists, EEXIST is returned.
11921 vnode_put(tond
->ni_vp
);
11926 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
11927 &tond
->ni_cnd
, ctx
);
11932 FREE(newname_buf
, M_TEMP
);
11934 vnode_put(snapdvp
);
11938 FREE(__rename_data
, M_TEMP
);
11943 * Mount a Filesystem snapshot
11945 * get the vnode for the unnamed snapshot directory and the snapshot and
11946 * mount the snapshot.
11949 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
11950 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
11952 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
11954 struct nameidata
*snapndp
, *dirndp
;
11955 /* carving out a chunk for structs that are too big to be on stack. */
11957 struct nameidata snapnd
;
11958 struct nameidata dirnd
;
11959 } * __snapshot_mount_data
;
11961 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
11963 snapndp
= &__snapshot_mount_data
->snapnd
;
11964 dirndp
= &__snapshot_mount_data
->dirnd
;
11966 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
11971 snapvp
= snapndp
->ni_vp
;
11972 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
11977 /* Get the vnode to be covered */
11978 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
11979 UIO_USERSPACE
, directory
, ctx
);
11980 error
= namei(dirndp
);
11984 vp
= dirndp
->ni_vp
;
11985 pvp
= dirndp
->ni_dvp
;
11987 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
11990 mount_t mp
= vnode_mount(rvp
);
11991 struct fs_snapshot_mount_args smnt_data
;
11993 smnt_data
.sm_mp
= mp
;
11994 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
11995 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
11996 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags
& MNT_DONTBROWSE
,
11997 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
12005 vnode_put(snapdvp
);
12007 nameidone(snapndp
);
12009 FREE(__snapshot_mount_data
, M_TEMP
);
12014 * Root from a snapshot of the filesystem
12016 * Marks the filesystem to root from the given snapshot on next boot.
12019 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
12025 struct fs_snapshot_root_args root_data
;
12026 struct componentname cnp
;
12030 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
12034 mp
= vnode_mount(rvp
);
12036 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
12037 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
12039 FREE(name_buf
, M_TEMP
);
12044 // XXX MAC checks ?
12047 * Grab mount_iterref so that we can release the vnode,
12048 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12050 error
= mount_iterref (mp
, 0);
12053 FREE(name_buf
, M_TEMP
);
12057 memset(&cnp
, 0, sizeof(cnp
));
12058 cnp
.cn_pnbuf
= (char *)name_buf
;
12059 cnp
.cn_nameiop
= LOOKUP
;
12060 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
12061 cnp
.cn_pnlen
= MAXPATHLEN
;
12062 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
12063 cnp
.cn_namelen
= (int)name_len
;
12064 root_data
.sr_cnp
= &cnp
;
12066 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
12068 mount_iterdrop(mp
);
12069 FREE(name_buf
, M_TEMP
);
12075 * FS snapshot operations dispatcher
12078 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
12079 __unused
int32_t *retval
)
12082 vfs_context_t ctx
= vfs_context_current();
12084 AUDIT_ARG(fd
, uap
->dirfd
);
12085 AUDIT_ARG(value32
, uap
->op
);
12087 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
12092 case SNAPSHOT_OP_CREATE
:
12093 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12095 case SNAPSHOT_OP_DELETE
:
12096 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12098 case SNAPSHOT_OP_RENAME
:
12099 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
12102 case SNAPSHOT_OP_MOUNT
:
12103 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
12104 uap
->data
, uap
->flags
, ctx
);
12106 case SNAPSHOT_OP_REVERT
:
12107 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12109 #if CONFIG_MNT_ROOTSNAP
12110 case SNAPSHOT_OP_ROOT
:
12111 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12113 #endif /* CONFIG_MNT_ROOTSNAP */