2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
110 #include <vfs/vfs_disk_conditioner.h>
112 #include <security/audit/audit.h>
113 #include <bsm/audit_kevents.h>
115 #include <mach/mach_types.h>
116 #include <kern/kern_types.h>
117 #include <kern/kalloc.h>
118 #include <kern/task.h>
120 #include <vm/vm_pageout.h>
121 #include <vm/vm_protos.h>
123 #include <libkern/OSAtomic.h>
124 #include <pexpert/pexpert.h>
125 #include <IOKit/IOBSD.h>
128 #include <miscfs/routefs/routefs.h>
132 #include <security/mac.h>
133 #include <security/mac_framework.h>
137 #define GET_PATH(x) \
138 (x) = get_pathbuff();
139 #define RELEASE_PATH(x) \
142 #define GET_PATH(x) \
143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
144 #define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146 #endif /* CONFIG_FSE */
148 #ifndef HFS_GET_BOOT_INFO
149 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
152 #ifndef HFS_SET_BOOT_INFO
153 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
156 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
157 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
160 extern void disk_conditioner_unmount(mount_t mp
);
162 /* struct for checkdirs iteration */
167 /* callback for checkdirs iteration */
168 static int checkdirs_callback(proc_t p
, void * arg
);
170 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
171 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
172 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
173 static int getfsstat_callback(mount_t mp
, void * arg
);
174 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
175 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
176 static int sync_callback(mount_t
, void *);
177 static void hibernate_sync_thread(void *, __unused wait_result_t
);
178 static int hibernate_sync_async(int);
179 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
180 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
181 boolean_t partial_copy
);
182 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
184 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
185 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
186 struct componentname
*cnp
, user_addr_t fsmountargs
,
187 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
189 void vfs_notify_mount(vnode_t pdvp
);
191 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
193 struct fd_vn_data
* fg_vn_data_alloc(void);
196 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
197 * Concurrent lookups (or lookups by ids) on hard links can cause the
198 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
199 * does) to return ENOENT as the path cannot be returned from the name cache
200 * alone. We have no option but to retry and hope to get one namei->reverse path
201 * generation done without an intervening lookup, lookup by id on the hard link
202 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
203 * which currently are the MAC hooks for rename, unlink and rmdir.
205 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
207 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
209 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
211 #ifdef CONFIG_IMGSRC_ACCESS
212 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
213 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
214 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
215 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
216 static void mount_end_update(mount_t mp
);
217 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
218 #endif /* CONFIG_IMGSRC_ACCESS */
220 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
223 int sync_internal(void);
226 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
228 extern lck_grp_t
*fd_vn_lck_grp
;
229 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
230 extern lck_attr_t
*fd_vn_lck_attr
;
233 * incremented each time a mount or unmount operation occurs
234 * used to invalidate the cached value of the rootvp in the
235 * mount structure utilized by cache_lookup_path
237 uint32_t mount_generation
= 0;
239 /* counts number of mount and unmount operations */
240 unsigned int vfs_nummntops
=0;
242 extern const struct fileops vnops
;
243 #if CONFIG_APPLEDOUBLE
244 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
245 #endif /* CONFIG_APPLEDOUBLE */
248 * Virtual File System System Calls
251 #if NFSCLIENT || DEVFS || ROUTEFS
253 * Private in-kernel mounting spi (NFS only, not exported)
257 vfs_iskernelmount(mount_t mp
)
259 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
264 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
265 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
271 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
272 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
275 * Get the vnode to be covered if it's not supplied
285 char *pnbuf
= CAST_DOWN(char *, path
);
287 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
288 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
292 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
293 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
303 #endif /* NFSCLIENT || DEVFS */
306 * Mount a file system.
310 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
312 struct __mac_mount_args muap
;
314 muap
.type
= uap
->type
;
315 muap
.path
= uap
->path
;
316 muap
.flags
= uap
->flags
;
317 muap
.data
= uap
->data
;
318 muap
.mac_p
= USER_ADDR_NULL
;
319 return (__mac_mount(p
, &muap
, retval
));
323 fmount(__unused proc_t p
, struct fmount_args
*uap
, __unused
int32_t *retval
)
325 struct componentname cn
;
326 vfs_context_t ctx
= vfs_context_current();
329 int flags
= uap
->flags
;
330 char fstypename
[MFSNAMELEN
];
331 char *labelstr
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */
335 AUDIT_ARG(fd
, uap
->fd
);
336 AUDIT_ARG(fflags
, flags
);
337 /* fstypename will get audited by mount_common */
339 /* Sanity check the flags */
340 if (flags
& (MNT_IMGSRC_BY_INDEX
|MNT_ROOTFS
)) {
344 if (flags
& MNT_UNION
) {
348 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
353 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0) {
357 if ((error
= vnode_getwithref(vp
)) != 0) {
362 pvp
= vnode_getparent(vp
);
369 memset(&cn
, 0, sizeof(struct componentname
));
370 MALLOC(cn
.cn_pnbuf
, char *, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
371 cn
.cn_pnlen
= MAXPATHLEN
;
373 if((error
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) {
374 FREE(cn
.cn_pnbuf
, M_TEMP
);
381 error
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
);
383 FREE(cn
.cn_pnbuf
, M_TEMP
);
392 vfs_notify_mount(vnode_t pdvp
)
394 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
395 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
400 * Mount a file system taking into account MAC label behavior.
401 * See mount(2) man page for more information
403 * Parameters: p Process requesting the mount
404 * uap User argument descriptor (see below)
407 * Indirect: uap->type Filesystem type
408 * uap->path Path to mount
409 * uap->data Mount arguments
410 * uap->mac_p MAC info
411 * uap->flags Mount flags
417 boolean_t root_fs_upgrade_try
= FALSE
;
420 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
424 int need_nameidone
= 0;
425 vfs_context_t ctx
= vfs_context_current();
426 char fstypename
[MFSNAMELEN
];
429 char *labelstr
= NULL
;
430 int flags
= uap
->flags
;
432 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
433 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
438 * Get the fs type name from user space
440 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
445 * Get the vnode to be covered
447 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
448 UIO_USERSPACE
, uap
->path
, ctx
);
457 #ifdef CONFIG_IMGSRC_ACCESS
458 /* Mounting image source cannot be batched with other operations */
459 if (flags
== MNT_IMGSRC_BY_INDEX
) {
460 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
461 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
464 #endif /* CONFIG_IMGSRC_ACCESS */
468 * Get the label string (if any) from user space
470 if (uap
->mac_p
!= USER_ADDR_NULL
) {
475 struct user64_mac mac64
;
476 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
477 mac
.m_buflen
= mac64
.m_buflen
;
478 mac
.m_string
= mac64
.m_string
;
480 struct user32_mac mac32
;
481 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
482 mac
.m_buflen
= mac32
.m_buflen
;
483 mac
.m_string
= mac32
.m_string
;
487 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
488 (mac
.m_buflen
< 2)) {
492 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
493 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
497 AUDIT_ARG(mac_string
, labelstr
);
499 #endif /* CONFIG_MACF */
501 AUDIT_ARG(fflags
, flags
);
504 if (flags
& MNT_UNION
) {
505 /* No union mounts on release kernels */
511 if ((vp
->v_flag
& VROOT
) &&
512 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
513 if (!(flags
& MNT_UNION
)) {
518 * For a union mount on '/', treat it as fresh
519 * mount instead of update.
520 * Otherwise, union mouting on '/' used to panic the
521 * system before, since mnt_vnodecovered was found to
522 * be NULL for '/' which is required for unionlookup
523 * after it gets ENOENT on union mount.
525 flags
= (flags
& ~(MNT_UPDATE
));
529 if ((flags
& MNT_RDONLY
) == 0) {
530 /* Release kernels are not allowed to mount "/" as rw */
536 * See 7392553 for more details on why this check exists.
537 * Suffice to say: If this check is ON and something tries
538 * to mount the rootFS RW, we'll turn off the codesign
539 * bitmap optimization.
541 #if CHECK_CS_VALIDATION_BITMAP
542 if ((flags
& MNT_RDONLY
) == 0 ) {
543 root_fs_upgrade_try
= TRUE
;
548 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
549 labelstr
, FALSE
, ctx
);
555 FREE(labelstr
, M_MACTEMP
);
556 #endif /* CONFIG_MACF */
564 if (need_nameidone
) {
572 * common mount implementation (final stage of mounting)
575 * fstypename file system type (ie it's vfs name)
576 * pvp parent of covered vnode
578 * cnp component name (ie path) of covered vnode
579 * flags generic mount flags
580 * fsmountargs file system specific data
581 * labelstr optional MAC label
582 * kernelmount TRUE for mounts initiated from inside the kernel
583 * ctx caller's context
586 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
587 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
588 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
591 #pragma unused(labelstr)
593 struct vnode
*devvp
= NULLVP
;
594 struct vnode
*device_vnode
= NULLVP
;
599 struct vfstable
*vfsp
= (struct vfstable
*)0;
600 struct proc
*p
= vfs_context_proc(ctx
);
602 user_addr_t devpath
= USER_ADDR_NULL
;
605 boolean_t vfsp_ref
= FALSE
;
606 boolean_t is_rwlock_locked
= FALSE
;
607 boolean_t did_rele
= FALSE
;
608 boolean_t have_usecount
= FALSE
;
611 * Process an update for an existing mount
613 if (flags
& MNT_UPDATE
) {
614 if ((vp
->v_flag
& VROOT
) == 0) {
620 /* unmount in progress return error */
622 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
628 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
629 is_rwlock_locked
= TRUE
;
631 * We only allow the filesystem to be reloaded if it
632 * is currently mounted read-only.
634 if ((flags
& MNT_RELOAD
) &&
635 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
641 * If content protection is enabled, update mounts are not
642 * allowed to turn it off.
644 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
645 ((flags
& MNT_CPROTECT
) == 0)) {
650 #ifdef CONFIG_IMGSRC_ACCESS
651 /* Can't downgrade the backer of the root FS */
652 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
653 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
657 #endif /* CONFIG_IMGSRC_ACCESS */
660 * Only root, or the user that did the original mount is
661 * permitted to update it.
663 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
664 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
668 error
= mac_mount_check_remount(ctx
, mp
);
674 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
675 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
677 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
678 flags
|= MNT_NOSUID
| MNT_NODEV
;
679 if (mp
->mnt_flag
& MNT_NOEXEC
)
686 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
688 vfsp
= mp
->mnt_vtable
;
693 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
694 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
696 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
697 flags
|= MNT_NOSUID
| MNT_NODEV
;
698 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
702 /* XXXAUDIT: Should we capture the type on the error path as well? */
703 AUDIT_ARG(text
, fstypename
);
705 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
706 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
707 vfsp
->vfc_refcount
++;
718 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
720 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
721 error
= EINVAL
; /* unsupported request */
725 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
731 * Allocate and initialize the filesystem (mount_t)
733 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
735 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
738 /* Initialize the default IO constraints */
739 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
740 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
741 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
742 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
743 mp
->mnt_devblocksize
= DEV_BSIZE
;
744 mp
->mnt_alignmentmask
= PAGE_MASK
;
745 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
748 mp
->mnt_realrootvp
= NULLVP
;
749 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
751 TAILQ_INIT(&mp
->mnt_vnodelist
);
752 TAILQ_INIT(&mp
->mnt_workerqueue
);
753 TAILQ_INIT(&mp
->mnt_newvnodes
);
755 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
756 is_rwlock_locked
= TRUE
;
757 mp
->mnt_op
= vfsp
->vfc_vfsops
;
758 mp
->mnt_vtable
= vfsp
;
759 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
760 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
761 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
762 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
763 mp
->mnt_vnodecovered
= vp
;
764 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
765 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
766 mp
->mnt_devbsdunit
= 0;
768 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
769 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
771 #if NFSCLIENT || DEVFS || ROUTEFS
773 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
774 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
775 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
776 #endif /* NFSCLIENT || DEVFS */
781 * Set the mount level flags.
783 if (flags
& MNT_RDONLY
)
784 mp
->mnt_flag
|= MNT_RDONLY
;
785 else if (mp
->mnt_flag
& MNT_RDONLY
) {
786 // disallow read/write upgrades of file systems that
787 // had the TYPENAME_OVERRIDE feature set.
788 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
792 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
794 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
795 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
796 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
797 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
798 MNT_QUARANTINE
| MNT_CPROTECT
);
803 * On release builds of iOS based platforms, always enforce NOSUID on
804 * all mounts. We do this here because we can catch update mounts as well as
805 * non-update mounts in this case.
807 mp
->mnt_flag
|= (MNT_NOSUID
);
811 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
812 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
813 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
814 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
815 MNT_QUARANTINE
| MNT_CPROTECT
);
818 if (flags
& MNT_MULTILABEL
) {
819 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
823 mp
->mnt_flag
|= MNT_MULTILABEL
;
827 * Process device path for local file systems if requested
829 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
830 !(internal_flags
& KERNEL_MOUNT_SNAPSHOT
)) {
831 if (vfs_context_is64bit(ctx
)) {
832 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
834 fsmountargs
+= sizeof(devpath
);
837 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
839 /* munge into LP64 addr */
840 devpath
= CAST_USER_ADDR_T(tmp
);
841 fsmountargs
+= sizeof(tmp
);
844 /* Lookup device and authorize access to it */
848 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
849 if ( (error
= namei(&nd
)) )
852 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
857 if (devvp
->v_type
!= VBLK
) {
861 if (major(devvp
->v_rdev
) >= nblkdev
) {
866 * If mount by non-root, then verify that user has necessary
867 * permissions on the device.
869 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
870 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
872 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
873 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
874 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
878 /* On first mount, preflight and open device */
879 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
880 if ( (error
= vnode_ref(devvp
)) )
883 * Disallow multiple mounts of the same device.
884 * Disallow mounting of a device that is currently in use
885 * (except for root, which might share swap device for miniroot).
886 * Flush out any old buffers remaining from a previous use.
888 if ( (error
= vfs_mountedon(devvp
)) )
891 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
895 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
899 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
902 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
904 error
= mac_vnode_check_open(ctx
,
906 ronly
? FREAD
: FREAD
|FWRITE
);
910 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
913 mp
->mnt_devvp
= devvp
;
914 device_vnode
= devvp
;
916 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
917 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
918 (device_vnode
= mp
->mnt_devvp
)) {
922 * If upgrade to read-write by non-root, then verify
923 * that user has necessary permissions on the device.
925 vnode_getalways(device_vnode
);
927 if (suser(vfs_context_ucred(ctx
), NULL
) &&
928 (error
= vnode_authorize(device_vnode
, NULL
,
929 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
931 vnode_put(device_vnode
);
935 /* Tell the device that we're upgrading */
936 dev
= (dev_t
)device_vnode
->v_rdev
;
939 if ((u_int
)maj
>= (u_int
)nblkdev
)
940 panic("Volume mounted on a device with invalid major number.");
942 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
943 vnode_put(device_vnode
);
944 device_vnode
= NULLVP
;
951 if ((flags
& MNT_UPDATE
) == 0) {
952 mac_mount_label_init(mp
);
953 mac_mount_label_associate(ctx
, mp
);
956 if ((flags
& MNT_UPDATE
) != 0) {
957 error
= mac_mount_check_label_update(ctx
, mp
);
964 * Mount the filesystem.
966 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
967 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
968 (caddr_t
)fsmountargs
, 0, ctx
);
970 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
973 if (flags
& MNT_UPDATE
) {
974 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
975 mp
->mnt_flag
&= ~MNT_RDONLY
;
977 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
978 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
980 mp
->mnt_flag
= flag
; /* restore flag value */
981 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
982 lck_rw_done(&mp
->mnt_rwlock
);
983 is_rwlock_locked
= FALSE
;
985 enablequotas(mp
, ctx
);
990 * Put the new filesystem on the mount list after root.
993 struct vfs_attr vfsattr
;
995 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
996 error
= VFS_ROOT(mp
, &rvp
, ctx
);
998 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
1001 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
1003 * drop reference provided by VFS_ROOT
1012 vnode_lock_spin(vp
);
1013 CLR(vp
->v_flag
, VMOUNT
);
1014 vp
->v_mountedhere
= mp
;
1018 * taking the name_cache_lock exclusively will
1019 * insure that everyone is out of the fast path who
1020 * might be trying to use a now stale copy of
1021 * vp->v_mountedhere->mnt_realrootvp
1022 * bumping mount_generation causes the cached values
1027 name_cache_unlock();
1029 error
= vnode_ref(vp
);
1034 have_usecount
= TRUE
;
1036 error
= checkdirs(vp
, ctx
);
1038 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1042 * there is no cleanup code here so I have made it void
1043 * we need to revisit this
1045 (void)VFS_START(mp
, 0, ctx
);
1047 if (mount_list_add(mp
) != 0) {
1049 * The system is shutting down trying to umount
1050 * everything, so fail with a plausible errno.
1055 lck_rw_done(&mp
->mnt_rwlock
);
1056 is_rwlock_locked
= FALSE
;
1058 /* Check if this mounted file system supports EAs or named streams. */
1059 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1060 VFSATTR_INIT(&vfsattr
);
1061 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
1062 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
1063 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
1064 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
1065 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
1066 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
1067 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1070 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
1071 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
1072 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
1075 /* Check if this file system supports path from id lookups. */
1076 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
1077 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
1078 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1079 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
1080 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1081 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
1084 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
1085 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
1086 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1089 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1090 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1092 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1093 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1095 /* increment the operations count */
1096 OSAddAtomic(1, &vfs_nummntops
);
1097 enablequotas(mp
, ctx
);
1100 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1103 * cache the IO attributes for the underlying physical media...
1104 * an error return indicates the underlying driver doesn't
1105 * support all the queries necessary... however, reasonable
1106 * defaults will have been set, so no reason to bail or care
1108 vfs_init_io_attributes(device_vnode
, mp
);
1111 /* Now that mount is setup, notify the listeners */
1112 vfs_notify_mount(pvp
);
1113 IOBSDMountChange(mp
, kIOMountChangeMount
);
1116 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1117 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1118 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1119 mp
->mnt_vtable
->vfc_name
, error
);
1122 vnode_lock_spin(vp
);
1123 CLR(vp
->v_flag
, VMOUNT
);
1126 mp
->mnt_vtable
->vfc_refcount
--;
1127 mount_list_unlock();
1129 if (device_vnode
) {
1130 vnode_rele(device_vnode
);
1131 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
1133 lck_rw_done(&mp
->mnt_rwlock
);
1134 is_rwlock_locked
= FALSE
;
1137 * if we get here, we have a mount structure that needs to be freed,
1138 * but since the coveredvp hasn't yet been updated to point at it,
1139 * no need to worry about other threads holding a crossref on this mp
1140 * so it's ok to just free it
1142 mount_lock_destroy(mp
);
1144 mac_mount_label_destroy(mp
);
1146 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1150 * drop I/O count on the device vp if there was one
1152 if (devpath
&& devvp
)
1157 /* Error condition exits */
1159 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1162 * If the mount has been placed on the covered vp,
1163 * it may have been discovered by now, so we have
1164 * to treat this just like an unmount
1166 mount_lock_spin(mp
);
1167 mp
->mnt_lflag
|= MNT_LDEAD
;
1170 if (device_vnode
!= NULLVP
) {
1171 vnode_rele(device_vnode
);
1172 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1177 vnode_lock_spin(vp
);
1180 vp
->v_mountedhere
= (mount_t
) 0;
1184 if (have_usecount
) {
1188 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1191 if (devpath
&& devvp
)
1194 /* Release mnt_rwlock only when it was taken */
1195 if (is_rwlock_locked
== TRUE
) {
1196 lck_rw_done(&mp
->mnt_rwlock
);
1200 if (mp
->mnt_crossref
)
1201 mount_dropcrossref(mp
, vp
, 0);
1203 mount_lock_destroy(mp
);
1205 mac_mount_label_destroy(mp
);
1207 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1212 vfsp
->vfc_refcount
--;
1213 mount_list_unlock();
1220 * Flush in-core data, check for competing mount attempts,
1224 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1227 #pragma unused(cnp,fsname)
1229 struct vnode_attr va
;
1234 * If the user is not root, ensure that they own the directory
1235 * onto which we are attempting to mount.
1238 VATTR_WANTED(&va
, va_uid
);
1239 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1240 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1241 (!vfs_context_issuser(ctx
)))) {
1247 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1250 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1253 if (vp
->v_type
!= VDIR
) {
1258 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1264 error
= mac_mount_check_mount(ctx
, vp
,
1270 vnode_lock_spin(vp
);
1271 SET(vp
->v_flag
, VMOUNT
);
1278 #if CONFIG_IMGSRC_ACCESS
1281 #define IMGSRC_DEBUG(args...) printf(args)
1283 #define IMGSRC_DEBUG(args...) do { } while(0)
1287 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1289 struct nameidata nd
;
1290 vnode_t vp
, realdevvp
;
1294 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1295 if ( (error
= namei(&nd
)) ) {
1296 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1302 if (!vnode_isblk(vp
)) {
1303 IMGSRC_DEBUG("Not block device.\n");
1308 realdevvp
= mp
->mnt_devvp
;
1309 if (realdevvp
== NULLVP
) {
1310 IMGSRC_DEBUG("No device backs the mount.\n");
1315 error
= vnode_getwithref(realdevvp
);
1317 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1321 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1322 IMGSRC_DEBUG("Wrong dev_t.\n");
1327 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1330 * If mount by non-root, then verify that user has necessary
1331 * permissions on the device.
1333 if (!vfs_context_issuser(ctx
)) {
1334 accessmode
= KAUTH_VNODE_READ_DATA
;
1335 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1336 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1337 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1338 IMGSRC_DEBUG("Access denied.\n");
1346 vnode_put(realdevvp
);
1357 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1358 * and call checkdirs()
1361 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1365 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1367 vnode_lock_spin(vp
);
1368 CLR(vp
->v_flag
, VMOUNT
);
1369 vp
->v_mountedhere
= mp
;
1373 * taking the name_cache_lock exclusively will
1374 * insure that everyone is out of the fast path who
1375 * might be trying to use a now stale copy of
1376 * vp->v_mountedhere->mnt_realrootvp
1377 * bumping mount_generation causes the cached values
1382 name_cache_unlock();
1384 error
= vnode_ref(vp
);
1389 error
= checkdirs(vp
, ctx
);
1391 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1398 mp
->mnt_vnodecovered
= NULLVP
;
1404 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1407 vnode_lock_spin(vp
);
1408 vp
->v_mountedhere
= (mount_t
)NULL
;
1411 mp
->mnt_vnodecovered
= NULLVP
;
1415 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1419 /* unmount in progress return error */
1420 mount_lock_spin(mp
);
1421 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1426 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1429 * We only allow the filesystem to be reloaded if it
1430 * is currently mounted read-only.
1432 if ((flags
& MNT_RELOAD
) &&
1433 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1439 * Only root, or the user that did the original mount is
1440 * permitted to update it.
1442 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1443 (!vfs_context_issuser(ctx
))) {
1448 error
= mac_mount_check_remount(ctx
, mp
);
1456 lck_rw_done(&mp
->mnt_rwlock
);
1463 mount_end_update(mount_t mp
)
1465 lck_rw_done(&mp
->mnt_rwlock
);
1469 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1473 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1477 vp
= imgsrc_rootvnodes
[height
];
1478 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1487 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1488 const char *fsname
, vfs_context_t ctx
,
1489 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1493 boolean_t placed
= FALSE
;
1494 vnode_t devvp
= NULLVP
;
1495 struct vfstable
*vfsp
;
1496 user_addr_t devpath
;
1497 char *old_mntonname
;
1502 /* If we didn't imageboot, nothing to move */
1503 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1507 /* Only root can do this */
1508 if (!vfs_context_issuser(ctx
)) {
1512 IMGSRC_DEBUG("looking for root vnode.\n");
1515 * Get root vnode of filesystem we're moving.
1519 struct user64_mnt_imgsrc_args mia64
;
1520 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1522 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1526 height
= mia64
.mi_height
;
1527 flags
= mia64
.mi_flags
;
1528 devpath
= mia64
.mi_devpath
;
1530 struct user32_mnt_imgsrc_args mia32
;
1531 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1533 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1537 height
= mia32
.mi_height
;
1538 flags
= mia32
.mi_flags
;
1539 devpath
= mia32
.mi_devpath
;
1543 * For binary compatibility--assumes one level of nesting.
1546 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1550 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1553 /* munge into LP64 addr */
1554 devpath
= CAST_USER_ADDR_T(tmp
);
1562 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1566 error
= get_imgsrc_rootvnode(height
, &rvp
);
1568 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1572 IMGSRC_DEBUG("got root vnode.\n");
1574 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1576 /* Can only move once */
1577 mp
= vnode_mount(rvp
);
1578 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1579 IMGSRC_DEBUG("Already moved.\n");
1584 IMGSRC_DEBUG("Starting updated.\n");
1586 /* Get exclusive rwlock on mount, authorize update on mp */
1587 error
= mount_begin_update(mp
, ctx
, 0);
1589 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1594 * It can only be moved once. Flag is set under the rwlock,
1595 * so we're now safe to proceed.
1597 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1598 IMGSRC_DEBUG("Already moved [2]\n");
1603 IMGSRC_DEBUG("Preparing coveredvp.\n");
1605 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1606 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1608 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1612 IMGSRC_DEBUG("Covered vp OK.\n");
1614 /* Sanity check the name caller has provided */
1615 vfsp
= mp
->mnt_vtable
;
1616 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1617 IMGSRC_DEBUG("Wrong fs name.\n");
1622 /* Check the device vnode and update mount-from name, for local filesystems */
1623 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1624 IMGSRC_DEBUG("Local, doing device validation.\n");
1626 if (devpath
!= USER_ADDR_NULL
) {
1627 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1629 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1638 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1639 * and increment the name cache's mount generation
1642 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1643 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1650 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1651 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1653 /* Forbid future moves */
1655 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1658 /* Finally, add to mount list, completely ready to go */
1659 if (mount_list_add(mp
) != 0) {
1661 * The system is shutting down trying to umount
1662 * everything, so fail with a plausible errno.
1668 mount_end_update(mp
);
1670 FREE(old_mntonname
, M_TEMP
);
1672 vfs_notify_mount(pvp
);
1676 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1679 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1684 * Placing the mp on the vnode clears VMOUNT,
1685 * so cleanup is different after that point
1688 /* Rele the vp, clear VMOUNT and v_mountedhere */
1689 undo_place_on_covered_vp(mp
, vp
);
1691 vnode_lock_spin(vp
);
1692 CLR(vp
->v_flag
, VMOUNT
);
1696 mount_end_update(mp
);
1700 FREE(old_mntonname
, M_TEMP
);
1704 #endif /* CONFIG_IMGSRC_ACCESS */
1707 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1709 struct nameidata qnd
;
1711 char qfpath
[MAXPATHLEN
];
1712 const char *qfname
= QUOTAFILENAME
;
1713 const char *qfopsname
= QUOTAOPSNAME
;
1714 const char *qfextension
[] = INITQFNAMES
;
1716 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1717 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1721 * Enable filesystem disk quotas if necessary.
1722 * We ignore errors as this should not interfere with final mount
1724 for (type
=0; type
< MAXQUOTAS
; type
++) {
1725 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1726 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1727 CAST_USER_ADDR_T(qfpath
), ctx
);
1728 if (namei(&qnd
) != 0)
1729 continue; /* option file to trigger quotas is not present */
1730 vnode_put(qnd
.ni_vp
);
1732 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1734 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1741 checkdirs_callback(proc_t p
, void * arg
)
1743 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1744 vnode_t olddp
= cdrp
->olddp
;
1745 vnode_t newdp
= cdrp
->newdp
;
1746 struct filedesc
*fdp
;
1750 int cdir_changed
= 0;
1751 int rdir_changed
= 0;
1754 * XXX Also needs to iterate each thread in the process to see if it
1755 * XXX is using a per-thread current working directory, and, if so,
1756 * XXX update that as well.
1761 if (fdp
== (struct filedesc
*)0) {
1763 return(PROC_RETURNED
);
1765 fdp_cvp
= fdp
->fd_cdir
;
1766 fdp_rvp
= fdp
->fd_rdir
;
1769 if (fdp_cvp
== olddp
) {
1776 if (fdp_rvp
== olddp
) {
1783 if (cdir_changed
|| rdir_changed
) {
1785 fdp
->fd_cdir
= fdp_cvp
;
1786 fdp
->fd_rdir
= fdp_rvp
;
1789 return(PROC_RETURNED
);
1795 * Scan all active processes to see if any of them have a current
1796 * or root directory onto which the new filesystem has just been
1797 * mounted. If so, replace them with the new mount point.
1800 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1805 struct cdirargs cdr
;
1807 if (olddp
->v_usecount
== 1)
1809 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1813 panic("mount: lost mount: error %d", err
);
1820 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1821 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1823 if (rootvnode
== olddp
) {
1835 * Unmount a file system.
1837 * Note: unmount takes a path to the vnode mounted on as argument,
1838 * not special file (as before).
1842 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1847 struct nameidata nd
;
1848 vfs_context_t ctx
= vfs_context_current();
1850 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1851 UIO_USERSPACE
, uap
->path
, ctx
);
1860 error
= mac_mount_check_umount(ctx
, mp
);
1867 * Must be the root of the filesystem
1869 if ((vp
->v_flag
& VROOT
) == 0) {
1875 /* safedounmount consumes the mount ref */
1876 return (safedounmount(mp
, uap
->flags
, ctx
));
1880 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
1884 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1885 if (mp
== (mount_t
)0) {
1890 /* safedounmount consumes the mount ref */
1891 return(safedounmount(mp
, flags
, ctx
));
1896 * The mount struct comes with a mount ref which will be consumed.
1897 * Do the actual file system unmount, prevent some common foot shooting.
1900 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1903 proc_t p
= vfs_context_proc(ctx
);
1906 * If the file system is not responding and MNT_NOBLOCK
1907 * is set and not a forced unmount then return EBUSY.
1909 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1910 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1916 * Skip authorization if the mount is tagged as permissive and
1917 * this is not a forced-unmount attempt.
1919 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1921 * Only root, or the user that did the original mount is
1922 * permitted to unmount this filesystem.
1924 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1925 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1929 * Don't allow unmounting the root file system.
1931 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1932 error
= EBUSY
; /* the root is always busy */
1936 #ifdef CONFIG_IMGSRC_ACCESS
1937 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1941 #endif /* CONFIG_IMGSRC_ACCESS */
1943 return (dounmount(mp
, flags
, 1, ctx
));
1951 * Do the actual file system unmount.
1954 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1956 vnode_t coveredvp
= (vnode_t
)0;
1959 int forcedunmount
= 0;
1961 struct vnode
*devvp
= NULLVP
;
1963 proc_t p
= vfs_context_proc(ctx
);
1965 int pflags_save
= 0;
1966 #endif /* CONFIG_TRIGGERS */
1969 if (!(flags
& MNT_FORCE
)) {
1970 fsevent_unmount(mp
, ctx
); /* has to come first! */
1977 * If already an unmount in progress just return EBUSY.
1978 * Even a forced unmount cannot override.
1980 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1987 if (flags
& MNT_FORCE
) {
1989 mp
->mnt_lflag
|= MNT_LFORCE
;
1993 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1994 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1997 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1998 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1999 mp
->mnt_flag
&=~ MNT_ASYNC
;
2001 * anyone currently in the fast path that
2002 * trips over the cached rootvp will be
2003 * dumped out and forced into the slow path
2004 * to regenerate a new cached value
2006 mp
->mnt_realrootvp
= NULLVP
;
2009 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
2011 * Force unmount any mounts in this filesystem.
2012 * If any unmounts fail - just leave them dangling.
2015 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
2019 * taking the name_cache_lock exclusively will
2020 * insure that everyone is out of the fast path who
2021 * might be trying to use a now stale copy of
2022 * vp->v_mountedhere->mnt_realrootvp
2023 * bumping mount_generation causes the cached values
2028 name_cache_unlock();
2031 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2035 if (forcedunmount
== 0) {
2036 ubc_umount(mp
); /* release cached vnodes */
2037 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2038 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
2041 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2042 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2043 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2049 /* free disk_conditioner_info structure for this mount */
2050 disk_conditioner_unmount(mp
);
2052 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
2055 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
2059 lflags
|= FORCECLOSE
;
2060 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
2061 if ((forcedunmount
== 0) && error
) {
2063 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2064 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2065 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2069 /* make sure there are no one in the mount iterations or lookup */
2070 mount_iterdrain(mp
);
2072 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
2074 mount_iterreset(mp
);
2076 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
2077 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
2078 mp
->mnt_lflag
&= ~MNT_LFORCE
;
2082 /* increment the operations count */
2084 OSAddAtomic(1, &vfs_nummntops
);
2086 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
2087 /* hold an io reference and drop the usecount before close */
2088 devvp
= mp
->mnt_devvp
;
2089 vnode_getalways(devvp
);
2091 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
2093 vnode_clearmountedon(devvp
);
2096 lck_rw_done(&mp
->mnt_rwlock
);
2097 mount_list_remove(mp
);
2098 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2100 /* mark the mount point hook in the vp but not drop the ref yet */
2101 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2103 * The covered vnode needs special handling. Trying to get an
2104 * iocount must not block here as this may lead to deadlocks
2105 * if the Filesystem to which the covered vnode belongs is
2106 * undergoing forced unmounts. Since we hold a usecount, the
2107 * vnode cannot be reused (it can, however, still be terminated)
2109 vnode_getalways(coveredvp
);
2110 vnode_lock_spin(coveredvp
);
2113 coveredvp
->v_mountedhere
= (struct mount
*)0;
2114 CLR(coveredvp
->v_flag
, VMOUNT
);
2116 vnode_unlock(coveredvp
);
2117 vnode_put(coveredvp
);
2121 mp
->mnt_vtable
->vfc_refcount
--;
2122 mount_list_unlock();
2124 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2125 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2127 mp
->mnt_lflag
|= MNT_LDEAD
;
2129 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2131 * do the wakeup here
2132 * in case we block in mount_refdrain
2133 * which will drop the mount lock
2134 * and allow anyone blocked in vfs_busy
2135 * to wakeup and see the LDEAD state
2137 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2138 wakeup((caddr_t
)mp
);
2142 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2143 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2148 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2149 // Restore P_NOREMOTEHANG bit to its previous value
2150 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2151 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2155 * Callback and context are set together under the mount lock, and
2156 * never cleared, so we're safe to examine them here, drop the lock,
2159 if (mp
->mnt_triggercallback
!= NULL
) {
2162 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2163 } else if (did_vflush
) {
2164 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2171 #endif /* CONFIG_TRIGGERS */
2173 lck_rw_done(&mp
->mnt_rwlock
);
2176 wakeup((caddr_t
)mp
);
2179 if ((coveredvp
!= NULLVP
)) {
2180 vnode_t pvp
= NULLVP
;
2183 * The covered vnode needs special handling. Trying to
2184 * get an iocount must not block here as this may lead
2185 * to deadlocks if the Filesystem to which the covered
2186 * vnode belongs is undergoing forced unmounts. Since we
2187 * hold a usecount, the vnode cannot be reused
2188 * (it can, however, still be terminated).
2190 vnode_getalways(coveredvp
);
2192 mount_dropcrossref(mp
, coveredvp
, 0);
2194 * We'll _try_ to detect if this really needs to be
2195 * done. The coveredvp can only be in termination (or
2196 * terminated) if the coveredvp's mount point is in a
2197 * forced unmount (or has been) since we still hold the
2200 if (!vnode_isrecycled(coveredvp
)) {
2201 pvp
= vnode_getparent(coveredvp
);
2203 if (coveredvp
->v_resolve
) {
2204 vnode_trigger_rearm(coveredvp
, ctx
);
2209 vnode_rele(coveredvp
);
2210 vnode_put(coveredvp
);
2214 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2217 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2218 mount_lock_destroy(mp
);
2220 mac_mount_label_destroy(mp
);
2222 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2224 panic("dounmount: no coveredvp");
2230 * Unmount any mounts in this filesystem.
2233 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2236 fsid_t
*fsids
, fsid
;
2238 int count
= 0, i
, m
= 0;
2243 // Get an array to hold the submounts fsids.
2244 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2246 fsids_sz
= count
* sizeof(fsid_t
);
2247 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2248 if (fsids
== NULL
) {
2249 mount_list_unlock();
2252 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2255 * Fill the array with submount fsids.
2256 * Since mounts are always added to the tail of the mount list, the
2257 * list is always in mount order.
2258 * For each mount check if the mounted-on vnode belongs to a
2259 * mount that's already added to our array of mounts to be unmounted.
2261 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2262 vp
= smp
->mnt_vnodecovered
;
2265 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2266 for (i
= 0; i
<= m
; i
++) {
2267 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2268 fsids
[i
].val
[1] == fsid
.val
[1]) {
2269 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2274 mount_list_unlock();
2276 // Unmount the submounts in reverse order. Ignore errors.
2277 for (i
= m
; i
> 0; i
--) {
2278 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2281 mount_iterdrop(smp
);
2282 (void) dounmount(smp
, flags
, 1, ctx
);
2287 FREE(fsids
, M_TEMP
);
2291 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2296 if (mp
->mnt_crossref
< 0)
2297 panic("mount cross refs -ve");
2299 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2302 vnode_put_locked(dp
);
2305 mount_lock_destroy(mp
);
2307 mac_mount_label_destroy(mp
);
2309 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2313 vnode_put_locked(dp
);
2319 * Sync each mounted filesystem.
2325 int print_vmpage_stat
=0;
2326 int sync_timeout
= 60; // Sync time limit (sec)
2330 sync_callback(mount_t mp
, __unused
void *arg
)
2332 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2333 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2335 mp
->mnt_flag
&= ~MNT_ASYNC
;
2336 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2338 mp
->mnt_flag
|= MNT_ASYNC
;
2341 return (VFS_RETURNED
);
2346 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2348 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2350 if (print_vmpage_stat
) {
2351 vm_countdirtypages();
2357 #endif /* DIAGNOSTIC */
2362 hibernate_sync_thread(void *arg
, __unused wait_result_t wr
)
2364 int *timeout
= (int *) arg
;
2366 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2369 wakeup((caddr_t
) timeout
);
2370 if (print_vmpage_stat
) {
2371 vm_countdirtypages();
2377 #endif /* DIAGNOSTIC */
2381 * Sync in a separate thread so we can time out if it blocks.
2384 hibernate_sync_async(int timeout
)
2388 struct timespec ts
= {timeout
, 0};
2390 lck_mtx_lock(sync_mtx_lck
);
2391 if (kernel_thread_start(hibernate_sync_thread
, &timeout
, &thd
) != KERN_SUCCESS
) {
2392 printf("hibernate_sync_thread failed\n");
2393 lck_mtx_unlock(sync_mtx_lck
);
2397 error
= msleep((caddr_t
) &timeout
, sync_mtx_lck
, (PVFS
| PDROP
| PCATCH
), "hibernate_sync_thread", &ts
);
2399 printf("sync timed out: %d sec\n", timeout
);
2401 thread_deallocate(thd
);
2407 * An in-kernel sync for power management to call.
2409 __private_extern__
int
2412 (void) hibernate_sync_async(sync_timeout
);
2415 } /* end of sync_internal call */
2418 * Change filesystem quotas.
2422 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2425 int error
, quota_cmd
, quota_status
;
2428 struct nameidata nd
;
2429 vfs_context_t ctx
= vfs_context_current();
2430 struct dqblk my_dqblk
;
2432 AUDIT_ARG(uid
, uap
->uid
);
2433 AUDIT_ARG(cmd
, uap
->cmd
);
2434 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2439 mp
= nd
.ni_vp
->v_mount
;
2440 vnode_put(nd
.ni_vp
);
2443 /* copyin any data we will need for downstream code */
2444 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2446 switch (quota_cmd
) {
2448 /* uap->arg specifies a file from which to take the quotas */
2449 fnamelen
= MAXPATHLEN
;
2450 datap
= kalloc(MAXPATHLEN
);
2451 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2454 /* uap->arg is a pointer to a dqblk structure. */
2455 datap
= (caddr_t
) &my_dqblk
;
2459 /* uap->arg is a pointer to a dqblk structure. */
2460 datap
= (caddr_t
) &my_dqblk
;
2461 if (proc_is64bit(p
)) {
2462 struct user_dqblk my_dqblk64
;
2463 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2465 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2469 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2473 /* uap->arg is a pointer to an integer */
2474 datap
= (caddr_t
) "a_status
;
2482 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2485 switch (quota_cmd
) {
2488 kfree(datap
, MAXPATHLEN
);
2491 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2493 if (proc_is64bit(p
)) {
2494 struct user_dqblk my_dqblk64
;
2496 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
));
2497 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2498 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2501 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2506 /* uap->arg is a pointer to an integer */
2508 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2519 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2521 return (EOPNOTSUPP
);
2526 * Get filesystem statistics.
2528 * Returns: 0 Success
2530 * vfs_update_vfsstat:???
2531 * munge_statfs:EFAULT
2535 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2538 struct vfsstatfs
*sp
;
2540 struct nameidata nd
;
2541 vfs_context_t ctx
= vfs_context_current();
2544 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2545 UIO_USERSPACE
, uap
->path
, ctx
);
2551 sp
= &mp
->mnt_vfsstat
;
2555 error
= mac_mount_check_stat(ctx
, mp
);
2560 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2566 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2572 * Get filesystem statistics.
2576 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2580 struct vfsstatfs
*sp
;
2583 AUDIT_ARG(fd
, uap
->fd
);
2585 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2588 error
= vnode_getwithref(vp
);
2594 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2603 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2608 sp
= &mp
->mnt_vfsstat
;
2609 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2613 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2623 * Common routine to handle copying of statfs64 data to user space
2626 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2629 struct statfs64 sfs
;
2631 bzero(&sfs
, sizeof(sfs
));
2633 sfs
.f_bsize
= sfsp
->f_bsize
;
2634 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2635 sfs
.f_blocks
= sfsp
->f_blocks
;
2636 sfs
.f_bfree
= sfsp
->f_bfree
;
2637 sfs
.f_bavail
= sfsp
->f_bavail
;
2638 sfs
.f_files
= sfsp
->f_files
;
2639 sfs
.f_ffree
= sfsp
->f_ffree
;
2640 sfs
.f_fsid
= sfsp
->f_fsid
;
2641 sfs
.f_owner
= sfsp
->f_owner
;
2642 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2643 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2644 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2645 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2646 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2648 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2650 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2651 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2653 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2659 * Get file system statistics in 64-bit mode
2662 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2665 struct vfsstatfs
*sp
;
2667 struct nameidata nd
;
2668 vfs_context_t ctxp
= vfs_context_current();
2671 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2672 UIO_USERSPACE
, uap
->path
, ctxp
);
2678 sp
= &mp
->mnt_vfsstat
;
2682 error
= mac_mount_check_stat(ctxp
, mp
);
2687 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2693 error
= statfs64_common(mp
, sp
, uap
->buf
);
2700 * Get file system statistics in 64-bit mode
2703 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2707 struct vfsstatfs
*sp
;
2710 AUDIT_ARG(fd
, uap
->fd
);
2712 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2715 error
= vnode_getwithref(vp
);
2721 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2730 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2735 sp
= &mp
->mnt_vfsstat
;
2736 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2740 error
= statfs64_common(mp
, sp
, uap
->buf
);
2749 struct getfsstat_struct
{
2760 getfsstat_callback(mount_t mp
, void * arg
)
2763 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2764 struct vfsstatfs
*sp
;
2766 vfs_context_t ctx
= vfs_context_current();
2768 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2770 error
= mac_mount_check_stat(ctx
, mp
);
2772 fstp
->error
= error
;
2773 return(VFS_RETURNED_DONE
);
2776 sp
= &mp
->mnt_vfsstat
;
2778 * If MNT_NOWAIT is specified, do not refresh the
2779 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2781 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2782 (error
= vfs_update_vfsstat(mp
, ctx
,
2784 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2785 return(VFS_RETURNED
);
2789 * Need to handle LP64 version of struct statfs
2791 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2793 fstp
->error
= error
;
2794 return(VFS_RETURNED_DONE
);
2796 fstp
->sfsp
+= my_size
;
2800 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2802 fstp
->error
= error
;
2803 return(VFS_RETURNED_DONE
);
2810 return(VFS_RETURNED
);
2814 * Get statistics on all filesystems.
2817 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2819 struct __mac_getfsstat_args muap
;
2821 muap
.buf
= uap
->buf
;
2822 muap
.bufsize
= uap
->bufsize
;
2823 muap
.mac
= USER_ADDR_NULL
;
2825 muap
.flags
= uap
->flags
;
2827 return (__mac_getfsstat(p
, &muap
, retval
));
2831 * __mac_getfsstat: Get MAC-related file system statistics
2833 * Parameters: p (ignored)
2834 * uap User argument descriptor (see below)
2835 * retval Count of file system statistics (N stats)
2837 * Indirect: uap->bufsize Buffer size
2838 * uap->macsize MAC info size
2839 * uap->buf Buffer where information will be returned
2841 * uap->flags File system flags
2844 * Returns: 0 Success
2849 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2853 size_t count
, maxcount
, bufsize
, macsize
;
2854 struct getfsstat_struct fst
;
2856 bufsize
= (size_t) uap
->bufsize
;
2857 macsize
= (size_t) uap
->macsize
;
2859 if (IS_64BIT_PROCESS(p
)) {
2860 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2863 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2871 if (uap
->mac
!= USER_ADDR_NULL
) {
2876 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2877 if (count
!= maxcount
)
2880 /* Copy in the array */
2881 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2886 error
= copyin(uap
->mac
, mp0
, macsize
);
2888 FREE(mp0
, M_MACTEMP
);
2892 /* Normalize to an array of user_addr_t */
2893 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2895 FREE(mp0
, M_MACTEMP
);
2899 for (i
= 0; i
< count
; i
++) {
2900 if (IS_64BIT_PROCESS(p
))
2901 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2903 mp
[i
] = (user_addr_t
)mp0
[i
];
2905 FREE(mp0
, M_MACTEMP
);
2912 fst
.flags
= uap
->flags
;
2915 fst
.maxcount
= maxcount
;
2918 vfs_iterate(0, getfsstat_callback
, &fst
);
2921 FREE(mp
, M_MACTEMP
);
2924 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2928 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2929 *retval
= fst
.maxcount
;
2931 *retval
= fst
.count
;
2936 getfsstat64_callback(mount_t mp
, void * arg
)
2938 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2939 struct vfsstatfs
*sp
;
2942 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2944 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2946 fstp
->error
= error
;
2947 return(VFS_RETURNED_DONE
);
2950 sp
= &mp
->mnt_vfsstat
;
2952 * If MNT_NOWAIT is specified, do not refresh the fsstat
2953 * cache. MNT_WAIT overrides MNT_NOWAIT.
2955 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2956 * getfsstat, since the constants are out of the same
2959 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2960 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2961 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2962 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2963 return(VFS_RETURNED
);
2966 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2968 fstp
->error
= error
;
2969 return(VFS_RETURNED_DONE
);
2971 fstp
->sfsp
+= sizeof(struct statfs64
);
2974 return(VFS_RETURNED
);
2978 * Get statistics on all file systems in 64 bit mode.
2981 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2984 int count
, maxcount
;
2985 struct getfsstat_struct fst
;
2987 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2993 fst
.flags
= uap
->flags
;
2996 fst
.maxcount
= maxcount
;
2998 vfs_iterate(0, getfsstat64_callback
, &fst
);
3001 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
3005 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
3006 *retval
= fst
.maxcount
;
3008 *retval
= fst
.count
;
3014 * gets the associated vnode with the file descriptor passed.
3018 * ctx - vfs context of caller
3019 * fd - file descriptor for which vnode is required.
3020 * vpp - Pointer to pointer to vnode to be returned.
3022 * The vnode is returned with an iocount so any vnode obtained
3023 * by this call needs a vnode_put
3027 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
3031 struct fileproc
*fp
;
3032 proc_t p
= vfs_context_proc(ctx
);
3036 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
3040 error
= vnode_getwithref(vp
);
3042 (void)fp_drop(p
, fd
, fp
, 0);
3046 (void)fp_drop(p
, fd
, fp
, 0);
3052 * Wrapper function around namei to start lookup from a directory
3053 * specified by a file descriptor ni_dirfd.
3055 * In addition to all the errors returned by namei, this call can
3056 * return ENOTDIR if the file descriptor does not refer to a directory.
3057 * and EBADF if the file descriptor is not valid.
3060 nameiat(struct nameidata
*ndp
, int dirfd
)
3062 if ((dirfd
!= AT_FDCWD
) &&
3063 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
3064 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3068 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3069 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3073 c
= *((char *)(ndp
->ni_dirp
));
3079 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3084 if (vnode_vtype(dvp_at
) != VDIR
) {
3089 ndp
->ni_dvp
= dvp_at
;
3090 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3092 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3098 return (namei(ndp
));
3102 * Change current working directory to a given file descriptor.
3106 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3108 struct filedesc
*fdp
= p
->p_fd
;
3114 vfs_context_t ctx
= vfs_context_current();
3116 AUDIT_ARG(fd
, uap
->fd
);
3117 if (per_thread
&& uap
->fd
== -1) {
3119 * Switching back from per-thread to per process CWD; verify we
3120 * in fact have one before proceeding. The only success case
3121 * for this code path is to return 0 preemptively after zapping
3122 * the thread structure contents.
3124 thread_t th
= vfs_context_thread(ctx
);
3126 uthread_t uth
= get_bsdthread_info(th
);
3128 uth
->uu_cdir
= NULLVP
;
3129 if (tvp
!= NULLVP
) {
3137 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
3139 if ( (error
= vnode_getwithref(vp
)) ) {
3144 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3146 if (vp
->v_type
!= VDIR
) {
3152 error
= mac_vnode_check_chdir(ctx
, vp
);
3156 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3160 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3161 if (vfs_busy(mp
, LK_NOWAIT
)) {
3165 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3174 if ( (error
= vnode_ref(vp
)) )
3179 thread_t th
= vfs_context_thread(ctx
);
3181 uthread_t uth
= get_bsdthread_info(th
);
3184 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3209 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3211 return common_fchdir(p
, uap
, 0);
3215 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3217 return common_fchdir(p
, (void *)uap
, 1);
3221 * Change current working directory (".").
3223 * Returns: 0 Success
3224 * change_dir:ENOTDIR
3226 * vnode_ref:ENOENT No such file or directory
3230 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3232 struct filedesc
*fdp
= p
->p_fd
;
3234 struct nameidata nd
;
3236 vfs_context_t ctx
= vfs_context_current();
3238 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3239 UIO_USERSPACE
, uap
->path
, ctx
);
3240 error
= change_dir(&nd
, ctx
);
3243 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3244 vnode_put(nd
.ni_vp
);
3248 * drop the iocount we picked up in change_dir
3250 vnode_put(nd
.ni_vp
);
3253 thread_t th
= vfs_context_thread(ctx
);
3255 uthread_t uth
= get_bsdthread_info(th
);
3257 uth
->uu_cdir
= nd
.ni_vp
;
3258 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3260 vnode_rele(nd
.ni_vp
);
3266 fdp
->fd_cdir
= nd
.ni_vp
;
3280 * Change current working directory (".") for the entire process
3282 * Parameters: p Process requesting the call
3283 * uap User argument descriptor (see below)
3286 * Indirect parameters: uap->path Directory path
3288 * Returns: 0 Success
3289 * common_chdir: ENOTDIR
3290 * common_chdir: ENOENT No such file or directory
3295 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3297 return common_chdir(p
, (void *)uap
, 0);
3303 * Change current working directory (".") for a single thread
3305 * Parameters: p Process requesting the call
3306 * uap User argument descriptor (see below)
3309 * Indirect parameters: uap->path Directory path
3311 * Returns: 0 Success
3312 * common_chdir: ENOTDIR
3313 * common_chdir: ENOENT No such file or directory
3318 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3320 return common_chdir(p
, (void *)uap
, 1);
3325 * Change notion of root (``/'') directory.
3329 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3331 struct filedesc
*fdp
= p
->p_fd
;
3333 struct nameidata nd
;
3335 vfs_context_t ctx
= vfs_context_current();
3337 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3340 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3341 UIO_USERSPACE
, uap
->path
, ctx
);
3342 error
= change_dir(&nd
, ctx
);
3347 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3350 vnode_put(nd
.ni_vp
);
3355 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3356 vnode_put(nd
.ni_vp
);
3359 vnode_put(nd
.ni_vp
);
3363 fdp
->fd_rdir
= nd
.ni_vp
;
3364 fdp
->fd_flags
|= FD_CHROOT
;
3374 * Common routine for chroot and chdir.
3376 * Returns: 0 Success
3377 * ENOTDIR Not a directory
3378 * namei:??? [anything namei can return]
3379 * vnode_authorize:??? [anything vnode_authorize can return]
3382 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3387 if ((error
= namei(ndp
)))
3392 if (vp
->v_type
!= VDIR
) {
3398 error
= mac_vnode_check_chdir(ctx
, vp
);
3405 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3415 * Free the vnode data (for directories) associated with the file glob.
3418 fg_vn_data_alloc(void)
3420 struct fd_vn_data
*fvdata
;
3422 /* Allocate per fd vnode data */
3423 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3424 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3425 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3430 * Free the vnode data (for directories) associated with the file glob.
3433 fg_vn_data_free(void *fgvndata
)
3435 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3438 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3439 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3440 FREE(fvdata
, M_FD_VN_DATA
);
3444 * Check permissions, allocate an open file structure,
3445 * and call the device open routine if any.
3447 * Returns: 0 Success
3458 * XXX Need to implement uid, gid
3461 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3462 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3465 proc_t p
= vfs_context_proc(ctx
);
3466 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3467 struct fileproc
*fp
;
3470 int type
, indx
, error
;
3472 struct vfs_context context
;
3476 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3479 flags
= FFLAGS(uflags
);
3480 CLR(flags
, FENCRYPTED
);
3481 CLR(flags
, FUNENCRYPTED
);
3483 AUDIT_ARG(fflags
, oflags
);
3484 AUDIT_ARG(mode
, vap
->va_mode
);
3486 if ((error
= falloc_withalloc(p
,
3487 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3490 uu
->uu_dupfd
= -indx
- 1;
3492 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3493 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3494 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3495 fp_drop(p
, indx
, NULL
, 0);
3500 if (error
== ERESTART
)
3502 fp_free(p
, indx
, fp
);
3508 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3509 fp
->f_fglob
->fg_ops
= &vnops
;
3510 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3512 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3513 lf
.l_whence
= SEEK_SET
;
3516 if (flags
& O_EXLOCK
)
3517 lf
.l_type
= F_WRLCK
;
3519 lf
.l_type
= F_RDLCK
;
3521 if ((flags
& FNONBLOCK
) == 0)
3524 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3529 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3531 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3534 #if DEVELOPMENT || DEBUG
3536 * XXX VSWAP: Check for entitlements or special flag here
3537 * so we can restrict access appropriately.
3539 #else /* DEVELOPMENT || DEBUG */
3541 if (vnode_isswap(vp
) && (flags
& (FWRITE
| O_TRUNC
)) && (ctx
!= vfs_context_kernel())) {
3542 /* block attempt to write/truncate swapfile */
3546 #endif /* DEVELOPMENT || DEBUG */
3548 /* try to truncate by setting the size attribute */
3549 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3553 * For directories we hold some additional information in the fd.
3555 if (vnode_vtype(vp
) == VDIR
) {
3556 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3558 fp
->f_fglob
->fg_vn_data
= NULL
;
3564 * The first terminal open (without a O_NOCTTY) by a session leader
3565 * results in it being set as the controlling terminal.
3567 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3568 !(flags
& O_NOCTTY
)) {
3571 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3572 (caddr_t
)&tmp
, ctx
);
3576 if (flags
& O_CLOEXEC
)
3577 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3578 if (flags
& O_CLOFORK
)
3579 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3580 procfdtbl_releasefd(p
, indx
, NULL
);
3582 #if CONFIG_SECLUDED_MEMORY
3583 if (secluded_for_filecache
&&
3584 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
3585 vnode_vtype(vp
) == VREG
) {
3586 memory_object_control_t moc
;
3588 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
3590 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
3591 /* nothing to do... */
3592 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
3593 /* writable -> no longer eligible for secluded pages */
3594 memory_object_mark_eligible_for_secluded(moc
,
3596 } else if (secluded_for_filecache
== 1) {
3597 char pathname
[32] = { 0, };
3599 /* XXX FBDP: better way to detect /Applications/ ? */
3600 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3601 copyinstr(ndp
->ni_dirp
,
3606 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
3611 pathname
[sizeof (pathname
) - 1] = '\0';
3612 if (strncmp(pathname
,
3614 strlen("/Applications/")) == 0 &&
3616 "/Applications/Camera.app/",
3617 strlen("/Applications/Camera.app/")) != 0) {
3620 * AND from "/Applications/"
3621 * AND not from "/Applications/Camera.app/"
3622 * ==> eligible for secluded
3624 memory_object_mark_eligible_for_secluded(moc
,
3627 } else if (secluded_for_filecache
== 2) {
3629 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3631 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3633 /* not implemented... */
3635 if (!strncmp(vp
->v_name
,
3636 DYLD_SHARED_CACHE_NAME
,
3637 strlen(DYLD_SHARED_CACHE_NAME
)) ||
3638 !strncmp(vp
->v_name
,
3640 strlen(vp
->v_name
)) ||
3641 !strncmp(vp
->v_name
,
3643 strlen(vp
->v_name
)) ||
3644 !strncmp(vp
->v_name
,
3646 strlen(vp
->v_name
)) ||
3647 !strncmp(vp
->v_name
,
3649 strlen(vp
->v_name
))) {
3651 * This file matters when launching Camera:
3652 * do not store its contents in the secluded
3653 * pool that will be drained on Camera launch.
3655 memory_object_mark_eligible_for_secluded(moc
,
3660 #endif /* CONFIG_SECLUDED_MEMORY */
3662 fp_drop(p
, indx
, fp
, 1);
3669 context
= *vfs_context_current();
3670 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3672 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3673 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3674 lf
.l_whence
= SEEK_SET
;
3677 lf
.l_type
= F_UNLCK
;
3680 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3683 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3685 fp_free(p
, indx
, fp
);
3691 * While most of the *at syscall handlers can call nameiat() which
3692 * is a wrapper around namei, the use of namei and initialisation
3693 * of nameidata are far removed and in different functions - namei
3694 * gets called in vn_open_auth for open1. So we'll just do here what
3698 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3699 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3702 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3706 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3707 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3711 c
= *((char *)(ndp
->ni_dirp
));
3717 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3722 if (vnode_vtype(dvp_at
) != VDIR
) {
3727 ndp
->ni_dvp
= dvp_at
;
3728 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3729 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3736 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3740 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3742 * Parameters: p Process requesting the open
3743 * uap User argument descriptor (see below)
3744 * retval Pointer to an area to receive the
3745 * return calue from the system call
3747 * Indirect: uap->path Path to open (same as 'open')
3748 * uap->flags Flags to open (same as 'open'
3749 * uap->uid UID to set, if creating
3750 * uap->gid GID to set, if creating
3751 * uap->mode File mode, if creating (same as 'open')
3752 * uap->xsecurity ACL to set, if creating
3754 * Returns: 0 Success
3757 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3759 * XXX: We should enummerate the possible errno values here, and where
3760 * in the code they originated.
3763 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3765 struct filedesc
*fdp
= p
->p_fd
;
3767 kauth_filesec_t xsecdst
;
3768 struct vnode_attr va
;
3769 struct nameidata nd
;
3772 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3775 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3776 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3780 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3781 VATTR_SET(&va
, va_mode
, cmode
);
3782 if (uap
->uid
!= KAUTH_UID_NONE
)
3783 VATTR_SET(&va
, va_uid
, uap
->uid
);
3784 if (uap
->gid
!= KAUTH_GID_NONE
)
3785 VATTR_SET(&va
, va_gid
, uap
->gid
);
3786 if (xsecdst
!= NULL
)
3787 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3789 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3790 uap
->path
, vfs_context_current());
3792 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3793 fileproc_alloc_init
, NULL
, retval
);
3794 if (xsecdst
!= NULL
)
3795 kauth_filesec_free(xsecdst
);
3801 * Go through the data-protected atomically controlled open (2)
3803 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3805 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3806 int flags
= uap
->flags
;
3807 int class = uap
->class;
3808 int dpflags
= uap
->dpflags
;
3811 * Follow the same path as normal open(2)
3812 * Look up the item if it exists, and acquire the vnode.
3814 struct filedesc
*fdp
= p
->p_fd
;
3815 struct vnode_attr va
;
3816 struct nameidata nd
;
3821 /* Mask off all but regular access permissions */
3822 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3823 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3825 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3826 uap
->path
, vfs_context_current());
3829 * Initialize the extra fields in vnode_attr to pass down our
3831 * 1. target cprotect class.
3832 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3834 if (flags
& O_CREAT
) {
3835 /* lower level kernel code validates that the class is valid before applying it. */
3836 if (class != PROTECTION_CLASS_DEFAULT
) {
3838 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3839 * file behave the same as open (2)
3841 VATTR_SET(&va
, va_dataprotect_class
, class);
3845 if (dpflags
& (O_DP_GETRAWENCRYPTED
|O_DP_GETRAWUNENCRYPTED
)) {
3846 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3847 /* Not allowed to write raw encrypted bytes */
3850 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
3851 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3853 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
3854 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
3858 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3859 fileproc_alloc_init
, NULL
, retval
);
3865 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3866 int fd
, enum uio_seg segflg
, int *retval
)
3868 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3869 struct vnode_attr va
;
3870 struct nameidata nd
;
3874 /* Mask off all but regular access permissions */
3875 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3876 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3878 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3881 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3886 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3888 __pthread_testcancel(1);
3889 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3893 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3896 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3897 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3901 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3904 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3905 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3909 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3911 __pthread_testcancel(1);
3912 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3916 * openbyid_np: open a file given a file system id and a file system object id
3917 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3918 * file systems that don't support object ids it is a node id (uint64_t).
3920 * Parameters: p Process requesting the open
3921 * uap User argument descriptor (see below)
3922 * retval Pointer to an area to receive the
3923 * return calue from the system call
3925 * Indirect: uap->path Path to open (same as 'open')
3927 * uap->fsid id of target file system
3928 * uap->objid id of target file system object
3929 * uap->flags Flags to open (same as 'open')
3931 * Returns: 0 Success
3935 * XXX: We should enummerate the possible errno values here, and where
3936 * in the code they originated.
3939 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
3945 int buflen
= MAXPATHLEN
;
3947 vfs_context_t ctx
= vfs_context_current();
3949 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
3953 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
3957 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3958 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
3962 AUDIT_ARG(value32
, fsid
.val
[0]);
3963 AUDIT_ARG(value64
, objid
);
3965 /*resolve path from fsis, objid*/
3967 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
3972 error
= fsgetpath_internal(
3973 ctx
, fsid
.val
[0], objid
,
3974 buflen
, buf
, &pathlen
);
3980 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
3988 error
= openat_internal(
3989 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
3998 * Create a special file.
4000 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
4003 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
4005 struct vnode_attr va
;
4006 vfs_context_t ctx
= vfs_context_current();
4008 struct nameidata nd
;
4012 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4013 VATTR_SET(&va
, va_rdev
, uap
->dev
);
4015 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4016 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
4017 return(mkfifo1(ctx
, uap
->path
, &va
));
4019 AUDIT_ARG(mode
, uap
->mode
);
4020 AUDIT_ARG(value32
, uap
->dev
);
4022 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
4024 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
4025 UIO_USERSPACE
, uap
->path
, ctx
);
4037 switch (uap
->mode
& S_IFMT
) {
4039 VATTR_SET(&va
, va_type
, VCHR
);
4042 VATTR_SET(&va
, va_type
, VBLK
);
4050 error
= mac_vnode_check_create(ctx
,
4051 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
4056 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4059 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
4063 int update_flags
= 0;
4065 // Make sure the name & parent pointers are hooked up
4066 if (vp
->v_name
== NULL
)
4067 update_flags
|= VNODE_UPDATE_NAME
;
4068 if (vp
->v_parent
== NULLVP
)
4069 update_flags
|= VNODE_UPDATE_PARENT
;
4072 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4075 add_fsevent(FSE_CREATE_FILE
, ctx
,
4083 * nameidone has to happen before we vnode_put(dvp)
4084 * since it may need to release the fs_nodelock on the dvp
4096 * Create a named pipe.
4098 * Returns: 0 Success
4101 * vnode_authorize:???
4105 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4109 struct nameidata nd
;
4111 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4112 UIO_USERSPACE
, upath
, ctx
);
4119 /* check that this is a new file and authorize addition */
4124 VATTR_SET(vap
, va_type
, VFIFO
);
4126 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
4129 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4132 * nameidone has to happen before we vnode_put(dvp)
4133 * since it may need to release the fs_nodelock on the dvp
4146 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4148 * Parameters: p Process requesting the open
4149 * uap User argument descriptor (see below)
4152 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4153 * uap->uid UID to set
4154 * uap->gid GID to set
4155 * uap->mode File mode to set (same as 'mkfifo')
4156 * uap->xsecurity ACL to set, if creating
4158 * Returns: 0 Success
4161 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4163 * XXX: We should enummerate the possible errno values here, and where
4164 * in the code they originated.
4167 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4170 kauth_filesec_t xsecdst
;
4171 struct vnode_attr va
;
4173 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4175 xsecdst
= KAUTH_FILESEC_NONE
;
4176 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4177 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
4182 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4183 if (uap
->uid
!= KAUTH_UID_NONE
)
4184 VATTR_SET(&va
, va_uid
, uap
->uid
);
4185 if (uap
->gid
!= KAUTH_GID_NONE
)
4186 VATTR_SET(&va
, va_gid
, uap
->gid
);
4187 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4188 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4190 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4192 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4193 kauth_filesec_free(xsecdst
);
4199 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4201 struct vnode_attr va
;
4204 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4206 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
4211 my_strrchr(char *p
, int ch
)
4215 for (save
= NULL
;; ++p
) {
4224 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4227 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4229 int ret
, len
= _len
;
4231 *truncated_path
= 0;
4232 ret
= vn_getpath(dvp
, path
, &len
);
4233 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4236 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
4237 if (len
> MAXPATHLEN
) {
4240 // the string got truncated!
4241 *truncated_path
= 1;
4242 ptr
= my_strrchr(path
, '/');
4244 *ptr
= '\0'; // chop off the string at the last directory component
4246 len
= strlen(path
) + 1;
4249 } else if (ret
== 0) {
4250 *truncated_path
= 1;
4251 } else if (ret
!= 0) {
4252 struct vnode
*mydvp
=dvp
;
4254 if (ret
!= ENOSPC
) {
4255 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4256 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4258 *truncated_path
= 1;
4261 if (mydvp
->v_parent
!= NULL
) {
4262 mydvp
= mydvp
->v_parent
;
4263 } else if (mydvp
->v_mount
) {
4264 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4267 // no parent and no mount point? only thing is to punt and say "/" changed
4268 strlcpy(path
, "/", _len
);
4273 if (mydvp
== NULL
) {
4278 ret
= vn_getpath(mydvp
, path
, &len
);
4279 } while (ret
== ENOSPC
);
4287 * Make a hard file link.
4289 * Returns: 0 Success
4294 * vnode_authorize:???
4299 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4300 user_addr_t link
, int flag
, enum uio_seg segflg
)
4302 vnode_t vp
, dvp
, lvp
;
4303 struct nameidata nd
;
4309 int need_event
, has_listeners
;
4310 char *target_path
= NULL
;
4313 vp
= dvp
= lvp
= NULLVP
;
4315 /* look up the object we are linking to */
4316 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4317 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4320 error
= nameiat(&nd
, fd1
);
4328 * Normally, linking to directories is not supported.
4329 * However, some file systems may have limited support.
4331 if (vp
->v_type
== VDIR
) {
4332 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4333 error
= EPERM
; /* POSIX */
4337 /* Linking to a directory requires ownership. */
4338 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4339 struct vnode_attr dva
;
4342 VATTR_WANTED(&dva
, va_uid
);
4343 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4344 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4345 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4352 /* lookup the target node */
4356 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4357 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4359 error
= nameiat(&nd
, fd2
);
4366 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4370 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4371 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4374 /* target node must not exist */
4375 if (lvp
!= NULLVP
) {
4379 /* cannot link across mountpoints */
4380 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4385 /* authorize creation of the target note */
4386 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4389 /* and finally make the link */
4390 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4395 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4399 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4403 has_listeners
= kauth_authorize_fileop_has_listeners();
4405 if (need_event
|| has_listeners
) {
4406 char *link_to_path
= NULL
;
4407 int len
, link_name_len
;
4409 /* build the path to the new link file */
4410 GET_PATH(target_path
);
4411 if (target_path
== NULL
) {
4416 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4418 if (has_listeners
) {
4419 /* build the path to file we are linking to */
4420 GET_PATH(link_to_path
);
4421 if (link_to_path
== NULL
) {
4426 link_name_len
= MAXPATHLEN
;
4427 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4429 * Call out to allow 3rd party notification of rename.
4430 * Ignore result of kauth_authorize_fileop call.
4432 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4433 (uintptr_t)link_to_path
,
4434 (uintptr_t)target_path
);
4436 if (link_to_path
!= NULL
) {
4437 RELEASE_PATH(link_to_path
);
4442 /* construct fsevent */
4443 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4445 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4448 // build the path to the destination of the link
4449 add_fsevent(FSE_CREATE_FILE
, ctx
,
4450 FSE_ARG_STRING
, len
, target_path
,
4451 FSE_ARG_FINFO
, &finfo
,
4455 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4456 FSE_ARG_VNODE
, vp
->v_parent
,
4464 * nameidone has to happen before we vnode_put(dvp)
4465 * since it may need to release the fs_nodelock on the dvp
4468 if (target_path
!= NULL
) {
4469 RELEASE_PATH(target_path
);
4481 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4483 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4484 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4488 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4490 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4493 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4494 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4498 * Make a symbolic link.
4500 * We could add support for ACLs here too...
4504 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4505 user_addr_t link
, enum uio_seg segflg
)
4507 struct vnode_attr va
;
4510 struct nameidata nd
;
4516 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4517 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4518 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4520 path
= (char *)path_data
;
4524 AUDIT_ARG(text
, path
); /* This is the link string */
4526 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4529 error
= nameiat(&nd
, fd
);
4535 p
= vfs_context_proc(ctx
);
4537 VATTR_SET(&va
, va_type
, VLNK
);
4538 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4541 error
= mac_vnode_check_create(ctx
,
4542 dvp
, &nd
.ni_cnd
, &va
);
4555 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4556 /* get default ownership, etc. */
4558 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4560 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4563 if (error
== 0 && vp
)
4564 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4567 /* do fallback attribute handling */
4568 if (error
== 0 && vp
)
4569 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4572 int update_flags
= 0;
4574 /*check if a new vnode was created, else try to get one*/
4576 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4578 nd
.ni_op
= OP_LOOKUP
;
4580 nd
.ni_cnd
.cn_flags
= 0;
4581 error
= nameiat(&nd
, fd
);
4588 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4589 /* call out to allow 3rd party notification of rename.
4590 * Ignore result of kauth_authorize_fileop call.
4592 if (kauth_authorize_fileop_has_listeners() &&
4594 char *new_link_path
= NULL
;
4597 /* build the path to the new link file */
4598 new_link_path
= get_pathbuff();
4600 vn_getpath(dvp
, new_link_path
, &len
);
4601 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4602 new_link_path
[len
- 1] = '/';
4603 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4606 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4607 (uintptr_t)path
, (uintptr_t)new_link_path
);
4608 if (new_link_path
!= NULL
)
4609 release_pathbuff(new_link_path
);
4612 // Make sure the name & parent pointers are hooked up
4613 if (vp
->v_name
== NULL
)
4614 update_flags
|= VNODE_UPDATE_NAME
;
4615 if (vp
->v_parent
== NULLVP
)
4616 update_flags
|= VNODE_UPDATE_PARENT
;
4619 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4622 add_fsevent(FSE_CREATE_FILE
, ctx
,
4630 * nameidone has to happen before we vnode_put(dvp)
4631 * since it may need to release the fs_nodelock on the dvp
4639 if (path
&& (path
!= (char *)path_data
))
4640 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4646 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4648 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4649 uap
->link
, UIO_USERSPACE
));
4653 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4654 __unused
int32_t *retval
)
4656 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4657 uap
->path2
, UIO_USERSPACE
));
4661 * Delete a whiteout from the filesystem.
4662 * No longer supported.
4665 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4671 * Delete a name from the filesystem.
4675 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4676 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4678 struct nameidata nd
;
4681 struct componentname
*cnp
;
4686 struct vnode_attr va
;
4693 struct vnode_attr
*vap
;
4695 int retry_count
= 0;
4698 cn_flags
= LOCKPARENT
;
4699 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
))
4700 cn_flags
|= AUDITVNPATH1
;
4701 /* If a starting dvp is passed, it trumps any fd passed. */
4706 /* unlink or delete is allowed on rsrc forks and named streams */
4707 cn_flags
|= CN_ALLOWRSRCFORK
;
4718 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4720 nd
.ni_dvp
= start_dvp
;
4721 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4725 error
= nameiat(&nd
, fd
);
4733 /* With Carbon delete semantics, busy files cannot be deleted */
4734 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4735 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4738 /* Skip any potential upcalls if told to. */
4739 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4740 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4744 batched
= vnode_compound_remove_available(vp
);
4746 * The root of a mounted filesystem cannot be deleted.
4748 if (vp
->v_flag
& VROOT
) {
4752 #if DEVELOPMENT || DEBUG
4754 * XXX VSWAP: Check for entitlements or special flag here
4755 * so we can restrict access appropriately.
4757 #else /* DEVELOPMENT || DEBUG */
4759 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
4763 #endif /* DEVELOPMENT || DEBUG */
4766 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4768 if (error
== ENOENT
) {
4769 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4770 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4781 if (!vnode_compound_remove_available(dvp
)) {
4782 panic("No vp, but no compound remove?");
4787 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4790 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4791 /* XXX need to get these data in batched VNOP */
4792 get_fse_info(vp
, &finfo
, ctx
);
4795 error
= vfs_get_notify_attributes(&va
);
4804 has_listeners
= kauth_authorize_fileop_has_listeners();
4805 if (need_event
|| has_listeners
) {
4813 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4817 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4818 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4822 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
4824 if (error
== EKEEPLOOKING
) {
4826 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4829 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4830 panic("EKEEPLOOKING, but continue flag not set?");
4833 if (vnode_isdir(vp
)) {
4837 goto continue_lookup
;
4838 } else if (error
== ENOENT
&& batched
) {
4839 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4840 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4842 * For compound VNOPs, the authorization callback may
4843 * return ENOENT in case of racing hardlink lookups
4844 * hitting the name cache, redrive the lookup.
4854 * Call out to allow 3rd party notification of delete.
4855 * Ignore result of kauth_authorize_fileop call.
4858 if (has_listeners
) {
4859 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4860 KAUTH_FILEOP_DELETE
,
4865 if (vp
->v_flag
& VISHARDLINK
) {
4867 // if a hardlink gets deleted we want to blow away the
4868 // v_parent link because the path that got us to this
4869 // instance of the link is no longer valid. this will
4870 // force the next call to get the path to ask the file
4871 // system instead of just following the v_parent link.
4873 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4878 if (vp
->v_flag
& VISHARDLINK
) {
4879 get_fse_info(vp
, &finfo
, ctx
);
4881 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4883 if (truncated_path
) {
4884 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4886 add_fsevent(FSE_DELETE
, ctx
,
4887 FSE_ARG_STRING
, len
, path
,
4888 FSE_ARG_FINFO
, &finfo
,
4899 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4900 * will cause its shadow file to go away if necessary.
4902 if (vp
&& (vnode_isnamedstream(vp
)) &&
4903 (vp
->v_parent
!= NULLVP
) &&
4904 vnode_isshadow(vp
)) {
4909 * nameidone has to happen before we vnode_put(dvp)
4910 * since it may need to release the fs_nodelock on the dvp
4926 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
4927 enum uio_seg segflg
, int unlink_flags
)
4929 return (unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
4934 * Delete a name from the filesystem using Carbon semantics.
4937 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4939 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4940 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
));
4944 * Delete a name from the filesystem using POSIX semantics.
4947 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4949 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4950 uap
->path
, UIO_USERSPACE
, 0));
4954 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
4956 if (uap
->flag
& ~AT_REMOVEDIR
)
4959 if (uap
->flag
& AT_REMOVEDIR
)
4960 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
4961 uap
->path
, UIO_USERSPACE
));
4963 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
4964 NULLVP
, uap
->path
, UIO_USERSPACE
, 0));
4968 * Reposition read/write file offset.
4971 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4973 struct fileproc
*fp
;
4975 struct vfs_context
*ctx
;
4976 off_t offset
= uap
->offset
, file_size
;
4979 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4980 if (error
== ENOTSUP
)
4984 if (vnode_isfifo(vp
)) {
4990 ctx
= vfs_context_current();
4992 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4993 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4996 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
5003 if ( (error
= vnode_getwithref(vp
)) ) {
5008 switch (uap
->whence
) {
5010 offset
+= fp
->f_fglob
->fg_offset
;
5013 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
5015 offset
+= file_size
;
5020 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
5023 error
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
5029 if (uap
->offset
> 0 && offset
< 0) {
5030 /* Incremented/relative move past max size */
5034 * Allow negative offsets on character devices, per
5035 * POSIX 1003.1-2001. Most likely for writing disk
5038 if (offset
< 0 && vp
->v_type
!= VCHR
) {
5039 /* Decremented/relative move before start */
5043 fp
->f_fglob
->fg_offset
= offset
;
5044 *retval
= fp
->f_fglob
->fg_offset
;
5050 * An lseek can affect whether data is "available to read." Use
5051 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5053 post_event_if_success(vp
, error
, NOTE_NONE
);
5054 (void)vnode_put(vp
);
5061 * Check access permissions.
5063 * Returns: 0 Success
5064 * vnode_authorize:???
5067 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
5069 kauth_action_t action
;
5073 * If just the regular access bits, convert them to something
5074 * that vnode_authorize will understand.
5076 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
5079 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5080 if (uflags
& W_OK
) {
5081 if (vnode_isdir(vp
)) {
5082 action
|= KAUTH_VNODE_ADD_FILE
|
5083 KAUTH_VNODE_ADD_SUBDIRECTORY
;
5084 /* might want delete rights here too */
5086 action
|= KAUTH_VNODE_WRITE_DATA
;
5089 if (uflags
& X_OK
) {
5090 if (vnode_isdir(vp
)) {
5091 action
|= KAUTH_VNODE_SEARCH
;
5093 action
|= KAUTH_VNODE_EXECUTE
;
5097 /* take advantage of definition of uflags */
5098 action
= uflags
>> 8;
5102 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5107 /* action == 0 means only check for existence */
5109 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5120 * access_extended: Check access permissions in bulk.
5122 * Description: uap->entries Pointer to an array of accessx
5123 * descriptor structs, plus one or
5124 * more NULL terminated strings (see
5125 * "Notes" section below).
5126 * uap->size Size of the area pointed to by
5128 * uap->results Pointer to the results array.
5130 * Returns: 0 Success
5131 * ENOMEM Insufficient memory
5132 * EINVAL Invalid arguments
5133 * namei:EFAULT Bad address
5134 * namei:ENAMETOOLONG Filename too long
5135 * namei:ENOENT No such file or directory
5136 * namei:ELOOP Too many levels of symbolic links
5137 * namei:EBADF Bad file descriptor
5138 * namei:ENOTDIR Not a directory
5143 * uap->results Array contents modified
5145 * Notes: The uap->entries are structured as an arbitrary length array
5146 * of accessx descriptors, followed by one or more NULL terminated
5149 * struct accessx_descriptor[0]
5151 * struct accessx_descriptor[n]
5152 * char name_data[0];
5154 * We determine the entry count by walking the buffer containing
5155 * the uap->entries argument descriptor. For each descriptor we
5156 * see, the valid values for the offset ad_name_offset will be
5157 * in the byte range:
5159 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5161 * [ uap->entries + uap->size - 2 ]
5163 * since we must have at least one string, and the string must
5164 * be at least one character plus the NULL terminator in length.
5166 * XXX: Need to support the check-as uid argument
5169 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5171 struct accessx_descriptor
*input
= NULL
;
5172 errno_t
*result
= NULL
;
5175 unsigned int desc_max
, desc_actual
, i
, j
;
5176 struct vfs_context context
;
5177 struct nameidata nd
;
5181 #define ACCESSX_MAX_DESCR_ON_STACK 10
5182 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5184 context
.vc_ucred
= NULL
;
5187 * Validate parameters; if valid, copy the descriptor array and string
5188 * arguments into local memory. Before proceeding, the following
5189 * conditions must have been met:
5191 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5192 * o There must be sufficient room in the request for at least one
5193 * descriptor and a one yte NUL terminated string.
5194 * o The allocation of local storage must not fail.
5196 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
5198 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
5200 if (uap
->size
<= sizeof (stack_input
)) {
5201 input
= stack_input
;
5203 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5204 if (input
== NULL
) {
5209 error
= copyin(uap
->entries
, input
, uap
->size
);
5213 AUDIT_ARG(opaque
, input
, uap
->size
);
5216 * Force NUL termination of the copyin buffer to avoid nami() running
5217 * off the end. If the caller passes us bogus data, they may get a
5220 ((char *)input
)[uap
->size
- 1] = 0;
5223 * Access is defined as checking against the process' real identity,
5224 * even if operations are checking the effective identity. This
5225 * requires that we use a local vfs context.
5227 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5228 context
.vc_thread
= current_thread();
5231 * Find out how many entries we have, so we can allocate the result
5232 * array by walking the list and adjusting the count downward by the
5233 * earliest string offset we see.
5235 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5236 desc_actual
= desc_max
;
5237 for (i
= 0; i
< desc_actual
; i
++) {
5239 * Take the offset to the name string for this entry and
5240 * convert to an input array index, which would be one off
5241 * the end of the array if this entry was the lowest-addressed
5244 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5247 * An offset greater than the max allowable offset is an error.
5248 * It is also an error for any valid entry to point
5249 * to a location prior to the end of the current entry, if
5250 * it's not a reference to the string of the previous entry.
5252 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5257 /* Also do not let ad_name_offset point to something beyond the size of the input */
5258 if (input
[i
].ad_name_offset
>= uap
->size
) {
5264 * An offset of 0 means use the previous descriptor's offset;
5265 * this is used to chain multiple requests for the same file
5266 * to avoid multiple lookups.
5269 /* This is not valid for the first entry */
5278 * If the offset of the string for this descriptor is before
5279 * what we believe is the current actual last descriptor,
5280 * then we need to adjust our estimate downward; this permits
5281 * the string table following the last descriptor to be out
5282 * of order relative to the descriptor list.
5284 if (j
< desc_actual
)
5289 * We limit the actual number of descriptors we are willing to process
5290 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5291 * requested does not exceed this limit,
5293 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5297 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
5298 if (result
== NULL
) {
5304 * Do the work by iterating over the descriptor entries we know to
5305 * at least appear to contain valid data.
5308 for (i
= 0; i
< desc_actual
; i
++) {
5310 * If the ad_name_offset is 0, then we use the previous
5311 * results to make the check; otherwise, we are looking up
5314 if (input
[i
].ad_name_offset
!= 0) {
5315 /* discard old vnodes */
5326 * Scan forward in the descriptor list to see if we
5327 * need the parent vnode. We will need it if we are
5328 * deleting, since we must have rights to remove
5329 * entries in the parent directory, as well as the
5330 * rights to delete the object itself.
5332 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5333 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5334 if (input
[j
].ad_flags
& _DELETE_OK
)
5337 niopts
= FOLLOW
| AUDITVNPATH1
;
5339 /* need parent for vnode_authorize for deletion test */
5341 niopts
|= WANTPARENT
;
5344 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5345 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5357 * Handle lookup errors.
5367 /* run this access check */
5368 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5371 /* fatal lookup error */
5377 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5379 /* copy out results */
5380 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5383 if (input
&& input
!= stack_input
)
5384 FREE(input
, M_TEMP
);
5386 FREE(result
, M_TEMP
);
5391 if (IS_VALID_CRED(context
.vc_ucred
))
5392 kauth_cred_unref(&context
.vc_ucred
);
5398 * Returns: 0 Success
5399 * namei:EFAULT Bad address
5400 * namei:ENAMETOOLONG Filename too long
5401 * namei:ENOENT No such file or directory
5402 * namei:ELOOP Too many levels of symbolic links
5403 * namei:EBADF Bad file descriptor
5404 * namei:ENOTDIR Not a directory
5409 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5410 int flag
, enum uio_seg segflg
)
5413 struct nameidata nd
;
5415 struct vfs_context context
;
5417 int is_namedstream
= 0;
5421 * Unless the AT_EACCESS option is used, Access is defined as checking
5422 * against the process' real identity, even if operations are checking
5423 * the effective identity. So we need to tweak the credential
5424 * in the context for that case.
5426 if (!(flag
& AT_EACCESS
))
5427 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5429 context
.vc_ucred
= ctx
->vc_ucred
;
5430 context
.vc_thread
= ctx
->vc_thread
;
5433 niopts
= FOLLOW
| AUDITVNPATH1
;
5434 /* need parent for vnode_authorize for deletion test */
5435 if (amode
& _DELETE_OK
)
5436 niopts
|= WANTPARENT
;
5437 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5441 /* access(F_OK) calls are allowed for resource forks. */
5443 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5445 error
= nameiat(&nd
, fd
);
5450 /* Grab reference on the shadow stream file vnode to
5451 * force an inactive on release which will mark it
5454 if (vnode_isnamedstream(nd
.ni_vp
) &&
5455 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5456 vnode_isshadow(nd
.ni_vp
)) {
5458 vnode_ref(nd
.ni_vp
);
5462 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5465 if (is_namedstream
) {
5466 vnode_rele(nd
.ni_vp
);
5470 vnode_put(nd
.ni_vp
);
5471 if (amode
& _DELETE_OK
)
5472 vnode_put(nd
.ni_dvp
);
5476 if (!(flag
& AT_EACCESS
))
5477 kauth_cred_unref(&context
.vc_ucred
);
5482 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5484 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5485 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5489 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5490 __unused
int32_t *retval
)
5492 if (uap
->flag
& ~AT_EACCESS
)
5495 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5496 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5500 * Returns: 0 Success
5507 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5508 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5509 enum uio_seg segflg
, int fd
, int flag
)
5511 struct nameidata nd
;
5518 struct user64_stat user64_sb
;
5519 struct user32_stat user32_sb
;
5520 struct user64_stat64 user64_sb64
;
5521 struct user32_stat64 user32_sb64
;
5525 kauth_filesec_t fsec
;
5526 size_t xsecurity_bufsize
;
5529 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5530 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5534 int is_namedstream
= 0;
5535 /* stat calls are allowed for resource forks. */
5536 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5538 error
= nameiat(&nd
, fd
);
5541 fsec
= KAUTH_FILESEC_NONE
;
5543 statptr
= (void *)&source
;
5546 /* Grab reference on the shadow stream file vnode to
5547 * force an inactive on release which will mark it
5550 if (vnode_isnamedstream(nd
.ni_vp
) &&
5551 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5552 vnode_isshadow(nd
.ni_vp
)) {
5554 vnode_ref(nd
.ni_vp
);
5558 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5561 if (is_namedstream
) {
5562 vnode_rele(nd
.ni_vp
);
5565 vnode_put(nd
.ni_vp
);
5570 /* Zap spare fields */
5571 if (isstat64
!= 0) {
5572 source
.sb64
.st_lspare
= 0;
5573 source
.sb64
.st_qspare
[0] = 0LL;
5574 source
.sb64
.st_qspare
[1] = 0LL;
5575 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5576 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5577 my_size
= sizeof(dest
.user64_sb64
);
5578 sbp
= (caddr_t
)&dest
.user64_sb64
;
5580 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5581 my_size
= sizeof(dest
.user32_sb64
);
5582 sbp
= (caddr_t
)&dest
.user32_sb64
;
5585 * Check if we raced (post lookup) against the last unlink of a file.
5587 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5588 source
.sb64
.st_nlink
= 1;
5591 source
.sb
.st_lspare
= 0;
5592 source
.sb
.st_qspare
[0] = 0LL;
5593 source
.sb
.st_qspare
[1] = 0LL;
5594 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5595 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5596 my_size
= sizeof(dest
.user64_sb
);
5597 sbp
= (caddr_t
)&dest
.user64_sb
;
5599 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5600 my_size
= sizeof(dest
.user32_sb
);
5601 sbp
= (caddr_t
)&dest
.user32_sb
;
5605 * Check if we raced (post lookup) against the last unlink of a file.
5607 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5608 source
.sb
.st_nlink
= 1;
5611 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5614 /* caller wants extended security information? */
5615 if (xsecurity
!= USER_ADDR_NULL
) {
5617 /* did we get any? */
5618 if (fsec
== KAUTH_FILESEC_NONE
) {
5619 if (susize(xsecurity_size
, 0) != 0) {
5624 /* find the user buffer size */
5625 xsecurity_bufsize
= fusize(xsecurity_size
);
5627 /* copy out the actual data size */
5628 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5633 /* if the caller supplied enough room, copy out to it */
5634 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5635 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5639 if (fsec
!= KAUTH_FILESEC_NONE
)
5640 kauth_filesec_free(fsec
);
5645 * stat_extended: Get file status; with extended security (ACL).
5647 * Parameters: p (ignored)
5648 * uap User argument descriptor (see below)
5651 * Indirect: uap->path Path of file to get status from
5652 * uap->ub User buffer (holds file status info)
5653 * uap->xsecurity ACL to get (extended security)
5654 * uap->xsecurity_size Size of ACL
5656 * Returns: 0 Success
5661 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5662 __unused
int32_t *retval
)
5664 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5665 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5670 * Returns: 0 Success
5671 * fstatat_internal:??? [see fstatat_internal() in this file]
5674 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5676 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5677 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5681 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5683 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5684 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5688 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5690 * Parameters: p (ignored)
5691 * uap User argument descriptor (see below)
5694 * Indirect: uap->path Path of file to get status from
5695 * uap->ub User buffer (holds file status info)
5696 * uap->xsecurity ACL to get (extended security)
5697 * uap->xsecurity_size Size of ACL
5699 * Returns: 0 Success
5704 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5706 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5707 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5712 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5714 * Parameters: p (ignored)
5715 * uap User argument descriptor (see below)
5718 * Indirect: uap->path Path of file to get status from
5719 * uap->ub User buffer (holds file status info)
5720 * uap->xsecurity ACL to get (extended security)
5721 * uap->xsecurity_size Size of ACL
5723 * Returns: 0 Success
5728 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5730 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5731 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5732 AT_SYMLINK_NOFOLLOW
));
5736 * Get file status; this version does not follow links.
5739 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5741 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5742 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5746 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5748 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5749 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5753 * lstat64_extended: Get file status; can handle large inode numbers; does not
5754 * follow links; with extended security (ACL).
5756 * Parameters: p (ignored)
5757 * uap User argument descriptor (see below)
5760 * Indirect: uap->path Path of file to get status from
5761 * uap->ub User buffer (holds file status info)
5762 * uap->xsecurity ACL to get (extended security)
5763 * uap->xsecurity_size Size of ACL
5765 * Returns: 0 Success
5770 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5772 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5773 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5774 AT_SYMLINK_NOFOLLOW
));
5778 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5780 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5783 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5784 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5788 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5789 __unused
int32_t *retval
)
5791 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5794 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5795 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5799 * Get configurable pathname variables.
5801 * Returns: 0 Success
5805 * Notes: Global implementation constants are intended to be
5806 * implemented in this function directly; all other constants
5807 * are per-FS implementation, and therefore must be handled in
5808 * each respective FS, instead.
5810 * XXX We implement some things globally right now that should actually be
5811 * XXX per-FS; we will need to deal with this at some point.
5815 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5818 struct nameidata nd
;
5819 vfs_context_t ctx
= vfs_context_current();
5821 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5822 UIO_USERSPACE
, uap
->path
, ctx
);
5827 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5829 vnode_put(nd
.ni_vp
);
5835 * Return target name of a symbolic link.
5839 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5840 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5846 struct nameidata nd
;
5847 char uio_buf
[ UIO_SIZEOF(1) ];
5849 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5852 error
= nameiat(&nd
, fd
);
5859 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5860 &uio_buf
[0], sizeof(uio_buf
));
5861 uio_addiov(auio
, buf
, bufsize
);
5862 if (vp
->v_type
!= VLNK
) {
5866 error
= mac_vnode_check_readlink(ctx
, vp
);
5869 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5872 error
= VNOP_READLINK(vp
, auio
, ctx
);
5876 *retval
= bufsize
- (int)uio_resid(auio
);
5881 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5883 enum uio_seg procseg
;
5885 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5886 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5887 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5888 uap
->count
, procseg
, retval
));
5892 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5894 enum uio_seg procseg
;
5896 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5897 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5898 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5902 * Change file flags.
5904 * NOTE: this will vnode_put() `vp'
5907 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5909 struct vnode_attr va
;
5910 kauth_action_t action
;
5914 VATTR_SET(&va
, va_flags
, flags
);
5917 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5922 /* request authorisation, disregard immutability */
5923 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5926 * Request that the auth layer disregard those file flags it's allowed to when
5927 * authorizing this operation; we need to do this in order to be able to
5928 * clear immutable flags.
5930 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5932 error
= vnode_setattr(vp
, &va
, ctx
);
5936 mac_vnode_notify_setflags(ctx
, vp
, flags
);
5939 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5948 * Change flags of a file given a path name.
5952 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5955 vfs_context_t ctx
= vfs_context_current();
5957 struct nameidata nd
;
5959 AUDIT_ARG(fflags
, uap
->flags
);
5960 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5961 UIO_USERSPACE
, uap
->path
, ctx
);
5968 /* we don't vnode_put() here because chflags1 does internally */
5969 error
= chflags1(vp
, uap
->flags
, ctx
);
5975 * Change flags of a file given a file descriptor.
5979 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5984 AUDIT_ARG(fd
, uap
->fd
);
5985 AUDIT_ARG(fflags
, uap
->flags
);
5986 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5989 if ((error
= vnode_getwithref(vp
))) {
5994 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5996 /* we don't vnode_put() here because chflags1 does internally */
5997 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
6004 * Change security information on a filesystem object.
6006 * Returns: 0 Success
6007 * EPERM Operation not permitted
6008 * vnode_authattr:??? [anything vnode_authattr can return]
6009 * vnode_authorize:??? [anything vnode_authorize can return]
6010 * vnode_setattr:??? [anything vnode_setattr can return]
6012 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6013 * translated to EPERM before being returned.
6016 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
6018 kauth_action_t action
;
6021 AUDIT_ARG(mode
, vap
->va_mode
);
6022 /* XXX audit new args */
6025 /* chmod calls are not allowed for resource forks. */
6026 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6032 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
6033 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
6036 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
6037 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
6038 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6039 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1)))
6043 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
6044 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
)))
6048 /* make sure that the caller is allowed to set this security information */
6049 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
6050 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6051 if (error
== EACCES
)
6056 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0)
6060 if (VATTR_IS_ACTIVE(vap
, va_mode
))
6061 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
6063 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
))
6064 mac_vnode_notify_setowner(ctx
, vp
,
6065 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
6066 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
6068 if (VATTR_IS_ACTIVE(vap
, va_acl
))
6069 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
6077 * Change mode of a file given a path name.
6079 * Returns: 0 Success
6080 * namei:??? [anything namei can return]
6081 * chmod_vnode:??? [anything chmod_vnode can return]
6084 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
6085 int fd
, int flag
, enum uio_seg segflg
)
6087 struct nameidata nd
;
6090 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6091 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
6093 if ((error
= nameiat(&nd
, fd
)))
6095 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
6096 vnode_put(nd
.ni_vp
);
6102 * chmod_extended: Change the mode of a file given a path name; with extended
6103 * argument list (including extended security (ACL)).
6105 * Parameters: p Process requesting the open
6106 * uap User argument descriptor (see below)
6109 * Indirect: uap->path Path to object (same as 'chmod')
6110 * uap->uid UID to set
6111 * uap->gid GID to set
6112 * uap->mode File mode to set (same as 'chmod')
6113 * uap->xsecurity ACL to set (or delete)
6115 * Returns: 0 Success
6118 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6120 * XXX: We should enummerate the possible errno values here, and where
6121 * in the code they originated.
6124 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6127 struct vnode_attr va
;
6128 kauth_filesec_t xsecdst
;
6130 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6133 if (uap
->mode
!= -1)
6134 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6135 if (uap
->uid
!= KAUTH_UID_NONE
)
6136 VATTR_SET(&va
, va_uid
, uap
->uid
);
6137 if (uap
->gid
!= KAUTH_GID_NONE
)
6138 VATTR_SET(&va
, va_gid
, uap
->gid
);
6141 switch(uap
->xsecurity
) {
6142 /* explicit remove request */
6143 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6144 VATTR_SET(&va
, va_acl
, NULL
);
6147 case USER_ADDR_NULL
:
6150 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6152 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6153 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6156 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6159 if (xsecdst
!= NULL
)
6160 kauth_filesec_free(xsecdst
);
6165 * Returns: 0 Success
6166 * chmodat:??? [anything chmodat can return]
6169 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6170 int flag
, enum uio_seg segflg
)
6172 struct vnode_attr va
;
6175 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6177 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
6181 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6183 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6184 AT_FDCWD
, 0, UIO_USERSPACE
));
6188 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6190 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6193 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6194 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
6198 * Change mode of a file given a file descriptor.
6201 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6208 if ((error
= file_vnode(fd
, &vp
)) != 0)
6210 if ((error
= vnode_getwithref(vp
)) != 0) {
6214 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6216 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6217 (void)vnode_put(vp
);
6224 * fchmod_extended: Change mode of a file given a file descriptor; with
6225 * extended argument list (including extended security (ACL)).
6227 * Parameters: p Process requesting to change file mode
6228 * uap User argument descriptor (see below)
6231 * Indirect: uap->mode File mode to set (same as 'chmod')
6232 * uap->uid UID to set
6233 * uap->gid GID to set
6234 * uap->xsecurity ACL to set (or delete)
6235 * uap->fd File descriptor of file to change mode
6237 * Returns: 0 Success
6242 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6245 struct vnode_attr va
;
6246 kauth_filesec_t xsecdst
;
6248 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6251 if (uap
->mode
!= -1)
6252 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6253 if (uap
->uid
!= KAUTH_UID_NONE
)
6254 VATTR_SET(&va
, va_uid
, uap
->uid
);
6255 if (uap
->gid
!= KAUTH_GID_NONE
)
6256 VATTR_SET(&va
, va_gid
, uap
->gid
);
6259 switch(uap
->xsecurity
) {
6260 case USER_ADDR_NULL
:
6261 VATTR_SET(&va
, va_acl
, NULL
);
6263 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6264 VATTR_SET(&va
, va_acl
, NULL
);
6267 case CAST_USER_ADDR_T(-1):
6270 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6272 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6275 error
= fchmod1(p
, uap
->fd
, &va
);
6278 switch(uap
->xsecurity
) {
6279 case USER_ADDR_NULL
:
6280 case CAST_USER_ADDR_T(-1):
6283 if (xsecdst
!= NULL
)
6284 kauth_filesec_free(xsecdst
);
6290 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6292 struct vnode_attr va
;
6295 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6297 return(fchmod1(p
, uap
->fd
, &va
));
6302 * Set ownership given a path name.
6306 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6307 gid_t gid
, int flag
, enum uio_seg segflg
)
6310 struct vnode_attr va
;
6312 struct nameidata nd
;
6314 kauth_action_t action
;
6316 AUDIT_ARG(owner
, uid
, gid
);
6318 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6319 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6321 error
= nameiat(&nd
, fd
);
6329 if (uid
!= (uid_t
)VNOVAL
)
6330 VATTR_SET(&va
, va_uid
, uid
);
6331 if (gid
!= (gid_t
)VNOVAL
)
6332 VATTR_SET(&va
, va_gid
, gid
);
6335 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6340 /* preflight and authorize attribute changes */
6341 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6343 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6345 error
= vnode_setattr(vp
, &va
, ctx
);
6349 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
6354 * EACCES is only allowed from namei(); permissions failure should
6355 * return EPERM, so we need to translate the error code.
6357 if (error
== EACCES
)
6365 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6367 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6368 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6372 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6374 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6375 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6379 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6381 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6384 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6385 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6389 * Set ownership given a file descriptor.
6393 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6395 struct vnode_attr va
;
6396 vfs_context_t ctx
= vfs_context_current();
6399 kauth_action_t action
;
6401 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6402 AUDIT_ARG(fd
, uap
->fd
);
6404 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6407 if ( (error
= vnode_getwithref(vp
)) ) {
6411 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6414 if (uap
->uid
!= VNOVAL
)
6415 VATTR_SET(&va
, va_uid
, uap
->uid
);
6416 if (uap
->gid
!= VNOVAL
)
6417 VATTR_SET(&va
, va_gid
, uap
->gid
);
6420 /* chown calls are not allowed for resource forks. */
6421 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6428 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6433 /* preflight and authorize attribute changes */
6434 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6436 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6437 if (error
== EACCES
)
6441 error
= vnode_setattr(vp
, &va
, ctx
);
6445 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6449 (void)vnode_put(vp
);
6455 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6459 if (usrtvp
== USER_ADDR_NULL
) {
6460 struct timeval old_tv
;
6461 /* XXX Y2038 bug because of microtime argument */
6463 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6466 if (IS_64BIT_PROCESS(current_proc())) {
6467 struct user64_timeval tv
[2];
6468 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6471 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6472 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6474 struct user32_timeval tv
[2];
6475 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6478 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6479 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6486 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6490 struct vnode_attr va
;
6491 kauth_action_t action
;
6493 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6496 VATTR_SET(&va
, va_access_time
, ts
[0]);
6497 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6499 va
.va_vaflags
|= VA_UTIMES_NULL
;
6502 /* utimes calls are not allowed for resource forks. */
6503 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6510 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6514 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6515 if (!nullflag
&& error
== EACCES
)
6520 /* since we may not need to auth anything, check here */
6521 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6522 if (!nullflag
&& error
== EACCES
)
6526 error
= vnode_setattr(vp
, &va
, ctx
);
6530 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6538 * Set the access and modification times of a file.
6542 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6544 struct timespec ts
[2];
6547 struct nameidata nd
;
6548 vfs_context_t ctx
= vfs_context_current();
6551 * AUDIT: Needed to change the order of operations to do the
6552 * name lookup first because auditing wants the path.
6554 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6555 UIO_USERSPACE
, uap
->path
, ctx
);
6562 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6563 * the current time instead.
6566 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6569 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6572 vnode_put(nd
.ni_vp
);
6577 * Set the access and modification times of a file.
6581 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6583 struct timespec ts
[2];
6588 AUDIT_ARG(fd
, uap
->fd
);
6590 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6592 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6594 if((error
= vnode_getwithref(vp
))) {
6599 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6606 * Truncate a file given its path name.
6610 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6613 struct vnode_attr va
;
6614 vfs_context_t ctx
= vfs_context_current();
6616 struct nameidata nd
;
6617 kauth_action_t action
;
6619 if (uap
->length
< 0)
6621 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6622 UIO_USERSPACE
, uap
->path
, ctx
);
6623 if ((error
= namei(&nd
)))
6630 VATTR_SET(&va
, va_data_size
, uap
->length
);
6633 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6638 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6640 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6642 error
= vnode_setattr(vp
, &va
, ctx
);
6646 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
6655 * Truncate a file given a file descriptor.
6659 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6661 vfs_context_t ctx
= vfs_context_current();
6662 struct vnode_attr va
;
6664 struct fileproc
*fp
;
6668 AUDIT_ARG(fd
, uap
->fd
);
6669 if (uap
->length
< 0)
6672 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6676 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6678 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6687 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6689 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6690 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6695 if ((error
= vnode_getwithref(vp
)) != 0) {
6699 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6702 error
= mac_vnode_check_truncate(ctx
,
6703 fp
->f_fglob
->fg_cred
, vp
);
6705 (void)vnode_put(vp
);
6710 VATTR_SET(&va
, va_data_size
, uap
->length
);
6711 error
= vnode_setattr(vp
, &va
, ctx
);
6715 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
6718 (void)vnode_put(vp
);
6726 * Sync an open file with synchronized I/O _file_ integrity completion
6730 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6732 __pthread_testcancel(1);
6733 return(fsync_common(p
, uap
, MNT_WAIT
));
6738 * Sync an open file with synchronized I/O _file_ integrity completion
6740 * Notes: This is a legacy support function that does not test for
6741 * thread cancellation points.
6745 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6747 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6752 * Sync an open file with synchronized I/O _data_ integrity completion
6756 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6758 __pthread_testcancel(1);
6759 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6766 * Common fsync code to support both synchronized I/O file integrity completion
6767 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6769 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6770 * will only guarantee that the file data contents are retrievable. If
6771 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6772 * includes additional metadata unnecessary for retrieving the file data
6773 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6776 * Parameters: p The process
6777 * uap->fd The descriptor to synchronize
6778 * flags The data integrity flags
6780 * Returns: int Success
6781 * fp_getfvp:EBADF Bad file descriptor
6782 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6783 * VNOP_FSYNC:??? unspecified
6785 * Notes: We use struct fsync_args because it is a short name, and all
6786 * caller argument structures are otherwise identical.
6789 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6792 struct fileproc
*fp
;
6793 vfs_context_t ctx
= vfs_context_current();
6796 AUDIT_ARG(fd
, uap
->fd
);
6798 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6800 if ( (error
= vnode_getwithref(vp
)) ) {
6805 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6807 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6810 /* Sync resource fork shadow file if necessary. */
6812 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6813 (vp
->v_parent
!= NULLVP
) &&
6814 vnode_isshadow(vp
) &&
6815 (fp
->f_flags
& FP_WRITTEN
)) {
6816 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6820 (void)vnode_put(vp
);
6826 * Duplicate files. Source must be a file, target must be a file or
6829 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6830 * perform inheritance correctly.
6834 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6836 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6837 struct nameidata fromnd
, tond
;
6839 vfs_context_t ctx
= vfs_context_current();
6841 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
6842 struct vnode_attr va
;
6845 /* Check that the flags are valid. */
6847 if (uap
->flags
& ~CPF_MASK
) {
6851 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
6852 UIO_USERSPACE
, uap
->from
, ctx
);
6853 if ((error
= namei(&fromnd
)))
6857 NDINIT(&tond
, CREATE
, OP_LINK
,
6858 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6859 UIO_USERSPACE
, uap
->to
, ctx
);
6860 if ((error
= namei(&tond
))) {
6867 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6873 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6878 /* This calls existing MAC hooks for open */
6879 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
6886 * See unlinkat_internal for an explanation of the potential
6887 * ENOENT from the MAC hook but the gist is that the MAC hook
6888 * can fail because vn_getpath isn't able to return the full
6889 * path. We choose to ignore this failure.
6891 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
6892 if (error
&& error
!= ENOENT
)
6899 VATTR_SET(&va
, va_type
, fvp
->v_type
);
6900 /* Mask off all but regular access permissions */
6901 VATTR_SET(&va
, va_mode
,
6902 ((((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
6903 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
6906 #endif /* CONFIG_MACF */
6908 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6914 * If source is the same as the destination (that is the
6915 * same inode number) then there is nothing to do.
6916 * (fixed to have POSIX semantics - CSM 3/2/98)
6921 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6923 sdvp
= tond
.ni_startdir
;
6925 * nameidone has to happen before we vnode_put(tdvp)
6926 * since it may need to release the fs_nodelock on the tdvp
6944 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
6947 * Helper function for doing clones. The caller is expected to provide an
6948 * iocounted source vnode and release it.
6951 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
6952 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
6955 struct nameidata tond
;
6958 boolean_t free_src_acl
;
6959 boolean_t attr_cleanup
;
6961 kauth_action_t action
;
6962 struct componentname
*cnp
;
6964 struct vnode_attr va
;
6965 struct vnode_attr nva
;
6966 uint32_t vnop_flags
;
6968 v_type
= vnode_vtype(fvp
);
6973 action
= KAUTH_VNODE_ADD_FILE
;
6976 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
6977 fvp
->v_mountedhere
) {
6980 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
6986 AUDIT_ARG(fd2
, dst_dirfd
);
6987 AUDIT_ARG(value32
, flags
);
6989 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6990 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
6991 UIO_USERSPACE
, dst
, ctx
);
6992 if ((error
= nameiat(&tond
, dst_dirfd
)))
6998 free_src_acl
= FALSE
;
6999 attr_cleanup
= FALSE
;
7006 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
7012 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
)))
7015 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
)))
7018 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
7019 if (data_read_authorised
)
7020 action
&= ~KAUTH_VNODE_READ_DATA
;
7021 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
)))
7025 * certain attributes may need to be changed from the source, we ask for
7029 VATTR_WANTED(&va
, va_uid
);
7030 VATTR_WANTED(&va
, va_gid
);
7031 VATTR_WANTED(&va
, va_mode
);
7032 VATTR_WANTED(&va
, va_flags
);
7033 VATTR_WANTED(&va
, va_acl
);
7035 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0)
7039 VATTR_SET(&nva
, va_type
, v_type
);
7040 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
7041 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
7042 free_src_acl
= TRUE
;
7045 /* Handle ACL inheritance, initialize vap. */
7046 if (v_type
== VLNK
) {
7047 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
7049 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
7052 attr_cleanup
= TRUE
;
7055 vnop_flags
= VNODE_CLONEFILE_DEFAULT
;
7057 * We've got initial values for all security parameters,
7058 * If we are superuser, then we can change owners to be the
7059 * same as the source. Both superuser and the owner have default
7060 * WRITE_SECURITY privileges so all other fields can be taken
7061 * from source as well.
7063 if (!(flags
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) {
7064 if (VATTR_IS_SUPPORTED(&va
, va_uid
))
7065 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
7066 if (VATTR_IS_SUPPORTED(&va
, va_gid
))
7067 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
7069 vnop_flags
|= VNODE_CLONEFILE_NOOWNERCOPY
;
7072 if (VATTR_IS_SUPPORTED(&va
, va_mode
))
7073 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
7074 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
7075 VATTR_SET(&nva
, va_flags
,
7076 ((va
.va_flags
& ~(UF_DATAVAULT
| SF_RESTRICTED
)) | /* Turn off from source */
7077 (nva
.va_flags
& (UF_DATAVAULT
| SF_RESTRICTED
))));
7080 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
);
7082 if (!error
&& tvp
) {
7083 int update_flags
= 0;
7086 #endif /* CONFIG_FSE */
7089 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
7090 VNODE_LABEL_CREATE
, ctx
);
7093 * If some of the requested attributes weren't handled by the
7094 * VNOP, use our fallback code.
7096 if (!VATTR_ALL_SUPPORTED(&va
))
7097 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
7099 // Make sure the name & parent pointers are hooked up
7100 if (tvp
->v_name
== NULL
)
7101 update_flags
|= VNODE_UPDATE_NAME
;
7102 if (tvp
->v_parent
== NULLVP
)
7103 update_flags
|= VNODE_UPDATE_PARENT
;
7106 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
7107 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
7111 switch (vnode_vtype(tvp
)) {
7115 fsevent
= FSE_CREATE_FILE
;
7118 fsevent
= FSE_CREATE_DIR
;
7124 if (need_fsevent(fsevent
, tvp
)) {
7126 * The following is a sequence of three explicit events.
7127 * A pair of FSE_CLONE events representing the source and destination
7128 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7129 * fseventsd may coalesce the destination clone and create events
7130 * into a single event resulting in the following sequence for a client
7132 * FSE_CLONE | FSE_CREATE (dst)
7134 add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
,
7136 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
7139 #endif /* CONFIG_FSE */
7144 vn_attribute_cleanup(&nva
, defaulted
);
7145 if (free_src_acl
&& va
.va_acl
)
7146 kauth_acl_free(va
.va_acl
);
7155 * clone files or directories, target must not exist.
7159 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7160 __unused
int32_t *retval
)
7163 struct nameidata fromnd
;
7166 vfs_context_t ctx
= vfs_context_current();
7168 /* Check that the flags are valid. */
7169 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
))
7172 AUDIT_ARG(fd
, uap
->src_dirfd
);
7174 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7175 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7176 UIO_USERSPACE
, uap
->src
, ctx
);
7177 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
)))
7183 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7191 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7192 __unused
int32_t *retval
)
7195 struct fileproc
*fp
;
7197 vfs_context_t ctx
= vfs_context_current();
7199 /* Check that the flags are valid. */
7200 if (uap
->flags
& ~(CLONE_NOFOLLOW
| CLONE_NOOWNERCOPY
))
7203 AUDIT_ARG(fd
, uap
->src_fd
);
7204 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7208 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7209 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7214 if ((error
= vnode_getwithref(fvp
)))
7217 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7219 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7224 file_drop(uap
->src_fd
);
7229 * Rename files. Source and destination must either both be directories,
7230 * or both not be directories. If target is a directory, it must be empty.
7234 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7235 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7237 if (flags
& ~VFS_RENAME_FLAGS_MASK
)
7240 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
))
7245 struct nameidata
*fromnd
, *tond
;
7251 const char *oname
= NULL
;
7252 char *from_name
= NULL
, *to_name
= NULL
;
7253 int from_len
=0, to_len
=0;
7254 int holding_mntlock
;
7255 mount_t locked_mp
= NULL
;
7256 vnode_t oparent
= NULLVP
;
7258 fse_info from_finfo
, to_finfo
;
7260 int from_truncated
=0, to_truncated
;
7262 struct vnode_attr
*fvap
, *tvap
;
7264 /* carving out a chunk for structs that are too big to be on stack. */
7266 struct nameidata from_node
, to_node
;
7267 struct vnode_attr fv_attr
, tv_attr
;
7269 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
7270 fromnd
= &__rename_data
->from_node
;
7271 tond
= &__rename_data
->to_node
;
7273 holding_mntlock
= 0;
7282 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
7284 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7286 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7288 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7291 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7292 if ( (error
= nameiat(fromnd
, fromfd
)) )
7294 fdvp
= fromnd
->ni_dvp
;
7295 fvp
= fromnd
->ni_vp
;
7297 if (fvp
&& fvp
->v_type
== VDIR
)
7298 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
7301 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7302 if ( (error
= nameiat(tond
, tofd
)) ) {
7304 * Translate error code for rename("dir1", "dir2/.").
7306 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
7310 tdvp
= tond
->ni_dvp
;
7314 #if DEVELOPMENT || DEBUG
7316 * XXX VSWAP: Check for entitlements or special flag here
7317 * so we can restrict access appropriately.
7319 #else /* DEVELOPMENT || DEBUG */
7321 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
7326 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
7330 #endif /* DEVELOPMENT || DEBUG */
7332 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
7337 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
7342 batched
= vnode_compound_rename_available(fdvp
);
7345 * Claim: this check will never reject a valid rename.
7346 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7347 * Suppose fdvp and tdvp are not on the same mount.
7348 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7349 * then you can't move it to within another dir on the same mountpoint.
7350 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7352 * If this check passes, then we are safe to pass these vnodes to the same FS.
7354 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
7358 goto skipped_lookup
;
7362 error
= vn_authorize_renamex(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, flags
, NULL
);
7364 if (error
== ENOENT
) {
7365 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7366 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7368 * We encountered a race where after doing the namei, tvp stops
7369 * being valid. If so, simply re-drive the rename call from the
7381 * If the source and destination are the same (i.e. they're
7382 * links to the same vnode) and the target file system is
7383 * case sensitive, then there is nothing to do.
7385 * XXX Come back to this.
7391 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7392 * then assume that this file system is case sensitive.
7394 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
7395 pathconf_val
!= 0) {
7401 * Allow the renaming of mount points.
7402 * - target must not exist
7403 * - target must reside in the same directory as source
7404 * - union mounts cannot be renamed
7405 * - "/" cannot be renamed
7407 * XXX Handle this in VFS after a continued lookup (if we missed
7408 * in the cache to start off)
7410 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7411 * we'll skip past here. The file system is responsible for
7412 * checking that @tvp is not a descendent of @fvp and vice versa
7413 * so it should always return EINVAL if either @tvp or @fvp is the
7416 if ((fvp
->v_flag
& VROOT
) &&
7417 (fvp
->v_type
== VDIR
) &&
7419 (fvp
->v_mountedhere
== NULL
) &&
7421 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
7422 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
7425 /* switch fvp to the covered vnode */
7426 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
7427 if ( (vnode_getwithref(coveredvp
)) ) {
7437 * Check for cross-device rename.
7439 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
7440 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
7446 * If source is the same as the destination (that is the
7447 * same inode number) then there is nothing to do...
7448 * EXCEPT if the underlying file system supports case
7449 * insensitivity and is case preserving. In this case
7450 * the file system needs to handle the special case of
7451 * getting the same vnode as target (fvp) and source (tvp).
7453 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7454 * and _PC_CASE_PRESERVING can have this exception, and they need to
7455 * handle the special case of getting the same vnode as target and
7456 * source. NOTE: Then the target is unlocked going into vnop_rename,
7457 * so not to cause locking problems. There is a single reference on tvp.
7459 * NOTE - that fvp == tvp also occurs if they are hard linked and
7460 * that correct behaviour then is just to return success without doing
7463 * XXX filesystem should take care of this itself, perhaps...
7465 if (fvp
== tvp
&& fdvp
== tdvp
) {
7466 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
7467 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
7468 fromnd
->ni_cnd
.cn_namelen
)) {
7473 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
7475 * we're holding a reference and lock
7476 * on locked_mp, but it no longer matches
7477 * what we want to do... so drop our hold
7479 mount_unlock_renames(locked_mp
);
7480 mount_drop(locked_mp
, 0);
7481 holding_mntlock
= 0;
7483 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
7485 * serialize renames that re-shape
7486 * the tree... if holding_mntlock is
7487 * set, then we're ready to go...
7489 * first need to drop the iocounts
7490 * we picked up, second take the
7491 * lock to serialize the access,
7492 * then finally start the lookup
7493 * process over with the lock held
7495 if (!holding_mntlock
) {
7497 * need to grab a reference on
7498 * the mount point before we
7499 * drop all the iocounts... once
7500 * the iocounts are gone, the mount
7503 locked_mp
= fvp
->v_mount
;
7504 mount_ref(locked_mp
, 0);
7507 * nameidone has to happen before we vnode_put(tvp)
7508 * since it may need to release the fs_nodelock on the tvp
7517 * nameidone has to happen before we vnode_put(fdvp)
7518 * since it may need to release the fs_nodelock on the fvp
7525 mount_lock_renames(locked_mp
);
7526 holding_mntlock
= 1;
7532 * when we dropped the iocounts to take
7533 * the lock, we allowed the identity of
7534 * the various vnodes to change... if they did,
7535 * we may no longer be dealing with a rename
7536 * that reshapes the tree... once we're holding
7537 * the iocounts, the vnodes can't change type
7538 * so we're free to drop the lock at this point
7541 if (holding_mntlock
) {
7542 mount_unlock_renames(locked_mp
);
7543 mount_drop(locked_mp
, 0);
7544 holding_mntlock
= 0;
7548 // save these off so we can later verify that fvp is the same
7549 oname
= fvp
->v_name
;
7550 oparent
= fvp
->v_parent
;
7554 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
7557 get_fse_info(fvp
, &from_finfo
, ctx
);
7559 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
7564 fvap
= &__rename_data
->fv_attr
;
7568 get_fse_info(tvp
, &to_finfo
, ctx
);
7569 } else if (batched
) {
7570 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
7575 tvap
= &__rename_data
->tv_attr
;
7580 #endif /* CONFIG_FSE */
7582 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
7583 if (from_name
== NULL
) {
7584 GET_PATH(from_name
);
7585 if (from_name
== NULL
) {
7591 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
7593 if (to_name
== NULL
) {
7595 if (to_name
== NULL
) {
7601 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
7603 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
7604 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
7607 if (holding_mntlock
) {
7609 * we can drop our serialization
7612 mount_unlock_renames(locked_mp
);
7613 mount_drop(locked_mp
, 0);
7614 holding_mntlock
= 0;
7617 if (error
== EKEEPLOOKING
) {
7618 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7619 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7620 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7624 fromnd
->ni_vp
= fvp
;
7627 goto continue_lookup
;
7631 * We may encounter a race in the VNOP where the destination didn't
7632 * exist when we did the namei, but it does by the time we go and
7633 * try to create the entry. In this case, we should re-drive this rename
7634 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7635 * but other filesystems susceptible to this race could return it, too.
7637 if (error
== ERECYCLE
) {
7642 * For compound VNOPs, the authorization callback may return
7643 * ENOENT in case of racing hardlink lookups hitting the name
7644 * cache, redrive the lookup.
7646 if (batched
&& error
== ENOENT
) {
7647 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7648 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7657 /* call out to allow 3rd party notification of rename.
7658 * Ignore result of kauth_authorize_fileop call.
7660 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7661 KAUTH_FILEOP_RENAME
,
7662 (uintptr_t)from_name
, (uintptr_t)to_name
);
7663 if (flags
& VFS_RENAME_SWAP
) {
7664 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7665 KAUTH_FILEOP_RENAME
,
7666 (uintptr_t)to_name
, (uintptr_t)from_name
);
7670 if (from_name
!= NULL
&& to_name
!= NULL
) {
7671 if (from_truncated
|| to_truncated
) {
7672 // set it here since only the from_finfo gets reported up to user space
7673 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
7677 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
7680 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
7684 add_fsevent(FSE_RENAME
, ctx
,
7685 FSE_ARG_STRING
, from_len
, from_name
,
7686 FSE_ARG_FINFO
, &from_finfo
,
7687 FSE_ARG_STRING
, to_len
, to_name
,
7688 FSE_ARG_FINFO
, &to_finfo
,
7690 if (flags
& VFS_RENAME_SWAP
) {
7692 * Strictly speaking, swap is the equivalent of
7693 * *three* renames. FSEvents clients should only take
7694 * the events as a hint, so we only bother reporting
7697 add_fsevent(FSE_RENAME
, ctx
,
7698 FSE_ARG_STRING
, to_len
, to_name
,
7699 FSE_ARG_FINFO
, &to_finfo
,
7700 FSE_ARG_STRING
, from_len
, from_name
,
7701 FSE_ARG_FINFO
, &from_finfo
,
7705 add_fsevent(FSE_RENAME
, ctx
,
7706 FSE_ARG_STRING
, from_len
, from_name
,
7707 FSE_ARG_FINFO
, &from_finfo
,
7708 FSE_ARG_STRING
, to_len
, to_name
,
7712 #endif /* CONFIG_FSE */
7715 * update filesystem's mount point data
7718 char *cp
, *pathend
, *mpname
;
7724 mp
= fvp
->v_mountedhere
;
7726 if (vfs_busy(mp
, LK_NOWAIT
)) {
7730 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7732 if (UIO_SEG_IS_USER_SPACE(segflg
))
7733 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7735 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7737 /* find current mount point prefix */
7738 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7739 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7743 /* find last component of target name */
7744 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7748 /* append name to prefix */
7749 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7750 bzero(pathend
, maxlen
);
7751 strlcpy(pathend
, mpname
, maxlen
);
7753 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7758 * fix up name & parent pointers. note that we first
7759 * check that fvp has the same name/parent pointers it
7760 * had before the rename call... this is a 'weak' check
7763 * XXX oparent and oname may not be set in the compound vnop case
7765 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7768 update_flags
= VNODE_UPDATE_NAME
;
7771 update_flags
|= VNODE_UPDATE_PARENT
;
7773 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7776 if (to_name
!= NULL
) {
7777 RELEASE_PATH(to_name
);
7780 if (from_name
!= NULL
) {
7781 RELEASE_PATH(from_name
);
7784 if (holding_mntlock
) {
7785 mount_unlock_renames(locked_mp
);
7786 mount_drop(locked_mp
, 0);
7787 holding_mntlock
= 0;
7791 * nameidone has to happen before we vnode_put(tdvp)
7792 * since it may need to release the fs_nodelock on the tdvp
7802 * nameidone has to happen before we vnode_put(fdvp)
7803 * since it may need to release the fs_nodelock on the fdvp
7813 * If things changed after we did the namei, then we will re-drive
7814 * this rename call from the top.
7821 FREE(__rename_data
, M_TEMP
);
7826 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7828 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7829 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7832 int renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
7834 return renameat_internal(
7835 vfs_context_current(),
7836 uap
->fromfd
, uap
->from
,
7838 UIO_USERSPACE
, uap
->flags
);
7842 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7844 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7845 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7849 * Make a directory file.
7851 * Returns: 0 Success
7854 * vnode_authorize:???
7859 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7860 enum uio_seg segflg
)
7864 int update_flags
= 0;
7866 struct nameidata nd
;
7868 AUDIT_ARG(mode
, vap
->va_mode
);
7869 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7871 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7872 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7875 error
= nameiat(&nd
, fd
);
7886 batched
= vnode_compound_mkdir_available(dvp
);
7888 VATTR_SET(vap
, va_type
, VDIR
);
7892 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7893 * only get EXISTS or EISDIR for existing path components, and not that it could see
7894 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7895 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7897 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7898 if (error
== EACCES
|| error
== EPERM
) {
7906 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7907 * rather than EACCESS if the target exists.
7909 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7911 error2
= nameiat(&nd
, fd
);
7925 * make the directory
7927 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7928 if (error
== EKEEPLOOKING
) {
7930 goto continue_lookup
;
7936 // Make sure the name & parent pointers are hooked up
7937 if (vp
->v_name
== NULL
)
7938 update_flags
|= VNODE_UPDATE_NAME
;
7939 if (vp
->v_parent
== NULLVP
)
7940 update_flags
|= VNODE_UPDATE_PARENT
;
7943 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
7946 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
7951 * nameidone has to happen before we vnode_put(dvp)
7952 * since it may need to release the fs_nodelock on the dvp
7965 * mkdir_extended: Create a directory; with extended security (ACL).
7967 * Parameters: p Process requesting to create the directory
7968 * uap User argument descriptor (see below)
7971 * Indirect: uap->path Path of directory to create
7972 * uap->mode Access permissions to set
7973 * uap->xsecurity ACL to set
7975 * Returns: 0 Success
7980 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
7983 kauth_filesec_t xsecdst
;
7984 struct vnode_attr va
;
7986 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7989 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
7990 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
7994 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7995 if (xsecdst
!= NULL
)
7996 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7998 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8000 if (xsecdst
!= NULL
)
8001 kauth_filesec_free(xsecdst
);
8006 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
8008 struct vnode_attr va
;
8011 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8013 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
8018 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
8020 struct vnode_attr va
;
8023 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
8025 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
8030 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
8031 enum uio_seg segflg
)
8035 struct nameidata nd
;
8038 int has_listeners
= 0;
8042 struct vnode_attr va
;
8043 #endif /* CONFIG_FSE */
8044 struct vnode_attr
*vap
= NULL
;
8045 int restart_count
= 0;
8051 * This loop exists to restart rmdir in the unlikely case that two
8052 * processes are simultaneously trying to remove the same directory
8053 * containing orphaned appleDouble files.
8056 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
8057 segflg
, dirpath
, ctx
);
8058 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
8063 error
= nameiat(&nd
, fd
);
8071 batched
= vnode_compound_rmdir_available(vp
);
8073 if (vp
->v_flag
& VROOT
) {
8075 * The root of a mounted filesystem cannot be deleted.
8081 #if DEVELOPMENT || DEBUG
8083 * XXX VSWAP: Check for entitlements or special flag here
8084 * so we can restrict access appropriately.
8086 #else /* DEVELOPMENT || DEBUG */
8088 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
8092 #endif /* DEVELOPMENT || DEBUG */
8095 * Removed a check here; we used to abort if vp's vid
8096 * was not the same as what we'd seen the last time around.
8097 * I do not think that check was valid, because if we retry
8098 * and all dirents are gone, the directory could legitimately
8099 * be recycled but still be present in a situation where we would
8100 * have had permission to delete. Therefore, we won't make
8101 * an effort to preserve that check now that we may not have a
8106 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
8108 if (error
== ENOENT
) {
8109 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
8110 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8121 if (!vnode_compound_rmdir_available(dvp
)) {
8122 panic("No error, but no compound rmdir?");
8129 need_event
= need_fsevent(FSE_DELETE
, dvp
);
8132 get_fse_info(vp
, &finfo
, ctx
);
8134 error
= vfs_get_notify_attributes(&va
);
8143 has_listeners
= kauth_authorize_fileop_has_listeners();
8144 if (need_event
|| has_listeners
) {
8153 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
8156 finfo
.mode
|= FSE_TRUNCATED_PATH
;
8161 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8164 /* Couldn't find a vnode */
8168 if (error
== EKEEPLOOKING
) {
8169 goto continue_lookup
;
8170 } else if (batched
&& error
== ENOENT
) {
8171 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
8172 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8174 * For compound VNOPs, the authorization callback
8175 * may return ENOENT in case of racing hard link lookups
8176 * redrive the lookup.
8183 #if CONFIG_APPLEDOUBLE
8185 * Special case to remove orphaned AppleDouble
8186 * files. I don't like putting this in the kernel,
8187 * but carbon does not like putting this in carbon either,
8190 if (error
== ENOTEMPTY
) {
8191 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
8192 if (error
== EBUSY
) {
8198 * Assuming everything went well, we will try the RMDIR again
8201 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8203 #endif /* CONFIG_APPLEDOUBLE */
8205 * Call out to allow 3rd party notification of delete.
8206 * Ignore result of kauth_authorize_fileop call.
8209 if (has_listeners
) {
8210 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8211 KAUTH_FILEOP_DELETE
,
8216 if (vp
->v_flag
& VISHARDLINK
) {
8217 // see the comment in unlink1() about why we update
8218 // the parent of a hard link when it is removed
8219 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
8225 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
8227 add_fsevent(FSE_DELETE
, ctx
,
8228 FSE_ARG_STRING
, len
, path
,
8229 FSE_ARG_FINFO
, &finfo
,
8241 * nameidone has to happen before we vnode_put(dvp)
8242 * since it may need to release the fs_nodelock on the dvp
8250 if (restart_flag
== 0) {
8251 wakeup_one((caddr_t
)vp
);
8254 tsleep(vp
, PVFS
, "rm AD", 1);
8256 } while (restart_flag
!= 0);
8263 * Remove a directory file.
8267 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
8269 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
8270 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
8273 /* Get direntry length padded to 8 byte alignment */
8274 #define DIRENT64_LEN(namlen) \
8275 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8277 /* Get dirent length padded to 4 byte alignment */
8278 #define DIRENT_LEN(namelen) \
8279 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8281 /* Get the end of this dirent */
8282 #define DIRENT_END(dep) \
8283 (((char *)(dep)) + (dep)->d_reclen - 1)
8286 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
8287 int *numdirent
, vfs_context_t ctxp
)
8289 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8290 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
8291 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
8292 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
8297 struct direntry
*entry64
;
8303 * We're here because the underlying file system does not
8304 * support direnties or we mounted denying support so we must
8305 * fall back to dirents and convert them to direntries.
8307 * Our kernel buffer needs to be smaller since re-packing will
8308 * expand each dirent. The worse case (when the name length
8309 * is 3 or less) corresponds to a struct direntry size of 32
8310 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8311 * (4-byte aligned). So having a buffer that is 3/8 the size
8312 * will prevent us from reading more than we can pack.
8314 * Since this buffer is wired memory, we will limit the
8315 * buffer size to a maximum of 32K. We would really like to
8316 * use 32K in the MIN(), but we use magic number 87371 to
8317 * prevent uio_resid() * 3 / 8 from overflowing.
8319 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
8320 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
8321 if (bufptr
== NULL
) {
8325 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
8326 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
8327 auio
->uio_offset
= uio
->uio_offset
;
8329 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
8331 dep
= (struct dirent
*)bufptr
;
8332 bytesread
= bufsize
- uio_resid(auio
);
8334 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
8337 * Convert all the entries and copy them out to user's buffer.
8339 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
8340 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
8342 if (DIRENT_END(dep
) > ((char *)bufptr
+ bytesread
) ||
8343 DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) {
8344 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
,
8345 vp
->v_mount
->mnt_vfsstat
.f_mntonname
,
8346 vp
->v_name
? vp
->v_name
: "<unknown>");
8351 bzero(entry64
, enbufsize
);
8352 /* Convert a dirent to a dirent64. */
8353 entry64
->d_ino
= dep
->d_ino
;
8354 entry64
->d_seekoff
= 0;
8355 entry64
->d_reclen
= enbufsize
;
8356 entry64
->d_namlen
= dep
->d_namlen
;
8357 entry64
->d_type
= dep
->d_type
;
8358 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
8360 /* Move to next entry. */
8361 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
8363 /* Copy entry64 to user's buffer. */
8364 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
8367 /* Update the real offset using the offset we got from VNOP_READDIR. */
8369 uio
->uio_offset
= auio
->uio_offset
;
8372 FREE(bufptr
, M_TEMP
);
8373 FREE(entry64
, M_TEMP
);
8378 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8381 * Read a block of directory entries in a file system independent format.
8384 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
8385 off_t
*offset
, int flags
)
8388 struct vfs_context context
= *vfs_context_current(); /* local copy */
8389 struct fileproc
*fp
;
8391 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8393 int error
, eofflag
, numdirent
;
8394 char uio_buf
[ UIO_SIZEOF(1) ];
8396 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
8400 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8401 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8406 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
8407 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
8410 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
8414 if ( (error
= vnode_getwithref(vp
)) ) {
8417 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8420 if (vp
->v_type
!= VDIR
) {
8421 (void)vnode_put(vp
);
8427 error
= mac_vnode_check_readdir(&context
, vp
);
8429 (void)vnode_put(vp
);
8434 loff
= fp
->f_fglob
->fg_offset
;
8435 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8436 uio_addiov(auio
, bufp
, bufsize
);
8438 if (flags
& VNODE_READDIR_EXTENDED
) {
8439 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
8440 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8442 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
8443 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8446 (void)vnode_put(vp
);
8450 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
8451 if (union_dircheckp
) {
8452 error
= union_dircheckp(&vp
, fp
, &context
);
8456 (void)vnode_put(vp
);
8461 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
8462 struct vnode
*tvp
= vp
;
8463 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
8465 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8466 fp
->f_fglob
->fg_offset
= 0;
8480 *bytesread
= bufsize
- uio_resid(auio
);
8488 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
8494 AUDIT_ARG(fd
, uap
->fd
);
8495 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
8498 if (proc_is64bit(p
)) {
8499 user64_long_t base
= (user64_long_t
)offset
;
8500 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
8502 user32_long_t base
= (user32_long_t
)offset
;
8503 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
8505 *retval
= bytesread
;
8511 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
8517 AUDIT_ARG(fd
, uap
->fd
);
8518 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
8521 *retval
= bytesread
;
8522 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
8529 * Set the mode mask for creation of filesystem nodes.
8530 * XXX implement xsecurity
8532 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8534 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
8536 struct filedesc
*fdp
;
8538 AUDIT_ARG(mask
, newmask
);
8541 *retval
= fdp
->fd_cmask
;
8542 fdp
->fd_cmask
= newmask
& ALLPERMS
;
8548 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8550 * Parameters: p Process requesting to set the umask
8551 * uap User argument descriptor (see below)
8552 * retval umask of the process (parameter p)
8554 * Indirect: uap->newmask umask to set
8555 * uap->xsecurity ACL to set
8557 * Returns: 0 Success
8562 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
8565 kauth_filesec_t xsecdst
;
8567 xsecdst
= KAUTH_FILESEC_NONE
;
8568 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
8569 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
8572 xsecdst
= KAUTH_FILESEC_NONE
;
8575 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
8577 if (xsecdst
!= KAUTH_FILESEC_NONE
)
8578 kauth_filesec_free(xsecdst
);
8583 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
8585 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
8589 * Void all references to file by ripping underlying filesystem
8594 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
8597 struct vnode_attr va
;
8598 vfs_context_t ctx
= vfs_context_current();
8600 struct nameidata nd
;
8602 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
8611 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
8616 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
8622 error
= mac_vnode_check_revoke(ctx
, vp
);
8628 VATTR_WANTED(&va
, va_uid
);
8629 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
8631 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
8632 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
8634 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
8635 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
8643 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8644 * The following system calls are designed to support features
8645 * which are specific to the HFS & HFS Plus volume formats
8650 * Obtain attribute information on objects in a directory while enumerating
8655 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
8658 struct fileproc
*fp
;
8660 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8661 uint32_t count
, savecount
;
8665 struct attrlist attributelist
;
8666 vfs_context_t ctx
= vfs_context_current();
8668 char uio_buf
[ UIO_SIZEOF(1) ];
8669 kauth_action_t action
;
8673 /* Get the attributes into kernel space */
8674 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
8677 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
8681 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
8684 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8685 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8692 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
8699 if ( (error
= vnode_getwithref(vp
)) )
8702 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8705 if (vp
->v_type
!= VDIR
) {
8706 (void)vnode_put(vp
);
8712 error
= mac_vnode_check_readdir(ctx
, vp
);
8714 (void)vnode_put(vp
);
8719 /* set up the uio structure which will contain the users return buffer */
8720 loff
= fp
->f_fglob
->fg_offset
;
8721 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8722 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
8725 * If the only item requested is file names, we can let that past with
8726 * just LIST_DIRECTORY. If they want any other attributes, that means
8727 * they need SEARCH as well.
8729 action
= KAUTH_VNODE_LIST_DIRECTORY
;
8730 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
8731 attributelist
.fileattr
|| attributelist
.dirattr
)
8732 action
|= KAUTH_VNODE_SEARCH
;
8734 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
8736 /* Believe it or not, uap->options only has 32-bits of valid
8737 * info, so truncate before extending again */
8739 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
8740 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
8744 (void) vnode_put(vp
);
8749 * If we've got the last entry of a directory in a union mount
8750 * then reset the eofflag and pretend there's still more to come.
8751 * The next call will again set eofflag and the buffer will be empty,
8752 * so traverse to the underlying directory and do the directory
8755 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
8756 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
8758 } else { // Empty buffer
8759 struct vnode
*tvp
= vp
;
8760 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
8761 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
8762 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8763 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
8765 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
8773 (void)vnode_put(vp
);
8777 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
8779 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8781 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8783 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8786 *retval
= eofflag
; /* similar to getdirentries */
8790 return (error
); /* return error earlier, an retval of 0 or 1 now */
8792 } /* end of getdirentriesattr system call */
8795 * Exchange data between two files
8800 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8803 struct nameidata fnd
, snd
;
8804 vfs_context_t ctx
= vfs_context_current();
8808 u_int32_t nameiflags
;
8812 int from_truncated
=0, to_truncated
=0;
8814 fse_info f_finfo
, s_finfo
;
8818 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8820 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8821 UIO_USERSPACE
, uap
->path1
, ctx
);
8823 error
= namei(&fnd
);
8830 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8831 UIO_USERSPACE
, uap
->path2
, ctx
);
8833 error
= namei(&snd
);
8842 * if the files are the same, return an inval error
8850 * if the files are on different volumes, return an error
8852 if (svp
->v_mount
!= fvp
->v_mount
) {
8857 /* If they're not files, return an error */
8858 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8864 error
= mac_vnode_check_exchangedata(ctx
,
8869 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8870 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8875 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8877 kauth_authorize_fileop_has_listeners()) {
8880 if (fpath
== NULL
|| spath
== NULL
) {
8885 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8886 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8889 get_fse_info(fvp
, &f_finfo
, ctx
);
8890 get_fse_info(svp
, &s_finfo
, ctx
);
8891 if (from_truncated
|| to_truncated
) {
8892 // set it here since only the f_finfo gets reported up to user space
8893 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8897 /* Ok, make the call */
8898 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8901 const char *tmpname
;
8903 if (fpath
!= NULL
&& spath
!= NULL
) {
8904 /* call out to allow 3rd party notification of exchangedata.
8905 * Ignore result of kauth_authorize_fileop call.
8907 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8908 (uintptr_t)fpath
, (uintptr_t)spath
);
8912 tmpname
= fvp
->v_name
;
8913 fvp
->v_name
= svp
->v_name
;
8914 svp
->v_name
= tmpname
;
8916 if (fvp
->v_parent
!= svp
->v_parent
) {
8919 tmp
= fvp
->v_parent
;
8920 fvp
->v_parent
= svp
->v_parent
;
8921 svp
->v_parent
= tmp
;
8923 name_cache_unlock();
8926 if (fpath
!= NULL
&& spath
!= NULL
) {
8927 add_fsevent(FSE_EXCHANGE
, ctx
,
8928 FSE_ARG_STRING
, flen
, fpath
,
8929 FSE_ARG_FINFO
, &f_finfo
,
8930 FSE_ARG_STRING
, slen
, spath
,
8931 FSE_ARG_FINFO
, &s_finfo
,
8939 RELEASE_PATH(fpath
);
8941 RELEASE_PATH(spath
);
8949 * Return (in MB) the amount of freespace on the given vnode's volume.
8951 uint32_t freespace_mb(vnode_t vp
);
8954 freespace_mb(vnode_t vp
)
8956 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
8957 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
8958 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
8966 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
8971 struct nameidata nd
;
8972 struct user64_fssearchblock searchblock
;
8973 struct searchstate
*state
;
8974 struct attrlist
*returnattrs
;
8975 struct timeval timelimit
;
8976 void *searchparams1
,*searchparams2
;
8978 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8979 uint32_t nummatches
;
8981 uint32_t nameiflags
;
8982 vfs_context_t ctx
= vfs_context_current();
8983 char uio_buf
[ UIO_SIZEOF(1) ];
8985 /* Start by copying in fsearchblock parameter list */
8986 if (IS_64BIT_PROCESS(p
)) {
8987 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
8988 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
8989 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
8992 struct user32_fssearchblock tmp_searchblock
;
8994 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
8995 // munge into 64-bit version
8996 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
8997 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
8998 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
8999 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
9001 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9002 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9004 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
9005 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
9006 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
9007 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
9008 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
9009 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
9010 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
9015 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9017 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
9018 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
9021 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9022 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9023 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9026 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9027 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9028 /* assumes the size is still 556 bytes it will continue to work */
9030 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
9031 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
9033 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
9035 /* Now set up the various pointers to the correct place in our newly allocated memory */
9037 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
9038 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
9039 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
9041 /* Now copy in the stuff given our local variables. */
9043 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
9046 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
9049 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
9052 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
9056 * When searching a union mount, need to set the
9057 * start flag at the first call on each layer to
9058 * reset state for the new volume.
9060 if (uap
->options
& SRCHFS_START
)
9061 state
->ss_union_layer
= 0;
9063 uap
->options
|= state
->ss_union_flags
;
9064 state
->ss_union_flags
= 0;
9067 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9068 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
9069 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9070 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
9071 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9074 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
9075 attrreference_t
* string_ref
;
9076 u_int32_t
* start_length
;
9077 user64_size_t param_length
;
9079 /* validate searchparams1 */
9080 param_length
= searchblock
.sizeofsearchparams1
;
9081 /* skip the word that specifies length of the buffer */
9082 start_length
= (u_int32_t
*) searchparams1
;
9083 start_length
= start_length
+1;
9084 string_ref
= (attrreference_t
*) start_length
;
9086 /* ensure no negative offsets or too big offsets */
9087 if (string_ref
->attr_dataoffset
< 0 ) {
9091 if (string_ref
->attr_length
> MAXPATHLEN
) {
9096 /* Check for pointer overflow in the string ref */
9097 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
9102 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
9106 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
9112 /* set up the uio structure which will contain the users return buffer */
9113 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9114 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
9117 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
9118 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
9119 UIO_USERSPACE
, uap
->path
, ctx
);
9128 * Switch to the root vnode for the volume
9130 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
9137 * If it's a union mount, the path lookup takes
9138 * us to the top layer. But we may need to descend
9139 * to a lower layer. For non-union mounts the layer
9142 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
9143 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
9146 vp
= vp
->v_mount
->mnt_vnodecovered
;
9152 error
= vnode_getwithref(vp
);
9159 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
9168 * If searchblock.maxmatches == 0, then skip the search. This has happened
9169 * before and sometimes the underlying code doesnt deal with it well.
9171 if (searchblock
.maxmatches
== 0) {
9177 * Allright, we have everything we need, so lets make that call.
9179 * We keep special track of the return value from the file system:
9180 * EAGAIN is an acceptable error condition that shouldn't keep us
9181 * from copying out any results...
9184 fserror
= VNOP_SEARCHFS(vp
,
9187 &searchblock
.searchattrs
,
9188 (u_long
)searchblock
.maxmatches
,
9192 (u_long
)uap
->scriptcode
,
9193 (u_long
)uap
->options
,
9195 (struct searchstate
*) &state
->ss_fsstate
,
9199 * If it's a union mount we need to be called again
9200 * to search the mounted-on filesystem.
9202 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
9203 state
->ss_union_flags
= SRCHFS_START
;
9204 state
->ss_union_layer
++; // search next layer down
9212 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9213 search state. Everything was already put into he return buffer by the vop call. */
9215 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
9218 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
9225 FREE(searchparams1
,M_TEMP
);
9230 } /* end of searchfs system call */
9232 #else /* CONFIG_SEARCHFS */
9235 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
9240 #endif /* CONFIG_SEARCHFS */
9243 lck_grp_attr_t
* nspace_group_attr
;
9244 lck_attr_t
* nspace_lock_attr
;
9245 lck_grp_t
* nspace_mutex_group
;
9247 lck_mtx_t nspace_handler_lock
;
9248 lck_mtx_t nspace_handler_exclusion_lock
;
9250 time_t snapshot_timestamp
=0;
9251 int nspace_allow_virtual_devs
=0;
9253 void nspace_handler_init(void);
9255 typedef struct nspace_item_info
{
9265 #define MAX_NSPACE_ITEMS 128
9266 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
9267 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
9268 uint32_t nspace_token_id
=0;
9269 uint32_t nspace_handler_timeout
= 15; // seconds
9271 #define NSPACE_ITEM_NEW 0x0001
9272 #define NSPACE_ITEM_PROCESSING 0x0002
9273 #define NSPACE_ITEM_DEAD 0x0004
9274 #define NSPACE_ITEM_CANCELLED 0x0008
9275 #define NSPACE_ITEM_DONE 0x0010
9276 #define NSPACE_ITEM_RESET_TIMER 0x0020
9278 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9279 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9281 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9283 //#pragma optimization_level 0
9286 NSPACE_HANDLER_NSPACE
= 0,
9287 NSPACE_HANDLER_SNAPSHOT
= 1,
9289 NSPACE_HANDLER_COUNT
,
9293 uint64_t handler_tid
;
9294 struct proc
*handler_proc
;
9298 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
9300 /* namespace fsctl functions */
9301 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
9302 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
9303 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
9304 static nspace_type_t
nspace_type_for_op(uint64_t op
);
9305 static int nspace_is_special_process(struct proc
*proc
);
9306 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
9307 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
9308 static int validate_namespace_args (int is64bit
, int size
);
9309 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
9312 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
9314 switch(nspace_type
) {
9315 case NSPACE_HANDLER_NSPACE
:
9316 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
9317 case NSPACE_HANDLER_SNAPSHOT
:
9318 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
9320 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
9325 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
9327 switch(nspace_type
) {
9328 case NSPACE_HANDLER_NSPACE
:
9329 return NSPACE_ITEM_NSPACE_EVENT
;
9330 case NSPACE_HANDLER_SNAPSHOT
:
9331 return NSPACE_ITEM_SNAPSHOT_EVENT
;
9333 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
9338 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
9340 switch(nspace_type
) {
9341 case NSPACE_HANDLER_NSPACE
:
9342 return FREAD
| FWRITE
| O_EVTONLY
;
9343 case NSPACE_HANDLER_SNAPSHOT
:
9344 return FREAD
| O_EVTONLY
;
9346 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
9351 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
9353 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
9354 case NAMESPACE_HANDLER_NSPACE_EVENT
:
9355 return NSPACE_HANDLER_NSPACE
;
9356 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
9357 return NSPACE_HANDLER_SNAPSHOT
;
9359 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
9360 return NSPACE_HANDLER_NSPACE
;
9364 static inline int nspace_is_special_process(struct proc
*proc
)
9367 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9368 if (proc
== nspace_handlers
[i
].handler_proc
)
9375 nspace_handler_init(void)
9377 nspace_lock_attr
= lck_attr_alloc_init();
9378 nspace_group_attr
= lck_grp_attr_alloc_init();
9379 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
9380 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
9381 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
9382 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
9386 nspace_proc_exit(struct proc
*p
)
9388 int i
, event_mask
= 0;
9390 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9391 if (p
== nspace_handlers
[i
].handler_proc
) {
9392 event_mask
|= nspace_item_flags_for_type(i
);
9393 nspace_handlers
[i
].handler_tid
= 0;
9394 nspace_handlers
[i
].handler_proc
= NULL
;
9398 if (event_mask
== 0) {
9402 lck_mtx_lock(&nspace_handler_lock
);
9403 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9404 // if this process was the snapshot handler, zero snapshot_timeout
9405 snapshot_timestamp
= 0;
9409 // unblock anyone that's waiting for the handler that died
9411 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9412 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
9414 if ( nspace_items
[i
].flags
& event_mask
) {
9416 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9417 vnode_lock_spin(nspace_items
[i
].vp
);
9418 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9419 vnode_unlock(nspace_items
[i
].vp
);
9421 nspace_items
[i
].vp
= NULL
;
9422 nspace_items
[i
].vid
= 0;
9423 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9424 nspace_items
[i
].token
= 0;
9426 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9431 wakeup((caddr_t
)&nspace_item_idx
);
9432 lck_mtx_unlock(&nspace_handler_lock
);
9437 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
9439 return resolve_nspace_item_ext(vp
, op
, NULL
);
9443 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
9445 int i
, error
, keep_waiting
;
9447 nspace_type_t nspace_type
= nspace_type_for_op(op
);
9449 // only allow namespace events on regular files, directories and symlinks.
9450 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
9455 // if this is a snapshot event and the vnode is on a
9456 // disk image just pretend nothing happened since any
9457 // change to the disk image will cause the disk image
9458 // itself to get backed up and this avoids multi-way
9459 // deadlocks between the snapshot handler and the ever
9460 // popular diskimages-helper process. the variable
9461 // nspace_allow_virtual_devs allows this behavior to
9462 // be overridden (for use by the Mobile TimeMachine
9463 // testing infrastructure which uses disk images)
9465 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
9466 && (vp
->v_mount
!= NULL
)
9467 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
9468 && !nspace_allow_virtual_devs
) {
9473 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9474 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9478 if (nspace_is_special_process(current_proc())) {
9482 lck_mtx_lock(&nspace_handler_lock
);
9485 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9486 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
9491 if (i
>= MAX_NSPACE_ITEMS
) {
9492 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9493 if (nspace_items
[i
].flags
== 0) {
9498 nspace_items
[i
].refcount
++;
9501 if (i
>= MAX_NSPACE_ITEMS
) {
9502 ts
.tv_sec
= nspace_handler_timeout
;
9505 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
9507 // an entry got free'd up, go see if we can get a slot
9510 lck_mtx_unlock(&nspace_handler_lock
);
9516 // if it didn't already exist, add it. if it did exist
9517 // we'll get woken up when someone does a wakeup() on
9518 // the slot in the nspace_items table.
9520 if (vp
!= nspace_items
[i
].vp
) {
9521 nspace_items
[i
].vp
= vp
;
9522 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
9523 nspace_items
[i
].op
= op
;
9524 nspace_items
[i
].vid
= vnode_vid(vp
);
9525 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
9526 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
9527 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9529 vnode_lock_spin(vp
);
9530 vp
->v_flag
|= VNEEDSSNAPSHOT
;
9535 nspace_items
[i
].token
= 0;
9536 nspace_items
[i
].refcount
= 1;
9538 wakeup((caddr_t
)&nspace_item_idx
);
9542 // Now go to sleep until the handler does a wakeup on this
9543 // slot in the nspace_items table (or we timeout).
9546 while(keep_waiting
) {
9547 ts
.tv_sec
= nspace_handler_timeout
;
9549 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
9551 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
9553 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
9554 error
= nspace_items
[i
].token
;
9555 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
9556 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
9557 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
9562 } else if (error
== 0) {
9563 // hmmm, why did we get woken up?
9564 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9565 nspace_items
[i
].token
);
9568 if (--nspace_items
[i
].refcount
== 0) {
9569 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
9570 nspace_items
[i
].arg
= NULL
;
9571 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
9572 nspace_items
[i
].flags
= 0; // this clears it for re-use
9574 wakeup(&nspace_token_id
);
9578 lck_mtx_unlock(&nspace_handler_lock
);
9583 int nspace_snapshot_event(vnode_t vp
, time_t ctime
, uint64_t op_type
, void *arg
)
9585 int snapshot_error
= 0;
9591 /* Swap files are special; skip them */
9592 if (vnode_isswap(vp
)) {
9596 if (ctime
!= 0 && snapshot_timestamp
!= 0 && (ctime
<= snapshot_timestamp
|| vnode_needssnapshots(vp
))) {
9597 // the change time is within this epoch
9600 error
= resolve_nspace_item_ext(vp
, op_type
| NAMESPACE_HANDLER_SNAPSHOT_EVENT
, arg
);
9601 if (error
== EDEADLK
) {
9604 if (error
== EAGAIN
) {
9605 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9606 } else if (error
== EINTR
) {
9607 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9608 snapshot_error
= EINTR
;
9613 return snapshot_error
;
9617 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
9621 lck_mtx_lock(&nspace_handler_lock
);
9622 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9623 if (nspace_items
[i
].vp
== vp
) {
9628 if (i
>= MAX_NSPACE_ITEMS
) {
9629 lck_mtx_unlock(&nspace_handler_lock
);
9633 *status
= nspace_items
[i
].flags
;
9634 lck_mtx_unlock(&nspace_handler_lock
);
9641 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
9643 struct vnode_attr va
;
9647 VATTR_WANTED(&va
, va_fsid
);
9648 VATTR_WANTED(&va
, va_fileid
);
9650 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
9651 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
9654 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
9663 // Note: this function does NOT check permissions on all of the
9664 // parent directories leading to this vnode. It should only be
9665 // called on behalf of a root process. Otherwise a process may
9666 // get access to a file because the file itself is readable even
9667 // though its parent directories would prevent access.
9670 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
9674 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9679 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
9684 /* compute action to be authorized */
9686 if (fmode
& FREAD
) {
9687 action
|= KAUTH_VNODE_READ_DATA
;
9689 if (fmode
& (FWRITE
| O_TRUNC
)) {
9691 * If we are writing, appending, and not truncating,
9692 * indicate that we are appending so that if the
9693 * UF_APPEND or SF_APPEND bits are set, we do not deny
9696 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
9697 action
|= KAUTH_VNODE_APPEND_DATA
;
9699 action
|= KAUTH_VNODE_WRITE_DATA
;
9703 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
9708 // if the vnode is tagged VOPENEVT and the current process
9709 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9710 // flag to the open mode so that this open won't count against
9711 // the vnode when carbon delete() does a vnode_isinuse() to see
9712 // if a file is currently in use. this allows spotlight
9713 // importers to not interfere with carbon apps that depend on
9714 // the no-delete-if-busy semantics of carbon delete().
9716 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
9720 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
9723 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
9724 VNOP_CLOSE(vp
, fmode
, ctx
);
9728 /* Call out to allow 3rd party notification of open.
9729 * Ignore result of kauth_authorize_fileop call.
9732 mac_vnode_notify_open(ctx
, vp
, fmode
);
9734 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
9742 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
9749 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9750 if (nspace_handlers
[nspace_type
].handler_busy
) {
9751 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9755 nspace_handlers
[nspace_type
].handler_busy
= 1;
9756 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9759 * Any process that gets here will be one of the namespace handlers.
9760 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9761 * as we can cause deadlocks to occur, because the namespace handler may prevent
9762 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9765 curtask
= current_task();
9766 bsd_set_dependency_capable (curtask
);
9768 lck_mtx_lock(&nspace_handler_lock
);
9769 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9770 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
9771 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
9774 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9775 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9779 while (error
== 0) {
9781 /* Try to find matching namespace item */
9782 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9783 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9784 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9790 if (i
>= MAX_NSPACE_ITEMS
) {
9791 /* Nothing is there yet. Wait for wake up and retry */
9792 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9793 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9794 /* Prevent infinite loop if snapshot handler exited */
9801 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
9802 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
9803 nspace_items
[i
].token
= ++nspace_token_id
;
9805 assert(nspace_items
[i
].vp
);
9806 struct fileproc
*fp
;
9809 struct proc
*p
= current_proc();
9810 vfs_context_t ctx
= vfs_context_current();
9811 struct vnode_attr va
;
9812 bool vn_get_succsessful
= false;
9813 bool vn_open_successful
= false;
9814 bool fp_alloc_successful
= false;
9817 * Use vnode pointer to acquire a file descriptor for
9818 * hand-off to userland
9820 fmode
= nspace_open_flags_for_type(nspace_type
);
9821 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
9822 if (error
) goto cleanup
;
9823 vn_get_succsessful
= true;
9825 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
9826 if (error
) goto cleanup
;
9827 vn_open_successful
= true;
9829 error
= falloc(p
, &fp
, &indx
, ctx
);
9830 if (error
) goto cleanup
;
9831 fp_alloc_successful
= true;
9833 fp
->f_fglob
->fg_flag
= fmode
;
9834 fp
->f_fglob
->fg_ops
= &vnops
;
9835 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9838 procfdtbl_releasefd(p
, indx
, NULL
);
9839 fp_drop(p
, indx
, fp
, 1);
9843 * All variants of the namespace handler struct support these three fields:
9844 * token, flags, and the FD pointer
9846 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9847 if (error
) goto cleanup
;
9848 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9849 if (error
) goto cleanup
;
9850 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9851 if (error
) goto cleanup
;
9854 * Handle optional fields:
9855 * extended version support an info ptr (offset, length), and the
9857 * namedata version supports a unique per-link object ID
9861 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9862 uint64_t u_offset
, u_length
;
9865 u_offset
= uio_offset(uio
);
9866 u_length
= uio_resid(uio
);
9871 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9872 if (error
) goto cleanup
;
9873 error
= copyout(&u_length
, nhd
->infoptr
+ sizeof(uint64_t), sizeof(uint64_t));
9874 if (error
) goto cleanup
;
9879 VATTR_WANTED(&va
, va_linkid
);
9880 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9881 if (error
) goto cleanup
;
9883 uint64_t linkid
= 0;
9884 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9885 linkid
= (uint64_t)va
.va_linkid
;
9887 error
= copyout(&linkid
, nhd
->objid
, sizeof(uint64_t));
9891 if (fp_alloc_successful
) fp_free(p
, indx
, fp
);
9892 if (vn_open_successful
) vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9896 if (vn_get_succsessful
) vnode_put(nspace_items
[i
].vp
);
9902 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9903 vnode_lock_spin(nspace_items
[i
].vp
);
9904 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9905 vnode_unlock(nspace_items
[i
].vp
);
9907 nspace_items
[i
].vp
= NULL
;
9908 nspace_items
[i
].vid
= 0;
9909 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9910 nspace_items
[i
].token
= 0;
9912 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9915 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9916 // just go through every snapshot event and unblock it immediately.
9917 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9918 for(i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9919 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9920 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9921 nspace_items
[i
].vp
= NULL
;
9922 nspace_items
[i
].vid
= 0;
9923 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9924 nspace_items
[i
].token
= 0;
9926 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9933 lck_mtx_unlock(&nspace_handler_lock
);
9935 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9936 nspace_handlers
[nspace_type
].handler_busy
= 0;
9937 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9942 static inline int validate_namespace_args (int is64bit
, int size
) {
9945 /* Must be one of these */
9946 if (size
== sizeof(user64_namespace_handler_info
)) {
9949 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
9952 if (size
== sizeof(user64_namespace_handler_data
)) {
9958 /* 32 bit -- must be one of these */
9959 if (size
== sizeof(user32_namespace_handler_info
)) {
9962 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
9965 if (size
== sizeof(user32_namespace_handler_data
)) {
9977 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
9980 namespace_handler_data nhd
;
9982 bzero (&nhd
, sizeof(namespace_handler_data
));
9984 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9988 error
= validate_namespace_args (is64bit
, size
);
9993 /* Copy in the userland pointers into our kernel-only struct */
9996 /* 64 bit userland structures */
9997 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
9998 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
9999 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
10001 /* If the size is greater than the standard info struct, add in extra fields */
10002 if (size
> (sizeof(user64_namespace_handler_info
))) {
10003 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
10004 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
10006 if (size
== (sizeof(user64_namespace_handler_data
))) {
10007 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
10009 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10013 /* 32 bit userland structures */
10014 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
10015 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
10016 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
10018 if (size
> (sizeof(user32_namespace_handler_info
))) {
10019 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
10020 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
10022 if (size
== (sizeof(user32_namespace_handler_data
))) {
10023 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
10025 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10029 return wait_for_namespace_event(&nhd
, nspace_type
);
10032 static unsigned long
10033 fsctl_bogus_command_compat(unsigned long cmd
)
10037 case IOCBASECMD(FSIOC_SYNC_VOLUME
):
10038 return (FSIOC_SYNC_VOLUME
);
10039 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
):
10040 return (FSIOC_ROUTEFS_SETROUTEID
);
10041 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
):
10042 return (FSIOC_SET_PACKAGE_EXTS
);
10043 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET
):
10044 return (FSIOC_NAMESPACE_HANDLER_GET
);
10045 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET
):
10046 return (FSIOC_OLD_SNAPSHOT_HANDLER_GET
);
10047 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT
):
10048 return (FSIOC_SNAPSHOT_HANDLER_GET_EXT
);
10049 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE
):
10050 return (FSIOC_NAMESPACE_HANDLER_UPDATE
);
10051 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK
):
10052 return (FSIOC_NAMESPACE_HANDLER_UNBLOCK
);
10053 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL
):
10054 return (FSIOC_NAMESPACE_HANDLER_CANCEL
);
10055 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
):
10056 return (FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
);
10057 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
):
10058 return (FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
);
10059 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
):
10060 return (FSIOC_SET_FSTYPENAME_OVERRIDE
);
10061 case IOCBASECMD(DISK_CONDITIONER_IOC_GET
):
10062 return (DISK_CONDITIONER_IOC_GET
);
10063 case IOCBASECMD(DISK_CONDITIONER_IOC_SET
):
10064 return (DISK_CONDITIONER_IOC_SET
);
10065 case IOCBASECMD(FSIOC_FIOSEEKHOLE
):
10066 return (FSIOC_FIOSEEKHOLE
);
10067 case IOCBASECMD(FSIOC_FIOSEEKDATA
):
10068 return (FSIOC_FIOSEEKDATA
);
10069 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
):
10070 return (SPOTLIGHT_IOC_GET_MOUNT_TIME
);
10071 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
):
10072 return (SPOTLIGHT_IOC_GET_LAST_MTIME
);
10079 * Make a filesystem-specific control call:
10083 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
10088 #define STK_PARAMS 128
10089 char stkbuf
[STK_PARAMS
] = {0};
10090 caddr_t data
, memp
;
10091 vnode_t vp
= *arg_vp
;
10093 cmd
= fsctl_bogus_command_compat(cmd
);
10095 size
= IOCPARM_LEN(cmd
);
10096 if (size
> IOCPARM_MAX
) return (EINVAL
);
10098 is64bit
= proc_is64bit(p
);
10102 if (size
> sizeof (stkbuf
)) {
10103 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
10109 if (cmd
& IOC_IN
) {
10111 error
= copyin(udata
, data
, size
);
10114 kfree (memp
, size
);
10120 *(user_addr_t
*)data
= udata
;
10123 *(uint32_t *)data
= (uint32_t)udata
;
10126 } else if ((cmd
& IOC_OUT
) && size
) {
10128 * Zero the buffer so the user always
10129 * gets back something deterministic.
10132 } else if (cmd
& IOC_VOID
) {
10134 *(user_addr_t
*)data
= udata
;
10137 *(uint32_t *)data
= (uint32_t)udata
;
10141 /* Check to see if it's a generic command */
10144 case FSIOC_SYNC_VOLUME
: {
10145 mount_t mp
= vp
->v_mount
;
10146 int arg
= *(uint32_t*)data
;
10148 /* record vid of vp so we can drop it below. */
10149 uint32_t vvid
= vp
->v_id
;
10152 * Then grab mount_iterref so that we can release the vnode.
10153 * Without this, a thread may call vnode_iterate_prepare then
10154 * get into a deadlock because we've never released the root vp
10156 error
= mount_iterref (mp
, 0);
10162 /* issue the sync for this volume */
10163 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
10166 * Then release the mount_iterref once we're done syncing; it's not
10167 * needed for the VNOP_IOCTL below
10169 mount_iterdrop(mp
);
10171 if (arg
& FSCTL_SYNC_FULLSYNC
) {
10172 /* re-obtain vnode iocount on the root vp, if possible */
10173 error
= vnode_getwithvid (vp
, vvid
);
10175 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
10179 /* mark the argument VP as having been released */
10184 case FSIOC_ROUTEFS_SETROUTEID
: {
10186 char routepath
[MAXPATHLEN
];
10189 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10192 bzero(routepath
, MAXPATHLEN
);
10193 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
10197 error
= routefs_kernel_mount(routepath
);
10205 case FSIOC_SET_PACKAGE_EXTS
: {
10206 user_addr_t ext_strings
;
10207 uint32_t num_entries
;
10208 uint32_t max_width
;
10210 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0)))
10213 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
10214 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
10216 // either you're 64-bit and passed a 64-bit struct or
10217 // you're 32-bit and passed a 32-bit struct. otherwise
10224 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
10225 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
10226 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
10228 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
10229 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
10230 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
10232 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
10236 /* namespace handlers */
10237 case FSIOC_NAMESPACE_HANDLER_GET
: {
10238 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
10242 /* Snapshot handlers */
10243 case FSIOC_OLD_SNAPSHOT_HANDLER_GET
: {
10244 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10248 case FSIOC_SNAPSHOT_HANDLER_GET_EXT
: {
10249 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10253 case FSIOC_NAMESPACE_HANDLER_UPDATE
: {
10254 uint32_t token
, val
;
10257 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10261 if (!nspace_is_special_process(p
)) {
10266 token
= ((uint32_t *)data
)[0];
10267 val
= ((uint32_t *)data
)[1];
10269 lck_mtx_lock(&nspace_handler_lock
);
10271 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10272 if (nspace_items
[i
].token
== token
) {
10273 break; /* exit for loop, not case stmt */
10277 if (i
>= MAX_NSPACE_ITEMS
) {
10281 // if this bit is set, when resolve_nspace_item() times out
10282 // it will loop and go back to sleep.
10284 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
10287 lck_mtx_unlock(&nspace_handler_lock
);
10290 printf("nspace-handler-update: did not find token %u\n", token
);
10295 case FSIOC_NAMESPACE_HANDLER_UNBLOCK
: {
10296 uint32_t token
, val
;
10299 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10303 if (!nspace_is_special_process(p
)) {
10308 token
= ((uint32_t *)data
)[0];
10309 val
= ((uint32_t *)data
)[1];
10311 lck_mtx_lock(&nspace_handler_lock
);
10313 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10314 if (nspace_items
[i
].token
== token
) {
10315 break; /* exit for loop, not case statement */
10319 if (i
>= MAX_NSPACE_ITEMS
) {
10320 printf("nspace-handler-unblock: did not find token %u\n", token
);
10323 if (val
== 0 && nspace_items
[i
].vp
) {
10324 vnode_lock_spin(nspace_items
[i
].vp
);
10325 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10326 vnode_unlock(nspace_items
[i
].vp
);
10329 nspace_items
[i
].vp
= NULL
;
10330 nspace_items
[i
].arg
= NULL
;
10331 nspace_items
[i
].op
= 0;
10332 nspace_items
[i
].vid
= 0;
10333 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
10334 nspace_items
[i
].token
= 0;
10336 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10339 lck_mtx_unlock(&nspace_handler_lock
);
10343 case FSIOC_NAMESPACE_HANDLER_CANCEL
: {
10344 uint32_t token
, val
;
10347 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10351 if (!nspace_is_special_process(p
)) {
10356 token
= ((uint32_t *)data
)[0];
10357 val
= ((uint32_t *)data
)[1];
10359 lck_mtx_lock(&nspace_handler_lock
);
10361 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10362 if (nspace_items
[i
].token
== token
) {
10363 break; /* exit for loop, not case stmt */
10367 if (i
>= MAX_NSPACE_ITEMS
) {
10368 printf("nspace-handler-cancel: did not find token %u\n", token
);
10371 if (nspace_items
[i
].vp
) {
10372 vnode_lock_spin(nspace_items
[i
].vp
);
10373 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10374 vnode_unlock(nspace_items
[i
].vp
);
10377 nspace_items
[i
].vp
= NULL
;
10378 nspace_items
[i
].arg
= NULL
;
10379 nspace_items
[i
].vid
= 0;
10380 nspace_items
[i
].token
= val
;
10381 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
10382 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
10384 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10387 lck_mtx_unlock(&nspace_handler_lock
);
10391 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
10392 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10396 // we explicitly do not do the namespace_handler_proc check here
10398 lck_mtx_lock(&nspace_handler_lock
);
10399 snapshot_timestamp
= ((uint32_t *)data
)[0];
10400 wakeup(&nspace_item_idx
);
10401 lck_mtx_unlock(&nspace_handler_lock
);
10402 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
10407 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
10409 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10413 lck_mtx_lock(&nspace_handler_lock
);
10414 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
10415 lck_mtx_unlock(&nspace_handler_lock
);
10416 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10417 nspace_allow_virtual_devs
? "" : " NOT");
10423 case FSIOC_SET_FSTYPENAME_OVERRIDE
:
10425 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10429 mount_lock(vp
->v_mount
);
10430 if (data
[0] != 0) {
10431 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
10432 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
10433 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10434 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
10435 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
10438 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10439 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
10441 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
10442 vp
->v_mount
->fstypename_override
[0] = '\0';
10444 mount_unlock(vp
->v_mount
);
10449 case DISK_CONDITIONER_IOC_GET
: {
10450 error
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
10454 case DISK_CONDITIONER_IOC_SET
: {
10455 error
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info
*)data
);
10460 /* other, known commands shouldn't be passed down here */
10463 case F_TRIM_ACTIVE_FILE
:
10465 case F_TRANSCODEKEY
:
10466 case F_GETPROTECTIONLEVEL
:
10467 case F_GETDEFAULTPROTLEVEL
:
10468 case F_MAKECOMPRESSED
:
10469 case F_SET_GREEDY_MODE
:
10470 case F_SETSTATICCONTENT
:
10472 case F_SETBACKINGSTORE
:
10473 case F_GETPATH_MTMINFO
:
10474 case APFSIOC_REVERT_TO_SNAPSHOT
:
10475 case FSIOC_FIOSEEKHOLE
:
10476 case FSIOC_FIOSEEKDATA
:
10477 case HFS_GET_BOOT_INFO
:
10478 case HFS_SET_BOOT_INFO
:
10482 case F_BARRIERFSYNC
:
10488 /* Invoke the filesystem-specific code */
10489 error
= VNOP_IOCTL(vp
, cmd
, data
, options
, ctx
);
10492 } /* end switch stmt */
10495 * if no errors, copy any data to user. Size was
10496 * already set and checked above.
10498 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
10499 error
= copyout(data
, udata
, size
);
10511 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
10514 struct nameidata nd
;
10517 vfs_context_t ctx
= vfs_context_current();
10519 AUDIT_ARG(cmd
, uap
->cmd
);
10520 AUDIT_ARG(value32
, uap
->options
);
10521 /* Get the vnode for the file we are getting info on: */
10523 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
10524 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
10525 UIO_USERSPACE
, uap
->path
, ctx
);
10526 if ((error
= namei(&nd
))) goto done
;
10531 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
10537 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10546 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
10550 vfs_context_t ctx
= vfs_context_current();
10553 AUDIT_ARG(fd
, uap
->fd
);
10554 AUDIT_ARG(cmd
, uap
->cmd
);
10555 AUDIT_ARG(value32
, uap
->options
);
10557 /* Get the vnode for the file we are getting info on: */
10558 if ((error
= file_vnode(uap
->fd
, &vp
)))
10561 if ((error
= vnode_getwithref(vp
))) {
10567 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
10574 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10578 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10585 /* end of fsctl system call */
10588 * Retrieve the data of an extended attribute.
10591 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
10594 struct nameidata nd
;
10595 char attrname
[XATTR_MAXNAMELEN
+1];
10596 vfs_context_t ctx
= vfs_context_current();
10598 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10599 size_t attrsize
= 0;
10601 u_int32_t nameiflags
;
10603 char uio_buf
[ UIO_SIZEOF(1) ];
10605 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10608 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10609 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10610 if ((error
= namei(&nd
))) {
10616 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10619 if (xattr_protected(attrname
)) {
10620 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
10626 * the specific check for 0xffffffff is a hack to preserve
10627 * binaray compatibilty in K64 with applications that discovered
10628 * that passing in a buf pointer and a size of -1 resulted in
10629 * just the size of the indicated extended attribute being returned.
10630 * this isn't part of the documented behavior, but because of the
10631 * original implemtation's check for "uap->size > 0", this behavior
10632 * was allowed. In K32 that check turned into a signed comparison
10633 * even though uap->size is unsigned... in K64, we blow by that
10634 * check because uap->size is unsigned and doesn't get sign smeared
10635 * in the munger for a 32 bit user app. we also need to add a
10636 * check to limit the maximum size of the buffer being passed in...
10637 * unfortunately, the underlying fileystems seem to just malloc
10638 * the requested size even if the actual extended attribute is tiny.
10639 * because that malloc is for kernel wired memory, we have to put a
10640 * sane limit on it.
10642 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10643 * U64 running on K64 will yield -1 (64 bits wide)
10644 * U32/U64 running on K32 will yield -1 (32 bits wide)
10646 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
10650 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
10651 uap
->size
= XATTR_MAXSIZE
;
10653 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10654 &uio_buf
[0], sizeof(uio_buf
));
10655 uio_addiov(auio
, uap
->value
, uap
->size
);
10658 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
10663 *retval
= uap
->size
- uio_resid(auio
);
10665 *retval
= (user_ssize_t
)attrsize
;
10672 * Retrieve the data of an extended attribute.
10675 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
10678 char attrname
[XATTR_MAXNAMELEN
+1];
10680 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10681 size_t attrsize
= 0;
10684 char uio_buf
[ UIO_SIZEOF(1) ];
10686 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10689 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10692 if ( (error
= vnode_getwithref(vp
)) ) {
10693 file_drop(uap
->fd
);
10696 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10699 if (xattr_protected(attrname
)) {
10703 if (uap
->value
&& uap
->size
> 0) {
10704 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10705 &uio_buf
[0], sizeof(uio_buf
));
10706 uio_addiov(auio
, uap
->value
, uap
->size
);
10709 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10711 (void)vnode_put(vp
);
10712 file_drop(uap
->fd
);
10715 *retval
= uap
->size
- uio_resid(auio
);
10717 *retval
= (user_ssize_t
)attrsize
;
10723 * Set the data of an extended attribute.
10726 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
10729 struct nameidata nd
;
10730 char attrname
[XATTR_MAXNAMELEN
+1];
10731 vfs_context_t ctx
= vfs_context_current();
10733 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10735 u_int32_t nameiflags
;
10737 char uio_buf
[ UIO_SIZEOF(1) ];
10739 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10742 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10743 if (error
== EPERM
) {
10744 /* if the string won't fit in attrname, copyinstr emits EPERM */
10745 return (ENAMETOOLONG
);
10747 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10750 if (xattr_protected(attrname
))
10752 if (uap
->size
!= 0 && uap
->value
== 0) {
10756 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10757 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10758 if ((error
= namei(&nd
))) {
10764 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10765 &uio_buf
[0], sizeof(uio_buf
));
10766 uio_addiov(auio
, uap
->value
, uap
->size
);
10768 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
10771 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10782 * Set the data of an extended attribute.
10785 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
10788 char attrname
[XATTR_MAXNAMELEN
+1];
10790 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10793 char uio_buf
[ UIO_SIZEOF(1) ];
10795 vfs_context_t ctx
= vfs_context_current();
10798 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10801 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10802 if (error
== EPERM
) {
10803 /* if the string won't fit in attrname, copyinstr emits EPERM */
10804 return (ENAMETOOLONG
);
10806 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10809 if (xattr_protected(attrname
))
10811 if (uap
->size
!= 0 && uap
->value
== 0) {
10814 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10817 if ( (error
= vnode_getwithref(vp
)) ) {
10818 file_drop(uap
->fd
);
10821 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10822 &uio_buf
[0], sizeof(uio_buf
));
10823 uio_addiov(auio
, uap
->value
, uap
->size
);
10825 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
10828 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10834 file_drop(uap
->fd
);
10840 * Remove an extended attribute.
10841 * XXX Code duplication here.
10844 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
10847 struct nameidata nd
;
10848 char attrname
[XATTR_MAXNAMELEN
+1];
10849 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10850 vfs_context_t ctx
= vfs_context_current();
10852 u_int32_t nameiflags
;
10855 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10858 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10862 if (xattr_protected(attrname
))
10864 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10865 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10866 if ((error
= namei(&nd
))) {
10872 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
10875 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10886 * Remove an extended attribute.
10887 * XXX Code duplication here.
10890 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
10893 char attrname
[XATTR_MAXNAMELEN
+1];
10897 vfs_context_t ctx
= vfs_context_current();
10900 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10903 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10907 if (xattr_protected(attrname
))
10909 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10912 if ( (error
= vnode_getwithref(vp
)) ) {
10913 file_drop(uap
->fd
);
10917 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
10920 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10926 file_drop(uap
->fd
);
10932 * Retrieve the list of extended attribute names.
10933 * XXX Code duplication here.
10936 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
10939 struct nameidata nd
;
10940 vfs_context_t ctx
= vfs_context_current();
10942 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10943 size_t attrsize
= 0;
10944 u_int32_t nameiflags
;
10946 char uio_buf
[ UIO_SIZEOF(1) ];
10948 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10951 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10952 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10953 if ((error
= namei(&nd
))) {
10958 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10959 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
10960 &uio_buf
[0], sizeof(uio_buf
));
10961 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10964 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
10968 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10970 *retval
= (user_ssize_t
)attrsize
;
10976 * Retrieve the list of extended attribute names.
10977 * XXX Code duplication here.
10980 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
10984 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10985 size_t attrsize
= 0;
10987 char uio_buf
[ UIO_SIZEOF(1) ];
10989 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10992 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10995 if ( (error
= vnode_getwithref(vp
)) ) {
10996 file_drop(uap
->fd
);
10999 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
11000 auio
= uio_createwithbuffer(1, 0, spacetype
,
11001 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
11002 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
11005 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
11008 file_drop(uap
->fd
);
11010 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
11012 *retval
= (user_ssize_t
)attrsize
;
11017 static int fsgetpath_internal(
11018 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
11019 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
11022 struct mount
*mp
= NULL
;
11026 /* maximum number of times to retry build_path */
11027 unsigned int retries
= 0x10;
11029 if (bufsize
> PAGE_SIZE
) {
11038 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
11039 error
= ENOTSUP
; /* unexpected failure */
11045 error
= VFS_ROOT(mp
, &vp
, ctx
);
11047 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
11050 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
11052 * If the fileid isn't found and we're in a union
11053 * mount volume, then see if the fileid is in the
11054 * mounted-on volume.
11056 struct mount
*tmp
= mp
;
11057 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
11059 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
11070 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
11077 /* Obtain the absolute path to this vnode. */
11078 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
11079 bpflags
|= BUILDPATH_CHECK_MOVED
;
11080 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
11084 /* there was a race building the path, try a few more times */
11085 if (error
== EAGAIN
) {
11095 AUDIT_ARG(text
, buf
);
11097 if (kdebug_enable
) {
11098 long dbg_parms
[NUMPARMS
];
11101 dbg_namelen
= (int)sizeof(dbg_parms
);
11103 if (length
< dbg_namelen
) {
11104 memcpy((char *)dbg_parms
, buf
, length
);
11105 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
11107 dbg_namelen
= length
;
11109 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
11112 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
11115 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
11122 * Obtain the full pathname of a file system object by id.
11125 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
11127 vfs_context_t ctx
= vfs_context_current();
11133 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
11136 AUDIT_ARG(value32
, fsid
.val
[0]);
11137 AUDIT_ARG(value64
, uap
->objid
);
11138 /* Restrict output buffer size for now. */
11140 if (uap
->bufsize
> PAGE_SIZE
) {
11143 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
11144 if (realpath
== NULL
) {
11148 error
= fsgetpath_internal(
11149 ctx
, fsid
.val
[0], uap
->objid
,
11150 uap
->bufsize
, realpath
, &length
);
11156 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
11158 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
11161 FREE(realpath
, M_TEMP
);
11167 * Common routine to handle various flavors of statfs data heading out
11170 * Returns: 0 Success
11174 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
11175 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
11176 boolean_t partial_copy
)
11179 int my_size
, copy_size
;
11182 struct user64_statfs sfs
;
11183 my_size
= copy_size
= sizeof(sfs
);
11184 bzero(&sfs
, my_size
);
11185 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
11186 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
11187 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
11188 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
11189 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
11190 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
11191 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
11192 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
11193 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
11194 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
11195 sfs
.f_fsid
= sfsp
->f_fsid
;
11196 sfs
.f_owner
= sfsp
->f_owner
;
11197 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11198 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11200 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11202 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11203 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11205 if (partial_copy
) {
11206 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11208 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11211 struct user32_statfs sfs
;
11213 my_size
= copy_size
= sizeof(sfs
);
11214 bzero(&sfs
, my_size
);
11216 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
11217 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
11218 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
11221 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11222 * have to fudge the numbers here in that case. We inflate the blocksize in order
11223 * to reflect the filesystem size as best we can.
11225 if ((sfsp
->f_blocks
> INT_MAX
)
11226 /* Hack for 4061702 . I think the real fix is for Carbon to
11227 * look for some volume capability and not depend on hidden
11228 * semantics agreed between a FS and carbon.
11229 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11230 * for Carbon to set bNoVolumeSizes volume attribute.
11231 * Without this the webdavfs files cannot be copied onto
11232 * disk as they look huge. This change should not affect
11233 * XSAN as they should not setting these to -1..
11235 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
11236 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
11237 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
11241 * Work out how far we have to shift the block count down to make it fit.
11242 * Note that it's possible to have to shift so far that the resulting
11243 * blocksize would be unreportably large. At that point, we will clip
11244 * any values that don't fit.
11246 * For safety's sake, we also ensure that f_iosize is never reported as
11247 * being smaller than f_bsize.
11249 for (shift
= 0; shift
< 32; shift
++) {
11250 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
11252 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
11255 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11256 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
11257 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
11258 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
11259 #undef __SHIFT_OR_CLIP
11260 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
11261 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
11263 /* filesystem is small enough to be reported honestly */
11264 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
11265 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
11266 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
11267 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
11268 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
11270 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
11271 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
11272 sfs
.f_fsid
= sfsp
->f_fsid
;
11273 sfs
.f_owner
= sfsp
->f_owner
;
11274 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11275 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11277 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11279 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11280 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11282 if (partial_copy
) {
11283 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11285 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11288 if (sizep
!= NULL
) {
11295 * copy stat structure into user_stat structure.
11297 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
11299 bzero(usbp
, sizeof(*usbp
));
11301 usbp
->st_dev
= sbp
->st_dev
;
11302 usbp
->st_ino
= sbp
->st_ino
;
11303 usbp
->st_mode
= sbp
->st_mode
;
11304 usbp
->st_nlink
= sbp
->st_nlink
;
11305 usbp
->st_uid
= sbp
->st_uid
;
11306 usbp
->st_gid
= sbp
->st_gid
;
11307 usbp
->st_rdev
= sbp
->st_rdev
;
11308 #ifndef _POSIX_C_SOURCE
11309 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11310 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11311 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11312 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11313 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11314 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11316 usbp
->st_atime
= sbp
->st_atime
;
11317 usbp
->st_atimensec
= sbp
->st_atimensec
;
11318 usbp
->st_mtime
= sbp
->st_mtime
;
11319 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11320 usbp
->st_ctime
= sbp
->st_ctime
;
11321 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11323 usbp
->st_size
= sbp
->st_size
;
11324 usbp
->st_blocks
= sbp
->st_blocks
;
11325 usbp
->st_blksize
= sbp
->st_blksize
;
11326 usbp
->st_flags
= sbp
->st_flags
;
11327 usbp
->st_gen
= sbp
->st_gen
;
11328 usbp
->st_lspare
= sbp
->st_lspare
;
11329 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11330 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11333 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
11335 bzero(usbp
, sizeof(*usbp
));
11337 usbp
->st_dev
= sbp
->st_dev
;
11338 usbp
->st_ino
= sbp
->st_ino
;
11339 usbp
->st_mode
= sbp
->st_mode
;
11340 usbp
->st_nlink
= sbp
->st_nlink
;
11341 usbp
->st_uid
= sbp
->st_uid
;
11342 usbp
->st_gid
= sbp
->st_gid
;
11343 usbp
->st_rdev
= sbp
->st_rdev
;
11344 #ifndef _POSIX_C_SOURCE
11345 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11346 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11347 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11348 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11349 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11350 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11352 usbp
->st_atime
= sbp
->st_atime
;
11353 usbp
->st_atimensec
= sbp
->st_atimensec
;
11354 usbp
->st_mtime
= sbp
->st_mtime
;
11355 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11356 usbp
->st_ctime
= sbp
->st_ctime
;
11357 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11359 usbp
->st_size
= sbp
->st_size
;
11360 usbp
->st_blocks
= sbp
->st_blocks
;
11361 usbp
->st_blksize
= sbp
->st_blksize
;
11362 usbp
->st_flags
= sbp
->st_flags
;
11363 usbp
->st_gen
= sbp
->st_gen
;
11364 usbp
->st_lspare
= sbp
->st_lspare
;
11365 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11366 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11370 * copy stat64 structure into user_stat64 structure.
11372 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
11374 bzero(usbp
, sizeof(*usbp
));
11376 usbp
->st_dev
= sbp
->st_dev
;
11377 usbp
->st_ino
= sbp
->st_ino
;
11378 usbp
->st_mode
= sbp
->st_mode
;
11379 usbp
->st_nlink
= sbp
->st_nlink
;
11380 usbp
->st_uid
= sbp
->st_uid
;
11381 usbp
->st_gid
= sbp
->st_gid
;
11382 usbp
->st_rdev
= sbp
->st_rdev
;
11383 #ifndef _POSIX_C_SOURCE
11384 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11385 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11386 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11387 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11388 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11389 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11390 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11391 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11393 usbp
->st_atime
= sbp
->st_atime
;
11394 usbp
->st_atimensec
= sbp
->st_atimensec
;
11395 usbp
->st_mtime
= sbp
->st_mtime
;
11396 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11397 usbp
->st_ctime
= sbp
->st_ctime
;
11398 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11399 usbp
->st_birthtime
= sbp
->st_birthtime
;
11400 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11402 usbp
->st_size
= sbp
->st_size
;
11403 usbp
->st_blocks
= sbp
->st_blocks
;
11404 usbp
->st_blksize
= sbp
->st_blksize
;
11405 usbp
->st_flags
= sbp
->st_flags
;
11406 usbp
->st_gen
= sbp
->st_gen
;
11407 usbp
->st_lspare
= sbp
->st_lspare
;
11408 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11409 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11412 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
11414 bzero(usbp
, sizeof(*usbp
));
11416 usbp
->st_dev
= sbp
->st_dev
;
11417 usbp
->st_ino
= sbp
->st_ino
;
11418 usbp
->st_mode
= sbp
->st_mode
;
11419 usbp
->st_nlink
= sbp
->st_nlink
;
11420 usbp
->st_uid
= sbp
->st_uid
;
11421 usbp
->st_gid
= sbp
->st_gid
;
11422 usbp
->st_rdev
= sbp
->st_rdev
;
11423 #ifndef _POSIX_C_SOURCE
11424 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11425 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11426 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11427 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11428 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11429 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11430 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11431 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11433 usbp
->st_atime
= sbp
->st_atime
;
11434 usbp
->st_atimensec
= sbp
->st_atimensec
;
11435 usbp
->st_mtime
= sbp
->st_mtime
;
11436 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11437 usbp
->st_ctime
= sbp
->st_ctime
;
11438 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11439 usbp
->st_birthtime
= sbp
->st_birthtime
;
11440 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11442 usbp
->st_size
= sbp
->st_size
;
11443 usbp
->st_blocks
= sbp
->st_blocks
;
11444 usbp
->st_blksize
= sbp
->st_blksize
;
11445 usbp
->st_flags
= sbp
->st_flags
;
11446 usbp
->st_gen
= sbp
->st_gen
;
11447 usbp
->st_lspare
= sbp
->st_lspare
;
11448 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11449 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11453 * Purge buffer cache for simulating cold starts
11455 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
11457 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
11459 return VNODE_RETURNED
;
11462 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
11464 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
11466 return VFS_RETURNED
;
11470 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
11472 if (!kauth_cred_issuser(kauth_cred_get()))
11475 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);
11481 * gets the vnode associated with the (unnamed) snapshot directory
11482 * for a Filesystem. The snapshot directory vnode is returned with
11483 * an iocount on it.
11486 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
11488 return (VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
));
11492 * Get the snapshot vnode.
11494 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11495 * needs nameidone() on ndp.
11497 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11499 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11503 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
11504 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
11505 #if !CONFIG_TRIGGERS
11508 enum path_operation pathop
,
11514 struct vfs_attr vfa
;
11519 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
11523 if (!vnode_isvroot(*rvpp
)) {
11528 /* Make sure the filesystem supports snapshots */
11529 VFSATTR_INIT(&vfa
);
11530 VFSATTR_WANTED(&vfa
, f_capabilities
);
11531 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
11532 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
11533 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
11534 VOL_CAP_INT_SNAPSHOT
)) ||
11535 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
11536 VOL_CAP_INT_SNAPSHOT
))) {
11541 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
11545 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11546 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11551 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11552 * (the length returned by copyinstr includes the terminating NUL)
11554 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
11555 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
11559 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++);
11560 if (i
< (int)name_len
) {
11566 if (op
== CREATE
) {
11567 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
11569 } else if (op
== DELETE
) {
11570 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
11577 /* Check if the snapshot already exists ... */
11578 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
11579 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
11580 ndp
->ni_dvp
= *sdvpp
;
11582 error
= namei(ndp
);
11584 FREE(name_buf
, M_TEMP
);
11600 * create a filesystem snapshot (for supporting filesystems)
11602 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11603 * We get to the (unnamed) snapshot directory vnode and create the vnode
11604 * for the snapshot in it.
11608 * a) Passed in name for snapshot cannot have slashes.
11609 * b) name can't be "." or ".."
11611 * Since this requires superuser privileges, vnode_authorize calls are not
11615 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11618 vnode_t rvp
, snapdvp
;
11620 struct nameidata namend
;
11622 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
11627 if (namend
.ni_vp
) {
11628 vnode_put(namend
.ni_vp
);
11631 struct vnode_attr va
;
11632 vnode_t vp
= NULLVP
;
11635 VATTR_SET(&va
, va_type
, VREG
);
11636 VATTR_SET(&va
, va_mode
, 0);
11638 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
11639 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
11644 nameidone(&namend
);
11645 vnode_put(snapdvp
);
11651 * Delete a Filesystem snapshot
11653 * get the vnode for the unnamed snapshot directory and the snapshot and
11654 * delete the snapshot.
11657 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11660 vnode_t rvp
, snapdvp
;
11662 struct nameidata namend
;
11664 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
11669 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
11670 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
11672 vnode_put(namend
.ni_vp
);
11673 nameidone(&namend
);
11674 vnode_put(snapdvp
);
11681 * Revert a filesystem to a snapshot
11683 * Marks the filesystem to revert to the given snapshot on next mount.
11686 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11692 struct fs_snapshot_revert_args revert_data
;
11693 struct componentname cnp
;
11697 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
11701 mp
= vnode_mount(rvp
);
11703 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11704 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11706 FREE(name_buf
, M_TEMP
);
11712 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
11714 FREE(name_buf
, M_TEMP
);
11721 * Grab mount_iterref so that we can release the vnode,
11722 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11724 error
= mount_iterref (mp
, 0);
11727 FREE(name_buf
, M_TEMP
);
11731 memset(&cnp
, 0, sizeof(cnp
));
11732 cnp
.cn_pnbuf
= (char *)name_buf
;
11733 cnp
.cn_nameiop
= LOOKUP
;
11734 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
11735 cnp
.cn_pnlen
= MAXPATHLEN
;
11736 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
11737 cnp
.cn_namelen
= (int)name_len
;
11738 revert_data
.sr_cnp
= &cnp
;
11740 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
11741 mount_iterdrop(mp
);
11742 FREE(name_buf
, M_TEMP
);
11745 /* If there was any error, try again using VNOP_IOCTL */
11748 struct nameidata namend
;
11750 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
11757 error
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
11760 vnode_put(namend
.ni_vp
);
11761 nameidone(&namend
);
11762 vnode_put(snapdvp
);
11770 * rename a Filesystem snapshot
11772 * get the vnode for the unnamed snapshot directory and the snapshot and
11773 * rename the snapshot. This is a very specialised (and simple) case of
11774 * rename(2) (which has to deal with a lot more complications). It differs
11775 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11778 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
11779 __unused
uint32_t flags
, vfs_context_t ctx
)
11781 vnode_t rvp
, snapdvp
;
11783 caddr_t newname_buf
;
11786 struct nameidata
*fromnd
, *tond
;
11787 /* carving out a chunk for structs that are too big to be on stack. */
11789 struct nameidata from_node
;
11790 struct nameidata to_node
;
11793 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
11794 fromnd
= &__rename_data
->from_node
;
11795 tond
= &__rename_data
->to_node
;
11797 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
11801 fvp
= fromnd
->ni_vp
;
11803 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11804 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
11809 * Some sanity checks- new name can't be empty, "." or ".." or have
11811 * (the length returned by copyinstr includes the terminating NUL)
11813 * The FS rename VNOP is suppossed to handle this but we'll pick it
11816 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
11817 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
11821 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++);
11822 if (i
< (int)name_len
) {
11828 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
11834 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
11835 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
11836 tond
->ni_dvp
= snapdvp
;
11838 error
= namei(tond
);
11841 } else if (tond
->ni_vp
) {
11843 * snapshot rename behaves differently than rename(2) - if the
11844 * new name exists, EEXIST is returned.
11846 vnode_put(tond
->ni_vp
);
11851 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
11852 &tond
->ni_cnd
, ctx
);
11857 FREE(newname_buf
, M_TEMP
);
11859 vnode_put(snapdvp
);
11863 FREE(__rename_data
, M_TEMP
);
11868 * Mount a Filesystem snapshot
11870 * get the vnode for the unnamed snapshot directory and the snapshot and
11871 * mount the snapshot.
11874 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
11875 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
11877 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
11879 struct nameidata
*snapndp
, *dirndp
;
11880 /* carving out a chunk for structs that are too big to be on stack. */
11882 struct nameidata snapnd
;
11883 struct nameidata dirnd
;
11884 } * __snapshot_mount_data
;
11886 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
11888 snapndp
= &__snapshot_mount_data
->snapnd
;
11889 dirndp
= &__snapshot_mount_data
->dirnd
;
11891 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
11896 snapvp
= snapndp
->ni_vp
;
11897 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
11902 /* Get the vnode to be covered */
11903 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
11904 UIO_USERSPACE
, directory
, ctx
);
11905 error
= namei(dirndp
);
11909 vp
= dirndp
->ni_vp
;
11910 pvp
= dirndp
->ni_dvp
;
11912 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
11915 mount_t mp
= vnode_mount(rvp
);
11916 struct fs_snapshot_mount_args smnt_data
;
11918 smnt_data
.sm_mp
= mp
;
11919 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
11920 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
11921 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags
& MNT_DONTBROWSE
,
11922 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
11930 vnode_put(snapdvp
);
11932 nameidone(snapndp
);
11934 FREE(__snapshot_mount_data
, M_TEMP
);
11939 * Root from a snapshot of the filesystem
11941 * Marks the filesystem to root from the given snapshot on next boot.
11944 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11950 struct fs_snapshot_root_args root_data
;
11951 struct componentname cnp
;
11955 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
11959 mp
= vnode_mount(rvp
);
11961 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11962 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11964 FREE(name_buf
, M_TEMP
);
11969 // XXX MAC checks ?
11972 * Grab mount_iterref so that we can release the vnode,
11973 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11975 error
= mount_iterref (mp
, 0);
11978 FREE(name_buf
, M_TEMP
);
11982 memset(&cnp
, 0, sizeof(cnp
));
11983 cnp
.cn_pnbuf
= (char *)name_buf
;
11984 cnp
.cn_nameiop
= LOOKUP
;
11985 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
11986 cnp
.cn_pnlen
= MAXPATHLEN
;
11987 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
11988 cnp
.cn_namelen
= (int)name_len
;
11989 root_data
.sr_cnp
= &cnp
;
11991 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
11993 mount_iterdrop(mp
);
11994 FREE(name_buf
, M_TEMP
);
12000 * FS snapshot operations dispatcher
12003 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
12004 __unused
int32_t *retval
)
12007 vfs_context_t ctx
= vfs_context_current();
12009 AUDIT_ARG(fd
, uap
->dirfd
);
12010 AUDIT_ARG(value32
, uap
->op
);
12012 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
12017 case SNAPSHOT_OP_CREATE
:
12018 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12020 case SNAPSHOT_OP_DELETE
:
12021 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12023 case SNAPSHOT_OP_RENAME
:
12024 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
12027 case SNAPSHOT_OP_MOUNT
:
12028 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
12029 uap
->data
, uap
->flags
, ctx
);
12031 case SNAPSHOT_OP_REVERT
:
12032 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12035 case SNAPSHOT_OP_ROOT
:
12036 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
12038 #endif /* !TARGET_OS_OSX */