2 * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
110 #include <security/audit/audit.h>
111 #include <bsm/audit_kevents.h>
113 #include <mach/mach_types.h>
114 #include <kern/kern_types.h>
115 #include <kern/kalloc.h>
116 #include <kern/task.h>
118 #include <vm/vm_pageout.h>
119 #include <vm/vm_protos.h>
121 #include <libkern/OSAtomic.h>
122 #include <pexpert/pexpert.h>
123 #include <IOKit/IOBSD.h>
126 #include <miscfs/routefs/routefs.h>
130 #include <security/mac.h>
131 #include <security/mac_framework.h>
135 #define GET_PATH(x) \
136 (x) = get_pathbuff();
137 #define RELEASE_PATH(x) \
140 #define GET_PATH(x) \
141 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
142 #define RELEASE_PATH(x) \
143 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
144 #endif /* CONFIG_FSE */
146 /* struct for checkdirs iteration */
151 /* callback for checkdirs iteration */
152 static int checkdirs_callback(proc_t p
, void * arg
);
154 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
155 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
156 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
157 static int getfsstat_callback(mount_t mp
, void * arg
);
158 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
159 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
160 static int sync_callback(mount_t
, void *);
161 static void sync_thread(void *, __unused wait_result_t
);
162 static int sync_async(int);
163 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
164 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
165 boolean_t partial_copy
);
166 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
168 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
169 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
170 struct componentname
*cnp
, user_addr_t fsmountargs
,
171 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
173 void vfs_notify_mount(vnode_t pdvp
);
175 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
177 struct fd_vn_data
* fg_vn_data_alloc(void);
180 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
181 * Concurrent lookups (or lookups by ids) on hard links can cause the
182 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
183 * does) to return ENOENT as the path cannot be returned from the name cache
184 * alone. We have no option but to retry and hope to get one namei->reverse path
185 * generation done without an intervening lookup, lookup by id on the hard link
186 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
187 * which currently are the MAC hooks for rename, unlink and rmdir.
189 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
191 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
193 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
195 #ifdef CONFIG_IMGSRC_ACCESS
196 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
197 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
198 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
199 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
200 static void mount_end_update(mount_t mp
);
201 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
202 #endif /* CONFIG_IMGSRC_ACCESS */
204 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
207 int sync_internal(void);
210 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
212 extern lck_grp_t
*fd_vn_lck_grp
;
213 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
214 extern lck_attr_t
*fd_vn_lck_attr
;
217 * incremented each time a mount or unmount operation occurs
218 * used to invalidate the cached value of the rootvp in the
219 * mount structure utilized by cache_lookup_path
221 uint32_t mount_generation
= 0;
223 /* counts number of mount and unmount operations */
224 unsigned int vfs_nummntops
=0;
226 extern const struct fileops vnops
;
227 #if CONFIG_APPLEDOUBLE
228 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
229 #endif /* CONFIG_APPLEDOUBLE */
232 * Virtual File System System Calls
235 #if NFSCLIENT || DEVFS || ROUTEFS
237 * Private in-kernel mounting spi (NFS only, not exported)
241 vfs_iskernelmount(mount_t mp
)
243 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
248 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
249 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
255 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
256 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
259 * Get the vnode to be covered if it's not supplied
269 char *pnbuf
= CAST_DOWN(char *, path
);
271 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
272 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
276 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
277 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
287 #endif /* NFSCLIENT || DEVFS */
290 * Mount a file system.
294 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
296 struct __mac_mount_args muap
;
298 muap
.type
= uap
->type
;
299 muap
.path
= uap
->path
;
300 muap
.flags
= uap
->flags
;
301 muap
.data
= uap
->data
;
302 muap
.mac_p
= USER_ADDR_NULL
;
303 return (__mac_mount(p
, &muap
, retval
));
307 vfs_notify_mount(vnode_t pdvp
)
309 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
310 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
315 * Mount a file system taking into account MAC label behavior.
316 * See mount(2) man page for more information
318 * Parameters: p Process requesting the mount
319 * uap User argument descriptor (see below)
322 * Indirect: uap->type Filesystem type
323 * uap->path Path to mount
324 * uap->data Mount arguments
325 * uap->mac_p MAC info
326 * uap->flags Mount flags
332 boolean_t root_fs_upgrade_try
= FALSE
;
335 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
339 int need_nameidone
= 0;
340 vfs_context_t ctx
= vfs_context_current();
341 char fstypename
[MFSNAMELEN
];
344 char *labelstr
= NULL
;
345 int flags
= uap
->flags
;
347 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
348 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
353 * Get the fs type name from user space
355 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
360 * Get the vnode to be covered
362 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
363 UIO_USERSPACE
, uap
->path
, ctx
);
372 #ifdef CONFIG_IMGSRC_ACCESS
373 /* Mounting image source cannot be batched with other operations */
374 if (flags
== MNT_IMGSRC_BY_INDEX
) {
375 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
376 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
379 #endif /* CONFIG_IMGSRC_ACCESS */
383 * Get the label string (if any) from user space
385 if (uap
->mac_p
!= USER_ADDR_NULL
) {
390 struct user64_mac mac64
;
391 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
392 mac
.m_buflen
= mac64
.m_buflen
;
393 mac
.m_string
= mac64
.m_string
;
395 struct user32_mac mac32
;
396 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
397 mac
.m_buflen
= mac32
.m_buflen
;
398 mac
.m_string
= mac32
.m_string
;
402 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
403 (mac
.m_buflen
< 2)) {
407 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
408 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
412 AUDIT_ARG(mac_string
, labelstr
);
414 #endif /* CONFIG_MACF */
416 AUDIT_ARG(fflags
, flags
);
419 if (flags
& MNT_UNION
) {
420 /* No union mounts on release kernels */
426 if ((vp
->v_flag
& VROOT
) &&
427 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
428 if (!(flags
& MNT_UNION
)) {
433 * For a union mount on '/', treat it as fresh
434 * mount instead of update.
435 * Otherwise, union mouting on '/' used to panic the
436 * system before, since mnt_vnodecovered was found to
437 * be NULL for '/' which is required for unionlookup
438 * after it gets ENOENT on union mount.
440 flags
= (flags
& ~(MNT_UPDATE
));
444 if ((flags
& MNT_RDONLY
) == 0) {
445 /* Release kernels are not allowed to mount "/" as rw */
451 * See 7392553 for more details on why this check exists.
452 * Suffice to say: If this check is ON and something tries
453 * to mount the rootFS RW, we'll turn off the codesign
454 * bitmap optimization.
456 #if CHECK_CS_VALIDATION_BITMAP
457 if ((flags
& MNT_RDONLY
) == 0 ) {
458 root_fs_upgrade_try
= TRUE
;
463 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
464 labelstr
, FALSE
, ctx
);
470 FREE(labelstr
, M_MACTEMP
);
471 #endif /* CONFIG_MACF */
479 if (need_nameidone
) {
487 * common mount implementation (final stage of mounting)
490 * fstypename file system type (ie it's vfs name)
491 * pvp parent of covered vnode
493 * cnp component name (ie path) of covered vnode
494 * flags generic mount flags
495 * fsmountargs file system specific data
496 * labelstr optional MAC label
497 * kernelmount TRUE for mounts initiated from inside the kernel
498 * ctx caller's context
501 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
502 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
503 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
506 #pragma unused(labelstr)
508 struct vnode
*devvp
= NULLVP
;
509 struct vnode
*device_vnode
= NULLVP
;
514 struct vfstable
*vfsp
= (struct vfstable
*)0;
515 struct proc
*p
= vfs_context_proc(ctx
);
517 user_addr_t devpath
= USER_ADDR_NULL
;
520 boolean_t vfsp_ref
= FALSE
;
521 boolean_t is_rwlock_locked
= FALSE
;
522 boolean_t did_rele
= FALSE
;
523 boolean_t have_usecount
= FALSE
;
526 * Process an update for an existing mount
528 if (flags
& MNT_UPDATE
) {
529 if ((vp
->v_flag
& VROOT
) == 0) {
535 /* unmount in progress return error */
537 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
543 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
544 is_rwlock_locked
= TRUE
;
546 * We only allow the filesystem to be reloaded if it
547 * is currently mounted read-only.
549 if ((flags
& MNT_RELOAD
) &&
550 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
556 * If content protection is enabled, update mounts are not
557 * allowed to turn it off.
559 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
560 ((flags
& MNT_CPROTECT
) == 0)) {
565 #ifdef CONFIG_IMGSRC_ACCESS
566 /* Can't downgrade the backer of the root FS */
567 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
568 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
572 #endif /* CONFIG_IMGSRC_ACCESS */
575 * Only root, or the user that did the original mount is
576 * permitted to update it.
578 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
579 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
583 error
= mac_mount_check_remount(ctx
, mp
);
589 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
590 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
592 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
593 flags
|= MNT_NOSUID
| MNT_NODEV
;
594 if (mp
->mnt_flag
& MNT_NOEXEC
)
601 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
603 vfsp
= mp
->mnt_vtable
;
607 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
608 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
610 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
611 flags
|= MNT_NOSUID
| MNT_NODEV
;
612 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
616 /* XXXAUDIT: Should we capture the type on the error path as well? */
617 AUDIT_ARG(text
, fstypename
);
619 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
620 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
621 vfsp
->vfc_refcount
++;
632 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
634 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
635 error
= EINVAL
; /* unsupported request */
639 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
645 * Allocate and initialize the filesystem (mount_t)
647 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
649 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
652 /* Initialize the default IO constraints */
653 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
654 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
655 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
656 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
657 mp
->mnt_devblocksize
= DEV_BSIZE
;
658 mp
->mnt_alignmentmask
= PAGE_MASK
;
659 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
662 mp
->mnt_realrootvp
= NULLVP
;
663 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
665 TAILQ_INIT(&mp
->mnt_vnodelist
);
666 TAILQ_INIT(&mp
->mnt_workerqueue
);
667 TAILQ_INIT(&mp
->mnt_newvnodes
);
669 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
670 is_rwlock_locked
= TRUE
;
671 mp
->mnt_op
= vfsp
->vfc_vfsops
;
672 mp
->mnt_vtable
= vfsp
;
673 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
674 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
675 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
676 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
677 mp
->mnt_vnodecovered
= vp
;
678 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
679 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
680 mp
->mnt_devbsdunit
= 0;
682 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
683 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
685 #if NFSCLIENT || DEVFS || ROUTEFS
687 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
688 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
689 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
690 #endif /* NFSCLIENT || DEVFS */
694 * Set the mount level flags.
696 if (flags
& MNT_RDONLY
)
697 mp
->mnt_flag
|= MNT_RDONLY
;
698 else if (mp
->mnt_flag
& MNT_RDONLY
) {
699 // disallow read/write upgrades of file systems that
700 // had the TYPENAME_OVERRIDE feature set.
701 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
705 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
707 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
708 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
709 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
710 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
711 MNT_QUARANTINE
| MNT_CPROTECT
);
716 * On release builds of iOS based platforms, always enforce NOSUID and NODEV on
717 * all mounts. We do this here because we can catch update mounts as well as
718 * non-update mounts in this case.
720 mp
->mnt_flag
|= (MNT_NOSUID
);
724 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
725 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
726 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
727 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
728 MNT_QUARANTINE
| MNT_CPROTECT
);
731 if (flags
& MNT_MULTILABEL
) {
732 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
736 mp
->mnt_flag
|= MNT_MULTILABEL
;
740 * Process device path for local file systems if requested
742 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
743 !(internal_flags
& KERNEL_MOUNT_SNAPSHOT
)) {
744 if (vfs_context_is64bit(ctx
)) {
745 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
747 fsmountargs
+= sizeof(devpath
);
750 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
752 /* munge into LP64 addr */
753 devpath
= CAST_USER_ADDR_T(tmp
);
754 fsmountargs
+= sizeof(tmp
);
757 /* Lookup device and authorize access to it */
761 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
762 if ( (error
= namei(&nd
)) )
765 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
770 if (devvp
->v_type
!= VBLK
) {
774 if (major(devvp
->v_rdev
) >= nblkdev
) {
779 * If mount by non-root, then verify that user has necessary
780 * permissions on the device.
782 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
783 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
785 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
786 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
787 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
791 /* On first mount, preflight and open device */
792 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
793 if ( (error
= vnode_ref(devvp
)) )
796 * Disallow multiple mounts of the same device.
797 * Disallow mounting of a device that is currently in use
798 * (except for root, which might share swap device for miniroot).
799 * Flush out any old buffers remaining from a previous use.
801 if ( (error
= vfs_mountedon(devvp
)) )
804 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
808 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
812 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
815 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
817 error
= mac_vnode_check_open(ctx
,
819 ronly
? FREAD
: FREAD
|FWRITE
);
823 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
826 mp
->mnt_devvp
= devvp
;
827 device_vnode
= devvp
;
829 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
830 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
831 (device_vnode
= mp
->mnt_devvp
)) {
835 * If upgrade to read-write by non-root, then verify
836 * that user has necessary permissions on the device.
838 vnode_getalways(device_vnode
);
840 if (suser(vfs_context_ucred(ctx
), NULL
) &&
841 (error
= vnode_authorize(device_vnode
, NULL
,
842 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
844 vnode_put(device_vnode
);
848 /* Tell the device that we're upgrading */
849 dev
= (dev_t
)device_vnode
->v_rdev
;
852 if ((u_int
)maj
>= (u_int
)nblkdev
)
853 panic("Volume mounted on a device with invalid major number.");
855 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
856 vnode_put(device_vnode
);
857 device_vnode
= NULLVP
;
864 if ((flags
& MNT_UPDATE
) == 0) {
865 mac_mount_label_init(mp
);
866 mac_mount_label_associate(ctx
, mp
);
869 if ((flags
& MNT_UPDATE
) != 0) {
870 error
= mac_mount_check_label_update(ctx
, mp
);
877 * Mount the filesystem.
879 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
880 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
881 (caddr_t
)fsmountargs
, 0, ctx
);
883 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
886 if (flags
& MNT_UPDATE
) {
887 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
888 mp
->mnt_flag
&= ~MNT_RDONLY
;
890 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
891 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
893 mp
->mnt_flag
= flag
; /* restore flag value */
894 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
895 lck_rw_done(&mp
->mnt_rwlock
);
896 is_rwlock_locked
= FALSE
;
898 enablequotas(mp
, ctx
);
903 * Put the new filesystem on the mount list after root.
906 struct vfs_attr vfsattr
;
908 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
909 error
= VFS_ROOT(mp
, &rvp
, ctx
);
911 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
914 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
916 * drop reference provided by VFS_ROOT
926 CLR(vp
->v_flag
, VMOUNT
);
927 vp
->v_mountedhere
= mp
;
931 * taking the name_cache_lock exclusively will
932 * insure that everyone is out of the fast path who
933 * might be trying to use a now stale copy of
934 * vp->v_mountedhere->mnt_realrootvp
935 * bumping mount_generation causes the cached values
942 error
= vnode_ref(vp
);
947 have_usecount
= TRUE
;
949 error
= checkdirs(vp
, ctx
);
951 /* Unmount the filesystem as cdir/rdirs cannot be updated */
955 * there is no cleanup code here so I have made it void
956 * we need to revisit this
958 (void)VFS_START(mp
, 0, ctx
);
960 if (mount_list_add(mp
) != 0) {
962 * The system is shutting down trying to umount
963 * everything, so fail with a plausible errno.
968 lck_rw_done(&mp
->mnt_rwlock
);
969 is_rwlock_locked
= FALSE
;
971 /* Check if this mounted file system supports EAs or named streams. */
972 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
973 VFSATTR_INIT(&vfsattr
);
974 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
975 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
976 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
977 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
978 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
979 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
980 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
983 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
984 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
985 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
988 /* Check if this file system supports path from id lookups. */
989 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
990 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
991 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
992 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
993 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
994 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
997 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
998 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
999 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1002 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1003 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1005 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1006 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1008 /* increment the operations count */
1009 OSAddAtomic(1, &vfs_nummntops
);
1010 enablequotas(mp
, ctx
);
1013 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1016 * cache the IO attributes for the underlying physical media...
1017 * an error return indicates the underlying driver doesn't
1018 * support all the queries necessary... however, reasonable
1019 * defaults will have been set, so no reason to bail or care
1021 vfs_init_io_attributes(device_vnode
, mp
);
1024 /* Now that mount is setup, notify the listeners */
1025 vfs_notify_mount(pvp
);
1026 IOBSDMountChange(mp
, kIOMountChangeMount
);
1029 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1030 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1031 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1032 mp
->mnt_vtable
->vfc_name
, error
);
1035 vnode_lock_spin(vp
);
1036 CLR(vp
->v_flag
, VMOUNT
);
1039 mp
->mnt_vtable
->vfc_refcount
--;
1040 mount_list_unlock();
1042 if (device_vnode
) {
1043 vnode_rele(device_vnode
);
1044 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
1046 lck_rw_done(&mp
->mnt_rwlock
);
1047 is_rwlock_locked
= FALSE
;
1050 * if we get here, we have a mount structure that needs to be freed,
1051 * but since the coveredvp hasn't yet been updated to point at it,
1052 * no need to worry about other threads holding a crossref on this mp
1053 * so it's ok to just free it
1055 mount_lock_destroy(mp
);
1057 mac_mount_label_destroy(mp
);
1059 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1063 * drop I/O count on the device vp if there was one
1065 if (devpath
&& devvp
)
1070 /* Error condition exits */
1072 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1075 * If the mount has been placed on the covered vp,
1076 * it may have been discovered by now, so we have
1077 * to treat this just like an unmount
1079 mount_lock_spin(mp
);
1080 mp
->mnt_lflag
|= MNT_LDEAD
;
1083 if (device_vnode
!= NULLVP
) {
1084 vnode_rele(device_vnode
);
1085 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1090 vnode_lock_spin(vp
);
1093 vp
->v_mountedhere
= (mount_t
) 0;
1097 if (have_usecount
) {
1101 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1104 if (devpath
&& devvp
)
1107 /* Release mnt_rwlock only when it was taken */
1108 if (is_rwlock_locked
== TRUE
) {
1109 lck_rw_done(&mp
->mnt_rwlock
);
1113 if (mp
->mnt_crossref
)
1114 mount_dropcrossref(mp
, vp
, 0);
1116 mount_lock_destroy(mp
);
1118 mac_mount_label_destroy(mp
);
1120 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1125 vfsp
->vfc_refcount
--;
1126 mount_list_unlock();
1133 * Flush in-core data, check for competing mount attempts,
1137 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1140 #pragma unused(cnp,fsname)
1142 struct vnode_attr va
;
1147 * If the user is not root, ensure that they own the directory
1148 * onto which we are attempting to mount.
1151 VATTR_WANTED(&va
, va_uid
);
1152 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1153 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1154 (!vfs_context_issuser(ctx
)))) {
1160 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1163 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1166 if (vp
->v_type
!= VDIR
) {
1171 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1177 error
= mac_mount_check_mount(ctx
, vp
,
1183 vnode_lock_spin(vp
);
1184 SET(vp
->v_flag
, VMOUNT
);
1191 #if CONFIG_IMGSRC_ACCESS
1194 #define IMGSRC_DEBUG(args...) printf(args)
1196 #define IMGSRC_DEBUG(args...) do { } while(0)
1200 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1202 struct nameidata nd
;
1203 vnode_t vp
, realdevvp
;
1207 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1208 if ( (error
= namei(&nd
)) ) {
1209 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1215 if (!vnode_isblk(vp
)) {
1216 IMGSRC_DEBUG("Not block device.\n");
1221 realdevvp
= mp
->mnt_devvp
;
1222 if (realdevvp
== NULLVP
) {
1223 IMGSRC_DEBUG("No device backs the mount.\n");
1228 error
= vnode_getwithref(realdevvp
);
1230 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1234 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1235 IMGSRC_DEBUG("Wrong dev_t.\n");
1240 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1243 * If mount by non-root, then verify that user has necessary
1244 * permissions on the device.
1246 if (!vfs_context_issuser(ctx
)) {
1247 accessmode
= KAUTH_VNODE_READ_DATA
;
1248 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1249 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1250 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1251 IMGSRC_DEBUG("Access denied.\n");
1259 vnode_put(realdevvp
);
1270 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1271 * and call checkdirs()
1274 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1278 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1280 vnode_lock_spin(vp
);
1281 CLR(vp
->v_flag
, VMOUNT
);
1282 vp
->v_mountedhere
= mp
;
1286 * taking the name_cache_lock exclusively will
1287 * insure that everyone is out of the fast path who
1288 * might be trying to use a now stale copy of
1289 * vp->v_mountedhere->mnt_realrootvp
1290 * bumping mount_generation causes the cached values
1295 name_cache_unlock();
1297 error
= vnode_ref(vp
);
1302 error
= checkdirs(vp
, ctx
);
1304 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1311 mp
->mnt_vnodecovered
= NULLVP
;
1317 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1320 vnode_lock_spin(vp
);
1321 vp
->v_mountedhere
= (mount_t
)NULL
;
1324 mp
->mnt_vnodecovered
= NULLVP
;
1328 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1332 /* unmount in progress return error */
1333 mount_lock_spin(mp
);
1334 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1339 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1342 * We only allow the filesystem to be reloaded if it
1343 * is currently mounted read-only.
1345 if ((flags
& MNT_RELOAD
) &&
1346 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1352 * Only root, or the user that did the original mount is
1353 * permitted to update it.
1355 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1356 (!vfs_context_issuser(ctx
))) {
1361 error
= mac_mount_check_remount(ctx
, mp
);
1369 lck_rw_done(&mp
->mnt_rwlock
);
1376 mount_end_update(mount_t mp
)
1378 lck_rw_done(&mp
->mnt_rwlock
);
1382 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1386 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1390 vp
= imgsrc_rootvnodes
[height
];
1391 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1400 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1401 const char *fsname
, vfs_context_t ctx
,
1402 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1406 boolean_t placed
= FALSE
;
1407 vnode_t devvp
= NULLVP
;
1408 struct vfstable
*vfsp
;
1409 user_addr_t devpath
;
1410 char *old_mntonname
;
1415 /* If we didn't imageboot, nothing to move */
1416 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1420 /* Only root can do this */
1421 if (!vfs_context_issuser(ctx
)) {
1425 IMGSRC_DEBUG("looking for root vnode.\n");
1428 * Get root vnode of filesystem we're moving.
1432 struct user64_mnt_imgsrc_args mia64
;
1433 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1435 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1439 height
= mia64
.mi_height
;
1440 flags
= mia64
.mi_flags
;
1441 devpath
= mia64
.mi_devpath
;
1443 struct user32_mnt_imgsrc_args mia32
;
1444 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1446 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1450 height
= mia32
.mi_height
;
1451 flags
= mia32
.mi_flags
;
1452 devpath
= mia32
.mi_devpath
;
1456 * For binary compatibility--assumes one level of nesting.
1459 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1463 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1466 /* munge into LP64 addr */
1467 devpath
= CAST_USER_ADDR_T(tmp
);
1475 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1479 error
= get_imgsrc_rootvnode(height
, &rvp
);
1481 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1485 IMGSRC_DEBUG("got root vnode.\n");
1487 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1489 /* Can only move once */
1490 mp
= vnode_mount(rvp
);
1491 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1492 IMGSRC_DEBUG("Already moved.\n");
1497 IMGSRC_DEBUG("Starting updated.\n");
1499 /* Get exclusive rwlock on mount, authorize update on mp */
1500 error
= mount_begin_update(mp
, ctx
, 0);
1502 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1507 * It can only be moved once. Flag is set under the rwlock,
1508 * so we're now safe to proceed.
1510 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1511 IMGSRC_DEBUG("Already moved [2]\n");
1516 IMGSRC_DEBUG("Preparing coveredvp.\n");
1518 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1519 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1521 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1525 IMGSRC_DEBUG("Covered vp OK.\n");
1527 /* Sanity check the name caller has provided */
1528 vfsp
= mp
->mnt_vtable
;
1529 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1530 IMGSRC_DEBUG("Wrong fs name.\n");
1535 /* Check the device vnode and update mount-from name, for local filesystems */
1536 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1537 IMGSRC_DEBUG("Local, doing device validation.\n");
1539 if (devpath
!= USER_ADDR_NULL
) {
1540 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1542 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1551 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1552 * and increment the name cache's mount generation
1555 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1556 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1563 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1564 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1566 /* Forbid future moves */
1568 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1571 /* Finally, add to mount list, completely ready to go */
1572 if (mount_list_add(mp
) != 0) {
1574 * The system is shutting down trying to umount
1575 * everything, so fail with a plausible errno.
1581 mount_end_update(mp
);
1583 FREE(old_mntonname
, M_TEMP
);
1585 vfs_notify_mount(pvp
);
1589 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1592 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1597 * Placing the mp on the vnode clears VMOUNT,
1598 * so cleanup is different after that point
1601 /* Rele the vp, clear VMOUNT and v_mountedhere */
1602 undo_place_on_covered_vp(mp
, vp
);
1604 vnode_lock_spin(vp
);
1605 CLR(vp
->v_flag
, VMOUNT
);
1609 mount_end_update(mp
);
1613 FREE(old_mntonname
, M_TEMP
);
1617 #endif /* CONFIG_IMGSRC_ACCESS */
1620 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1622 struct nameidata qnd
;
1624 char qfpath
[MAXPATHLEN
];
1625 const char *qfname
= QUOTAFILENAME
;
1626 const char *qfopsname
= QUOTAOPSNAME
;
1627 const char *qfextension
[] = INITQFNAMES
;
1629 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1630 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1634 * Enable filesystem disk quotas if necessary.
1635 * We ignore errors as this should not interfere with final mount
1637 for (type
=0; type
< MAXQUOTAS
; type
++) {
1638 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1639 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1640 CAST_USER_ADDR_T(qfpath
), ctx
);
1641 if (namei(&qnd
) != 0)
1642 continue; /* option file to trigger quotas is not present */
1643 vnode_put(qnd
.ni_vp
);
1645 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1647 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1654 checkdirs_callback(proc_t p
, void * arg
)
1656 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1657 vnode_t olddp
= cdrp
->olddp
;
1658 vnode_t newdp
= cdrp
->newdp
;
1659 struct filedesc
*fdp
;
1663 int cdir_changed
= 0;
1664 int rdir_changed
= 0;
1667 * XXX Also needs to iterate each thread in the process to see if it
1668 * XXX is using a per-thread current working directory, and, if so,
1669 * XXX update that as well.
1674 if (fdp
== (struct filedesc
*)0) {
1676 return(PROC_RETURNED
);
1678 fdp_cvp
= fdp
->fd_cdir
;
1679 fdp_rvp
= fdp
->fd_rdir
;
1682 if (fdp_cvp
== olddp
) {
1689 if (fdp_rvp
== olddp
) {
1696 if (cdir_changed
|| rdir_changed
) {
1698 fdp
->fd_cdir
= fdp_cvp
;
1699 fdp
->fd_rdir
= fdp_rvp
;
1702 return(PROC_RETURNED
);
1708 * Scan all active processes to see if any of them have a current
1709 * or root directory onto which the new filesystem has just been
1710 * mounted. If so, replace them with the new mount point.
1713 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1718 struct cdirargs cdr
;
1720 if (olddp
->v_usecount
== 1)
1722 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1726 panic("mount: lost mount: error %d", err
);
1733 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1734 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1736 if (rootvnode
== olddp
) {
1748 * Unmount a file system.
1750 * Note: unmount takes a path to the vnode mounted on as argument,
1751 * not special file (as before).
1755 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1760 struct nameidata nd
;
1761 vfs_context_t ctx
= vfs_context_current();
1763 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1764 UIO_USERSPACE
, uap
->path
, ctx
);
1773 error
= mac_mount_check_umount(ctx
, mp
);
1780 * Must be the root of the filesystem
1782 if ((vp
->v_flag
& VROOT
) == 0) {
1788 /* safedounmount consumes the mount ref */
1789 return (safedounmount(mp
, uap
->flags
, ctx
));
1793 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
1797 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1798 if (mp
== (mount_t
)0) {
1803 /* safedounmount consumes the mount ref */
1804 return(safedounmount(mp
, flags
, ctx
));
1809 * The mount struct comes with a mount ref which will be consumed.
1810 * Do the actual file system unmount, prevent some common foot shooting.
1813 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1816 proc_t p
= vfs_context_proc(ctx
);
1819 * If the file system is not responding and MNT_NOBLOCK
1820 * is set and not a forced unmount then return EBUSY.
1822 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1823 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1829 * Skip authorization if the mount is tagged as permissive and
1830 * this is not a forced-unmount attempt.
1832 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1834 * Only root, or the user that did the original mount is
1835 * permitted to unmount this filesystem.
1837 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1838 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1842 * Don't allow unmounting the root file system.
1844 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1845 error
= EBUSY
; /* the root is always busy */
1849 #ifdef CONFIG_IMGSRC_ACCESS
1850 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1854 #endif /* CONFIG_IMGSRC_ACCESS */
1856 return (dounmount(mp
, flags
, 1, ctx
));
1864 * Do the actual file system unmount.
1867 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1869 vnode_t coveredvp
= (vnode_t
)0;
1872 int forcedunmount
= 0;
1874 struct vnode
*devvp
= NULLVP
;
1876 proc_t p
= vfs_context_proc(ctx
);
1878 int pflags_save
= 0;
1879 #endif /* CONFIG_TRIGGERS */
1882 if (!(flags
& MNT_FORCE
)) {
1883 fsevent_unmount(mp
, ctx
); /* has to come first! */
1890 * If already an unmount in progress just return EBUSY.
1891 * Even a forced unmount cannot override.
1893 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1900 if (flags
& MNT_FORCE
) {
1902 mp
->mnt_lflag
|= MNT_LFORCE
;
1906 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1907 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1910 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1911 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1912 mp
->mnt_flag
&=~ MNT_ASYNC
;
1914 * anyone currently in the fast path that
1915 * trips over the cached rootvp will be
1916 * dumped out and forced into the slow path
1917 * to regenerate a new cached value
1919 mp
->mnt_realrootvp
= NULLVP
;
1922 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
1924 * Force unmount any mounts in this filesystem.
1925 * If any unmounts fail - just leave them dangling.
1928 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
1932 * taking the name_cache_lock exclusively will
1933 * insure that everyone is out of the fast path who
1934 * might be trying to use a now stale copy of
1935 * vp->v_mountedhere->mnt_realrootvp
1936 * bumping mount_generation causes the cached values
1941 name_cache_unlock();
1944 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1948 if (forcedunmount
== 0) {
1949 ubc_umount(mp
); /* release cached vnodes */
1950 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1951 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
1954 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1955 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1956 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1962 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
1965 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
1969 lflags
|= FORCECLOSE
;
1970 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
1971 if ((forcedunmount
== 0) && error
) {
1973 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1974 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1975 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1979 /* make sure there are no one in the mount iterations or lookup */
1980 mount_iterdrain(mp
);
1982 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
1984 mount_iterreset(mp
);
1986 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1987 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1988 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1992 /* increment the operations count */
1994 OSAddAtomic(1, &vfs_nummntops
);
1996 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1997 /* hold an io reference and drop the usecount before close */
1998 devvp
= mp
->mnt_devvp
;
1999 vnode_getalways(devvp
);
2001 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
2003 vnode_clearmountedon(devvp
);
2006 lck_rw_done(&mp
->mnt_rwlock
);
2007 mount_list_remove(mp
);
2008 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2010 /* mark the mount point hook in the vp but not drop the ref yet */
2011 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2013 * The covered vnode needs special handling. Trying to get an
2014 * iocount must not block here as this may lead to deadlocks
2015 * if the Filesystem to which the covered vnode belongs is
2016 * undergoing forced unmounts. Since we hold a usecount, the
2017 * vnode cannot be reused (it can, however, still be terminated)
2019 vnode_getalways(coveredvp
);
2020 vnode_lock_spin(coveredvp
);
2023 coveredvp
->v_mountedhere
= (struct mount
*)0;
2024 CLR(coveredvp
->v_flag
, VMOUNT
);
2026 vnode_unlock(coveredvp
);
2027 vnode_put(coveredvp
);
2031 mp
->mnt_vtable
->vfc_refcount
--;
2032 mount_list_unlock();
2034 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2035 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2037 mp
->mnt_lflag
|= MNT_LDEAD
;
2039 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2041 * do the wakeup here
2042 * in case we block in mount_refdrain
2043 * which will drop the mount lock
2044 * and allow anyone blocked in vfs_busy
2045 * to wakeup and see the LDEAD state
2047 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2048 wakeup((caddr_t
)mp
);
2052 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2053 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2058 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2059 // Restore P_NOREMOTEHANG bit to its previous value
2060 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2061 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2065 * Callback and context are set together under the mount lock, and
2066 * never cleared, so we're safe to examine them here, drop the lock,
2069 if (mp
->mnt_triggercallback
!= NULL
) {
2072 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2073 } else if (did_vflush
) {
2074 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2081 #endif /* CONFIG_TRIGGERS */
2083 lck_rw_done(&mp
->mnt_rwlock
);
2086 wakeup((caddr_t
)mp
);
2089 if ((coveredvp
!= NULLVP
)) {
2090 vnode_t pvp
= NULLVP
;
2093 * The covered vnode needs special handling. Trying to
2094 * get an iocount must not block here as this may lead
2095 * to deadlocks if the Filesystem to which the covered
2096 * vnode belongs is undergoing forced unmounts. Since we
2097 * hold a usecount, the vnode cannot be reused
2098 * (it can, however, still be terminated).
2100 vnode_getalways(coveredvp
);
2102 mount_dropcrossref(mp
, coveredvp
, 0);
2104 * We'll _try_ to detect if this really needs to be
2105 * done. The coveredvp can only be in termination (or
2106 * terminated) if the coveredvp's mount point is in a
2107 * forced unmount (or has been) since we still hold the
2110 if (!vnode_isrecycled(coveredvp
)) {
2111 pvp
= vnode_getparent(coveredvp
);
2113 if (coveredvp
->v_resolve
) {
2114 vnode_trigger_rearm(coveredvp
, ctx
);
2119 vnode_rele(coveredvp
);
2120 vnode_put(coveredvp
);
2124 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2127 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2128 mount_lock_destroy(mp
);
2130 mac_mount_label_destroy(mp
);
2132 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2134 panic("dounmount: no coveredvp");
2140 * Unmount any mounts in this filesystem.
2143 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2146 fsid_t
*fsids
, fsid
;
2148 int count
= 0, i
, m
= 0;
2153 // Get an array to hold the submounts fsids.
2154 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2156 fsids_sz
= count
* sizeof(fsid_t
);
2157 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2158 if (fsids
== NULL
) {
2159 mount_list_unlock();
2162 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2165 * Fill the array with submount fsids.
2166 * Since mounts are always added to the tail of the mount list, the
2167 * list is always in mount order.
2168 * For each mount check if the mounted-on vnode belongs to a
2169 * mount that's already added to our array of mounts to be unmounted.
2171 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2172 vp
= smp
->mnt_vnodecovered
;
2175 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2176 for (i
= 0; i
<= m
; i
++) {
2177 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2178 fsids
[i
].val
[1] == fsid
.val
[1]) {
2179 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2184 mount_list_unlock();
2186 // Unmount the submounts in reverse order. Ignore errors.
2187 for (i
= m
; i
> 0; i
--) {
2188 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2191 mount_iterdrop(smp
);
2192 (void) dounmount(smp
, flags
, 1, ctx
);
2197 FREE(fsids
, M_TEMP
);
2201 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2206 if (mp
->mnt_crossref
< 0)
2207 panic("mount cross refs -ve");
2209 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2212 vnode_put_locked(dp
);
2215 mount_lock_destroy(mp
);
2217 mac_mount_label_destroy(mp
);
2219 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2223 vnode_put_locked(dp
);
2229 * Sync each mounted filesystem.
2235 int print_vmpage_stat
=0;
2236 int sync_timeout
= 60; // Sync time limit (sec)
2239 sync_callback(mount_t mp
, __unused
void *arg
)
2241 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2242 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2244 mp
->mnt_flag
&= ~MNT_ASYNC
;
2245 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2247 mp
->mnt_flag
|= MNT_ASYNC
;
2250 return (VFS_RETURNED
);
2255 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2257 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2259 if (print_vmpage_stat
) {
2260 vm_countdirtypages();
2266 #endif /* DIAGNOSTIC */
2271 sync_thread(void *arg
, __unused wait_result_t wr
)
2273 int *timeout
= (int *) arg
;
2275 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2278 wakeup((caddr_t
) timeout
);
2279 if (print_vmpage_stat
) {
2280 vm_countdirtypages();
2286 #endif /* DIAGNOSTIC */
2290 * Sync in a separate thread so we can time out if it blocks.
2293 sync_async(int timeout
)
2297 struct timespec ts
= {timeout
, 0};
2299 lck_mtx_lock(sync_mtx_lck
);
2300 if (kernel_thread_start(sync_thread
, &timeout
, &thd
) != KERN_SUCCESS
) {
2301 printf("sync_thread failed\n");
2302 lck_mtx_unlock(sync_mtx_lck
);
2306 error
= msleep((caddr_t
) &timeout
, sync_mtx_lck
, (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2308 printf("sync timed out: %d sec\n", timeout
);
2310 thread_deallocate(thd
);
2316 * An in-kernel sync for power management to call.
2318 __private_extern__
int
2321 (void) sync_async(sync_timeout
);
2324 } /* end of sync_internal call */
2327 * Change filesystem quotas.
2331 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2334 int error
, quota_cmd
, quota_status
;
2337 struct nameidata nd
;
2338 vfs_context_t ctx
= vfs_context_current();
2339 struct dqblk my_dqblk
;
2341 AUDIT_ARG(uid
, uap
->uid
);
2342 AUDIT_ARG(cmd
, uap
->cmd
);
2343 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2348 mp
= nd
.ni_vp
->v_mount
;
2349 vnode_put(nd
.ni_vp
);
2352 /* copyin any data we will need for downstream code */
2353 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2355 switch (quota_cmd
) {
2357 /* uap->arg specifies a file from which to take the quotas */
2358 fnamelen
= MAXPATHLEN
;
2359 datap
= kalloc(MAXPATHLEN
);
2360 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2363 /* uap->arg is a pointer to a dqblk structure. */
2364 datap
= (caddr_t
) &my_dqblk
;
2368 /* uap->arg is a pointer to a dqblk structure. */
2369 datap
= (caddr_t
) &my_dqblk
;
2370 if (proc_is64bit(p
)) {
2371 struct user_dqblk my_dqblk64
;
2372 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2374 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2378 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2382 /* uap->arg is a pointer to an integer */
2383 datap
= (caddr_t
) "a_status
;
2391 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2394 switch (quota_cmd
) {
2397 kfree(datap
, MAXPATHLEN
);
2400 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2402 if (proc_is64bit(p
)) {
2403 struct user_dqblk my_dqblk64
= {.dqb_bhardlimit
= 0};
2404 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2405 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2408 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2413 /* uap->arg is a pointer to an integer */
2415 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2426 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2428 return (EOPNOTSUPP
);
2433 * Get filesystem statistics.
2435 * Returns: 0 Success
2437 * vfs_update_vfsstat:???
2438 * munge_statfs:EFAULT
2442 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2445 struct vfsstatfs
*sp
;
2447 struct nameidata nd
;
2448 vfs_context_t ctx
= vfs_context_current();
2451 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2452 UIO_USERSPACE
, uap
->path
, ctx
);
2458 sp
= &mp
->mnt_vfsstat
;
2462 error
= mac_mount_check_stat(ctx
, mp
);
2467 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2473 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2479 * Get filesystem statistics.
2483 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2487 struct vfsstatfs
*sp
;
2490 AUDIT_ARG(fd
, uap
->fd
);
2492 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2495 error
= vnode_getwithref(vp
);
2501 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2510 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2515 sp
= &mp
->mnt_vfsstat
;
2516 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2520 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2530 * Common routine to handle copying of statfs64 data to user space
2533 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2536 struct statfs64 sfs
;
2538 bzero(&sfs
, sizeof(sfs
));
2540 sfs
.f_bsize
= sfsp
->f_bsize
;
2541 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2542 sfs
.f_blocks
= sfsp
->f_blocks
;
2543 sfs
.f_bfree
= sfsp
->f_bfree
;
2544 sfs
.f_bavail
= sfsp
->f_bavail
;
2545 sfs
.f_files
= sfsp
->f_files
;
2546 sfs
.f_ffree
= sfsp
->f_ffree
;
2547 sfs
.f_fsid
= sfsp
->f_fsid
;
2548 sfs
.f_owner
= sfsp
->f_owner
;
2549 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2550 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2551 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2552 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2553 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2555 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2557 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2558 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2560 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2566 * Get file system statistics in 64-bit mode
2569 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2572 struct vfsstatfs
*sp
;
2574 struct nameidata nd
;
2575 vfs_context_t ctxp
= vfs_context_current();
2578 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2579 UIO_USERSPACE
, uap
->path
, ctxp
);
2585 sp
= &mp
->mnt_vfsstat
;
2589 error
= mac_mount_check_stat(ctxp
, mp
);
2594 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2600 error
= statfs64_common(mp
, sp
, uap
->buf
);
2607 * Get file system statistics in 64-bit mode
2610 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2614 struct vfsstatfs
*sp
;
2617 AUDIT_ARG(fd
, uap
->fd
);
2619 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2622 error
= vnode_getwithref(vp
);
2628 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2637 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2642 sp
= &mp
->mnt_vfsstat
;
2643 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2647 error
= statfs64_common(mp
, sp
, uap
->buf
);
2656 struct getfsstat_struct
{
2667 getfsstat_callback(mount_t mp
, void * arg
)
2670 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2671 struct vfsstatfs
*sp
;
2673 vfs_context_t ctx
= vfs_context_current();
2675 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2677 error
= mac_mount_check_stat(ctx
, mp
);
2679 fstp
->error
= error
;
2680 return(VFS_RETURNED_DONE
);
2683 sp
= &mp
->mnt_vfsstat
;
2685 * If MNT_NOWAIT is specified, do not refresh the
2686 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2688 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2689 (error
= vfs_update_vfsstat(mp
, ctx
,
2691 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2692 return(VFS_RETURNED
);
2696 * Need to handle LP64 version of struct statfs
2698 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2700 fstp
->error
= error
;
2701 return(VFS_RETURNED_DONE
);
2703 fstp
->sfsp
+= my_size
;
2707 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2709 fstp
->error
= error
;
2710 return(VFS_RETURNED_DONE
);
2717 return(VFS_RETURNED
);
2721 * Get statistics on all filesystems.
2724 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2726 struct __mac_getfsstat_args muap
;
2728 muap
.buf
= uap
->buf
;
2729 muap
.bufsize
= uap
->bufsize
;
2730 muap
.mac
= USER_ADDR_NULL
;
2732 muap
.flags
= uap
->flags
;
2734 return (__mac_getfsstat(p
, &muap
, retval
));
2738 * __mac_getfsstat: Get MAC-related file system statistics
2740 * Parameters: p (ignored)
2741 * uap User argument descriptor (see below)
2742 * retval Count of file system statistics (N stats)
2744 * Indirect: uap->bufsize Buffer size
2745 * uap->macsize MAC info size
2746 * uap->buf Buffer where information will be returned
2748 * uap->flags File system flags
2751 * Returns: 0 Success
2756 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2760 size_t count
, maxcount
, bufsize
, macsize
;
2761 struct getfsstat_struct fst
;
2763 bufsize
= (size_t) uap
->bufsize
;
2764 macsize
= (size_t) uap
->macsize
;
2766 if (IS_64BIT_PROCESS(p
)) {
2767 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2770 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2778 if (uap
->mac
!= USER_ADDR_NULL
) {
2783 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2784 if (count
!= maxcount
)
2787 /* Copy in the array */
2788 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2793 error
= copyin(uap
->mac
, mp0
, macsize
);
2795 FREE(mp0
, M_MACTEMP
);
2799 /* Normalize to an array of user_addr_t */
2800 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2802 FREE(mp0
, M_MACTEMP
);
2806 for (i
= 0; i
< count
; i
++) {
2807 if (IS_64BIT_PROCESS(p
))
2808 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2810 mp
[i
] = (user_addr_t
)mp0
[i
];
2812 FREE(mp0
, M_MACTEMP
);
2819 fst
.flags
= uap
->flags
;
2822 fst
.maxcount
= maxcount
;
2825 vfs_iterate(0, getfsstat_callback
, &fst
);
2828 FREE(mp
, M_MACTEMP
);
2831 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2835 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2836 *retval
= fst
.maxcount
;
2838 *retval
= fst
.count
;
2843 getfsstat64_callback(mount_t mp
, void * arg
)
2845 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2846 struct vfsstatfs
*sp
;
2849 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2851 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2853 fstp
->error
= error
;
2854 return(VFS_RETURNED_DONE
);
2857 sp
= &mp
->mnt_vfsstat
;
2859 * If MNT_NOWAIT is specified, do not refresh the fsstat
2860 * cache. MNT_WAIT overrides MNT_NOWAIT.
2862 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2863 * getfsstat, since the constants are out of the same
2866 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2867 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2868 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2869 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2870 return(VFS_RETURNED
);
2873 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2875 fstp
->error
= error
;
2876 return(VFS_RETURNED_DONE
);
2878 fstp
->sfsp
+= sizeof(struct statfs64
);
2881 return(VFS_RETURNED
);
2885 * Get statistics on all file systems in 64 bit mode.
2888 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2891 int count
, maxcount
;
2892 struct getfsstat_struct fst
;
2894 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2900 fst
.flags
= uap
->flags
;
2903 fst
.maxcount
= maxcount
;
2905 vfs_iterate(0, getfsstat64_callback
, &fst
);
2908 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2912 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2913 *retval
= fst
.maxcount
;
2915 *retval
= fst
.count
;
2921 * gets the associated vnode with the file descriptor passed.
2925 * ctx - vfs context of caller
2926 * fd - file descriptor for which vnode is required.
2927 * vpp - Pointer to pointer to vnode to be returned.
2929 * The vnode is returned with an iocount so any vnode obtained
2930 * by this call needs a vnode_put
2934 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
2938 struct fileproc
*fp
;
2939 proc_t p
= vfs_context_proc(ctx
);
2943 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
2947 error
= vnode_getwithref(vp
);
2949 (void)fp_drop(p
, fd
, fp
, 0);
2953 (void)fp_drop(p
, fd
, fp
, 0);
2959 * Wrapper function around namei to start lookup from a directory
2960 * specified by a file descriptor ni_dirfd.
2962 * In addition to all the errors returned by namei, this call can
2963 * return ENOTDIR if the file descriptor does not refer to a directory.
2964 * and EBADF if the file descriptor is not valid.
2967 nameiat(struct nameidata
*ndp
, int dirfd
)
2969 if ((dirfd
!= AT_FDCWD
) &&
2970 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
2971 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
2975 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
2976 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
2980 c
= *((char *)(ndp
->ni_dirp
));
2986 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
2991 if (vnode_vtype(dvp_at
) != VDIR
) {
2996 ndp
->ni_dvp
= dvp_at
;
2997 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
2999 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3005 return (namei(ndp
));
3009 * Change current working directory to a given file descriptor.
3013 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3015 struct filedesc
*fdp
= p
->p_fd
;
3021 vfs_context_t ctx
= vfs_context_current();
3023 AUDIT_ARG(fd
, uap
->fd
);
3024 if (per_thread
&& uap
->fd
== -1) {
3026 * Switching back from per-thread to per process CWD; verify we
3027 * in fact have one before proceeding. The only success case
3028 * for this code path is to return 0 preemptively after zapping
3029 * the thread structure contents.
3031 thread_t th
= vfs_context_thread(ctx
);
3033 uthread_t uth
= get_bsdthread_info(th
);
3035 uth
->uu_cdir
= NULLVP
;
3036 if (tvp
!= NULLVP
) {
3044 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
3046 if ( (error
= vnode_getwithref(vp
)) ) {
3051 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3053 if (vp
->v_type
!= VDIR
) {
3059 error
= mac_vnode_check_chdir(ctx
, vp
);
3063 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3067 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3068 if (vfs_busy(mp
, LK_NOWAIT
)) {
3072 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3081 if ( (error
= vnode_ref(vp
)) )
3086 thread_t th
= vfs_context_thread(ctx
);
3088 uthread_t uth
= get_bsdthread_info(th
);
3091 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3116 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3118 return common_fchdir(p
, uap
, 0);
3122 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3124 return common_fchdir(p
, (void *)uap
, 1);
3128 * Change current working directory (".").
3130 * Returns: 0 Success
3131 * change_dir:ENOTDIR
3133 * vnode_ref:ENOENT No such file or directory
3137 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3139 struct filedesc
*fdp
= p
->p_fd
;
3141 struct nameidata nd
;
3143 vfs_context_t ctx
= vfs_context_current();
3145 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3146 UIO_USERSPACE
, uap
->path
, ctx
);
3147 error
= change_dir(&nd
, ctx
);
3150 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3151 vnode_put(nd
.ni_vp
);
3155 * drop the iocount we picked up in change_dir
3157 vnode_put(nd
.ni_vp
);
3160 thread_t th
= vfs_context_thread(ctx
);
3162 uthread_t uth
= get_bsdthread_info(th
);
3164 uth
->uu_cdir
= nd
.ni_vp
;
3165 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3167 vnode_rele(nd
.ni_vp
);
3173 fdp
->fd_cdir
= nd
.ni_vp
;
3187 * Change current working directory (".") for the entire process
3189 * Parameters: p Process requesting the call
3190 * uap User argument descriptor (see below)
3193 * Indirect parameters: uap->path Directory path
3195 * Returns: 0 Success
3196 * common_chdir: ENOTDIR
3197 * common_chdir: ENOENT No such file or directory
3202 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3204 return common_chdir(p
, (void *)uap
, 0);
3210 * Change current working directory (".") for a single thread
3212 * Parameters: p Process requesting the call
3213 * uap User argument descriptor (see below)
3216 * Indirect parameters: uap->path Directory path
3218 * Returns: 0 Success
3219 * common_chdir: ENOTDIR
3220 * common_chdir: ENOENT No such file or directory
3225 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3227 return common_chdir(p
, (void *)uap
, 1);
3232 * Change notion of root (``/'') directory.
3236 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3238 struct filedesc
*fdp
= p
->p_fd
;
3240 struct nameidata nd
;
3242 vfs_context_t ctx
= vfs_context_current();
3244 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3247 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3248 UIO_USERSPACE
, uap
->path
, ctx
);
3249 error
= change_dir(&nd
, ctx
);
3254 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3257 vnode_put(nd
.ni_vp
);
3262 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3263 vnode_put(nd
.ni_vp
);
3266 vnode_put(nd
.ni_vp
);
3270 fdp
->fd_rdir
= nd
.ni_vp
;
3271 fdp
->fd_flags
|= FD_CHROOT
;
3281 * Common routine for chroot and chdir.
3283 * Returns: 0 Success
3284 * ENOTDIR Not a directory
3285 * namei:??? [anything namei can return]
3286 * vnode_authorize:??? [anything vnode_authorize can return]
3289 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3294 if ((error
= namei(ndp
)))
3299 if (vp
->v_type
!= VDIR
) {
3305 error
= mac_vnode_check_chdir(ctx
, vp
);
3312 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3322 * Free the vnode data (for directories) associated with the file glob.
3325 fg_vn_data_alloc(void)
3327 struct fd_vn_data
*fvdata
;
3329 /* Allocate per fd vnode data */
3330 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3331 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3332 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3337 * Free the vnode data (for directories) associated with the file glob.
3340 fg_vn_data_free(void *fgvndata
)
3342 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3345 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3346 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3347 FREE(fvdata
, M_FD_VN_DATA
);
3351 * Check permissions, allocate an open file structure,
3352 * and call the device open routine if any.
3354 * Returns: 0 Success
3365 * XXX Need to implement uid, gid
3368 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3369 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3372 proc_t p
= vfs_context_proc(ctx
);
3373 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3374 struct fileproc
*fp
;
3377 int type
, indx
, error
;
3379 struct vfs_context context
;
3383 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3386 flags
= FFLAGS(uflags
);
3387 CLR(flags
, FENCRYPTED
);
3388 CLR(flags
, FUNENCRYPTED
);
3390 AUDIT_ARG(fflags
, oflags
);
3391 AUDIT_ARG(mode
, vap
->va_mode
);
3393 if ((error
= falloc_withalloc(p
,
3394 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3397 uu
->uu_dupfd
= -indx
- 1;
3399 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3400 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3401 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3402 fp_drop(p
, indx
, NULL
, 0);
3407 if (error
== ERESTART
)
3409 fp_free(p
, indx
, fp
);
3415 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3416 fp
->f_fglob
->fg_ops
= &vnops
;
3417 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3419 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3420 lf
.l_whence
= SEEK_SET
;
3423 if (flags
& O_EXLOCK
)
3424 lf
.l_type
= F_WRLCK
;
3426 lf
.l_type
= F_RDLCK
;
3428 if ((flags
& FNONBLOCK
) == 0)
3431 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3436 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3438 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3441 #if DEVELOPMENT || DEBUG
3443 * XXX VSWAP: Check for entitlements or special flag here
3444 * so we can restrict access appropriately.
3446 #else /* DEVELOPMENT || DEBUG */
3448 if (vnode_isswap(vp
) && (flags
& (FWRITE
| O_TRUNC
)) && (ctx
!= vfs_context_kernel())) {
3449 /* block attempt to write/truncate swapfile */
3453 #endif /* DEVELOPMENT || DEBUG */
3455 /* try to truncate by setting the size attribute */
3456 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3460 * For directories we hold some additional information in the fd.
3462 if (vnode_vtype(vp
) == VDIR
) {
3463 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3465 fp
->f_fglob
->fg_vn_data
= NULL
;
3471 * The first terminal open (without a O_NOCTTY) by a session leader
3472 * results in it being set as the controlling terminal.
3474 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3475 !(flags
& O_NOCTTY
)) {
3478 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3479 (caddr_t
)&tmp
, ctx
);
3483 if (flags
& O_CLOEXEC
)
3484 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3485 if (flags
& O_CLOFORK
)
3486 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3487 procfdtbl_releasefd(p
, indx
, NULL
);
3489 #if CONFIG_SECLUDED_MEMORY
3490 if (secluded_for_filecache
&&
3491 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
3492 vnode_vtype(vp
) == VREG
) {
3493 memory_object_control_t moc
;
3495 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
3497 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
3498 /* nothing to do... */
3499 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
3500 /* writable -> no longer eligible for secluded pages */
3501 memory_object_mark_eligible_for_secluded(moc
,
3503 } else if (secluded_for_filecache
== 1) {
3504 char pathname
[32] = { 0, };
3506 /* XXX FBDP: better way to detect /Applications/ ? */
3507 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3508 copyinstr(ndp
->ni_dirp
,
3513 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
3518 pathname
[sizeof (pathname
) - 1] = '\0';
3519 if (strncmp(pathname
,
3521 strlen("/Applications/")) == 0 &&
3523 "/Applications/Camera.app/",
3524 strlen("/Applications/Camera.app/")) != 0) {
3527 * AND from "/Applications/"
3528 * AND not from "/Applications/Camera.app/"
3529 * ==> eligible for secluded
3531 memory_object_mark_eligible_for_secluded(moc
,
3534 } else if (secluded_for_filecache
== 2) {
3535 /* not implemented... */
3536 if (!strncmp(vp
->v_name
,
3537 DYLD_SHARED_CACHE_NAME
,
3538 strlen(DYLD_SHARED_CACHE_NAME
)) ||
3539 !strncmp(vp
->v_name
,
3541 strlen(vp
->v_name
)) ||
3542 !strncmp(vp
->v_name
,
3544 strlen(vp
->v_name
)) ||
3545 !strncmp(vp
->v_name
,
3547 strlen(vp
->v_name
)) ||
3548 !strncmp(vp
->v_name
,
3550 strlen(vp
->v_name
))) {
3552 * This file matters when launching Camera:
3553 * do not store its contents in the secluded
3554 * pool that will be drained on Camera launch.
3556 memory_object_mark_eligible_for_secluded(moc
,
3561 #endif /* CONFIG_SECLUDED_MEMORY */
3563 fp_drop(p
, indx
, fp
, 1);
3570 context
= *vfs_context_current();
3571 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3573 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3574 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3575 lf
.l_whence
= SEEK_SET
;
3578 lf
.l_type
= F_UNLCK
;
3581 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3584 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3586 fp_free(p
, indx
, fp
);
3592 * While most of the *at syscall handlers can call nameiat() which
3593 * is a wrapper around namei, the use of namei and initialisation
3594 * of nameidata are far removed and in different functions - namei
3595 * gets called in vn_open_auth for open1. So we'll just do here what
3599 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3600 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3603 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3607 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3608 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3612 c
= *((char *)(ndp
->ni_dirp
));
3618 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3623 if (vnode_vtype(dvp_at
) != VDIR
) {
3628 ndp
->ni_dvp
= dvp_at
;
3629 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3630 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3637 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3641 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3643 * Parameters: p Process requesting the open
3644 * uap User argument descriptor (see below)
3645 * retval Pointer to an area to receive the
3646 * return calue from the system call
3648 * Indirect: uap->path Path to open (same as 'open')
3649 * uap->flags Flags to open (same as 'open'
3650 * uap->uid UID to set, if creating
3651 * uap->gid GID to set, if creating
3652 * uap->mode File mode, if creating (same as 'open')
3653 * uap->xsecurity ACL to set, if creating
3655 * Returns: 0 Success
3658 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3660 * XXX: We should enummerate the possible errno values here, and where
3661 * in the code they originated.
3664 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3666 struct filedesc
*fdp
= p
->p_fd
;
3668 kauth_filesec_t xsecdst
;
3669 struct vnode_attr va
;
3670 struct nameidata nd
;
3673 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3676 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3677 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3681 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3682 VATTR_SET(&va
, va_mode
, cmode
);
3683 if (uap
->uid
!= KAUTH_UID_NONE
)
3684 VATTR_SET(&va
, va_uid
, uap
->uid
);
3685 if (uap
->gid
!= KAUTH_GID_NONE
)
3686 VATTR_SET(&va
, va_gid
, uap
->gid
);
3687 if (xsecdst
!= NULL
)
3688 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3690 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3691 uap
->path
, vfs_context_current());
3693 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3694 fileproc_alloc_init
, NULL
, retval
);
3695 if (xsecdst
!= NULL
)
3696 kauth_filesec_free(xsecdst
);
3702 * Go through the data-protected atomically controlled open (2)
3704 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3706 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3707 int flags
= uap
->flags
;
3708 int class = uap
->class;
3709 int dpflags
= uap
->dpflags
;
3712 * Follow the same path as normal open(2)
3713 * Look up the item if it exists, and acquire the vnode.
3715 struct filedesc
*fdp
= p
->p_fd
;
3716 struct vnode_attr va
;
3717 struct nameidata nd
;
3722 /* Mask off all but regular access permissions */
3723 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3724 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3726 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3727 uap
->path
, vfs_context_current());
3730 * Initialize the extra fields in vnode_attr to pass down our
3732 * 1. target cprotect class.
3733 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3735 if (flags
& O_CREAT
) {
3736 /* lower level kernel code validates that the class is valid before applying it. */
3737 if (class != PROTECTION_CLASS_DEFAULT
) {
3739 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3740 * file behave the same as open (2)
3742 VATTR_SET(&va
, va_dataprotect_class
, class);
3746 if (dpflags
& (O_DP_GETRAWENCRYPTED
|O_DP_GETRAWUNENCRYPTED
)) {
3747 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3748 /* Not allowed to write raw encrypted bytes */
3751 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
3752 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3754 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
3755 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
3759 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3760 fileproc_alloc_init
, NULL
, retval
);
3766 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3767 int fd
, enum uio_seg segflg
, int *retval
)
3769 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3770 struct vnode_attr va
;
3771 struct nameidata nd
;
3775 /* Mask off all but regular access permissions */
3776 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3777 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3779 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3782 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3787 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3789 __pthread_testcancel(1);
3790 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3794 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3797 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3798 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3802 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3805 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3806 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3810 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3812 __pthread_testcancel(1);
3813 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3817 * openbyid_np: open a file given a file system id and a file system object id
3818 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3819 * file systems that don't support object ids it is a node id (uint64_t).
3821 * Parameters: p Process requesting the open
3822 * uap User argument descriptor (see below)
3823 * retval Pointer to an area to receive the
3824 * return calue from the system call
3826 * Indirect: uap->path Path to open (same as 'open')
3828 * uap->fsid id of target file system
3829 * uap->objid id of target file system object
3830 * uap->flags Flags to open (same as 'open')
3832 * Returns: 0 Success
3836 * XXX: We should enummerate the possible errno values here, and where
3837 * in the code they originated.
3840 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
3846 int buflen
= MAXPATHLEN
;
3848 vfs_context_t ctx
= vfs_context_current();
3850 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
3854 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
3858 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3859 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
3863 AUDIT_ARG(value32
, fsid
.val
[0]);
3864 AUDIT_ARG(value64
, objid
);
3866 /*resolve path from fsis, objid*/
3868 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
3873 error
= fsgetpath_internal(
3874 ctx
, fsid
.val
[0], objid
,
3875 buflen
, buf
, &pathlen
);
3881 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
3889 error
= openat_internal(
3890 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
3899 * Create a special file.
3901 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
3904 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
3906 struct vnode_attr va
;
3907 vfs_context_t ctx
= vfs_context_current();
3909 struct nameidata nd
;
3913 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3914 VATTR_SET(&va
, va_rdev
, uap
->dev
);
3916 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3917 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
3918 return(mkfifo1(ctx
, uap
->path
, &va
));
3920 AUDIT_ARG(mode
, uap
->mode
);
3921 AUDIT_ARG(value32
, uap
->dev
);
3923 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
3925 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
3926 UIO_USERSPACE
, uap
->path
, ctx
);
3938 switch (uap
->mode
& S_IFMT
) {
3940 VATTR_SET(&va
, va_type
, VCHR
);
3943 VATTR_SET(&va
, va_type
, VBLK
);
3951 error
= mac_vnode_check_create(ctx
,
3952 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
3957 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3960 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
3964 int update_flags
= 0;
3966 // Make sure the name & parent pointers are hooked up
3967 if (vp
->v_name
== NULL
)
3968 update_flags
|= VNODE_UPDATE_NAME
;
3969 if (vp
->v_parent
== NULLVP
)
3970 update_flags
|= VNODE_UPDATE_PARENT
;
3973 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3976 add_fsevent(FSE_CREATE_FILE
, ctx
,
3984 * nameidone has to happen before we vnode_put(dvp)
3985 * since it may need to release the fs_nodelock on the dvp
3997 * Create a named pipe.
3999 * Returns: 0 Success
4002 * vnode_authorize:???
4006 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
4010 struct nameidata nd
;
4012 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
4013 UIO_USERSPACE
, upath
, ctx
);
4020 /* check that this is a new file and authorize addition */
4025 VATTR_SET(vap
, va_type
, VFIFO
);
4027 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
4030 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4033 * nameidone has to happen before we vnode_put(dvp)
4034 * since it may need to release the fs_nodelock on the dvp
4047 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4049 * Parameters: p Process requesting the open
4050 * uap User argument descriptor (see below)
4053 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4054 * uap->uid UID to set
4055 * uap->gid GID to set
4056 * uap->mode File mode to set (same as 'mkfifo')
4057 * uap->xsecurity ACL to set, if creating
4059 * Returns: 0 Success
4062 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4064 * XXX: We should enummerate the possible errno values here, and where
4065 * in the code they originated.
4068 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4071 kauth_filesec_t xsecdst
;
4072 struct vnode_attr va
;
4074 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4076 xsecdst
= KAUTH_FILESEC_NONE
;
4077 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4078 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
4083 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4084 if (uap
->uid
!= KAUTH_UID_NONE
)
4085 VATTR_SET(&va
, va_uid
, uap
->uid
);
4086 if (uap
->gid
!= KAUTH_GID_NONE
)
4087 VATTR_SET(&va
, va_gid
, uap
->gid
);
4088 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4089 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4091 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4093 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4094 kauth_filesec_free(xsecdst
);
4100 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4102 struct vnode_attr va
;
4105 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4107 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
4112 my_strrchr(char *p
, int ch
)
4116 for (save
= NULL
;; ++p
) {
4125 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4128 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4130 int ret
, len
= _len
;
4132 *truncated_path
= 0;
4133 ret
= vn_getpath(dvp
, path
, &len
);
4134 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4137 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
4138 if (len
> MAXPATHLEN
) {
4141 // the string got truncated!
4142 *truncated_path
= 1;
4143 ptr
= my_strrchr(path
, '/');
4145 *ptr
= '\0'; // chop off the string at the last directory component
4147 len
= strlen(path
) + 1;
4150 } else if (ret
== 0) {
4151 *truncated_path
= 1;
4152 } else if (ret
!= 0) {
4153 struct vnode
*mydvp
=dvp
;
4155 if (ret
!= ENOSPC
) {
4156 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4157 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4159 *truncated_path
= 1;
4162 if (mydvp
->v_parent
!= NULL
) {
4163 mydvp
= mydvp
->v_parent
;
4164 } else if (mydvp
->v_mount
) {
4165 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4168 // no parent and no mount point? only thing is to punt and say "/" changed
4169 strlcpy(path
, "/", _len
);
4174 if (mydvp
== NULL
) {
4179 ret
= vn_getpath(mydvp
, path
, &len
);
4180 } while (ret
== ENOSPC
);
4188 * Make a hard file link.
4190 * Returns: 0 Success
4195 * vnode_authorize:???
4200 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4201 user_addr_t link
, int flag
, enum uio_seg segflg
)
4203 vnode_t vp
, dvp
, lvp
;
4204 struct nameidata nd
;
4210 int need_event
, has_listeners
;
4211 char *target_path
= NULL
;
4214 vp
= dvp
= lvp
= NULLVP
;
4216 /* look up the object we are linking to */
4217 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4218 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4221 error
= nameiat(&nd
, fd1
);
4229 * Normally, linking to directories is not supported.
4230 * However, some file systems may have limited support.
4232 if (vp
->v_type
== VDIR
) {
4233 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4234 error
= EPERM
; /* POSIX */
4238 /* Linking to a directory requires ownership. */
4239 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4240 struct vnode_attr dva
;
4243 VATTR_WANTED(&dva
, va_uid
);
4244 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4245 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4246 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4253 /* lookup the target node */
4257 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4258 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4260 error
= nameiat(&nd
, fd2
);
4267 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4271 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4272 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4275 /* target node must not exist */
4276 if (lvp
!= NULLVP
) {
4280 /* cannot link across mountpoints */
4281 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4286 /* authorize creation of the target note */
4287 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4290 /* and finally make the link */
4291 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4296 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4300 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4304 has_listeners
= kauth_authorize_fileop_has_listeners();
4306 if (need_event
|| has_listeners
) {
4307 char *link_to_path
= NULL
;
4308 int len
, link_name_len
;
4310 /* build the path to the new link file */
4311 GET_PATH(target_path
);
4312 if (target_path
== NULL
) {
4317 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4319 if (has_listeners
) {
4320 /* build the path to file we are linking to */
4321 GET_PATH(link_to_path
);
4322 if (link_to_path
== NULL
) {
4327 link_name_len
= MAXPATHLEN
;
4328 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4330 * Call out to allow 3rd party notification of rename.
4331 * Ignore result of kauth_authorize_fileop call.
4333 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4334 (uintptr_t)link_to_path
,
4335 (uintptr_t)target_path
);
4337 if (link_to_path
!= NULL
) {
4338 RELEASE_PATH(link_to_path
);
4343 /* construct fsevent */
4344 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4346 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4349 // build the path to the destination of the link
4350 add_fsevent(FSE_CREATE_FILE
, ctx
,
4351 FSE_ARG_STRING
, len
, target_path
,
4352 FSE_ARG_FINFO
, &finfo
,
4356 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4357 FSE_ARG_VNODE
, vp
->v_parent
,
4365 * nameidone has to happen before we vnode_put(dvp)
4366 * since it may need to release the fs_nodelock on the dvp
4369 if (target_path
!= NULL
) {
4370 RELEASE_PATH(target_path
);
4382 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4384 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4385 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4389 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4391 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4394 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4395 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4399 * Make a symbolic link.
4401 * We could add support for ACLs here too...
4405 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4406 user_addr_t link
, enum uio_seg segflg
)
4408 struct vnode_attr va
;
4411 struct nameidata nd
;
4417 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4418 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4419 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4421 path
= (char *)path_data
;
4425 AUDIT_ARG(text
, path
); /* This is the link string */
4427 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4430 error
= nameiat(&nd
, fd
);
4436 p
= vfs_context_proc(ctx
);
4438 VATTR_SET(&va
, va_type
, VLNK
);
4439 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4442 error
= mac_vnode_check_create(ctx
,
4443 dvp
, &nd
.ni_cnd
, &va
);
4456 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4457 /* get default ownership, etc. */
4459 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4461 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4464 if (error
== 0 && vp
)
4465 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4468 /* do fallback attribute handling */
4469 if (error
== 0 && vp
)
4470 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4473 int update_flags
= 0;
4475 /*check if a new vnode was created, else try to get one*/
4477 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4479 nd
.ni_op
= OP_LOOKUP
;
4481 nd
.ni_cnd
.cn_flags
= 0;
4482 error
= nameiat(&nd
, fd
);
4489 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4490 /* call out to allow 3rd party notification of rename.
4491 * Ignore result of kauth_authorize_fileop call.
4493 if (kauth_authorize_fileop_has_listeners() &&
4495 char *new_link_path
= NULL
;
4498 /* build the path to the new link file */
4499 new_link_path
= get_pathbuff();
4501 vn_getpath(dvp
, new_link_path
, &len
);
4502 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4503 new_link_path
[len
- 1] = '/';
4504 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4507 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4508 (uintptr_t)path
, (uintptr_t)new_link_path
);
4509 if (new_link_path
!= NULL
)
4510 release_pathbuff(new_link_path
);
4513 // Make sure the name & parent pointers are hooked up
4514 if (vp
->v_name
== NULL
)
4515 update_flags
|= VNODE_UPDATE_NAME
;
4516 if (vp
->v_parent
== NULLVP
)
4517 update_flags
|= VNODE_UPDATE_PARENT
;
4520 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4523 add_fsevent(FSE_CREATE_FILE
, ctx
,
4531 * nameidone has to happen before we vnode_put(dvp)
4532 * since it may need to release the fs_nodelock on the dvp
4540 if (path
&& (path
!= (char *)path_data
))
4541 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4547 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4549 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4550 uap
->link
, UIO_USERSPACE
));
4554 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4555 __unused
int32_t *retval
)
4557 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4558 uap
->path2
, UIO_USERSPACE
));
4562 * Delete a whiteout from the filesystem.
4563 * No longer supported.
4566 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4572 * Delete a name from the filesystem.
4576 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4577 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4579 struct nameidata nd
;
4582 struct componentname
*cnp
;
4587 struct vnode_attr va
;
4594 struct vnode_attr
*vap
;
4596 int retry_count
= 0;
4599 cn_flags
= LOCKPARENT
;
4600 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
))
4601 cn_flags
|= AUDITVNPATH1
;
4602 /* If a starting dvp is passed, it trumps any fd passed. */
4607 /* unlink or delete is allowed on rsrc forks and named streams */
4608 cn_flags
|= CN_ALLOWRSRCFORK
;
4619 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4621 nd
.ni_dvp
= start_dvp
;
4622 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4626 error
= nameiat(&nd
, fd
);
4634 /* With Carbon delete semantics, busy files cannot be deleted */
4635 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4636 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4639 /* Skip any potential upcalls if told to. */
4640 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4641 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4645 batched
= vnode_compound_remove_available(vp
);
4647 * The root of a mounted filesystem cannot be deleted.
4649 if (vp
->v_flag
& VROOT
) {
4653 #if DEVELOPMENT || DEBUG
4655 * XXX VSWAP: Check for entitlements or special flag here
4656 * so we can restrict access appropriately.
4658 #else /* DEVELOPMENT || DEBUG */
4660 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
4664 #endif /* DEVELOPMENT || DEBUG */
4667 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4669 if (error
== ENOENT
) {
4670 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4671 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4682 if (!vnode_compound_remove_available(dvp
)) {
4683 panic("No vp, but no compound remove?");
4688 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4691 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4692 /* XXX need to get these data in batched VNOP */
4693 get_fse_info(vp
, &finfo
, ctx
);
4696 error
= vfs_get_notify_attributes(&va
);
4705 has_listeners
= kauth_authorize_fileop_has_listeners();
4706 if (need_event
|| has_listeners
) {
4714 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4718 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4719 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4723 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
4725 if (error
== EKEEPLOOKING
) {
4727 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4730 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4731 panic("EKEEPLOOKING, but continue flag not set?");
4734 if (vnode_isdir(vp
)) {
4738 goto continue_lookup
;
4739 } else if (error
== ENOENT
&& batched
) {
4740 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4741 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4743 * For compound VNOPs, the authorization callback may
4744 * return ENOENT in case of racing hardlink lookups
4745 * hitting the name cache, redrive the lookup.
4755 * Call out to allow 3rd party notification of delete.
4756 * Ignore result of kauth_authorize_fileop call.
4759 if (has_listeners
) {
4760 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4761 KAUTH_FILEOP_DELETE
,
4766 if (vp
->v_flag
& VISHARDLINK
) {
4768 // if a hardlink gets deleted we want to blow away the
4769 // v_parent link because the path that got us to this
4770 // instance of the link is no longer valid. this will
4771 // force the next call to get the path to ask the file
4772 // system instead of just following the v_parent link.
4774 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4779 if (vp
->v_flag
& VISHARDLINK
) {
4780 get_fse_info(vp
, &finfo
, ctx
);
4782 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4784 if (truncated_path
) {
4785 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4787 add_fsevent(FSE_DELETE
, ctx
,
4788 FSE_ARG_STRING
, len
, path
,
4789 FSE_ARG_FINFO
, &finfo
,
4800 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4801 * will cause its shadow file to go away if necessary.
4803 if (vp
&& (vnode_isnamedstream(vp
)) &&
4804 (vp
->v_parent
!= NULLVP
) &&
4805 vnode_isshadow(vp
)) {
4810 * nameidone has to happen before we vnode_put(dvp)
4811 * since it may need to release the fs_nodelock on the dvp
4827 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
4828 enum uio_seg segflg
, int unlink_flags
)
4830 return (unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
4835 * Delete a name from the filesystem using Carbon semantics.
4838 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4840 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4841 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
));
4845 * Delete a name from the filesystem using POSIX semantics.
4848 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4850 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4851 uap
->path
, UIO_USERSPACE
, 0));
4855 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
4857 if (uap
->flag
& ~AT_REMOVEDIR
)
4860 if (uap
->flag
& AT_REMOVEDIR
)
4861 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
4862 uap
->path
, UIO_USERSPACE
));
4864 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
4865 NULLVP
, uap
->path
, UIO_USERSPACE
, 0));
4869 * Reposition read/write file offset.
4872 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4874 struct fileproc
*fp
;
4876 struct vfs_context
*ctx
;
4877 off_t offset
= uap
->offset
, file_size
;
4880 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4881 if (error
== ENOTSUP
)
4885 if (vnode_isfifo(vp
)) {
4891 ctx
= vfs_context_current();
4893 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4894 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4897 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
4904 if ( (error
= vnode_getwithref(vp
)) ) {
4909 switch (uap
->whence
) {
4911 offset
+= fp
->f_fglob
->fg_offset
;
4914 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
4916 offset
+= file_size
;
4921 error
= VNOP_IOCTL(vp
, FSCTL_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
4924 error
= VNOP_IOCTL(vp
, FSCTL_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
4930 if (uap
->offset
> 0 && offset
< 0) {
4931 /* Incremented/relative move past max size */
4935 * Allow negative offsets on character devices, per
4936 * POSIX 1003.1-2001. Most likely for writing disk
4939 if (offset
< 0 && vp
->v_type
!= VCHR
) {
4940 /* Decremented/relative move before start */
4944 fp
->f_fglob
->fg_offset
= offset
;
4945 *retval
= fp
->f_fglob
->fg_offset
;
4951 * An lseek can affect whether data is "available to read." Use
4952 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4954 post_event_if_success(vp
, error
, NOTE_NONE
);
4955 (void)vnode_put(vp
);
4962 * Check access permissions.
4964 * Returns: 0 Success
4965 * vnode_authorize:???
4968 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
4970 kauth_action_t action
;
4974 * If just the regular access bits, convert them to something
4975 * that vnode_authorize will understand.
4977 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
4980 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4981 if (uflags
& W_OK
) {
4982 if (vnode_isdir(vp
)) {
4983 action
|= KAUTH_VNODE_ADD_FILE
|
4984 KAUTH_VNODE_ADD_SUBDIRECTORY
;
4985 /* might want delete rights here too */
4987 action
|= KAUTH_VNODE_WRITE_DATA
;
4990 if (uflags
& X_OK
) {
4991 if (vnode_isdir(vp
)) {
4992 action
|= KAUTH_VNODE_SEARCH
;
4994 action
|= KAUTH_VNODE_EXECUTE
;
4998 /* take advantage of definition of uflags */
4999 action
= uflags
>> 8;
5003 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
5008 /* action == 0 means only check for existence */
5010 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
5021 * access_extended: Check access permissions in bulk.
5023 * Description: uap->entries Pointer to an array of accessx
5024 * descriptor structs, plus one or
5025 * more NULL terminated strings (see
5026 * "Notes" section below).
5027 * uap->size Size of the area pointed to by
5029 * uap->results Pointer to the results array.
5031 * Returns: 0 Success
5032 * ENOMEM Insufficient memory
5033 * EINVAL Invalid arguments
5034 * namei:EFAULT Bad address
5035 * namei:ENAMETOOLONG Filename too long
5036 * namei:ENOENT No such file or directory
5037 * namei:ELOOP Too many levels of symbolic links
5038 * namei:EBADF Bad file descriptor
5039 * namei:ENOTDIR Not a directory
5044 * uap->results Array contents modified
5046 * Notes: The uap->entries are structured as an arbitrary length array
5047 * of accessx descriptors, followed by one or more NULL terminated
5050 * struct accessx_descriptor[0]
5052 * struct accessx_descriptor[n]
5053 * char name_data[0];
5055 * We determine the entry count by walking the buffer containing
5056 * the uap->entries argument descriptor. For each descriptor we
5057 * see, the valid values for the offset ad_name_offset will be
5058 * in the byte range:
5060 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5062 * [ uap->entries + uap->size - 2 ]
5064 * since we must have at least one string, and the string must
5065 * be at least one character plus the NULL terminator in length.
5067 * XXX: Need to support the check-as uid argument
5070 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5072 struct accessx_descriptor
*input
= NULL
;
5073 errno_t
*result
= NULL
;
5076 unsigned int desc_max
, desc_actual
, i
, j
;
5077 struct vfs_context context
;
5078 struct nameidata nd
;
5082 #define ACCESSX_MAX_DESCR_ON_STACK 10
5083 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5085 context
.vc_ucred
= NULL
;
5088 * Validate parameters; if valid, copy the descriptor array and string
5089 * arguments into local memory. Before proceeding, the following
5090 * conditions must have been met:
5092 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5093 * o There must be sufficient room in the request for at least one
5094 * descriptor and a one yte NUL terminated string.
5095 * o The allocation of local storage must not fail.
5097 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
5099 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
5101 if (uap
->size
<= sizeof (stack_input
)) {
5102 input
= stack_input
;
5104 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5105 if (input
== NULL
) {
5110 error
= copyin(uap
->entries
, input
, uap
->size
);
5114 AUDIT_ARG(opaque
, input
, uap
->size
);
5117 * Force NUL termination of the copyin buffer to avoid nami() running
5118 * off the end. If the caller passes us bogus data, they may get a
5121 ((char *)input
)[uap
->size
- 1] = 0;
5124 * Access is defined as checking against the process' real identity,
5125 * even if operations are checking the effective identity. This
5126 * requires that we use a local vfs context.
5128 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5129 context
.vc_thread
= current_thread();
5132 * Find out how many entries we have, so we can allocate the result
5133 * array by walking the list and adjusting the count downward by the
5134 * earliest string offset we see.
5136 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5137 desc_actual
= desc_max
;
5138 for (i
= 0; i
< desc_actual
; i
++) {
5140 * Take the offset to the name string for this entry and
5141 * convert to an input array index, which would be one off
5142 * the end of the array if this entry was the lowest-addressed
5145 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5148 * An offset greater than the max allowable offset is an error.
5149 * It is also an error for any valid entry to point
5150 * to a location prior to the end of the current entry, if
5151 * it's not a reference to the string of the previous entry.
5153 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5158 /* Also do not let ad_name_offset point to something beyond the size of the input */
5159 if (input
[i
].ad_name_offset
>= uap
->size
) {
5165 * An offset of 0 means use the previous descriptor's offset;
5166 * this is used to chain multiple requests for the same file
5167 * to avoid multiple lookups.
5170 /* This is not valid for the first entry */
5179 * If the offset of the string for this descriptor is before
5180 * what we believe is the current actual last descriptor,
5181 * then we need to adjust our estimate downward; this permits
5182 * the string table following the last descriptor to be out
5183 * of order relative to the descriptor list.
5185 if (j
< desc_actual
)
5190 * We limit the actual number of descriptors we are willing to process
5191 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5192 * requested does not exceed this limit,
5194 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5198 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
5199 if (result
== NULL
) {
5205 * Do the work by iterating over the descriptor entries we know to
5206 * at least appear to contain valid data.
5209 for (i
= 0; i
< desc_actual
; i
++) {
5211 * If the ad_name_offset is 0, then we use the previous
5212 * results to make the check; otherwise, we are looking up
5215 if (input
[i
].ad_name_offset
!= 0) {
5216 /* discard old vnodes */
5227 * Scan forward in the descriptor list to see if we
5228 * need the parent vnode. We will need it if we are
5229 * deleting, since we must have rights to remove
5230 * entries in the parent directory, as well as the
5231 * rights to delete the object itself.
5233 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5234 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5235 if (input
[j
].ad_flags
& _DELETE_OK
)
5238 niopts
= FOLLOW
| AUDITVNPATH1
;
5240 /* need parent for vnode_authorize for deletion test */
5242 niopts
|= WANTPARENT
;
5245 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5246 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5258 * Handle lookup errors.
5268 /* run this access check */
5269 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5272 /* fatal lookup error */
5278 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5280 /* copy out results */
5281 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5284 if (input
&& input
!= stack_input
)
5285 FREE(input
, M_TEMP
);
5287 FREE(result
, M_TEMP
);
5292 if (IS_VALID_CRED(context
.vc_ucred
))
5293 kauth_cred_unref(&context
.vc_ucred
);
5299 * Returns: 0 Success
5300 * namei:EFAULT Bad address
5301 * namei:ENAMETOOLONG Filename too long
5302 * namei:ENOENT No such file or directory
5303 * namei:ELOOP Too many levels of symbolic links
5304 * namei:EBADF Bad file descriptor
5305 * namei:ENOTDIR Not a directory
5310 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5311 int flag
, enum uio_seg segflg
)
5314 struct nameidata nd
;
5316 struct vfs_context context
;
5318 int is_namedstream
= 0;
5322 * Unless the AT_EACCESS option is used, Access is defined as checking
5323 * against the process' real identity, even if operations are checking
5324 * the effective identity. So we need to tweak the credential
5325 * in the context for that case.
5327 if (!(flag
& AT_EACCESS
))
5328 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5330 context
.vc_ucred
= ctx
->vc_ucred
;
5331 context
.vc_thread
= ctx
->vc_thread
;
5334 niopts
= FOLLOW
| AUDITVNPATH1
;
5335 /* need parent for vnode_authorize for deletion test */
5336 if (amode
& _DELETE_OK
)
5337 niopts
|= WANTPARENT
;
5338 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5342 /* access(F_OK) calls are allowed for resource forks. */
5344 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5346 error
= nameiat(&nd
, fd
);
5351 /* Grab reference on the shadow stream file vnode to
5352 * force an inactive on release which will mark it
5355 if (vnode_isnamedstream(nd
.ni_vp
) &&
5356 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5357 vnode_isshadow(nd
.ni_vp
)) {
5359 vnode_ref(nd
.ni_vp
);
5363 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5366 if (is_namedstream
) {
5367 vnode_rele(nd
.ni_vp
);
5371 vnode_put(nd
.ni_vp
);
5372 if (amode
& _DELETE_OK
)
5373 vnode_put(nd
.ni_dvp
);
5377 if (!(flag
& AT_EACCESS
))
5378 kauth_cred_unref(&context
.vc_ucred
);
5383 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5385 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5386 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5390 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5391 __unused
int32_t *retval
)
5393 if (uap
->flag
& ~AT_EACCESS
)
5396 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5397 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5401 * Returns: 0 Success
5408 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5409 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5410 enum uio_seg segflg
, int fd
, int flag
)
5412 struct nameidata nd
;
5419 struct user64_stat user64_sb
;
5420 struct user32_stat user32_sb
;
5421 struct user64_stat64 user64_sb64
;
5422 struct user32_stat64 user32_sb64
;
5426 kauth_filesec_t fsec
;
5427 size_t xsecurity_bufsize
;
5430 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5431 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5435 int is_namedstream
= 0;
5436 /* stat calls are allowed for resource forks. */
5437 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5439 error
= nameiat(&nd
, fd
);
5442 fsec
= KAUTH_FILESEC_NONE
;
5444 statptr
= (void *)&source
;
5447 /* Grab reference on the shadow stream file vnode to
5448 * force an inactive on release which will mark it
5451 if (vnode_isnamedstream(nd
.ni_vp
) &&
5452 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5453 vnode_isshadow(nd
.ni_vp
)) {
5455 vnode_ref(nd
.ni_vp
);
5459 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5462 if (is_namedstream
) {
5463 vnode_rele(nd
.ni_vp
);
5466 vnode_put(nd
.ni_vp
);
5471 /* Zap spare fields */
5472 if (isstat64
!= 0) {
5473 source
.sb64
.st_lspare
= 0;
5474 source
.sb64
.st_qspare
[0] = 0LL;
5475 source
.sb64
.st_qspare
[1] = 0LL;
5476 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5477 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5478 my_size
= sizeof(dest
.user64_sb64
);
5479 sbp
= (caddr_t
)&dest
.user64_sb64
;
5481 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5482 my_size
= sizeof(dest
.user32_sb64
);
5483 sbp
= (caddr_t
)&dest
.user32_sb64
;
5486 * Check if we raced (post lookup) against the last unlink of a file.
5488 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5489 source
.sb64
.st_nlink
= 1;
5492 source
.sb
.st_lspare
= 0;
5493 source
.sb
.st_qspare
[0] = 0LL;
5494 source
.sb
.st_qspare
[1] = 0LL;
5495 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5496 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5497 my_size
= sizeof(dest
.user64_sb
);
5498 sbp
= (caddr_t
)&dest
.user64_sb
;
5500 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5501 my_size
= sizeof(dest
.user32_sb
);
5502 sbp
= (caddr_t
)&dest
.user32_sb
;
5506 * Check if we raced (post lookup) against the last unlink of a file.
5508 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5509 source
.sb
.st_nlink
= 1;
5512 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5515 /* caller wants extended security information? */
5516 if (xsecurity
!= USER_ADDR_NULL
) {
5518 /* did we get any? */
5519 if (fsec
== KAUTH_FILESEC_NONE
) {
5520 if (susize(xsecurity_size
, 0) != 0) {
5525 /* find the user buffer size */
5526 xsecurity_bufsize
= fusize(xsecurity_size
);
5528 /* copy out the actual data size */
5529 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5534 /* if the caller supplied enough room, copy out to it */
5535 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5536 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5540 if (fsec
!= KAUTH_FILESEC_NONE
)
5541 kauth_filesec_free(fsec
);
5546 * stat_extended: Get file status; with extended security (ACL).
5548 * Parameters: p (ignored)
5549 * uap User argument descriptor (see below)
5552 * Indirect: uap->path Path of file to get status from
5553 * uap->ub User buffer (holds file status info)
5554 * uap->xsecurity ACL to get (extended security)
5555 * uap->xsecurity_size Size of ACL
5557 * Returns: 0 Success
5562 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5563 __unused
int32_t *retval
)
5565 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5566 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5571 * Returns: 0 Success
5572 * fstatat_internal:??? [see fstatat_internal() in this file]
5575 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5577 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5578 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5582 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5584 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5585 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5589 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5591 * Parameters: p (ignored)
5592 * uap User argument descriptor (see below)
5595 * Indirect: uap->path Path of file to get status from
5596 * uap->ub User buffer (holds file status info)
5597 * uap->xsecurity ACL to get (extended security)
5598 * uap->xsecurity_size Size of ACL
5600 * Returns: 0 Success
5605 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5607 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5608 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5613 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5615 * Parameters: p (ignored)
5616 * uap User argument descriptor (see below)
5619 * Indirect: uap->path Path of file to get status from
5620 * uap->ub User buffer (holds file status info)
5621 * uap->xsecurity ACL to get (extended security)
5622 * uap->xsecurity_size Size of ACL
5624 * Returns: 0 Success
5629 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5631 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5632 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5633 AT_SYMLINK_NOFOLLOW
));
5637 * Get file status; this version does not follow links.
5640 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5642 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5643 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5647 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5649 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5650 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5654 * lstat64_extended: Get file status; can handle large inode numbers; does not
5655 * follow links; with extended security (ACL).
5657 * Parameters: p (ignored)
5658 * uap User argument descriptor (see below)
5661 * Indirect: uap->path Path of file to get status from
5662 * uap->ub User buffer (holds file status info)
5663 * uap->xsecurity ACL to get (extended security)
5664 * uap->xsecurity_size Size of ACL
5666 * Returns: 0 Success
5671 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5673 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5674 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5675 AT_SYMLINK_NOFOLLOW
));
5679 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5681 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5684 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5685 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5689 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5690 __unused
int32_t *retval
)
5692 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5695 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5696 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5700 * Get configurable pathname variables.
5702 * Returns: 0 Success
5706 * Notes: Global implementation constants are intended to be
5707 * implemented in this function directly; all other constants
5708 * are per-FS implementation, and therefore must be handled in
5709 * each respective FS, instead.
5711 * XXX We implement some things globally right now that should actually be
5712 * XXX per-FS; we will need to deal with this at some point.
5716 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5719 struct nameidata nd
;
5720 vfs_context_t ctx
= vfs_context_current();
5722 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5723 UIO_USERSPACE
, uap
->path
, ctx
);
5728 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5730 vnode_put(nd
.ni_vp
);
5736 * Return target name of a symbolic link.
5740 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5741 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5747 struct nameidata nd
;
5748 char uio_buf
[ UIO_SIZEOF(1) ];
5750 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5753 error
= nameiat(&nd
, fd
);
5760 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5761 &uio_buf
[0], sizeof(uio_buf
));
5762 uio_addiov(auio
, buf
, bufsize
);
5763 if (vp
->v_type
!= VLNK
) {
5767 error
= mac_vnode_check_readlink(ctx
, vp
);
5770 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5773 error
= VNOP_READLINK(vp
, auio
, ctx
);
5777 *retval
= bufsize
- (int)uio_resid(auio
);
5782 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5784 enum uio_seg procseg
;
5786 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5787 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5788 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5789 uap
->count
, procseg
, retval
));
5793 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5795 enum uio_seg procseg
;
5797 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5798 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5799 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5803 * Change file flags.
5805 * NOTE: this will vnode_put() `vp'
5808 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5810 struct vnode_attr va
;
5811 kauth_action_t action
;
5815 VATTR_SET(&va
, va_flags
, flags
);
5818 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5823 /* request authorisation, disregard immutability */
5824 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5827 * Request that the auth layer disregard those file flags it's allowed to when
5828 * authorizing this operation; we need to do this in order to be able to
5829 * clear immutable flags.
5831 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5833 error
= vnode_setattr(vp
, &va
, ctx
);
5837 mac_vnode_notify_setflags(ctx
, vp
, flags
);
5840 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5849 * Change flags of a file given a path name.
5853 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5856 vfs_context_t ctx
= vfs_context_current();
5858 struct nameidata nd
;
5860 AUDIT_ARG(fflags
, uap
->flags
);
5861 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5862 UIO_USERSPACE
, uap
->path
, ctx
);
5869 /* we don't vnode_put() here because chflags1 does internally */
5870 error
= chflags1(vp
, uap
->flags
, ctx
);
5876 * Change flags of a file given a file descriptor.
5880 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5885 AUDIT_ARG(fd
, uap
->fd
);
5886 AUDIT_ARG(fflags
, uap
->flags
);
5887 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5890 if ((error
= vnode_getwithref(vp
))) {
5895 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5897 /* we don't vnode_put() here because chflags1 does internally */
5898 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
5905 * Change security information on a filesystem object.
5907 * Returns: 0 Success
5908 * EPERM Operation not permitted
5909 * vnode_authattr:??? [anything vnode_authattr can return]
5910 * vnode_authorize:??? [anything vnode_authorize can return]
5911 * vnode_setattr:??? [anything vnode_setattr can return]
5913 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5914 * translated to EPERM before being returned.
5917 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
5919 kauth_action_t action
;
5922 AUDIT_ARG(mode
, vap
->va_mode
);
5923 /* XXX audit new args */
5926 /* chmod calls are not allowed for resource forks. */
5927 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5933 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
5934 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
5937 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
5938 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
5939 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
5940 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1)))
5944 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
5945 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
)))
5949 /* make sure that the caller is allowed to set this security information */
5950 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
5951 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5952 if (error
== EACCES
)
5957 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0)
5961 if (VATTR_IS_ACTIVE(vap
, va_mode
))
5962 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
5964 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
))
5965 mac_vnode_notify_setowner(ctx
, vp
,
5966 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
5967 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
5969 if (VATTR_IS_ACTIVE(vap
, va_acl
))
5970 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
5978 * Change mode of a file given a path name.
5980 * Returns: 0 Success
5981 * namei:??? [anything namei can return]
5982 * chmod_vnode:??? [anything chmod_vnode can return]
5985 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
5986 int fd
, int flag
, enum uio_seg segflg
)
5988 struct nameidata nd
;
5991 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5992 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
5994 if ((error
= nameiat(&nd
, fd
)))
5996 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
5997 vnode_put(nd
.ni_vp
);
6003 * chmod_extended: Change the mode of a file given a path name; with extended
6004 * argument list (including extended security (ACL)).
6006 * Parameters: p Process requesting the open
6007 * uap User argument descriptor (see below)
6010 * Indirect: uap->path Path to object (same as 'chmod')
6011 * uap->uid UID to set
6012 * uap->gid GID to set
6013 * uap->mode File mode to set (same as 'chmod')
6014 * uap->xsecurity ACL to set (or delete)
6016 * Returns: 0 Success
6019 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6021 * XXX: We should enummerate the possible errno values here, and where
6022 * in the code they originated.
6025 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6028 struct vnode_attr va
;
6029 kauth_filesec_t xsecdst
;
6031 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6034 if (uap
->mode
!= -1)
6035 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6036 if (uap
->uid
!= KAUTH_UID_NONE
)
6037 VATTR_SET(&va
, va_uid
, uap
->uid
);
6038 if (uap
->gid
!= KAUTH_GID_NONE
)
6039 VATTR_SET(&va
, va_gid
, uap
->gid
);
6042 switch(uap
->xsecurity
) {
6043 /* explicit remove request */
6044 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6045 VATTR_SET(&va
, va_acl
, NULL
);
6048 case USER_ADDR_NULL
:
6051 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6053 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6054 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6057 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6060 if (xsecdst
!= NULL
)
6061 kauth_filesec_free(xsecdst
);
6066 * Returns: 0 Success
6067 * chmodat:??? [anything chmodat can return]
6070 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6071 int flag
, enum uio_seg segflg
)
6073 struct vnode_attr va
;
6076 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6078 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
6082 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6084 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6085 AT_FDCWD
, 0, UIO_USERSPACE
));
6089 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6091 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6094 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6095 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
6099 * Change mode of a file given a file descriptor.
6102 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6109 if ((error
= file_vnode(fd
, &vp
)) != 0)
6111 if ((error
= vnode_getwithref(vp
)) != 0) {
6115 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6117 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6118 (void)vnode_put(vp
);
6125 * fchmod_extended: Change mode of a file given a file descriptor; with
6126 * extended argument list (including extended security (ACL)).
6128 * Parameters: p Process requesting to change file mode
6129 * uap User argument descriptor (see below)
6132 * Indirect: uap->mode File mode to set (same as 'chmod')
6133 * uap->uid UID to set
6134 * uap->gid GID to set
6135 * uap->xsecurity ACL to set (or delete)
6136 * uap->fd File descriptor of file to change mode
6138 * Returns: 0 Success
6143 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6146 struct vnode_attr va
;
6147 kauth_filesec_t xsecdst
;
6149 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6152 if (uap
->mode
!= -1)
6153 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6154 if (uap
->uid
!= KAUTH_UID_NONE
)
6155 VATTR_SET(&va
, va_uid
, uap
->uid
);
6156 if (uap
->gid
!= KAUTH_GID_NONE
)
6157 VATTR_SET(&va
, va_gid
, uap
->gid
);
6160 switch(uap
->xsecurity
) {
6161 case USER_ADDR_NULL
:
6162 VATTR_SET(&va
, va_acl
, NULL
);
6164 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6165 VATTR_SET(&va
, va_acl
, NULL
);
6168 case CAST_USER_ADDR_T(-1):
6171 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6173 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6176 error
= fchmod1(p
, uap
->fd
, &va
);
6179 switch(uap
->xsecurity
) {
6180 case USER_ADDR_NULL
:
6181 case CAST_USER_ADDR_T(-1):
6184 if (xsecdst
!= NULL
)
6185 kauth_filesec_free(xsecdst
);
6191 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6193 struct vnode_attr va
;
6196 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6198 return(fchmod1(p
, uap
->fd
, &va
));
6203 * Set ownership given a path name.
6207 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6208 gid_t gid
, int flag
, enum uio_seg segflg
)
6211 struct vnode_attr va
;
6213 struct nameidata nd
;
6215 kauth_action_t action
;
6217 AUDIT_ARG(owner
, uid
, gid
);
6219 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6220 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6222 error
= nameiat(&nd
, fd
);
6230 if (uid
!= (uid_t
)VNOVAL
)
6231 VATTR_SET(&va
, va_uid
, uid
);
6232 if (gid
!= (gid_t
)VNOVAL
)
6233 VATTR_SET(&va
, va_gid
, gid
);
6236 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6241 /* preflight and authorize attribute changes */
6242 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6244 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6246 error
= vnode_setattr(vp
, &va
, ctx
);
6250 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
6255 * EACCES is only allowed from namei(); permissions failure should
6256 * return EPERM, so we need to translate the error code.
6258 if (error
== EACCES
)
6266 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6268 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6269 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6273 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6275 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6276 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6280 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6282 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6285 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6286 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6290 * Set ownership given a file descriptor.
6294 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6296 struct vnode_attr va
;
6297 vfs_context_t ctx
= vfs_context_current();
6300 kauth_action_t action
;
6302 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6303 AUDIT_ARG(fd
, uap
->fd
);
6305 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6308 if ( (error
= vnode_getwithref(vp
)) ) {
6312 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6315 if (uap
->uid
!= VNOVAL
)
6316 VATTR_SET(&va
, va_uid
, uap
->uid
);
6317 if (uap
->gid
!= VNOVAL
)
6318 VATTR_SET(&va
, va_gid
, uap
->gid
);
6321 /* chown calls are not allowed for resource forks. */
6322 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6329 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6334 /* preflight and authorize attribute changes */
6335 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6337 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6338 if (error
== EACCES
)
6342 error
= vnode_setattr(vp
, &va
, ctx
);
6346 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6350 (void)vnode_put(vp
);
6356 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6360 if (usrtvp
== USER_ADDR_NULL
) {
6361 struct timeval old_tv
;
6362 /* XXX Y2038 bug because of microtime argument */
6364 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6367 if (IS_64BIT_PROCESS(current_proc())) {
6368 struct user64_timeval tv
[2];
6369 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6372 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6373 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6375 struct user32_timeval tv
[2];
6376 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6379 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6380 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6387 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6391 struct vnode_attr va
;
6392 kauth_action_t action
;
6394 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6397 VATTR_SET(&va
, va_access_time
, ts
[0]);
6398 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6400 va
.va_vaflags
|= VA_UTIMES_NULL
;
6403 /* utimes calls are not allowed for resource forks. */
6404 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6411 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6415 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6416 if (!nullflag
&& error
== EACCES
)
6421 /* since we may not need to auth anything, check here */
6422 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6423 if (!nullflag
&& error
== EACCES
)
6427 error
= vnode_setattr(vp
, &va
, ctx
);
6431 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6439 * Set the access and modification times of a file.
6443 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6445 struct timespec ts
[2];
6448 struct nameidata nd
;
6449 vfs_context_t ctx
= vfs_context_current();
6452 * AUDIT: Needed to change the order of operations to do the
6453 * name lookup first because auditing wants the path.
6455 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6456 UIO_USERSPACE
, uap
->path
, ctx
);
6463 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6464 * the current time instead.
6467 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6470 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6473 vnode_put(nd
.ni_vp
);
6478 * Set the access and modification times of a file.
6482 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6484 struct timespec ts
[2];
6489 AUDIT_ARG(fd
, uap
->fd
);
6491 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6493 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6495 if((error
= vnode_getwithref(vp
))) {
6500 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6507 * Truncate a file given its path name.
6511 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6514 struct vnode_attr va
;
6515 vfs_context_t ctx
= vfs_context_current();
6517 struct nameidata nd
;
6518 kauth_action_t action
;
6520 if (uap
->length
< 0)
6522 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6523 UIO_USERSPACE
, uap
->path
, ctx
);
6524 if ((error
= namei(&nd
)))
6531 VATTR_SET(&va
, va_data_size
, uap
->length
);
6534 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6539 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6541 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6543 error
= vnode_setattr(vp
, &va
, ctx
);
6547 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
6556 * Truncate a file given a file descriptor.
6560 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6562 vfs_context_t ctx
= vfs_context_current();
6563 struct vnode_attr va
;
6565 struct fileproc
*fp
;
6569 AUDIT_ARG(fd
, uap
->fd
);
6570 if (uap
->length
< 0)
6573 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6577 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6579 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6588 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6590 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6591 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6596 if ((error
= vnode_getwithref(vp
)) != 0) {
6600 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6603 error
= mac_vnode_check_truncate(ctx
,
6604 fp
->f_fglob
->fg_cred
, vp
);
6606 (void)vnode_put(vp
);
6611 VATTR_SET(&va
, va_data_size
, uap
->length
);
6612 error
= vnode_setattr(vp
, &va
, ctx
);
6616 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
6619 (void)vnode_put(vp
);
6627 * Sync an open file with synchronized I/O _file_ integrity completion
6631 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6633 __pthread_testcancel(1);
6634 return(fsync_common(p
, uap
, MNT_WAIT
));
6639 * Sync an open file with synchronized I/O _file_ integrity completion
6641 * Notes: This is a legacy support function that does not test for
6642 * thread cancellation points.
6646 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6648 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6653 * Sync an open file with synchronized I/O _data_ integrity completion
6657 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6659 __pthread_testcancel(1);
6660 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6667 * Common fsync code to support both synchronized I/O file integrity completion
6668 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6670 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6671 * will only guarantee that the file data contents are retrievable. If
6672 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6673 * includes additional metadata unnecessary for retrieving the file data
6674 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6677 * Parameters: p The process
6678 * uap->fd The descriptor to synchronize
6679 * flags The data integrity flags
6681 * Returns: int Success
6682 * fp_getfvp:EBADF Bad file descriptor
6683 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6684 * VNOP_FSYNC:??? unspecified
6686 * Notes: We use struct fsync_args because it is a short name, and all
6687 * caller argument structures are otherwise identical.
6690 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6693 struct fileproc
*fp
;
6694 vfs_context_t ctx
= vfs_context_current();
6697 AUDIT_ARG(fd
, uap
->fd
);
6699 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6701 if ( (error
= vnode_getwithref(vp
)) ) {
6706 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6708 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6711 /* Sync resource fork shadow file if necessary. */
6713 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6714 (vp
->v_parent
!= NULLVP
) &&
6715 vnode_isshadow(vp
) &&
6716 (fp
->f_flags
& FP_WRITTEN
)) {
6717 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6721 (void)vnode_put(vp
);
6727 * Duplicate files. Source must be a file, target must be a file or
6730 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6731 * perform inheritance correctly.
6735 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6737 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6738 struct nameidata fromnd
, tond
;
6740 vfs_context_t ctx
= vfs_context_current();
6742 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
6743 struct vnode_attr va
;
6746 /* Check that the flags are valid. */
6748 if (uap
->flags
& ~CPF_MASK
) {
6752 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
6753 UIO_USERSPACE
, uap
->from
, ctx
);
6754 if ((error
= namei(&fromnd
)))
6758 NDINIT(&tond
, CREATE
, OP_LINK
,
6759 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6760 UIO_USERSPACE
, uap
->to
, ctx
);
6761 if ((error
= namei(&tond
))) {
6768 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6774 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6779 /* This calls existing MAC hooks for open */
6780 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
6787 * See unlinkat_internal for an explanation of the potential
6788 * ENOENT from the MAC hook but the gist is that the MAC hook
6789 * can fail because vn_getpath isn't able to return the full
6790 * path. We choose to ignore this failure.
6792 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
6793 if (error
&& error
!= ENOENT
)
6800 VATTR_SET(&va
, va_type
, fvp
->v_type
);
6801 /* Mask off all but regular access permissions */
6802 VATTR_SET(&va
, va_mode
,
6803 ((((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
6804 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
6807 #endif /* CONFIG_MACF */
6809 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6815 * If source is the same as the destination (that is the
6816 * same inode number) then there is nothing to do.
6817 * (fixed to have POSIX semantics - CSM 3/2/98)
6822 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6824 sdvp
= tond
.ni_startdir
;
6826 * nameidone has to happen before we vnode_put(tdvp)
6827 * since it may need to release the fs_nodelock on the tdvp
6845 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
6848 * Helper function for doing clones. The caller is expected to provide an
6849 * iocounted source vnode and release it.
6852 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
6853 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
6856 struct nameidata tond
;
6859 boolean_t free_src_acl
;
6860 boolean_t attr_cleanup
;
6862 kauth_action_t action
;
6863 struct componentname
*cnp
;
6865 struct vnode_attr va
;
6866 struct vnode_attr nva
;
6868 v_type
= vnode_vtype(fvp
);
6873 action
= KAUTH_VNODE_ADD_FILE
;
6876 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
6877 fvp
->v_mountedhere
) {
6880 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
6886 AUDIT_ARG(fd2
, dst_dirfd
);
6887 AUDIT_ARG(value32
, flags
);
6889 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6890 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
6891 UIO_USERSPACE
, dst
, ctx
);
6892 if ((error
= nameiat(&tond
, dst_dirfd
)))
6898 free_src_acl
= FALSE
;
6899 attr_cleanup
= FALSE
;
6906 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
6912 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
)))
6915 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
)))
6918 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
6919 if (data_read_authorised
)
6920 action
&= ~KAUTH_VNODE_READ_DATA
;
6921 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
)))
6925 * certain attributes may need to be changed from the source, we ask for
6929 VATTR_WANTED(&va
, va_uid
);
6930 VATTR_WANTED(&va
, va_gid
);
6931 VATTR_WANTED(&va
, va_mode
);
6932 VATTR_WANTED(&va
, va_flags
);
6933 VATTR_WANTED(&va
, va_acl
);
6935 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0)
6939 VATTR_SET(&nva
, va_type
, v_type
);
6940 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
6941 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
6942 free_src_acl
= TRUE
;
6945 /* Handle ACL inheritance, initialize vap. */
6946 if (v_type
== VLNK
) {
6947 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
6949 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
6952 attr_cleanup
= TRUE
;
6956 * We've got initial values for all security parameters,
6957 * If we are superuser, then we can change owners to be the
6958 * same as the source. Both superuser and the owner have default
6959 * WRITE_SECURITY privileges so all other fields can be taken
6960 * from source as well.
6962 if (vfs_context_issuser(ctx
)) {
6963 if (VATTR_IS_SUPPORTED(&va
, va_uid
))
6964 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
6965 if (VATTR_IS_SUPPORTED(&va
, va_gid
))
6966 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
6968 if (VATTR_IS_SUPPORTED(&va
, va_mode
))
6969 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
6970 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6971 VATTR_SET(&nva
, va_flags
,
6972 ((va
.va_flags
& ~SF_RESTRICTED
) | /* Turn off from source */
6973 (nva
.va_flags
& SF_RESTRICTED
)));
6976 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
,
6977 VNODE_CLONEFILE_DEFAULT
, ctx
);
6979 if (!error
&& tvp
) {
6980 int update_flags
= 0;
6983 #endif /* CONFIG_FSE */
6986 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
6987 VNODE_LABEL_CREATE
, ctx
);
6990 * If some of the requested attributes weren't handled by the
6991 * VNOP, use our fallback code.
6993 if (!VATTR_ALL_SUPPORTED(&va
))
6994 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
6996 // Make sure the name & parent pointers are hooked up
6997 if (tvp
->v_name
== NULL
)
6998 update_flags
|= VNODE_UPDATE_NAME
;
6999 if (tvp
->v_parent
== NULLVP
)
7000 update_flags
|= VNODE_UPDATE_PARENT
;
7003 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
7004 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
7008 switch (vnode_vtype(tvp
)) {
7012 fsevent
= FSE_CREATE_FILE
;
7015 fsevent
= FSE_CREATE_DIR
;
7021 if (need_fsevent(fsevent
, tvp
)) {
7022 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
7025 #endif /* CONFIG_FSE */
7030 vn_attribute_cleanup(&nva
, defaulted
);
7031 if (free_src_acl
&& va
.va_acl
)
7032 kauth_acl_free(va
.va_acl
);
7041 * clone files or directories, target must not exist.
7045 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7046 __unused
int32_t *retval
)
7049 struct nameidata fromnd
;
7052 vfs_context_t ctx
= vfs_context_current();
7054 /* Check that the flags are valid. */
7055 if (uap
->flags
& ~CLONE_NOFOLLOW
)
7058 AUDIT_ARG(fd
, uap
->src_dirfd
);
7060 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7061 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7062 UIO_USERSPACE
, uap
->src
, ctx
);
7063 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
)))
7069 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7077 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7078 __unused
int32_t *retval
)
7081 struct fileproc
*fp
;
7083 vfs_context_t ctx
= vfs_context_current();
7085 AUDIT_ARG(fd
, uap
->src_fd
);
7086 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7090 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7091 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7096 if ((error
= vnode_getwithref(fvp
)))
7099 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7101 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7106 file_drop(uap
->src_fd
);
7111 * Rename files. Source and destination must either both be directories,
7112 * or both not be directories. If target is a directory, it must be empty.
7116 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7117 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7119 if (flags
& ~VFS_RENAME_FLAGS_MASK
)
7122 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
))
7127 struct nameidata
*fromnd
, *tond
;
7133 const char *oname
= NULL
;
7134 char *from_name
= NULL
, *to_name
= NULL
;
7135 int from_len
=0, to_len
=0;
7136 int holding_mntlock
;
7137 mount_t locked_mp
= NULL
;
7138 vnode_t oparent
= NULLVP
;
7140 fse_info from_finfo
, to_finfo
;
7142 int from_truncated
=0, to_truncated
;
7144 struct vnode_attr
*fvap
, *tvap
;
7146 /* carving out a chunk for structs that are too big to be on stack. */
7148 struct nameidata from_node
, to_node
;
7149 struct vnode_attr fv_attr
, tv_attr
;
7151 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
7152 fromnd
= &__rename_data
->from_node
;
7153 tond
= &__rename_data
->to_node
;
7155 holding_mntlock
= 0;
7164 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
7166 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7168 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7170 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7173 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7174 if ( (error
= nameiat(fromnd
, fromfd
)) )
7176 fdvp
= fromnd
->ni_dvp
;
7177 fvp
= fromnd
->ni_vp
;
7179 if (fvp
&& fvp
->v_type
== VDIR
)
7180 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
7183 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7184 if ( (error
= nameiat(tond
, tofd
)) ) {
7186 * Translate error code for rename("dir1", "dir2/.").
7188 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
7192 tdvp
= tond
->ni_dvp
;
7196 #if DEVELOPMENT || DEBUG
7198 * XXX VSWAP: Check for entitlements or special flag here
7199 * so we can restrict access appropriately.
7201 #else /* DEVELOPMENT || DEBUG */
7203 if (fromnd
->ni_vp
&& vnode_isswap(fromnd
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
7208 if (tond
->ni_vp
&& vnode_isswap(tond
->ni_vp
) && (ctx
!= vfs_context_kernel())) {
7212 #endif /* DEVELOPMENT || DEBUG */
7214 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
7219 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
7224 batched
= vnode_compound_rename_available(fdvp
);
7227 * Claim: this check will never reject a valid rename.
7228 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7229 * Suppose fdvp and tdvp are not on the same mount.
7230 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7231 * then you can't move it to within another dir on the same mountpoint.
7232 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7234 * If this check passes, then we are safe to pass these vnodes to the same FS.
7236 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
7240 goto skipped_lookup
;
7244 error
= vn_authorize_renamex(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, flags
, NULL
);
7246 if (error
== ENOENT
) {
7247 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7248 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7250 * We encountered a race where after doing the namei, tvp stops
7251 * being valid. If so, simply re-drive the rename call from the
7263 * If the source and destination are the same (i.e. they're
7264 * links to the same vnode) and the target file system is
7265 * case sensitive, then there is nothing to do.
7267 * XXX Come back to this.
7273 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7274 * then assume that this file system is case sensitive.
7276 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
7277 pathconf_val
!= 0) {
7283 * Allow the renaming of mount points.
7284 * - target must not exist
7285 * - target must reside in the same directory as source
7286 * - union mounts cannot be renamed
7287 * - "/" cannot be renamed
7289 * XXX Handle this in VFS after a continued lookup (if we missed
7290 * in the cache to start off)
7292 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7293 * we'll skip past here. The file system is responsible for
7294 * checking that @tvp is not a descendent of @fvp and vice versa
7295 * so it should always return EINVAL if either @tvp or @fvp is the
7298 if ((fvp
->v_flag
& VROOT
) &&
7299 (fvp
->v_type
== VDIR
) &&
7301 (fvp
->v_mountedhere
== NULL
) &&
7303 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
7304 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
7307 /* switch fvp to the covered vnode */
7308 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
7309 if ( (vnode_getwithref(coveredvp
)) ) {
7319 * Check for cross-device rename.
7321 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
7322 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
7328 * If source is the same as the destination (that is the
7329 * same inode number) then there is nothing to do...
7330 * EXCEPT if the underlying file system supports case
7331 * insensitivity and is case preserving. In this case
7332 * the file system needs to handle the special case of
7333 * getting the same vnode as target (fvp) and source (tvp).
7335 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7336 * and _PC_CASE_PRESERVING can have this exception, and they need to
7337 * handle the special case of getting the same vnode as target and
7338 * source. NOTE: Then the target is unlocked going into vnop_rename,
7339 * so not to cause locking problems. There is a single reference on tvp.
7341 * NOTE - that fvp == tvp also occurs if they are hard linked and
7342 * that correct behaviour then is just to return success without doing
7345 * XXX filesystem should take care of this itself, perhaps...
7347 if (fvp
== tvp
&& fdvp
== tdvp
) {
7348 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
7349 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
7350 fromnd
->ni_cnd
.cn_namelen
)) {
7355 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
7357 * we're holding a reference and lock
7358 * on locked_mp, but it no longer matches
7359 * what we want to do... so drop our hold
7361 mount_unlock_renames(locked_mp
);
7362 mount_drop(locked_mp
, 0);
7363 holding_mntlock
= 0;
7365 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
7367 * serialize renames that re-shape
7368 * the tree... if holding_mntlock is
7369 * set, then we're ready to go...
7371 * first need to drop the iocounts
7372 * we picked up, second take the
7373 * lock to serialize the access,
7374 * then finally start the lookup
7375 * process over with the lock held
7377 if (!holding_mntlock
) {
7379 * need to grab a reference on
7380 * the mount point before we
7381 * drop all the iocounts... once
7382 * the iocounts are gone, the mount
7385 locked_mp
= fvp
->v_mount
;
7386 mount_ref(locked_mp
, 0);
7389 * nameidone has to happen before we vnode_put(tvp)
7390 * since it may need to release the fs_nodelock on the tvp
7399 * nameidone has to happen before we vnode_put(fdvp)
7400 * since it may need to release the fs_nodelock on the fvp
7407 mount_lock_renames(locked_mp
);
7408 holding_mntlock
= 1;
7414 * when we dropped the iocounts to take
7415 * the lock, we allowed the identity of
7416 * the various vnodes to change... if they did,
7417 * we may no longer be dealing with a rename
7418 * that reshapes the tree... once we're holding
7419 * the iocounts, the vnodes can't change type
7420 * so we're free to drop the lock at this point
7423 if (holding_mntlock
) {
7424 mount_unlock_renames(locked_mp
);
7425 mount_drop(locked_mp
, 0);
7426 holding_mntlock
= 0;
7430 // save these off so we can later verify that fvp is the same
7431 oname
= fvp
->v_name
;
7432 oparent
= fvp
->v_parent
;
7436 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
7439 get_fse_info(fvp
, &from_finfo
, ctx
);
7441 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
7446 fvap
= &__rename_data
->fv_attr
;
7450 get_fse_info(tvp
, &to_finfo
, ctx
);
7451 } else if (batched
) {
7452 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
7457 tvap
= &__rename_data
->tv_attr
;
7462 #endif /* CONFIG_FSE */
7464 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
7465 if (from_name
== NULL
) {
7466 GET_PATH(from_name
);
7467 if (from_name
== NULL
) {
7473 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
7475 if (to_name
== NULL
) {
7477 if (to_name
== NULL
) {
7483 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
7485 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
7486 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
7489 if (holding_mntlock
) {
7491 * we can drop our serialization
7494 mount_unlock_renames(locked_mp
);
7495 mount_drop(locked_mp
, 0);
7496 holding_mntlock
= 0;
7499 if (error
== EKEEPLOOKING
) {
7500 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7501 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7502 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7506 fromnd
->ni_vp
= fvp
;
7509 goto continue_lookup
;
7513 * We may encounter a race in the VNOP where the destination didn't
7514 * exist when we did the namei, but it does by the time we go and
7515 * try to create the entry. In this case, we should re-drive this rename
7516 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7517 * but other filesystems susceptible to this race could return it, too.
7519 if (error
== ERECYCLE
) {
7524 * For compound VNOPs, the authorization callback may return
7525 * ENOENT in case of racing hardlink lookups hitting the name
7526 * cache, redrive the lookup.
7528 if (batched
&& error
== ENOENT
) {
7529 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7530 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7539 /* call out to allow 3rd party notification of rename.
7540 * Ignore result of kauth_authorize_fileop call.
7542 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7543 KAUTH_FILEOP_RENAME
,
7544 (uintptr_t)from_name
, (uintptr_t)to_name
);
7545 if (flags
& VFS_RENAME_SWAP
) {
7546 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7547 KAUTH_FILEOP_RENAME
,
7548 (uintptr_t)to_name
, (uintptr_t)from_name
);
7552 if (from_name
!= NULL
&& to_name
!= NULL
) {
7553 if (from_truncated
|| to_truncated
) {
7554 // set it here since only the from_finfo gets reported up to user space
7555 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
7559 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
7562 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
7566 add_fsevent(FSE_RENAME
, ctx
,
7567 FSE_ARG_STRING
, from_len
, from_name
,
7568 FSE_ARG_FINFO
, &from_finfo
,
7569 FSE_ARG_STRING
, to_len
, to_name
,
7570 FSE_ARG_FINFO
, &to_finfo
,
7572 if (flags
& VFS_RENAME_SWAP
) {
7574 * Strictly speaking, swap is the equivalent of
7575 * *three* renames. FSEvents clients should only take
7576 * the events as a hint, so we only bother reporting
7579 add_fsevent(FSE_RENAME
, ctx
,
7580 FSE_ARG_STRING
, to_len
, to_name
,
7581 FSE_ARG_FINFO
, &to_finfo
,
7582 FSE_ARG_STRING
, from_len
, from_name
,
7583 FSE_ARG_FINFO
, &from_finfo
,
7587 add_fsevent(FSE_RENAME
, ctx
,
7588 FSE_ARG_STRING
, from_len
, from_name
,
7589 FSE_ARG_FINFO
, &from_finfo
,
7590 FSE_ARG_STRING
, to_len
, to_name
,
7594 #endif /* CONFIG_FSE */
7597 * update filesystem's mount point data
7600 char *cp
, *pathend
, *mpname
;
7606 mp
= fvp
->v_mountedhere
;
7608 if (vfs_busy(mp
, LK_NOWAIT
)) {
7612 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7614 if (UIO_SEG_IS_USER_SPACE(segflg
))
7615 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7617 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7619 /* find current mount point prefix */
7620 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7621 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7625 /* find last component of target name */
7626 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7630 /* append name to prefix */
7631 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7632 bzero(pathend
, maxlen
);
7633 strlcpy(pathend
, mpname
, maxlen
);
7635 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7640 * fix up name & parent pointers. note that we first
7641 * check that fvp has the same name/parent pointers it
7642 * had before the rename call... this is a 'weak' check
7645 * XXX oparent and oname may not be set in the compound vnop case
7647 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7650 update_flags
= VNODE_UPDATE_NAME
;
7653 update_flags
|= VNODE_UPDATE_PARENT
;
7655 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7658 if (to_name
!= NULL
) {
7659 RELEASE_PATH(to_name
);
7662 if (from_name
!= NULL
) {
7663 RELEASE_PATH(from_name
);
7666 if (holding_mntlock
) {
7667 mount_unlock_renames(locked_mp
);
7668 mount_drop(locked_mp
, 0);
7669 holding_mntlock
= 0;
7673 * nameidone has to happen before we vnode_put(tdvp)
7674 * since it may need to release the fs_nodelock on the tdvp
7684 * nameidone has to happen before we vnode_put(fdvp)
7685 * since it may need to release the fs_nodelock on the fdvp
7695 * If things changed after we did the namei, then we will re-drive
7696 * this rename call from the top.
7703 FREE(__rename_data
, M_TEMP
);
7708 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7710 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7711 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7714 int renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
7716 return renameat_internal(
7717 vfs_context_current(),
7718 uap
->fromfd
, uap
->from
,
7720 UIO_USERSPACE
, uap
->flags
);
7724 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7726 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7727 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7731 * Make a directory file.
7733 * Returns: 0 Success
7736 * vnode_authorize:???
7741 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7742 enum uio_seg segflg
)
7746 int update_flags
= 0;
7748 struct nameidata nd
;
7750 AUDIT_ARG(mode
, vap
->va_mode
);
7751 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7753 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7754 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7757 error
= nameiat(&nd
, fd
);
7768 batched
= vnode_compound_mkdir_available(dvp
);
7770 VATTR_SET(vap
, va_type
, VDIR
);
7774 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7775 * only get EXISTS or EISDIR for existing path components, and not that it could see
7776 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7777 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7779 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7780 if (error
== EACCES
|| error
== EPERM
) {
7788 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7789 * rather than EACCESS if the target exists.
7791 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7793 error2
= nameiat(&nd
, fd
);
7807 * make the directory
7809 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7810 if (error
== EKEEPLOOKING
) {
7812 goto continue_lookup
;
7818 // Make sure the name & parent pointers are hooked up
7819 if (vp
->v_name
== NULL
)
7820 update_flags
|= VNODE_UPDATE_NAME
;
7821 if (vp
->v_parent
== NULLVP
)
7822 update_flags
|= VNODE_UPDATE_PARENT
;
7825 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
7828 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
7833 * nameidone has to happen before we vnode_put(dvp)
7834 * since it may need to release the fs_nodelock on the dvp
7847 * mkdir_extended: Create a directory; with extended security (ACL).
7849 * Parameters: p Process requesting to create the directory
7850 * uap User argument descriptor (see below)
7853 * Indirect: uap->path Path of directory to create
7854 * uap->mode Access permissions to set
7855 * uap->xsecurity ACL to set
7857 * Returns: 0 Success
7862 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
7865 kauth_filesec_t xsecdst
;
7866 struct vnode_attr va
;
7868 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7871 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
7872 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
7876 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7877 if (xsecdst
!= NULL
)
7878 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7880 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7882 if (xsecdst
!= NULL
)
7883 kauth_filesec_free(xsecdst
);
7888 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
7890 struct vnode_attr va
;
7893 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7895 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7900 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
7902 struct vnode_attr va
;
7905 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7907 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
7912 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
7913 enum uio_seg segflg
)
7917 struct nameidata nd
;
7920 int has_listeners
= 0;
7924 struct vnode_attr va
;
7925 #endif /* CONFIG_FSE */
7926 struct vnode_attr
*vap
= NULL
;
7927 int restart_count
= 0;
7933 * This loop exists to restart rmdir in the unlikely case that two
7934 * processes are simultaneously trying to remove the same directory
7935 * containing orphaned appleDouble files.
7938 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
7939 segflg
, dirpath
, ctx
);
7940 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
7945 error
= nameiat(&nd
, fd
);
7953 batched
= vnode_compound_rmdir_available(vp
);
7955 if (vp
->v_flag
& VROOT
) {
7957 * The root of a mounted filesystem cannot be deleted.
7963 #if DEVELOPMENT || DEBUG
7965 * XXX VSWAP: Check for entitlements or special flag here
7966 * so we can restrict access appropriately.
7968 #else /* DEVELOPMENT || DEBUG */
7970 if (vnode_isswap(vp
) && (ctx
!= vfs_context_kernel())) {
7974 #endif /* DEVELOPMENT || DEBUG */
7977 * Removed a check here; we used to abort if vp's vid
7978 * was not the same as what we'd seen the last time around.
7979 * I do not think that check was valid, because if we retry
7980 * and all dirents are gone, the directory could legitimately
7981 * be recycled but still be present in a situation where we would
7982 * have had permission to delete. Therefore, we won't make
7983 * an effort to preserve that check now that we may not have a
7988 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
7990 if (error
== ENOENT
) {
7991 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7992 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8003 if (!vnode_compound_rmdir_available(dvp
)) {
8004 panic("No error, but no compound rmdir?");
8011 need_event
= need_fsevent(FSE_DELETE
, dvp
);
8014 get_fse_info(vp
, &finfo
, ctx
);
8016 error
= vfs_get_notify_attributes(&va
);
8025 has_listeners
= kauth_authorize_fileop_has_listeners();
8026 if (need_event
|| has_listeners
) {
8035 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
8038 finfo
.mode
|= FSE_TRUNCATED_PATH
;
8043 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8046 /* Couldn't find a vnode */
8050 if (error
== EKEEPLOOKING
) {
8051 goto continue_lookup
;
8052 } else if (batched
&& error
== ENOENT
) {
8053 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
8054 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
8056 * For compound VNOPs, the authorization callback
8057 * may return ENOENT in case of racing hard link lookups
8058 * redrive the lookup.
8065 #if CONFIG_APPLEDOUBLE
8067 * Special case to remove orphaned AppleDouble
8068 * files. I don't like putting this in the kernel,
8069 * but carbon does not like putting this in carbon either,
8072 if (error
== ENOTEMPTY
) {
8073 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
8074 if (error
== EBUSY
) {
8080 * Assuming everything went well, we will try the RMDIR again
8083 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8085 #endif /* CONFIG_APPLEDOUBLE */
8087 * Call out to allow 3rd party notification of delete.
8088 * Ignore result of kauth_authorize_fileop call.
8091 if (has_listeners
) {
8092 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8093 KAUTH_FILEOP_DELETE
,
8098 if (vp
->v_flag
& VISHARDLINK
) {
8099 // see the comment in unlink1() about why we update
8100 // the parent of a hard link when it is removed
8101 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
8107 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
8109 add_fsevent(FSE_DELETE
, ctx
,
8110 FSE_ARG_STRING
, len
, path
,
8111 FSE_ARG_FINFO
, &finfo
,
8123 * nameidone has to happen before we vnode_put(dvp)
8124 * since it may need to release the fs_nodelock on the dvp
8132 if (restart_flag
== 0) {
8133 wakeup_one((caddr_t
)vp
);
8136 tsleep(vp
, PVFS
, "rm AD", 1);
8138 } while (restart_flag
!= 0);
8145 * Remove a directory file.
8149 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
8151 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
8152 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
8155 /* Get direntry length padded to 8 byte alignment */
8156 #define DIRENT64_LEN(namlen) \
8157 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8160 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
8161 int *numdirent
, vfs_context_t ctxp
)
8163 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8164 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
8165 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
8166 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
8171 struct direntry
*entry64
;
8177 * Our kernel buffer needs to be smaller since re-packing
8178 * will expand each dirent. The worse case (when the name
8179 * length is 3) corresponds to a struct direntry size of 32
8180 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8181 * (4-byte aligned). So having a buffer that is 3/8 the size
8182 * will prevent us from reading more than we can pack.
8184 * Since this buffer is wired memory, we will limit the
8185 * buffer size to a maximum of 32K. We would really like to
8186 * use 32K in the MIN(), but we use magic number 87371 to
8187 * prevent uio_resid() * 3 / 8 from overflowing.
8189 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
8190 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
8191 if (bufptr
== NULL
) {
8195 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
8196 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
8197 auio
->uio_offset
= uio
->uio_offset
;
8199 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
8201 dep
= (struct dirent
*)bufptr
;
8202 bytesread
= bufsize
- uio_resid(auio
);
8204 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
8207 * Convert all the entries and copy them out to user's buffer.
8209 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
8210 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
8212 bzero(entry64
, enbufsize
);
8213 /* Convert a dirent to a dirent64. */
8214 entry64
->d_ino
= dep
->d_ino
;
8215 entry64
->d_seekoff
= 0;
8216 entry64
->d_reclen
= enbufsize
;
8217 entry64
->d_namlen
= dep
->d_namlen
;
8218 entry64
->d_type
= dep
->d_type
;
8219 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
8221 /* Move to next entry. */
8222 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
8224 /* Copy entry64 to user's buffer. */
8225 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
8228 /* Update the real offset using the offset we got from VNOP_READDIR. */
8230 uio
->uio_offset
= auio
->uio_offset
;
8233 FREE(bufptr
, M_TEMP
);
8234 FREE(entry64
, M_TEMP
);
8239 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8242 * Read a block of directory entries in a file system independent format.
8245 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
8246 off_t
*offset
, int flags
)
8249 struct vfs_context context
= *vfs_context_current(); /* local copy */
8250 struct fileproc
*fp
;
8252 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8254 int error
, eofflag
, numdirent
;
8255 char uio_buf
[ UIO_SIZEOF(1) ];
8257 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
8261 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8262 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8267 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
8268 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
8271 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
8275 if ( (error
= vnode_getwithref(vp
)) ) {
8278 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8281 if (vp
->v_type
!= VDIR
) {
8282 (void)vnode_put(vp
);
8288 error
= mac_vnode_check_readdir(&context
, vp
);
8290 (void)vnode_put(vp
);
8295 loff
= fp
->f_fglob
->fg_offset
;
8296 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8297 uio_addiov(auio
, bufp
, bufsize
);
8299 if (flags
& VNODE_READDIR_EXTENDED
) {
8300 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
8301 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8303 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
8304 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8307 (void)vnode_put(vp
);
8311 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
8312 if (union_dircheckp
) {
8313 error
= union_dircheckp(&vp
, fp
, &context
);
8317 (void)vnode_put(vp
);
8322 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
8323 struct vnode
*tvp
= vp
;
8324 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
8326 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8327 fp
->f_fglob
->fg_offset
= 0;
8341 *bytesread
= bufsize
- uio_resid(auio
);
8349 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
8355 AUDIT_ARG(fd
, uap
->fd
);
8356 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
8359 if (proc_is64bit(p
)) {
8360 user64_long_t base
= (user64_long_t
)offset
;
8361 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
8363 user32_long_t base
= (user32_long_t
)offset
;
8364 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
8366 *retval
= bytesread
;
8372 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
8378 AUDIT_ARG(fd
, uap
->fd
);
8379 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
8382 *retval
= bytesread
;
8383 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
8390 * Set the mode mask for creation of filesystem nodes.
8391 * XXX implement xsecurity
8393 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8395 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
8397 struct filedesc
*fdp
;
8399 AUDIT_ARG(mask
, newmask
);
8402 *retval
= fdp
->fd_cmask
;
8403 fdp
->fd_cmask
= newmask
& ALLPERMS
;
8409 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8411 * Parameters: p Process requesting to set the umask
8412 * uap User argument descriptor (see below)
8413 * retval umask of the process (parameter p)
8415 * Indirect: uap->newmask umask to set
8416 * uap->xsecurity ACL to set
8418 * Returns: 0 Success
8423 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
8426 kauth_filesec_t xsecdst
;
8428 xsecdst
= KAUTH_FILESEC_NONE
;
8429 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
8430 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
8433 xsecdst
= KAUTH_FILESEC_NONE
;
8436 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
8438 if (xsecdst
!= KAUTH_FILESEC_NONE
)
8439 kauth_filesec_free(xsecdst
);
8444 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
8446 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
8450 * Void all references to file by ripping underlying filesystem
8455 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
8458 struct vnode_attr va
;
8459 vfs_context_t ctx
= vfs_context_current();
8461 struct nameidata nd
;
8463 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
8472 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
8477 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
8483 error
= mac_vnode_check_revoke(ctx
, vp
);
8489 VATTR_WANTED(&va
, va_uid
);
8490 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
8492 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
8493 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
8495 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
8496 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
8504 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8505 * The following system calls are designed to support features
8506 * which are specific to the HFS & HFS Plus volume formats
8511 * Obtain attribute information on objects in a directory while enumerating
8516 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
8519 struct fileproc
*fp
;
8521 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8522 uint32_t count
, savecount
;
8526 struct attrlist attributelist
;
8527 vfs_context_t ctx
= vfs_context_current();
8529 char uio_buf
[ UIO_SIZEOF(1) ];
8530 kauth_action_t action
;
8534 /* Get the attributes into kernel space */
8535 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
8538 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
8542 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
8545 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8546 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8553 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
8560 if ( (error
= vnode_getwithref(vp
)) )
8563 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8566 if (vp
->v_type
!= VDIR
) {
8567 (void)vnode_put(vp
);
8573 error
= mac_vnode_check_readdir(ctx
, vp
);
8575 (void)vnode_put(vp
);
8580 /* set up the uio structure which will contain the users return buffer */
8581 loff
= fp
->f_fglob
->fg_offset
;
8582 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8583 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
8586 * If the only item requested is file names, we can let that past with
8587 * just LIST_DIRECTORY. If they want any other attributes, that means
8588 * they need SEARCH as well.
8590 action
= KAUTH_VNODE_LIST_DIRECTORY
;
8591 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
8592 attributelist
.fileattr
|| attributelist
.dirattr
)
8593 action
|= KAUTH_VNODE_SEARCH
;
8595 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
8597 /* Believe it or not, uap->options only has 32-bits of valid
8598 * info, so truncate before extending again */
8600 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
8601 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
8605 (void) vnode_put(vp
);
8610 * If we've got the last entry of a directory in a union mount
8611 * then reset the eofflag and pretend there's still more to come.
8612 * The next call will again set eofflag and the buffer will be empty,
8613 * so traverse to the underlying directory and do the directory
8616 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
8617 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
8619 } else { // Empty buffer
8620 struct vnode
*tvp
= vp
;
8621 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
8622 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
8623 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8624 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
8626 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
8634 (void)vnode_put(vp
);
8638 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
8640 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8642 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8644 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8647 *retval
= eofflag
; /* similar to getdirentries */
8651 return (error
); /* return error earlier, an retval of 0 or 1 now */
8653 } /* end of getdirentriesattr system call */
8656 * Exchange data between two files
8661 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8664 struct nameidata fnd
, snd
;
8665 vfs_context_t ctx
= vfs_context_current();
8669 u_int32_t nameiflags
;
8673 int from_truncated
=0, to_truncated
=0;
8675 fse_info f_finfo
, s_finfo
;
8679 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8681 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8682 UIO_USERSPACE
, uap
->path1
, ctx
);
8684 error
= namei(&fnd
);
8691 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8692 UIO_USERSPACE
, uap
->path2
, ctx
);
8694 error
= namei(&snd
);
8703 * if the files are the same, return an inval error
8711 * if the files are on different volumes, return an error
8713 if (svp
->v_mount
!= fvp
->v_mount
) {
8718 /* If they're not files, return an error */
8719 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8725 error
= mac_vnode_check_exchangedata(ctx
,
8730 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8731 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8736 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8738 kauth_authorize_fileop_has_listeners()) {
8741 if (fpath
== NULL
|| spath
== NULL
) {
8746 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8747 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8750 get_fse_info(fvp
, &f_finfo
, ctx
);
8751 get_fse_info(svp
, &s_finfo
, ctx
);
8752 if (from_truncated
|| to_truncated
) {
8753 // set it here since only the f_finfo gets reported up to user space
8754 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8758 /* Ok, make the call */
8759 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8762 const char *tmpname
;
8764 if (fpath
!= NULL
&& spath
!= NULL
) {
8765 /* call out to allow 3rd party notification of exchangedata.
8766 * Ignore result of kauth_authorize_fileop call.
8768 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8769 (uintptr_t)fpath
, (uintptr_t)spath
);
8773 tmpname
= fvp
->v_name
;
8774 fvp
->v_name
= svp
->v_name
;
8775 svp
->v_name
= tmpname
;
8777 if (fvp
->v_parent
!= svp
->v_parent
) {
8780 tmp
= fvp
->v_parent
;
8781 fvp
->v_parent
= svp
->v_parent
;
8782 svp
->v_parent
= tmp
;
8784 name_cache_unlock();
8787 if (fpath
!= NULL
&& spath
!= NULL
) {
8788 add_fsevent(FSE_EXCHANGE
, ctx
,
8789 FSE_ARG_STRING
, flen
, fpath
,
8790 FSE_ARG_FINFO
, &f_finfo
,
8791 FSE_ARG_STRING
, slen
, spath
,
8792 FSE_ARG_FINFO
, &s_finfo
,
8800 RELEASE_PATH(fpath
);
8802 RELEASE_PATH(spath
);
8810 * Return (in MB) the amount of freespace on the given vnode's volume.
8812 uint32_t freespace_mb(vnode_t vp
);
8815 freespace_mb(vnode_t vp
)
8817 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
8818 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
8819 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
8827 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
8832 struct nameidata nd
;
8833 struct user64_fssearchblock searchblock
;
8834 struct searchstate
*state
;
8835 struct attrlist
*returnattrs
;
8836 struct timeval timelimit
;
8837 void *searchparams1
,*searchparams2
;
8839 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8840 uint32_t nummatches
;
8842 uint32_t nameiflags
;
8843 vfs_context_t ctx
= vfs_context_current();
8844 char uio_buf
[ UIO_SIZEOF(1) ];
8846 /* Start by copying in fsearchblock parameter list */
8847 if (IS_64BIT_PROCESS(p
)) {
8848 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
8849 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
8850 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
8853 struct user32_fssearchblock tmp_searchblock
;
8855 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
8856 // munge into 64-bit version
8857 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
8858 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
8859 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
8860 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
8862 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8863 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8865 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
8866 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
8867 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
8868 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
8869 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
8870 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
8871 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
8876 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8878 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
8879 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
8882 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8883 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8884 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8887 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8888 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8889 /* assumes the size is still 556 bytes it will continue to work */
8891 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
8892 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
8894 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
8896 /* Now set up the various pointers to the correct place in our newly allocated memory */
8898 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
8899 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
8900 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
8902 /* Now copy in the stuff given our local variables. */
8904 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
8907 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
8910 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
8913 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
8917 * When searching a union mount, need to set the
8918 * start flag at the first call on each layer to
8919 * reset state for the new volume.
8921 if (uap
->options
& SRCHFS_START
)
8922 state
->ss_union_layer
= 0;
8924 uap
->options
|= state
->ss_union_flags
;
8925 state
->ss_union_flags
= 0;
8928 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8929 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8930 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8931 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8932 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8935 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
8936 attrreference_t
* string_ref
;
8937 u_int32_t
* start_length
;
8938 user64_size_t param_length
;
8940 /* validate searchparams1 */
8941 param_length
= searchblock
.sizeofsearchparams1
;
8942 /* skip the word that specifies length of the buffer */
8943 start_length
= (u_int32_t
*) searchparams1
;
8944 start_length
= start_length
+1;
8945 string_ref
= (attrreference_t
*) start_length
;
8947 /* ensure no negative offsets or too big offsets */
8948 if (string_ref
->attr_dataoffset
< 0 ) {
8952 if (string_ref
->attr_length
> MAXPATHLEN
) {
8957 /* Check for pointer overflow in the string ref */
8958 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
8963 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
8967 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
8973 /* set up the uio structure which will contain the users return buffer */
8974 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8975 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
8978 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8979 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
8980 UIO_USERSPACE
, uap
->path
, ctx
);
8989 * Switch to the root vnode for the volume
8991 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
8998 * If it's a union mount, the path lookup takes
8999 * us to the top layer. But we may need to descend
9000 * to a lower layer. For non-union mounts the layer
9003 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
9004 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
9007 vp
= vp
->v_mount
->mnt_vnodecovered
;
9013 error
= vnode_getwithref(vp
);
9020 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
9029 * If searchblock.maxmatches == 0, then skip the search. This has happened
9030 * before and sometimes the underlying code doesnt deal with it well.
9032 if (searchblock
.maxmatches
== 0) {
9038 * Allright, we have everything we need, so lets make that call.
9040 * We keep special track of the return value from the file system:
9041 * EAGAIN is an acceptable error condition that shouldn't keep us
9042 * from copying out any results...
9045 fserror
= VNOP_SEARCHFS(vp
,
9048 &searchblock
.searchattrs
,
9049 (u_long
)searchblock
.maxmatches
,
9053 (u_long
)uap
->scriptcode
,
9054 (u_long
)uap
->options
,
9056 (struct searchstate
*) &state
->ss_fsstate
,
9060 * If it's a union mount we need to be called again
9061 * to search the mounted-on filesystem.
9063 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
9064 state
->ss_union_flags
= SRCHFS_START
;
9065 state
->ss_union_layer
++; // search next layer down
9073 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9074 search state. Everything was already put into he return buffer by the vop call. */
9076 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
9079 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
9086 FREE(searchparams1
,M_TEMP
);
9091 } /* end of searchfs system call */
9093 #else /* CONFIG_SEARCHFS */
9096 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
9101 #endif /* CONFIG_SEARCHFS */
9104 lck_grp_attr_t
* nspace_group_attr
;
9105 lck_attr_t
* nspace_lock_attr
;
9106 lck_grp_t
* nspace_mutex_group
;
9108 lck_mtx_t nspace_handler_lock
;
9109 lck_mtx_t nspace_handler_exclusion_lock
;
9111 time_t snapshot_timestamp
=0;
9112 int nspace_allow_virtual_devs
=0;
9114 void nspace_handler_init(void);
9116 typedef struct nspace_item_info
{
9126 #define MAX_NSPACE_ITEMS 128
9127 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
9128 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
9129 uint32_t nspace_token_id
=0;
9130 uint32_t nspace_handler_timeout
= 15; // seconds
9132 #define NSPACE_ITEM_NEW 0x0001
9133 #define NSPACE_ITEM_PROCESSING 0x0002
9134 #define NSPACE_ITEM_DEAD 0x0004
9135 #define NSPACE_ITEM_CANCELLED 0x0008
9136 #define NSPACE_ITEM_DONE 0x0010
9137 #define NSPACE_ITEM_RESET_TIMER 0x0020
9139 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9140 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9142 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9144 //#pragma optimization_level 0
9147 NSPACE_HANDLER_NSPACE
= 0,
9148 NSPACE_HANDLER_SNAPSHOT
= 1,
9150 NSPACE_HANDLER_COUNT
,
9154 uint64_t handler_tid
;
9155 struct proc
*handler_proc
;
9159 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
9161 /* namespace fsctl functions */
9162 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
9163 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
9164 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
9165 static nspace_type_t
nspace_type_for_op(uint64_t op
);
9166 static int nspace_is_special_process(struct proc
*proc
);
9167 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
9168 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
9169 static int validate_namespace_args (int is64bit
, int size
);
9170 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
9173 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
9175 switch(nspace_type
) {
9176 case NSPACE_HANDLER_NSPACE
:
9177 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
9178 case NSPACE_HANDLER_SNAPSHOT
:
9179 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
9181 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
9186 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
9188 switch(nspace_type
) {
9189 case NSPACE_HANDLER_NSPACE
:
9190 return NSPACE_ITEM_NSPACE_EVENT
;
9191 case NSPACE_HANDLER_SNAPSHOT
:
9192 return NSPACE_ITEM_SNAPSHOT_EVENT
;
9194 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
9199 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
9201 switch(nspace_type
) {
9202 case NSPACE_HANDLER_NSPACE
:
9203 return FREAD
| FWRITE
| O_EVTONLY
;
9204 case NSPACE_HANDLER_SNAPSHOT
:
9205 return FREAD
| O_EVTONLY
;
9207 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
9212 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
9214 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
9215 case NAMESPACE_HANDLER_NSPACE_EVENT
:
9216 return NSPACE_HANDLER_NSPACE
;
9217 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
9218 return NSPACE_HANDLER_SNAPSHOT
;
9220 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
9221 return NSPACE_HANDLER_NSPACE
;
9225 static inline int nspace_is_special_process(struct proc
*proc
)
9228 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9229 if (proc
== nspace_handlers
[i
].handler_proc
)
9236 nspace_handler_init(void)
9238 nspace_lock_attr
= lck_attr_alloc_init();
9239 nspace_group_attr
= lck_grp_attr_alloc_init();
9240 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
9241 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
9242 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
9243 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
9247 nspace_proc_exit(struct proc
*p
)
9249 int i
, event_mask
= 0;
9251 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9252 if (p
== nspace_handlers
[i
].handler_proc
) {
9253 event_mask
|= nspace_item_flags_for_type(i
);
9254 nspace_handlers
[i
].handler_tid
= 0;
9255 nspace_handlers
[i
].handler_proc
= NULL
;
9259 if (event_mask
== 0) {
9263 lck_mtx_lock(&nspace_handler_lock
);
9264 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9265 // if this process was the snapshot handler, zero snapshot_timeout
9266 snapshot_timestamp
= 0;
9270 // unblock anyone that's waiting for the handler that died
9272 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9273 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
9275 if ( nspace_items
[i
].flags
& event_mask
) {
9277 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9278 vnode_lock_spin(nspace_items
[i
].vp
);
9279 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9280 vnode_unlock(nspace_items
[i
].vp
);
9282 nspace_items
[i
].vp
= NULL
;
9283 nspace_items
[i
].vid
= 0;
9284 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9285 nspace_items
[i
].token
= 0;
9287 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9292 wakeup((caddr_t
)&nspace_item_idx
);
9293 lck_mtx_unlock(&nspace_handler_lock
);
9298 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
9300 return resolve_nspace_item_ext(vp
, op
, NULL
);
9304 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
9306 int i
, error
, keep_waiting
;
9308 nspace_type_t nspace_type
= nspace_type_for_op(op
);
9310 // only allow namespace events on regular files, directories and symlinks.
9311 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
9316 // if this is a snapshot event and the vnode is on a
9317 // disk image just pretend nothing happened since any
9318 // change to the disk image will cause the disk image
9319 // itself to get backed up and this avoids multi-way
9320 // deadlocks between the snapshot handler and the ever
9321 // popular diskimages-helper process. the variable
9322 // nspace_allow_virtual_devs allows this behavior to
9323 // be overridden (for use by the Mobile TimeMachine
9324 // testing infrastructure which uses disk images)
9326 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
9327 && (vp
->v_mount
!= NULL
)
9328 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
9329 && !nspace_allow_virtual_devs
) {
9334 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9335 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9339 if (nspace_is_special_process(current_proc())) {
9343 lck_mtx_lock(&nspace_handler_lock
);
9346 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9347 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
9352 if (i
>= MAX_NSPACE_ITEMS
) {
9353 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9354 if (nspace_items
[i
].flags
== 0) {
9359 nspace_items
[i
].refcount
++;
9362 if (i
>= MAX_NSPACE_ITEMS
) {
9363 ts
.tv_sec
= nspace_handler_timeout
;
9366 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
9368 // an entry got free'd up, go see if we can get a slot
9371 lck_mtx_unlock(&nspace_handler_lock
);
9377 // if it didn't already exist, add it. if it did exist
9378 // we'll get woken up when someone does a wakeup() on
9379 // the slot in the nspace_items table.
9381 if (vp
!= nspace_items
[i
].vp
) {
9382 nspace_items
[i
].vp
= vp
;
9383 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
9384 nspace_items
[i
].op
= op
;
9385 nspace_items
[i
].vid
= vnode_vid(vp
);
9386 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
9387 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
9388 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9390 vnode_lock_spin(vp
);
9391 vp
->v_flag
|= VNEEDSSNAPSHOT
;
9396 nspace_items
[i
].token
= 0;
9397 nspace_items
[i
].refcount
= 1;
9399 wakeup((caddr_t
)&nspace_item_idx
);
9403 // Now go to sleep until the handler does a wakeup on this
9404 // slot in the nspace_items table (or we timeout).
9407 while(keep_waiting
) {
9408 ts
.tv_sec
= nspace_handler_timeout
;
9410 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
9412 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
9414 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
9415 error
= nspace_items
[i
].token
;
9416 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
9417 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
9418 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
9423 } else if (error
== 0) {
9424 // hmmm, why did we get woken up?
9425 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9426 nspace_items
[i
].token
);
9429 if (--nspace_items
[i
].refcount
== 0) {
9430 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
9431 nspace_items
[i
].arg
= NULL
;
9432 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
9433 nspace_items
[i
].flags
= 0; // this clears it for re-use
9435 wakeup(&nspace_token_id
);
9439 lck_mtx_unlock(&nspace_handler_lock
);
9444 int nspace_snapshot_event(vnode_t vp
, time_t ctime
, uint64_t op_type
, void *arg
)
9446 int snapshot_error
= 0;
9452 /* Swap files are special; skip them */
9453 if (vnode_isswap(vp
)) {
9457 if (ctime
!= 0 && snapshot_timestamp
!= 0 && (ctime
<= snapshot_timestamp
|| vnode_needssnapshots(vp
))) {
9458 // the change time is within this epoch
9461 error
= resolve_nspace_item_ext(vp
, op_type
| NAMESPACE_HANDLER_SNAPSHOT_EVENT
, arg
);
9462 if (error
== EDEADLK
) {
9465 if (error
== EAGAIN
) {
9466 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9467 } else if (error
== EINTR
) {
9468 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9469 snapshot_error
= EINTR
;
9474 return snapshot_error
;
9478 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
9482 lck_mtx_lock(&nspace_handler_lock
);
9483 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9484 if (nspace_items
[i
].vp
== vp
) {
9489 if (i
>= MAX_NSPACE_ITEMS
) {
9490 lck_mtx_unlock(&nspace_handler_lock
);
9494 *status
= nspace_items
[i
].flags
;
9495 lck_mtx_unlock(&nspace_handler_lock
);
9502 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
9504 struct vnode_attr va
;
9508 VATTR_WANTED(&va
, va_fsid
);
9509 VATTR_WANTED(&va
, va_fileid
);
9511 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
9512 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
9515 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
9524 // Note: this function does NOT check permissions on all of the
9525 // parent directories leading to this vnode. It should only be
9526 // called on behalf of a root process. Otherwise a process may
9527 // get access to a file because the file itself is readable even
9528 // though its parent directories would prevent access.
9531 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
9535 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9540 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
9545 /* compute action to be authorized */
9547 if (fmode
& FREAD
) {
9548 action
|= KAUTH_VNODE_READ_DATA
;
9550 if (fmode
& (FWRITE
| O_TRUNC
)) {
9552 * If we are writing, appending, and not truncating,
9553 * indicate that we are appending so that if the
9554 * UF_APPEND or SF_APPEND bits are set, we do not deny
9557 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
9558 action
|= KAUTH_VNODE_APPEND_DATA
;
9560 action
|= KAUTH_VNODE_WRITE_DATA
;
9564 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
9569 // if the vnode is tagged VOPENEVT and the current process
9570 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9571 // flag to the open mode so that this open won't count against
9572 // the vnode when carbon delete() does a vnode_isinuse() to see
9573 // if a file is currently in use. this allows spotlight
9574 // importers to not interfere with carbon apps that depend on
9575 // the no-delete-if-busy semantics of carbon delete().
9577 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
9581 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
9584 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
9585 VNOP_CLOSE(vp
, fmode
, ctx
);
9589 /* Call out to allow 3rd party notification of open.
9590 * Ignore result of kauth_authorize_fileop call.
9593 mac_vnode_notify_open(ctx
, vp
, fmode
);
9595 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
9603 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
9610 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9611 if (nspace_handlers
[nspace_type
].handler_busy
) {
9612 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9616 nspace_handlers
[nspace_type
].handler_busy
= 1;
9617 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9620 * Any process that gets here will be one of the namespace handlers.
9621 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9622 * as we can cause deadlocks to occur, because the namespace handler may prevent
9623 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9626 curtask
= current_task();
9627 bsd_set_dependency_capable (curtask
);
9629 lck_mtx_lock(&nspace_handler_lock
);
9630 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9631 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
9632 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
9635 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9636 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9640 while (error
== 0) {
9642 /* Try to find matching namespace item */
9643 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9644 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9645 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9651 if (i
>= MAX_NSPACE_ITEMS
) {
9652 /* Nothing is there yet. Wait for wake up and retry */
9653 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9654 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9655 /* Prevent infinite loop if snapshot handler exited */
9662 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
9663 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
9664 nspace_items
[i
].token
= ++nspace_token_id
;
9666 assert(nspace_items
[i
].vp
);
9667 struct fileproc
*fp
;
9670 struct proc
*p
= current_proc();
9671 vfs_context_t ctx
= vfs_context_current();
9672 struct vnode_attr va
;
9673 bool vn_get_succsessful
= false;
9674 bool vn_open_successful
= false;
9675 bool fp_alloc_successful
= false;
9678 * Use vnode pointer to acquire a file descriptor for
9679 * hand-off to userland
9681 fmode
= nspace_open_flags_for_type(nspace_type
);
9682 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
9683 if (error
) goto cleanup
;
9684 vn_get_succsessful
= true;
9686 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
9687 if (error
) goto cleanup
;
9688 vn_open_successful
= true;
9690 error
= falloc(p
, &fp
, &indx
, ctx
);
9691 if (error
) goto cleanup
;
9692 fp_alloc_successful
= true;
9694 fp
->f_fglob
->fg_flag
= fmode
;
9695 fp
->f_fglob
->fg_ops
= &vnops
;
9696 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9699 procfdtbl_releasefd(p
, indx
, NULL
);
9700 fp_drop(p
, indx
, fp
, 1);
9704 * All variants of the namespace handler struct support these three fields:
9705 * token, flags, and the FD pointer
9707 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9708 if (error
) goto cleanup
;
9709 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9710 if (error
) goto cleanup
;
9711 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9712 if (error
) goto cleanup
;
9715 * Handle optional fields:
9716 * extended version support an info ptr (offset, length), and the
9718 * namedata version supports a unique per-link object ID
9722 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9723 uint64_t u_offset
, u_length
;
9726 u_offset
= uio_offset(uio
);
9727 u_length
= uio_resid(uio
);
9732 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9733 if (error
) goto cleanup
;
9734 error
= copyout(&u_length
, nhd
->infoptr
+ sizeof(uint64_t), sizeof(uint64_t));
9735 if (error
) goto cleanup
;
9740 VATTR_WANTED(&va
, va_linkid
);
9741 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9742 if (error
) goto cleanup
;
9744 uint64_t linkid
= 0;
9745 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9746 linkid
= (uint64_t)va
.va_linkid
;
9748 error
= copyout(&linkid
, nhd
->objid
, sizeof(uint64_t));
9752 if (fp_alloc_successful
) fp_free(p
, indx
, fp
);
9753 if (vn_open_successful
) vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9757 if (vn_get_succsessful
) vnode_put(nspace_items
[i
].vp
);
9763 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9764 vnode_lock_spin(nspace_items
[i
].vp
);
9765 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9766 vnode_unlock(nspace_items
[i
].vp
);
9768 nspace_items
[i
].vp
= NULL
;
9769 nspace_items
[i
].vid
= 0;
9770 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9771 nspace_items
[i
].token
= 0;
9773 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9776 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9777 // just go through every snapshot event and unblock it immediately.
9778 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9779 for(i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9780 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9781 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9782 nspace_items
[i
].vp
= NULL
;
9783 nspace_items
[i
].vid
= 0;
9784 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9785 nspace_items
[i
].token
= 0;
9787 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9794 lck_mtx_unlock(&nspace_handler_lock
);
9796 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9797 nspace_handlers
[nspace_type
].handler_busy
= 0;
9798 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9803 static inline int validate_namespace_args (int is64bit
, int size
) {
9806 /* Must be one of these */
9807 if (size
== sizeof(user64_namespace_handler_info
)) {
9810 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
9813 if (size
== sizeof(user64_namespace_handler_data
)) {
9819 /* 32 bit -- must be one of these */
9820 if (size
== sizeof(user32_namespace_handler_info
)) {
9823 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
9826 if (size
== sizeof(user32_namespace_handler_data
)) {
9838 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
9841 namespace_handler_data nhd
;
9843 bzero (&nhd
, sizeof(namespace_handler_data
));
9845 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9849 error
= validate_namespace_args (is64bit
, size
);
9854 /* Copy in the userland pointers into our kernel-only struct */
9857 /* 64 bit userland structures */
9858 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
9859 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
9860 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
9862 /* If the size is greater than the standard info struct, add in extra fields */
9863 if (size
> (sizeof(user64_namespace_handler_info
))) {
9864 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
9865 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
9867 if (size
== (sizeof(user64_namespace_handler_data
))) {
9868 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
9870 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9874 /* 32 bit userland structures */
9875 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
9876 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
9877 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
9879 if (size
> (sizeof(user32_namespace_handler_info
))) {
9880 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
9881 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
9883 if (size
== (sizeof(user32_namespace_handler_data
))) {
9884 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
9886 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9890 return wait_for_namespace_event(&nhd
, nspace_type
);
9894 * Make a filesystem-specific control call:
9898 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
9903 #define STK_PARAMS 128
9904 char stkbuf
[STK_PARAMS
] = {0};
9906 vnode_t vp
= *arg_vp
;
9908 size
= IOCPARM_LEN(cmd
);
9909 if (size
> IOCPARM_MAX
) return (EINVAL
);
9911 is64bit
= proc_is64bit(p
);
9917 * ensure the buffer is large enough for underlying calls
9919 #ifndef HFSIOC_GETPATH
9920 typedef char pn_t
[MAXPATHLEN
];
9921 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9925 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9927 if (IOCBASECMD(cmd
) == HFS_GETPATH
) {
9928 /* Round up to MAXPATHLEN regardless of user input */
9931 else if (vp
->v_tag
== VT_CIFS
) {
9933 * XXX Until fsctl's length encoding can be
9934 * XXX fixed properly.
9936 if (IOCBASECMD(cmd
) == _IOWR('z', 19, 0) && size
< 1432) {
9937 size
= 1432; /* sizeof(struct UniqueSMBShareID) */
9938 } else if (IOCBASECMD(cmd
) == _IOWR('z', 28, 0) && size
< 308) {
9939 size
= 308; /* sizeof(struct smbDebugTestPB) */
9943 if (size
> sizeof (stkbuf
)) {
9944 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
9952 error
= copyin(udata
, data
, size
);
9961 *(user_addr_t
*)data
= udata
;
9964 *(uint32_t *)data
= (uint32_t)udata
;
9967 } else if ((cmd
& IOC_OUT
) && size
) {
9969 * Zero the buffer so the user always
9970 * gets back something deterministic.
9973 } else if (cmd
& IOC_VOID
) {
9975 *(user_addr_t
*)data
= udata
;
9978 *(uint32_t *)data
= (uint32_t)udata
;
9982 /* Check to see if it's a generic command */
9983 switch (IOCBASECMD(cmd
)) {
9985 case FSCTL_SYNC_VOLUME
: {
9986 mount_t mp
= vp
->v_mount
;
9987 int arg
= *(uint32_t*)data
;
9989 /* record vid of vp so we can drop it below. */
9990 uint32_t vvid
= vp
->v_id
;
9993 * Then grab mount_iterref so that we can release the vnode.
9994 * Without this, a thread may call vnode_iterate_prepare then
9995 * get into a deadlock because we've never released the root vp
9997 error
= mount_iterref (mp
, 0);
10003 /* issue the sync for this volume */
10004 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
10007 * Then release the mount_iterref once we're done syncing; it's not
10008 * needed for the VNOP_IOCTL below
10010 mount_iterdrop(mp
);
10012 if (arg
& FSCTL_SYNC_FULLSYNC
) {
10013 /* re-obtain vnode iocount on the root vp, if possible */
10014 error
= vnode_getwithvid (vp
, vvid
);
10016 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
10020 /* mark the argument VP as having been released */
10025 case FSCTL_ROUTEFS_SETROUTEID
: {
10027 char routepath
[MAXPATHLEN
];
10030 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10033 bzero(routepath
, MAXPATHLEN
);
10034 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
10038 error
= routefs_kernel_mount(routepath
);
10046 case FSCTL_SET_PACKAGE_EXTS
: {
10047 user_addr_t ext_strings
;
10048 uint32_t num_entries
;
10049 uint32_t max_width
;
10051 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0)))
10054 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
10055 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
10057 // either you're 64-bit and passed a 64-bit struct or
10058 // you're 32-bit and passed a 32-bit struct. otherwise
10065 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
10066 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
10067 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
10069 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
10070 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
10071 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
10073 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
10077 /* namespace handlers */
10078 case FSCTL_NAMESPACE_HANDLER_GET
: {
10079 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
10083 /* Snapshot handlers */
10084 case FSCTL_OLD_SNAPSHOT_HANDLER_GET
: {
10085 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10089 case FSCTL_SNAPSHOT_HANDLER_GET_EXT
: {
10090 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10094 case FSCTL_NAMESPACE_HANDLER_UPDATE
: {
10095 uint32_t token
, val
;
10098 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10102 if (!nspace_is_special_process(p
)) {
10107 token
= ((uint32_t *)data
)[0];
10108 val
= ((uint32_t *)data
)[1];
10110 lck_mtx_lock(&nspace_handler_lock
);
10112 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10113 if (nspace_items
[i
].token
== token
) {
10114 break; /* exit for loop, not case stmt */
10118 if (i
>= MAX_NSPACE_ITEMS
) {
10122 // if this bit is set, when resolve_nspace_item() times out
10123 // it will loop and go back to sleep.
10125 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
10128 lck_mtx_unlock(&nspace_handler_lock
);
10131 printf("nspace-handler-update: did not find token %u\n", token
);
10136 case FSCTL_NAMESPACE_HANDLER_UNBLOCK
: {
10137 uint32_t token
, val
;
10140 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10144 if (!nspace_is_special_process(p
)) {
10149 token
= ((uint32_t *)data
)[0];
10150 val
= ((uint32_t *)data
)[1];
10152 lck_mtx_lock(&nspace_handler_lock
);
10154 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10155 if (nspace_items
[i
].token
== token
) {
10156 break; /* exit for loop, not case statement */
10160 if (i
>= MAX_NSPACE_ITEMS
) {
10161 printf("nspace-handler-unblock: did not find token %u\n", token
);
10164 if (val
== 0 && nspace_items
[i
].vp
) {
10165 vnode_lock_spin(nspace_items
[i
].vp
);
10166 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10167 vnode_unlock(nspace_items
[i
].vp
);
10170 nspace_items
[i
].vp
= NULL
;
10171 nspace_items
[i
].arg
= NULL
;
10172 nspace_items
[i
].op
= 0;
10173 nspace_items
[i
].vid
= 0;
10174 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
10175 nspace_items
[i
].token
= 0;
10177 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10180 lck_mtx_unlock(&nspace_handler_lock
);
10184 case FSCTL_NAMESPACE_HANDLER_CANCEL
: {
10185 uint32_t token
, val
;
10188 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10192 if (!nspace_is_special_process(p
)) {
10197 token
= ((uint32_t *)data
)[0];
10198 val
= ((uint32_t *)data
)[1];
10200 lck_mtx_lock(&nspace_handler_lock
);
10202 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10203 if (nspace_items
[i
].token
== token
) {
10204 break; /* exit for loop, not case stmt */
10208 if (i
>= MAX_NSPACE_ITEMS
) {
10209 printf("nspace-handler-cancel: did not find token %u\n", token
);
10212 if (nspace_items
[i
].vp
) {
10213 vnode_lock_spin(nspace_items
[i
].vp
);
10214 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10215 vnode_unlock(nspace_items
[i
].vp
);
10218 nspace_items
[i
].vp
= NULL
;
10219 nspace_items
[i
].arg
= NULL
;
10220 nspace_items
[i
].vid
= 0;
10221 nspace_items
[i
].token
= val
;
10222 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
10223 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
10225 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10228 lck_mtx_unlock(&nspace_handler_lock
);
10232 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
10233 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10237 // we explicitly do not do the namespace_handler_proc check here
10239 lck_mtx_lock(&nspace_handler_lock
);
10240 snapshot_timestamp
= ((uint32_t *)data
)[0];
10241 wakeup(&nspace_item_idx
);
10242 lck_mtx_unlock(&nspace_handler_lock
);
10243 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
10248 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
10250 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10254 lck_mtx_lock(&nspace_handler_lock
);
10255 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
10256 lck_mtx_unlock(&nspace_handler_lock
);
10257 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10258 nspace_allow_virtual_devs
? "" : " NOT");
10264 case FSCTL_SET_FSTYPENAME_OVERRIDE
:
10266 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10270 mount_lock(vp
->v_mount
);
10271 if (data
[0] != 0) {
10272 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
10273 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
10274 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10275 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
10276 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
10279 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10280 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
10282 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
10283 vp
->v_mount
->fstypename_override
[0] = '\0';
10285 mount_unlock(vp
->v_mount
);
10291 /* Invoke the filesystem-specific code */
10292 error
= VNOP_IOCTL(vp
, IOCBASECMD(cmd
), data
, options
, ctx
);
10295 } /* end switch stmt */
10298 * if no errors, copy any data to user. Size was
10299 * already set and checked above.
10301 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
10302 error
= copyout(data
, udata
, size
);
10313 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
10316 struct nameidata nd
;
10319 vfs_context_t ctx
= vfs_context_current();
10321 AUDIT_ARG(cmd
, uap
->cmd
);
10322 AUDIT_ARG(value32
, uap
->options
);
10323 /* Get the vnode for the file we are getting info on: */
10325 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
10326 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
10327 UIO_USERSPACE
, uap
->path
, ctx
);
10328 if ((error
= namei(&nd
))) goto done
;
10333 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
10339 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10348 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
10352 vfs_context_t ctx
= vfs_context_current();
10355 AUDIT_ARG(fd
, uap
->fd
);
10356 AUDIT_ARG(cmd
, uap
->cmd
);
10357 AUDIT_ARG(value32
, uap
->options
);
10359 /* Get the vnode for the file we are getting info on: */
10360 if ((error
= file_vnode(uap
->fd
, &vp
)))
10363 if ((error
= vnode_getwithref(vp
))) {
10369 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
10376 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10380 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10387 /* end of fsctl system call */
10390 * Retrieve the data of an extended attribute.
10393 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
10396 struct nameidata nd
;
10397 char attrname
[XATTR_MAXNAMELEN
+1];
10398 vfs_context_t ctx
= vfs_context_current();
10400 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10401 size_t attrsize
= 0;
10403 u_int32_t nameiflags
;
10405 char uio_buf
[ UIO_SIZEOF(1) ];
10407 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10410 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10411 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10412 if ((error
= namei(&nd
))) {
10418 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10421 if (xattr_protected(attrname
)) {
10422 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
10428 * the specific check for 0xffffffff is a hack to preserve
10429 * binaray compatibilty in K64 with applications that discovered
10430 * that passing in a buf pointer and a size of -1 resulted in
10431 * just the size of the indicated extended attribute being returned.
10432 * this isn't part of the documented behavior, but because of the
10433 * original implemtation's check for "uap->size > 0", this behavior
10434 * was allowed. In K32 that check turned into a signed comparison
10435 * even though uap->size is unsigned... in K64, we blow by that
10436 * check because uap->size is unsigned and doesn't get sign smeared
10437 * in the munger for a 32 bit user app. we also need to add a
10438 * check to limit the maximum size of the buffer being passed in...
10439 * unfortunately, the underlying fileystems seem to just malloc
10440 * the requested size even if the actual extended attribute is tiny.
10441 * because that malloc is for kernel wired memory, we have to put a
10442 * sane limit on it.
10444 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10445 * U64 running on K64 will yield -1 (64 bits wide)
10446 * U32/U64 running on K32 will yield -1 (32 bits wide)
10448 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
10452 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
10453 uap
->size
= XATTR_MAXSIZE
;
10455 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10456 &uio_buf
[0], sizeof(uio_buf
));
10457 uio_addiov(auio
, uap
->value
, uap
->size
);
10460 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
10465 *retval
= uap
->size
- uio_resid(auio
);
10467 *retval
= (user_ssize_t
)attrsize
;
10474 * Retrieve the data of an extended attribute.
10477 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
10480 char attrname
[XATTR_MAXNAMELEN
+1];
10482 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10483 size_t attrsize
= 0;
10486 char uio_buf
[ UIO_SIZEOF(1) ];
10488 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10491 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10494 if ( (error
= vnode_getwithref(vp
)) ) {
10495 file_drop(uap
->fd
);
10498 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10501 if (xattr_protected(attrname
)) {
10505 if (uap
->value
&& uap
->size
> 0) {
10506 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10507 &uio_buf
[0], sizeof(uio_buf
));
10508 uio_addiov(auio
, uap
->value
, uap
->size
);
10511 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10513 (void)vnode_put(vp
);
10514 file_drop(uap
->fd
);
10517 *retval
= uap
->size
- uio_resid(auio
);
10519 *retval
= (user_ssize_t
)attrsize
;
10525 * Set the data of an extended attribute.
10528 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
10531 struct nameidata nd
;
10532 char attrname
[XATTR_MAXNAMELEN
+1];
10533 vfs_context_t ctx
= vfs_context_current();
10535 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10537 u_int32_t nameiflags
;
10539 char uio_buf
[ UIO_SIZEOF(1) ];
10541 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10544 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10545 if (error
== EPERM
) {
10546 /* if the string won't fit in attrname, copyinstr emits EPERM */
10547 return (ENAMETOOLONG
);
10549 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10552 if (xattr_protected(attrname
))
10554 if (uap
->size
!= 0 && uap
->value
== 0) {
10558 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10559 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10560 if ((error
= namei(&nd
))) {
10566 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10567 &uio_buf
[0], sizeof(uio_buf
));
10568 uio_addiov(auio
, uap
->value
, uap
->size
);
10570 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
10573 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10584 * Set the data of an extended attribute.
10587 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
10590 char attrname
[XATTR_MAXNAMELEN
+1];
10592 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10595 char uio_buf
[ UIO_SIZEOF(1) ];
10597 vfs_context_t ctx
= vfs_context_current();
10600 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10603 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10604 if (error
== EPERM
) {
10605 /* if the string won't fit in attrname, copyinstr emits EPERM */
10606 return (ENAMETOOLONG
);
10608 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10611 if (xattr_protected(attrname
))
10613 if (uap
->size
!= 0 && uap
->value
== 0) {
10616 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10619 if ( (error
= vnode_getwithref(vp
)) ) {
10620 file_drop(uap
->fd
);
10623 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10624 &uio_buf
[0], sizeof(uio_buf
));
10625 uio_addiov(auio
, uap
->value
, uap
->size
);
10627 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
10630 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10636 file_drop(uap
->fd
);
10642 * Remove an extended attribute.
10643 * XXX Code duplication here.
10646 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
10649 struct nameidata nd
;
10650 char attrname
[XATTR_MAXNAMELEN
+1];
10651 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10652 vfs_context_t ctx
= vfs_context_current();
10654 u_int32_t nameiflags
;
10657 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10660 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10664 if (xattr_protected(attrname
))
10666 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10667 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10668 if ((error
= namei(&nd
))) {
10674 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
10677 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10688 * Remove an extended attribute.
10689 * XXX Code duplication here.
10692 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
10695 char attrname
[XATTR_MAXNAMELEN
+1];
10699 vfs_context_t ctx
= vfs_context_current();
10702 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10705 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10709 if (xattr_protected(attrname
))
10711 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10714 if ( (error
= vnode_getwithref(vp
)) ) {
10715 file_drop(uap
->fd
);
10719 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
10722 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10728 file_drop(uap
->fd
);
10734 * Retrieve the list of extended attribute names.
10735 * XXX Code duplication here.
10738 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
10741 struct nameidata nd
;
10742 vfs_context_t ctx
= vfs_context_current();
10744 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10745 size_t attrsize
= 0;
10746 u_int32_t nameiflags
;
10748 char uio_buf
[ UIO_SIZEOF(1) ];
10750 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10753 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10754 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10755 if ((error
= namei(&nd
))) {
10760 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10761 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
10762 &uio_buf
[0], sizeof(uio_buf
));
10763 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10766 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
10770 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10772 *retval
= (user_ssize_t
)attrsize
;
10778 * Retrieve the list of extended attribute names.
10779 * XXX Code duplication here.
10782 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
10786 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10787 size_t attrsize
= 0;
10789 char uio_buf
[ UIO_SIZEOF(1) ];
10791 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10794 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10797 if ( (error
= vnode_getwithref(vp
)) ) {
10798 file_drop(uap
->fd
);
10801 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10802 auio
= uio_createwithbuffer(1, 0, spacetype
,
10803 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10804 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10807 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10810 file_drop(uap
->fd
);
10812 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10814 *retval
= (user_ssize_t
)attrsize
;
10819 static int fsgetpath_internal(
10820 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
10821 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
10824 struct mount
*mp
= NULL
;
10828 /* maximum number of times to retry build_path */
10829 unsigned int retries
= 0x10;
10831 if (bufsize
> PAGE_SIZE
) {
10840 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
10841 error
= ENOTSUP
; /* unexpected failure */
10847 error
= VFS_ROOT(mp
, &vp
, ctx
);
10849 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
10852 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
10854 * If the fileid isn't found and we're in a union
10855 * mount volume, then see if the fileid is in the
10856 * mounted-on volume.
10858 struct mount
*tmp
= mp
;
10859 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
10861 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
10872 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
10879 /* Obtain the absolute path to this vnode. */
10880 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
10881 bpflags
|= BUILDPATH_CHECK_MOVED
;
10882 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
10886 /* there was a race building the path, try a few more times */
10887 if (error
== EAGAIN
) {
10897 AUDIT_ARG(text
, buf
);
10899 if (kdebug_enable
) {
10900 long dbg_parms
[NUMPARMS
];
10903 dbg_namelen
= (int)sizeof(dbg_parms
);
10905 if (length
< dbg_namelen
) {
10906 memcpy((char *)dbg_parms
, buf
, length
);
10907 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
10909 dbg_namelen
= length
;
10911 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
10914 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
10917 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
10924 * Obtain the full pathname of a file system object by id.
10926 * This is a private SPI used by the File Manager.
10930 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
10932 vfs_context_t ctx
= vfs_context_current();
10938 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
10941 AUDIT_ARG(value32
, fsid
.val
[0]);
10942 AUDIT_ARG(value64
, uap
->objid
);
10943 /* Restrict output buffer size for now. */
10945 if (uap
->bufsize
> PAGE_SIZE
) {
10948 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
10949 if (realpath
== NULL
) {
10953 error
= fsgetpath_internal(
10954 ctx
, fsid
.val
[0], uap
->objid
,
10955 uap
->bufsize
, realpath
, &length
);
10961 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
10963 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
10966 FREE(realpath
, M_TEMP
);
10972 * Common routine to handle various flavors of statfs data heading out
10975 * Returns: 0 Success
10979 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
10980 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
10981 boolean_t partial_copy
)
10984 int my_size
, copy_size
;
10987 struct user64_statfs sfs
;
10988 my_size
= copy_size
= sizeof(sfs
);
10989 bzero(&sfs
, my_size
);
10990 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10991 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10992 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10993 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
10994 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
10995 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
10996 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
10997 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
10998 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
10999 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
11000 sfs
.f_fsid
= sfsp
->f_fsid
;
11001 sfs
.f_owner
= sfsp
->f_owner
;
11002 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11003 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11005 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11007 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11008 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11010 if (partial_copy
) {
11011 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11013 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11016 struct user32_statfs sfs
;
11018 my_size
= copy_size
= sizeof(sfs
);
11019 bzero(&sfs
, my_size
);
11021 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
11022 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
11023 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
11026 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11027 * have to fudge the numbers here in that case. We inflate the blocksize in order
11028 * to reflect the filesystem size as best we can.
11030 if ((sfsp
->f_blocks
> INT_MAX
)
11031 /* Hack for 4061702 . I think the real fix is for Carbon to
11032 * look for some volume capability and not depend on hidden
11033 * semantics agreed between a FS and carbon.
11034 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11035 * for Carbon to set bNoVolumeSizes volume attribute.
11036 * Without this the webdavfs files cannot be copied onto
11037 * disk as they look huge. This change should not affect
11038 * XSAN as they should not setting these to -1..
11040 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
11041 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
11042 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
11046 * Work out how far we have to shift the block count down to make it fit.
11047 * Note that it's possible to have to shift so far that the resulting
11048 * blocksize would be unreportably large. At that point, we will clip
11049 * any values that don't fit.
11051 * For safety's sake, we also ensure that f_iosize is never reported as
11052 * being smaller than f_bsize.
11054 for (shift
= 0; shift
< 32; shift
++) {
11055 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
11057 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
11060 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11061 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
11062 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
11063 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
11064 #undef __SHIFT_OR_CLIP
11065 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
11066 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
11068 /* filesystem is small enough to be reported honestly */
11069 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
11070 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
11071 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
11072 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
11073 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
11075 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
11076 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
11077 sfs
.f_fsid
= sfsp
->f_fsid
;
11078 sfs
.f_owner
= sfsp
->f_owner
;
11079 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11080 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11082 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11084 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11085 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11087 if (partial_copy
) {
11088 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11090 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11093 if (sizep
!= NULL
) {
11100 * copy stat structure into user_stat structure.
11102 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
11104 bzero(usbp
, sizeof(*usbp
));
11106 usbp
->st_dev
= sbp
->st_dev
;
11107 usbp
->st_ino
= sbp
->st_ino
;
11108 usbp
->st_mode
= sbp
->st_mode
;
11109 usbp
->st_nlink
= sbp
->st_nlink
;
11110 usbp
->st_uid
= sbp
->st_uid
;
11111 usbp
->st_gid
= sbp
->st_gid
;
11112 usbp
->st_rdev
= sbp
->st_rdev
;
11113 #ifndef _POSIX_C_SOURCE
11114 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11115 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11116 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11117 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11118 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11119 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11121 usbp
->st_atime
= sbp
->st_atime
;
11122 usbp
->st_atimensec
= sbp
->st_atimensec
;
11123 usbp
->st_mtime
= sbp
->st_mtime
;
11124 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11125 usbp
->st_ctime
= sbp
->st_ctime
;
11126 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11128 usbp
->st_size
= sbp
->st_size
;
11129 usbp
->st_blocks
= sbp
->st_blocks
;
11130 usbp
->st_blksize
= sbp
->st_blksize
;
11131 usbp
->st_flags
= sbp
->st_flags
;
11132 usbp
->st_gen
= sbp
->st_gen
;
11133 usbp
->st_lspare
= sbp
->st_lspare
;
11134 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11135 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11138 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
11140 bzero(usbp
, sizeof(*usbp
));
11142 usbp
->st_dev
= sbp
->st_dev
;
11143 usbp
->st_ino
= sbp
->st_ino
;
11144 usbp
->st_mode
= sbp
->st_mode
;
11145 usbp
->st_nlink
= sbp
->st_nlink
;
11146 usbp
->st_uid
= sbp
->st_uid
;
11147 usbp
->st_gid
= sbp
->st_gid
;
11148 usbp
->st_rdev
= sbp
->st_rdev
;
11149 #ifndef _POSIX_C_SOURCE
11150 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11151 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11152 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11153 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11154 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11155 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11157 usbp
->st_atime
= sbp
->st_atime
;
11158 usbp
->st_atimensec
= sbp
->st_atimensec
;
11159 usbp
->st_mtime
= sbp
->st_mtime
;
11160 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11161 usbp
->st_ctime
= sbp
->st_ctime
;
11162 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11164 usbp
->st_size
= sbp
->st_size
;
11165 usbp
->st_blocks
= sbp
->st_blocks
;
11166 usbp
->st_blksize
= sbp
->st_blksize
;
11167 usbp
->st_flags
= sbp
->st_flags
;
11168 usbp
->st_gen
= sbp
->st_gen
;
11169 usbp
->st_lspare
= sbp
->st_lspare
;
11170 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11171 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11175 * copy stat64 structure into user_stat64 structure.
11177 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
11179 bzero(usbp
, sizeof(*usbp
));
11181 usbp
->st_dev
= sbp
->st_dev
;
11182 usbp
->st_ino
= sbp
->st_ino
;
11183 usbp
->st_mode
= sbp
->st_mode
;
11184 usbp
->st_nlink
= sbp
->st_nlink
;
11185 usbp
->st_uid
= sbp
->st_uid
;
11186 usbp
->st_gid
= sbp
->st_gid
;
11187 usbp
->st_rdev
= sbp
->st_rdev
;
11188 #ifndef _POSIX_C_SOURCE
11189 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11190 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11191 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11192 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11193 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11194 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11195 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11196 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11198 usbp
->st_atime
= sbp
->st_atime
;
11199 usbp
->st_atimensec
= sbp
->st_atimensec
;
11200 usbp
->st_mtime
= sbp
->st_mtime
;
11201 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11202 usbp
->st_ctime
= sbp
->st_ctime
;
11203 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11204 usbp
->st_birthtime
= sbp
->st_birthtime
;
11205 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11207 usbp
->st_size
= sbp
->st_size
;
11208 usbp
->st_blocks
= sbp
->st_blocks
;
11209 usbp
->st_blksize
= sbp
->st_blksize
;
11210 usbp
->st_flags
= sbp
->st_flags
;
11211 usbp
->st_gen
= sbp
->st_gen
;
11212 usbp
->st_lspare
= sbp
->st_lspare
;
11213 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11214 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11217 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
11219 bzero(usbp
, sizeof(*usbp
));
11221 usbp
->st_dev
= sbp
->st_dev
;
11222 usbp
->st_ino
= sbp
->st_ino
;
11223 usbp
->st_mode
= sbp
->st_mode
;
11224 usbp
->st_nlink
= sbp
->st_nlink
;
11225 usbp
->st_uid
= sbp
->st_uid
;
11226 usbp
->st_gid
= sbp
->st_gid
;
11227 usbp
->st_rdev
= sbp
->st_rdev
;
11228 #ifndef _POSIX_C_SOURCE
11229 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11230 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11231 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11232 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11233 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11234 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11235 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11236 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11238 usbp
->st_atime
= sbp
->st_atime
;
11239 usbp
->st_atimensec
= sbp
->st_atimensec
;
11240 usbp
->st_mtime
= sbp
->st_mtime
;
11241 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11242 usbp
->st_ctime
= sbp
->st_ctime
;
11243 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11244 usbp
->st_birthtime
= sbp
->st_birthtime
;
11245 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11247 usbp
->st_size
= sbp
->st_size
;
11248 usbp
->st_blocks
= sbp
->st_blocks
;
11249 usbp
->st_blksize
= sbp
->st_blksize
;
11250 usbp
->st_flags
= sbp
->st_flags
;
11251 usbp
->st_gen
= sbp
->st_gen
;
11252 usbp
->st_lspare
= sbp
->st_lspare
;
11253 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11254 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11258 * Purge buffer cache for simulating cold starts
11260 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
11262 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
11264 return VNODE_RETURNED
;
11267 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
11269 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
11271 return VFS_RETURNED
;
11275 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
11277 if (!kauth_cred_issuser(kauth_cred_get()))
11280 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);
11286 * gets the vnode associated with the (unnamed) snapshot directory
11287 * for a Filesystem. The snapshot directory vnode is returned with
11288 * an iocount on it.
11291 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
11293 return (VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
));
11297 * Get the snapshot vnode.
11299 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11300 * needs nameidone() on ndp.
11302 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11304 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11308 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
11309 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
11310 #if !CONFIG_TRIGGERS
11313 enum path_operation pathop
,
11319 struct vfs_attr vfa
;
11324 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
11328 if (!vnode_isvroot(*rvpp
)) {
11333 /* Make sure the filesystem supports snapshots */
11334 VFSATTR_INIT(&vfa
);
11335 VFSATTR_WANTED(&vfa
, f_capabilities
);
11336 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
11337 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
11338 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
11339 VOL_CAP_INT_SNAPSHOT
)) ||
11340 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
11341 VOL_CAP_INT_SNAPSHOT
))) {
11346 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
11350 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11351 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11356 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11357 * (the length returned by copyinstr includes the terminating NUL)
11359 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
11360 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
11364 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++);
11365 if (i
< (int)name_len
) {
11371 if (op
== CREATE
) {
11372 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
11374 } else if (op
== DELETE
) {
11375 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
11382 /* Check if the snapshot already exists ... */
11383 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
11384 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
11385 ndp
->ni_dvp
= *sdvpp
;
11387 error
= namei(ndp
);
11389 FREE(name_buf
, M_TEMP
);
11405 * create a filesystem snapshot (for supporting filesystems)
11407 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11408 * We get to the (unnamed) snapshot directory vnode and create the vnode
11409 * for the snapshot in it.
11413 * a) Passed in name for snapshot cannot have slashes.
11414 * b) name can't be "." or ".."
11416 * Since this requires superuser privileges, vnode_authorize calls are not
11420 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11423 vnode_t rvp
, snapdvp
;
11425 struct nameidata namend
;
11427 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
11432 if (namend
.ni_vp
) {
11433 vnode_put(namend
.ni_vp
);
11436 struct vnode_attr va
;
11437 vnode_t vp
= NULLVP
;
11440 VATTR_SET(&va
, va_type
, VREG
);
11441 VATTR_SET(&va
, va_mode
, 0);
11443 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
11444 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
11449 nameidone(&namend
);
11450 vnode_put(snapdvp
);
11456 * Delete a Filesystem snapshot
11458 * get the vnode for the unnamed snapshot directory and the snapshot and
11459 * delete the snapshot.
11462 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11465 vnode_t rvp
, snapdvp
;
11467 struct nameidata namend
;
11469 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
11474 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
11475 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
11477 vnode_put(namend
.ni_vp
);
11478 nameidone(&namend
);
11479 vnode_put(snapdvp
);
11486 * Revert a filesystem to a snapshot
11488 * Marks the filesystem to revert to the given snapshot on next mount.
11491 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11497 struct fs_snapshot_revert_args revert_data
;
11498 struct componentname cnp
;
11502 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
11506 mp
= vnode_mount(rvp
);
11508 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11509 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11511 FREE(name_buf
, M_TEMP
);
11517 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
11519 FREE(name_buf
, M_TEMP
);
11526 * Grab mount_iterref so that we can release the vnode,
11527 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11529 error
= mount_iterref (mp
, 0);
11532 FREE(name_buf
, M_TEMP
);
11536 memset(&cnp
, 0, sizeof(cnp
));
11537 cnp
.cn_pnbuf
= (char *)name_buf
;
11538 cnp
.cn_nameiop
= LOOKUP
;
11539 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
11540 cnp
.cn_pnlen
= MAXPATHLEN
;
11541 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
11542 cnp
.cn_namelen
= (int)name_len
;
11543 revert_data
.sr_cnp
= &cnp
;
11545 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
11546 mount_iterdrop(mp
);
11547 FREE(name_buf
, M_TEMP
);
11550 /* If there was any error, try again using VNOP_IOCTL */
11553 struct nameidata namend
;
11555 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
11562 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
11563 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11566 #ifndef APFS_REVERT_TO_SNAPSHOT
11567 #define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
11570 error
= VNOP_IOCTL(namend
.ni_vp
, APFS_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
11573 vnode_put(namend
.ni_vp
);
11574 nameidone(&namend
);
11575 vnode_put(snapdvp
);
11583 * rename a Filesystem snapshot
11585 * get the vnode for the unnamed snapshot directory and the snapshot and
11586 * rename the snapshot. This is a very specialised (and simple) case of
11587 * rename(2) (which has to deal with a lot more complications). It differs
11588 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11591 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
11592 __unused
uint32_t flags
, vfs_context_t ctx
)
11594 vnode_t rvp
, snapdvp
;
11596 caddr_t newname_buf
;
11599 struct nameidata
*fromnd
, *tond
;
11600 /* carving out a chunk for structs that are too big to be on stack. */
11602 struct nameidata from_node
;
11603 struct nameidata to_node
;
11606 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
11607 fromnd
= &__rename_data
->from_node
;
11608 tond
= &__rename_data
->to_node
;
11610 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
11614 fvp
= fromnd
->ni_vp
;
11616 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11617 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
11622 * Some sanity checks- new name can't be empty, "." or ".." or have
11624 * (the length returned by copyinstr includes the terminating NUL)
11626 * The FS rename VNOP is suppossed to handle this but we'll pick it
11629 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
11630 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
11634 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++);
11635 if (i
< (int)name_len
) {
11641 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
11647 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
11648 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
11649 tond
->ni_dvp
= snapdvp
;
11651 error
= namei(tond
);
11654 } else if (tond
->ni_vp
) {
11656 * snapshot rename behaves differently than rename(2) - if the
11657 * new name exists, EEXIST is returned.
11659 vnode_put(tond
->ni_vp
);
11664 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
11665 &tond
->ni_cnd
, ctx
);
11670 FREE(newname_buf
, M_TEMP
);
11672 vnode_put(snapdvp
);
11676 FREE(__rename_data
, M_TEMP
);
11681 * Mount a Filesystem snapshot
11683 * get the vnode for the unnamed snapshot directory and the snapshot and
11684 * mount the snapshot.
11687 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
11688 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
11690 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
11692 struct nameidata
*snapndp
, *dirndp
;
11693 /* carving out a chunk for structs that are too big to be on stack. */
11695 struct nameidata snapnd
;
11696 struct nameidata dirnd
;
11697 } * __snapshot_mount_data
;
11699 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
11701 snapndp
= &__snapshot_mount_data
->snapnd
;
11702 dirndp
= &__snapshot_mount_data
->dirnd
;
11704 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
11709 snapvp
= snapndp
->ni_vp
;
11710 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
11715 /* Get the vnode to be covered */
11716 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
11717 UIO_USERSPACE
, directory
, ctx
);
11718 error
= namei(dirndp
);
11722 vp
= dirndp
->ni_vp
;
11723 pvp
= dirndp
->ni_dvp
;
11725 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
11728 mount_t mp
= vnode_mount(rvp
);
11729 struct fs_snapshot_mount_args smnt_data
;
11731 smnt_data
.sm_mp
= mp
;
11732 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
11733 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
11734 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), 0,
11735 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
11743 vnode_put(snapdvp
);
11745 nameidone(snapndp
);
11747 FREE(__snapshot_mount_data
, M_TEMP
);
11752 * Root from a snapshot of the filesystem
11754 * Marks the filesystem to root from the given snapshot on next boot.
11757 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11763 struct fs_snapshot_root_args root_data
;
11764 struct componentname cnp
;
11768 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
11772 mp
= vnode_mount(rvp
);
11774 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11775 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11777 FREE(name_buf
, M_TEMP
);
11782 // XXX MAC checks ?
11785 * Grab mount_iterref so that we can release the vnode,
11786 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11788 error
= mount_iterref (mp
, 0);
11791 FREE(name_buf
, M_TEMP
);
11795 memset(&cnp
, 0, sizeof(cnp
));
11796 cnp
.cn_pnbuf
= (char *)name_buf
;
11797 cnp
.cn_nameiop
= LOOKUP
;
11798 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
11799 cnp
.cn_pnlen
= MAXPATHLEN
;
11800 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
11801 cnp
.cn_namelen
= (int)name_len
;
11802 root_data
.sr_cnp
= &cnp
;
11804 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
11806 mount_iterdrop(mp
);
11807 FREE(name_buf
, M_TEMP
);
11813 * FS snapshot operations dispatcher
11816 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
11817 __unused
int32_t *retval
)
11820 vfs_context_t ctx
= vfs_context_current();
11822 AUDIT_ARG(fd
, uap
->dirfd
);
11823 AUDIT_ARG(value32
, uap
->op
);
11825 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
11830 case SNAPSHOT_OP_CREATE
:
11831 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
11833 case SNAPSHOT_OP_DELETE
:
11834 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
11836 case SNAPSHOT_OP_RENAME
:
11837 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
11840 case SNAPSHOT_OP_MOUNT
:
11841 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
11842 uap
->data
, uap
->flags
, ctx
);
11844 case SNAPSHOT_OP_REVERT
:
11845 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
11847 case SNAPSHOT_OP_ROOT
:
11848 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);