2 * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
110 #include <security/audit/audit.h>
111 #include <bsm/audit_kevents.h>
113 #include <mach/mach_types.h>
114 #include <kern/kern_types.h>
115 #include <kern/kalloc.h>
116 #include <kern/task.h>
118 #include <vm/vm_pageout.h>
119 #include <vm/vm_protos.h>
121 #include <libkern/OSAtomic.h>
122 #include <pexpert/pexpert.h>
123 #include <IOKit/IOBSD.h>
126 #include <miscfs/routefs/routefs.h>
130 #include <security/mac.h>
131 #include <security/mac_framework.h>
135 #define GET_PATH(x) \
136 (x) = get_pathbuff();
137 #define RELEASE_PATH(x) \
140 #define GET_PATH(x) \
141 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
142 #define RELEASE_PATH(x) \
143 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
144 #endif /* CONFIG_FSE */
146 /* struct for checkdirs iteration */
151 /* callback for checkdirs iteration */
152 static int checkdirs_callback(proc_t p
, void * arg
);
154 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
155 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
156 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
157 static int getfsstat_callback(mount_t mp
, void * arg
);
158 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
159 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
160 static int sync_callback(mount_t
, void *);
161 static void sync_thread(void *, __unused wait_result_t
);
162 static int sync_async(int);
163 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
164 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
165 boolean_t partial_copy
);
166 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
168 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
169 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
170 struct componentname
*cnp
, user_addr_t fsmountargs
,
171 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
173 void vfs_notify_mount(vnode_t pdvp
);
175 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
177 struct fd_vn_data
* fg_vn_data_alloc(void);
180 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
181 * Concurrent lookups (or lookups by ids) on hard links can cause the
182 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
183 * does) to return ENOENT as the path cannot be returned from the name cache
184 * alone. We have no option but to retry and hope to get one namei->reverse path
185 * generation done without an intervening lookup, lookup by id on the hard link
186 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
187 * which currently are the MAC hooks for rename, unlink and rmdir.
189 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
191 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
193 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
195 #ifdef CONFIG_IMGSRC_ACCESS
196 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
197 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
198 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
199 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
200 static void mount_end_update(mount_t mp
);
201 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
202 #endif /* CONFIG_IMGSRC_ACCESS */
204 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
207 int sync_internal(void);
210 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
212 extern lck_grp_t
*fd_vn_lck_grp
;
213 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
214 extern lck_attr_t
*fd_vn_lck_attr
;
217 * incremented each time a mount or unmount operation occurs
218 * used to invalidate the cached value of the rootvp in the
219 * mount structure utilized by cache_lookup_path
221 uint32_t mount_generation
= 0;
223 /* counts number of mount and unmount operations */
224 unsigned int vfs_nummntops
=0;
226 extern const struct fileops vnops
;
227 #if CONFIG_APPLEDOUBLE
228 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
229 #endif /* CONFIG_APPLEDOUBLE */
232 * Virtual File System System Calls
235 #if NFSCLIENT || DEVFS || ROUTEFS
237 * Private in-kernel mounting spi (NFS only, not exported)
241 vfs_iskernelmount(mount_t mp
)
243 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
248 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
249 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
255 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
256 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
259 * Get the vnode to be covered if it's not supplied
269 char *pnbuf
= CAST_DOWN(char *, path
);
271 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
272 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
276 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
277 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
287 #endif /* NFSCLIENT || DEVFS */
290 * Mount a file system.
294 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
296 struct __mac_mount_args muap
;
298 muap
.type
= uap
->type
;
299 muap
.path
= uap
->path
;
300 muap
.flags
= uap
->flags
;
301 muap
.data
= uap
->data
;
302 muap
.mac_p
= USER_ADDR_NULL
;
303 return (__mac_mount(p
, &muap
, retval
));
307 vfs_notify_mount(vnode_t pdvp
)
309 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
310 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
315 * Mount a file system taking into account MAC label behavior.
316 * See mount(2) man page for more information
318 * Parameters: p Process requesting the mount
319 * uap User argument descriptor (see below)
322 * Indirect: uap->type Filesystem type
323 * uap->path Path to mount
324 * uap->data Mount arguments
325 * uap->mac_p MAC info
326 * uap->flags Mount flags
332 boolean_t root_fs_upgrade_try
= FALSE
;
335 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
339 int need_nameidone
= 0;
340 vfs_context_t ctx
= vfs_context_current();
341 char fstypename
[MFSNAMELEN
];
344 char *labelstr
= NULL
;
345 int flags
= uap
->flags
;
347 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
348 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
353 * Get the fs type name from user space
355 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
360 * Get the vnode to be covered
362 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
363 UIO_USERSPACE
, uap
->path
, ctx
);
372 #ifdef CONFIG_IMGSRC_ACCESS
373 /* Mounting image source cannot be batched with other operations */
374 if (flags
== MNT_IMGSRC_BY_INDEX
) {
375 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
376 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
379 #endif /* CONFIG_IMGSRC_ACCESS */
383 * Get the label string (if any) from user space
385 if (uap
->mac_p
!= USER_ADDR_NULL
) {
390 struct user64_mac mac64
;
391 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
392 mac
.m_buflen
= mac64
.m_buflen
;
393 mac
.m_string
= mac64
.m_string
;
395 struct user32_mac mac32
;
396 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
397 mac
.m_buflen
= mac32
.m_buflen
;
398 mac
.m_string
= mac32
.m_string
;
402 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
403 (mac
.m_buflen
< 2)) {
407 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
408 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
412 AUDIT_ARG(mac_string
, labelstr
);
414 #endif /* CONFIG_MACF */
416 AUDIT_ARG(fflags
, flags
);
419 if (flags
& MNT_UNION
) {
420 /* No union mounts on release kernels */
426 if ((vp
->v_flag
& VROOT
) &&
427 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
428 if (!(flags
& MNT_UNION
)) {
433 * For a union mount on '/', treat it as fresh
434 * mount instead of update.
435 * Otherwise, union mouting on '/' used to panic the
436 * system before, since mnt_vnodecovered was found to
437 * be NULL for '/' which is required for unionlookup
438 * after it gets ENOENT on union mount.
440 flags
= (flags
& ~(MNT_UPDATE
));
444 if ((flags
& MNT_RDONLY
) == 0) {
445 /* Release kernels are not allowed to mount "/" as rw */
451 * See 7392553 for more details on why this check exists.
452 * Suffice to say: If this check is ON and something tries
453 * to mount the rootFS RW, we'll turn off the codesign
454 * bitmap optimization.
456 #if CHECK_CS_VALIDATION_BITMAP
457 if ((flags
& MNT_RDONLY
) == 0 ) {
458 root_fs_upgrade_try
= TRUE
;
463 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
464 labelstr
, FALSE
, ctx
);
470 FREE(labelstr
, M_MACTEMP
);
471 #endif /* CONFIG_MACF */
479 if (need_nameidone
) {
487 * common mount implementation (final stage of mounting)
490 * fstypename file system type (ie it's vfs name)
491 * pvp parent of covered vnode
493 * cnp component name (ie path) of covered vnode
494 * flags generic mount flags
495 * fsmountargs file system specific data
496 * labelstr optional MAC label
497 * kernelmount TRUE for mounts initiated from inside the kernel
498 * ctx caller's context
501 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
502 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
503 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
506 #pragma unused(labelstr)
508 struct vnode
*devvp
= NULLVP
;
509 struct vnode
*device_vnode
= NULLVP
;
514 struct vfstable
*vfsp
= (struct vfstable
*)0;
515 struct proc
*p
= vfs_context_proc(ctx
);
517 user_addr_t devpath
= USER_ADDR_NULL
;
520 boolean_t vfsp_ref
= FALSE
;
521 boolean_t is_rwlock_locked
= FALSE
;
522 boolean_t did_rele
= FALSE
;
523 boolean_t have_usecount
= FALSE
;
526 * Process an update for an existing mount
528 if (flags
& MNT_UPDATE
) {
529 if ((vp
->v_flag
& VROOT
) == 0) {
535 /* unmount in progress return error */
537 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
543 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
544 is_rwlock_locked
= TRUE
;
546 * We only allow the filesystem to be reloaded if it
547 * is currently mounted read-only.
549 if ((flags
& MNT_RELOAD
) &&
550 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
556 * If content protection is enabled, update mounts are not
557 * allowed to turn it off.
559 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
560 ((flags
& MNT_CPROTECT
) == 0)) {
565 #ifdef CONFIG_IMGSRC_ACCESS
566 /* Can't downgrade the backer of the root FS */
567 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
568 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
572 #endif /* CONFIG_IMGSRC_ACCESS */
575 * Only root, or the user that did the original mount is
576 * permitted to update it.
578 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
579 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
583 error
= mac_mount_check_remount(ctx
, mp
);
589 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
590 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
592 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
593 flags
|= MNT_NOSUID
| MNT_NODEV
;
594 if (mp
->mnt_flag
& MNT_NOEXEC
)
601 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
603 vfsp
= mp
->mnt_vtable
;
607 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
608 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
610 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
611 flags
|= MNT_NOSUID
| MNT_NODEV
;
612 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
616 /* XXXAUDIT: Should we capture the type on the error path as well? */
617 AUDIT_ARG(text
, fstypename
);
619 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
620 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
621 vfsp
->vfc_refcount
++;
632 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
634 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
635 error
= EINVAL
; /* unsupported request */
639 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
645 * Allocate and initialize the filesystem (mount_t)
647 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
649 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
652 /* Initialize the default IO constraints */
653 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
654 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
655 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
656 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
657 mp
->mnt_devblocksize
= DEV_BSIZE
;
658 mp
->mnt_alignmentmask
= PAGE_MASK
;
659 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
662 mp
->mnt_realrootvp
= NULLVP
;
663 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
665 TAILQ_INIT(&mp
->mnt_vnodelist
);
666 TAILQ_INIT(&mp
->mnt_workerqueue
);
667 TAILQ_INIT(&mp
->mnt_newvnodes
);
669 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
670 is_rwlock_locked
= TRUE
;
671 mp
->mnt_op
= vfsp
->vfc_vfsops
;
672 mp
->mnt_vtable
= vfsp
;
673 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
674 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
675 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
676 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
677 mp
->mnt_vnodecovered
= vp
;
678 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
679 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
680 mp
->mnt_devbsdunit
= 0;
682 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
683 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
685 #if NFSCLIENT || DEVFS || ROUTEFS
687 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
688 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
689 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
690 #endif /* NFSCLIENT || DEVFS */
694 * Set the mount level flags.
696 if (flags
& MNT_RDONLY
)
697 mp
->mnt_flag
|= MNT_RDONLY
;
698 else if (mp
->mnt_flag
& MNT_RDONLY
) {
699 // disallow read/write upgrades of file systems that
700 // had the TYPENAME_OVERRIDE feature set.
701 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
705 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
707 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
708 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
709 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
710 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
711 MNT_QUARANTINE
| MNT_CPROTECT
);
716 * On release builds of iOS based platforms, always enforce NOSUID and NODEV on
717 * all mounts. We do this here because we can catch update mounts as well as
718 * non-update mounts in this case.
720 mp
->mnt_flag
|= (MNT_NOSUID
);
724 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
725 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
726 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
727 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
728 MNT_QUARANTINE
| MNT_CPROTECT
);
731 if (flags
& MNT_MULTILABEL
) {
732 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
736 mp
->mnt_flag
|= MNT_MULTILABEL
;
740 * Process device path for local file systems if requested
742 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
743 !(internal_flags
& KERNEL_MOUNT_SNAPSHOT
)) {
744 if (vfs_context_is64bit(ctx
)) {
745 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
747 fsmountargs
+= sizeof(devpath
);
750 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
752 /* munge into LP64 addr */
753 devpath
= CAST_USER_ADDR_T(tmp
);
754 fsmountargs
+= sizeof(tmp
);
757 /* Lookup device and authorize access to it */
761 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
762 if ( (error
= namei(&nd
)) )
765 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
770 if (devvp
->v_type
!= VBLK
) {
774 if (major(devvp
->v_rdev
) >= nblkdev
) {
779 * If mount by non-root, then verify that user has necessary
780 * permissions on the device.
782 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
783 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
785 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
786 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
787 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
791 /* On first mount, preflight and open device */
792 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
793 if ( (error
= vnode_ref(devvp
)) )
796 * Disallow multiple mounts of the same device.
797 * Disallow mounting of a device that is currently in use
798 * (except for root, which might share swap device for miniroot).
799 * Flush out any old buffers remaining from a previous use.
801 if ( (error
= vfs_mountedon(devvp
)) )
804 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
808 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
812 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
815 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
817 error
= mac_vnode_check_open(ctx
,
819 ronly
? FREAD
: FREAD
|FWRITE
);
823 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
826 mp
->mnt_devvp
= devvp
;
827 device_vnode
= devvp
;
829 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
830 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
831 (device_vnode
= mp
->mnt_devvp
)) {
835 * If upgrade to read-write by non-root, then verify
836 * that user has necessary permissions on the device.
838 vnode_getalways(device_vnode
);
840 if (suser(vfs_context_ucred(ctx
), NULL
) &&
841 (error
= vnode_authorize(device_vnode
, NULL
,
842 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
844 vnode_put(device_vnode
);
848 /* Tell the device that we're upgrading */
849 dev
= (dev_t
)device_vnode
->v_rdev
;
852 if ((u_int
)maj
>= (u_int
)nblkdev
)
853 panic("Volume mounted on a device with invalid major number.");
855 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
856 vnode_put(device_vnode
);
857 device_vnode
= NULLVP
;
864 if ((flags
& MNT_UPDATE
) == 0) {
865 mac_mount_label_init(mp
);
866 mac_mount_label_associate(ctx
, mp
);
869 if ((flags
& MNT_UPDATE
) != 0) {
870 error
= mac_mount_check_label_update(ctx
, mp
);
877 * Mount the filesystem.
879 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
880 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
881 (caddr_t
)fsmountargs
, 0, ctx
);
883 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
886 if (flags
& MNT_UPDATE
) {
887 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
888 mp
->mnt_flag
&= ~MNT_RDONLY
;
890 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
891 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
893 mp
->mnt_flag
= flag
; /* restore flag value */
894 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
895 lck_rw_done(&mp
->mnt_rwlock
);
896 is_rwlock_locked
= FALSE
;
898 enablequotas(mp
, ctx
);
903 * Put the new filesystem on the mount list after root.
906 struct vfs_attr vfsattr
;
908 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
909 error
= VFS_ROOT(mp
, &rvp
, ctx
);
911 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
914 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
916 * drop reference provided by VFS_ROOT
926 CLR(vp
->v_flag
, VMOUNT
);
927 vp
->v_mountedhere
= mp
;
931 * taking the name_cache_lock exclusively will
932 * insure that everyone is out of the fast path who
933 * might be trying to use a now stale copy of
934 * vp->v_mountedhere->mnt_realrootvp
935 * bumping mount_generation causes the cached values
942 error
= vnode_ref(vp
);
947 have_usecount
= TRUE
;
949 error
= checkdirs(vp
, ctx
);
951 /* Unmount the filesystem as cdir/rdirs cannot be updated */
955 * there is no cleanup code here so I have made it void
956 * we need to revisit this
958 (void)VFS_START(mp
, 0, ctx
);
960 if (mount_list_add(mp
) != 0) {
962 * The system is shutting down trying to umount
963 * everything, so fail with a plausible errno.
968 lck_rw_done(&mp
->mnt_rwlock
);
969 is_rwlock_locked
= FALSE
;
971 /* Check if this mounted file system supports EAs or named streams. */
972 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
973 VFSATTR_INIT(&vfsattr
);
974 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
975 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
976 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
977 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
978 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
979 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
980 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
983 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
984 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
985 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
988 /* Check if this file system supports path from id lookups. */
989 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
990 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
991 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
992 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
993 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
994 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
997 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
998 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
999 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
1002 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
1003 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
1005 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
1006 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
1008 /* increment the operations count */
1009 OSAddAtomic(1, &vfs_nummntops
);
1010 enablequotas(mp
, ctx
);
1013 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1016 * cache the IO attributes for the underlying physical media...
1017 * an error return indicates the underlying driver doesn't
1018 * support all the queries necessary... however, reasonable
1019 * defaults will have been set, so no reason to bail or care
1021 vfs_init_io_attributes(device_vnode
, mp
);
1024 /* Now that mount is setup, notify the listeners */
1025 vfs_notify_mount(pvp
);
1026 IOBSDMountChange(mp
, kIOMountChangeMount
);
1029 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1030 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1031 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1032 mp
->mnt_vtable
->vfc_name
, error
);
1035 vnode_lock_spin(vp
);
1036 CLR(vp
->v_flag
, VMOUNT
);
1039 mp
->mnt_vtable
->vfc_refcount
--;
1040 mount_list_unlock();
1042 if (device_vnode
) {
1043 vnode_rele(device_vnode
);
1044 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
1046 lck_rw_done(&mp
->mnt_rwlock
);
1047 is_rwlock_locked
= FALSE
;
1050 * if we get here, we have a mount structure that needs to be freed,
1051 * but since the coveredvp hasn't yet been updated to point at it,
1052 * no need to worry about other threads holding a crossref on this mp
1053 * so it's ok to just free it
1055 mount_lock_destroy(mp
);
1057 mac_mount_label_destroy(mp
);
1059 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1063 * drop I/O count on the device vp if there was one
1065 if (devpath
&& devvp
)
1070 /* Error condition exits */
1072 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1075 * If the mount has been placed on the covered vp,
1076 * it may have been discovered by now, so we have
1077 * to treat this just like an unmount
1079 mount_lock_spin(mp
);
1080 mp
->mnt_lflag
|= MNT_LDEAD
;
1083 if (device_vnode
!= NULLVP
) {
1084 vnode_rele(device_vnode
);
1085 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1090 vnode_lock_spin(vp
);
1093 vp
->v_mountedhere
= (mount_t
) 0;
1097 if (have_usecount
) {
1101 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1104 if (devpath
&& devvp
)
1107 /* Release mnt_rwlock only when it was taken */
1108 if (is_rwlock_locked
== TRUE
) {
1109 lck_rw_done(&mp
->mnt_rwlock
);
1113 if (mp
->mnt_crossref
)
1114 mount_dropcrossref(mp
, vp
, 0);
1116 mount_lock_destroy(mp
);
1118 mac_mount_label_destroy(mp
);
1120 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1125 vfsp
->vfc_refcount
--;
1126 mount_list_unlock();
1133 * Flush in-core data, check for competing mount attempts,
1137 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1140 #pragma unused(cnp,fsname)
1142 struct vnode_attr va
;
1147 * If the user is not root, ensure that they own the directory
1148 * onto which we are attempting to mount.
1151 VATTR_WANTED(&va
, va_uid
);
1152 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1153 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1154 (!vfs_context_issuser(ctx
)))) {
1160 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1163 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1166 if (vp
->v_type
!= VDIR
) {
1171 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1177 error
= mac_mount_check_mount(ctx
, vp
,
1183 vnode_lock_spin(vp
);
1184 SET(vp
->v_flag
, VMOUNT
);
1191 #if CONFIG_IMGSRC_ACCESS
1194 #define IMGSRC_DEBUG(args...) printf(args)
1196 #define IMGSRC_DEBUG(args...) do { } while(0)
1200 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1202 struct nameidata nd
;
1203 vnode_t vp
, realdevvp
;
1207 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1208 if ( (error
= namei(&nd
)) ) {
1209 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1215 if (!vnode_isblk(vp
)) {
1216 IMGSRC_DEBUG("Not block device.\n");
1221 realdevvp
= mp
->mnt_devvp
;
1222 if (realdevvp
== NULLVP
) {
1223 IMGSRC_DEBUG("No device backs the mount.\n");
1228 error
= vnode_getwithref(realdevvp
);
1230 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1234 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1235 IMGSRC_DEBUG("Wrong dev_t.\n");
1240 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1243 * If mount by non-root, then verify that user has necessary
1244 * permissions on the device.
1246 if (!vfs_context_issuser(ctx
)) {
1247 accessmode
= KAUTH_VNODE_READ_DATA
;
1248 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1249 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1250 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1251 IMGSRC_DEBUG("Access denied.\n");
1259 vnode_put(realdevvp
);
1270 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1271 * and call checkdirs()
1274 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1278 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1280 vnode_lock_spin(vp
);
1281 CLR(vp
->v_flag
, VMOUNT
);
1282 vp
->v_mountedhere
= mp
;
1286 * taking the name_cache_lock exclusively will
1287 * insure that everyone is out of the fast path who
1288 * might be trying to use a now stale copy of
1289 * vp->v_mountedhere->mnt_realrootvp
1290 * bumping mount_generation causes the cached values
1295 name_cache_unlock();
1297 error
= vnode_ref(vp
);
1302 error
= checkdirs(vp
, ctx
);
1304 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1311 mp
->mnt_vnodecovered
= NULLVP
;
1317 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1320 vnode_lock_spin(vp
);
1321 vp
->v_mountedhere
= (mount_t
)NULL
;
1324 mp
->mnt_vnodecovered
= NULLVP
;
1328 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1332 /* unmount in progress return error */
1333 mount_lock_spin(mp
);
1334 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1339 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1342 * We only allow the filesystem to be reloaded if it
1343 * is currently mounted read-only.
1345 if ((flags
& MNT_RELOAD
) &&
1346 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1352 * Only root, or the user that did the original mount is
1353 * permitted to update it.
1355 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1356 (!vfs_context_issuser(ctx
))) {
1361 error
= mac_mount_check_remount(ctx
, mp
);
1369 lck_rw_done(&mp
->mnt_rwlock
);
1376 mount_end_update(mount_t mp
)
1378 lck_rw_done(&mp
->mnt_rwlock
);
1382 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1386 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1390 vp
= imgsrc_rootvnodes
[height
];
1391 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1400 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1401 const char *fsname
, vfs_context_t ctx
,
1402 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1406 boolean_t placed
= FALSE
;
1407 vnode_t devvp
= NULLVP
;
1408 struct vfstable
*vfsp
;
1409 user_addr_t devpath
;
1410 char *old_mntonname
;
1415 /* If we didn't imageboot, nothing to move */
1416 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1420 /* Only root can do this */
1421 if (!vfs_context_issuser(ctx
)) {
1425 IMGSRC_DEBUG("looking for root vnode.\n");
1428 * Get root vnode of filesystem we're moving.
1432 struct user64_mnt_imgsrc_args mia64
;
1433 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1435 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1439 height
= mia64
.mi_height
;
1440 flags
= mia64
.mi_flags
;
1441 devpath
= mia64
.mi_devpath
;
1443 struct user32_mnt_imgsrc_args mia32
;
1444 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1446 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1450 height
= mia32
.mi_height
;
1451 flags
= mia32
.mi_flags
;
1452 devpath
= mia32
.mi_devpath
;
1456 * For binary compatibility--assumes one level of nesting.
1459 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1463 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1466 /* munge into LP64 addr */
1467 devpath
= CAST_USER_ADDR_T(tmp
);
1475 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1479 error
= get_imgsrc_rootvnode(height
, &rvp
);
1481 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1485 IMGSRC_DEBUG("got root vnode.\n");
1487 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1489 /* Can only move once */
1490 mp
= vnode_mount(rvp
);
1491 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1492 IMGSRC_DEBUG("Already moved.\n");
1497 IMGSRC_DEBUG("Starting updated.\n");
1499 /* Get exclusive rwlock on mount, authorize update on mp */
1500 error
= mount_begin_update(mp
, ctx
, 0);
1502 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1507 * It can only be moved once. Flag is set under the rwlock,
1508 * so we're now safe to proceed.
1510 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1511 IMGSRC_DEBUG("Already moved [2]\n");
1516 IMGSRC_DEBUG("Preparing coveredvp.\n");
1518 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1519 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1521 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1525 IMGSRC_DEBUG("Covered vp OK.\n");
1527 /* Sanity check the name caller has provided */
1528 vfsp
= mp
->mnt_vtable
;
1529 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1530 IMGSRC_DEBUG("Wrong fs name.\n");
1535 /* Check the device vnode and update mount-from name, for local filesystems */
1536 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1537 IMGSRC_DEBUG("Local, doing device validation.\n");
1539 if (devpath
!= USER_ADDR_NULL
) {
1540 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1542 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1551 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1552 * and increment the name cache's mount generation
1555 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1556 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1563 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1564 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1566 /* Forbid future moves */
1568 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1571 /* Finally, add to mount list, completely ready to go */
1572 if (mount_list_add(mp
) != 0) {
1574 * The system is shutting down trying to umount
1575 * everything, so fail with a plausible errno.
1581 mount_end_update(mp
);
1583 FREE(old_mntonname
, M_TEMP
);
1585 vfs_notify_mount(pvp
);
1589 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1592 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1597 * Placing the mp on the vnode clears VMOUNT,
1598 * so cleanup is different after that point
1601 /* Rele the vp, clear VMOUNT and v_mountedhere */
1602 undo_place_on_covered_vp(mp
, vp
);
1604 vnode_lock_spin(vp
);
1605 CLR(vp
->v_flag
, VMOUNT
);
1609 mount_end_update(mp
);
1613 FREE(old_mntonname
, M_TEMP
);
1617 #endif /* CONFIG_IMGSRC_ACCESS */
1620 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1622 struct nameidata qnd
;
1624 char qfpath
[MAXPATHLEN
];
1625 const char *qfname
= QUOTAFILENAME
;
1626 const char *qfopsname
= QUOTAOPSNAME
;
1627 const char *qfextension
[] = INITQFNAMES
;
1629 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1630 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1634 * Enable filesystem disk quotas if necessary.
1635 * We ignore errors as this should not interfere with final mount
1637 for (type
=0; type
< MAXQUOTAS
; type
++) {
1638 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1639 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1640 CAST_USER_ADDR_T(qfpath
), ctx
);
1641 if (namei(&qnd
) != 0)
1642 continue; /* option file to trigger quotas is not present */
1643 vnode_put(qnd
.ni_vp
);
1645 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1647 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1654 checkdirs_callback(proc_t p
, void * arg
)
1656 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1657 vnode_t olddp
= cdrp
->olddp
;
1658 vnode_t newdp
= cdrp
->newdp
;
1659 struct filedesc
*fdp
;
1663 int cdir_changed
= 0;
1664 int rdir_changed
= 0;
1667 * XXX Also needs to iterate each thread in the process to see if it
1668 * XXX is using a per-thread current working directory, and, if so,
1669 * XXX update that as well.
1674 if (fdp
== (struct filedesc
*)0) {
1676 return(PROC_RETURNED
);
1678 fdp_cvp
= fdp
->fd_cdir
;
1679 fdp_rvp
= fdp
->fd_rdir
;
1682 if (fdp_cvp
== olddp
) {
1689 if (fdp_rvp
== olddp
) {
1696 if (cdir_changed
|| rdir_changed
) {
1698 fdp
->fd_cdir
= fdp_cvp
;
1699 fdp
->fd_rdir
= fdp_rvp
;
1702 return(PROC_RETURNED
);
1708 * Scan all active processes to see if any of them have a current
1709 * or root directory onto which the new filesystem has just been
1710 * mounted. If so, replace them with the new mount point.
1713 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1718 struct cdirargs cdr
;
1720 if (olddp
->v_usecount
== 1)
1722 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1726 panic("mount: lost mount: error %d", err
);
1733 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1734 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1736 if (rootvnode
== olddp
) {
1748 * Unmount a file system.
1750 * Note: unmount takes a path to the vnode mounted on as argument,
1751 * not special file (as before).
1755 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1760 struct nameidata nd
;
1761 vfs_context_t ctx
= vfs_context_current();
1763 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1764 UIO_USERSPACE
, uap
->path
, ctx
);
1773 error
= mac_mount_check_umount(ctx
, mp
);
1780 * Must be the root of the filesystem
1782 if ((vp
->v_flag
& VROOT
) == 0) {
1788 /* safedounmount consumes the mount ref */
1789 return (safedounmount(mp
, uap
->flags
, ctx
));
1793 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
1797 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1798 if (mp
== (mount_t
)0) {
1803 /* safedounmount consumes the mount ref */
1804 return(safedounmount(mp
, flags
, ctx
));
1809 * The mount struct comes with a mount ref which will be consumed.
1810 * Do the actual file system unmount, prevent some common foot shooting.
1813 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1816 proc_t p
= vfs_context_proc(ctx
);
1819 * If the file system is not responding and MNT_NOBLOCK
1820 * is set and not a forced unmount then return EBUSY.
1822 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1823 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1829 * Skip authorization if the mount is tagged as permissive and
1830 * this is not a forced-unmount attempt.
1832 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1834 * Only root, or the user that did the original mount is
1835 * permitted to unmount this filesystem.
1837 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1838 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1842 * Don't allow unmounting the root file system.
1844 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1845 error
= EBUSY
; /* the root is always busy */
1849 #ifdef CONFIG_IMGSRC_ACCESS
1850 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1854 #endif /* CONFIG_IMGSRC_ACCESS */
1856 return (dounmount(mp
, flags
, 1, ctx
));
1864 * Do the actual file system unmount.
1867 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1869 vnode_t coveredvp
= (vnode_t
)0;
1872 int forcedunmount
= 0;
1874 struct vnode
*devvp
= NULLVP
;
1876 proc_t p
= vfs_context_proc(ctx
);
1878 int pflags_save
= 0;
1879 #endif /* CONFIG_TRIGGERS */
1882 if (!(flags
& MNT_FORCE
)) {
1883 fsevent_unmount(mp
, ctx
); /* has to come first! */
1890 * If already an unmount in progress just return EBUSY.
1891 * Even a forced unmount cannot override.
1893 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1900 if (flags
& MNT_FORCE
) {
1902 mp
->mnt_lflag
|= MNT_LFORCE
;
1906 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1907 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1910 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1911 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1912 mp
->mnt_flag
&=~ MNT_ASYNC
;
1914 * anyone currently in the fast path that
1915 * trips over the cached rootvp will be
1916 * dumped out and forced into the slow path
1917 * to regenerate a new cached value
1919 mp
->mnt_realrootvp
= NULLVP
;
1922 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
1924 * Force unmount any mounts in this filesystem.
1925 * If any unmounts fail - just leave them dangling.
1928 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
1932 * taking the name_cache_lock exclusively will
1933 * insure that everyone is out of the fast path who
1934 * might be trying to use a now stale copy of
1935 * vp->v_mountedhere->mnt_realrootvp
1936 * bumping mount_generation causes the cached values
1941 name_cache_unlock();
1944 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1948 if (forcedunmount
== 0) {
1949 ubc_umount(mp
); /* release cached vnodes */
1950 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1951 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
1954 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1955 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1956 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1962 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
1965 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
1969 lflags
|= FORCECLOSE
;
1970 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
1971 if ((forcedunmount
== 0) && error
) {
1973 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1974 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1975 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1979 /* make sure there are no one in the mount iterations or lookup */
1980 mount_iterdrain(mp
);
1982 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
1984 mount_iterreset(mp
);
1986 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1987 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1988 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1992 /* increment the operations count */
1994 OSAddAtomic(1, &vfs_nummntops
);
1996 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1997 /* hold an io reference and drop the usecount before close */
1998 devvp
= mp
->mnt_devvp
;
1999 vnode_getalways(devvp
);
2001 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
2003 vnode_clearmountedon(devvp
);
2006 lck_rw_done(&mp
->mnt_rwlock
);
2007 mount_list_remove(mp
);
2008 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
2010 /* mark the mount point hook in the vp but not drop the ref yet */
2011 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
2013 * The covered vnode needs special handling. Trying to get an
2014 * iocount must not block here as this may lead to deadlocks
2015 * if the Filesystem to which the covered vnode belongs is
2016 * undergoing forced unmounts. Since we hold a usecount, the
2017 * vnode cannot be reused (it can, however, still be terminated)
2019 vnode_getalways(coveredvp
);
2020 vnode_lock_spin(coveredvp
);
2023 coveredvp
->v_mountedhere
= (struct mount
*)0;
2024 CLR(coveredvp
->v_flag
, VMOUNT
);
2026 vnode_unlock(coveredvp
);
2027 vnode_put(coveredvp
);
2031 mp
->mnt_vtable
->vfc_refcount
--;
2032 mount_list_unlock();
2034 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2035 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2037 mp
->mnt_lflag
|= MNT_LDEAD
;
2039 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2041 * do the wakeup here
2042 * in case we block in mount_refdrain
2043 * which will drop the mount lock
2044 * and allow anyone blocked in vfs_busy
2045 * to wakeup and see the LDEAD state
2047 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2048 wakeup((caddr_t
)mp
);
2052 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2053 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2058 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2059 // Restore P_NOREMOTEHANG bit to its previous value
2060 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2061 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2065 * Callback and context are set together under the mount lock, and
2066 * never cleared, so we're safe to examine them here, drop the lock,
2069 if (mp
->mnt_triggercallback
!= NULL
) {
2072 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2073 } else if (did_vflush
) {
2074 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2081 #endif /* CONFIG_TRIGGERS */
2083 lck_rw_done(&mp
->mnt_rwlock
);
2086 wakeup((caddr_t
)mp
);
2089 if ((coveredvp
!= NULLVP
)) {
2090 vnode_t pvp
= NULLVP
;
2093 * The covered vnode needs special handling. Trying to
2094 * get an iocount must not block here as this may lead
2095 * to deadlocks if the Filesystem to which the covered
2096 * vnode belongs is undergoing forced unmounts. Since we
2097 * hold a usecount, the vnode cannot be reused
2098 * (it can, however, still be terminated).
2100 vnode_getalways(coveredvp
);
2102 mount_dropcrossref(mp
, coveredvp
, 0);
2104 * We'll _try_ to detect if this really needs to be
2105 * done. The coveredvp can only be in termination (or
2106 * terminated) if the coveredvp's mount point is in a
2107 * forced unmount (or has been) since we still hold the
2110 if (!vnode_isrecycled(coveredvp
)) {
2111 pvp
= vnode_getparent(coveredvp
);
2113 if (coveredvp
->v_resolve
) {
2114 vnode_trigger_rearm(coveredvp
, ctx
);
2119 vnode_rele(coveredvp
);
2120 vnode_put(coveredvp
);
2124 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2127 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2128 mount_lock_destroy(mp
);
2130 mac_mount_label_destroy(mp
);
2132 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2134 panic("dounmount: no coveredvp");
2140 * Unmount any mounts in this filesystem.
2143 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2146 fsid_t
*fsids
, fsid
;
2148 int count
= 0, i
, m
= 0;
2153 // Get an array to hold the submounts fsids.
2154 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2156 fsids_sz
= count
* sizeof(fsid_t
);
2157 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2158 if (fsids
== NULL
) {
2159 mount_list_unlock();
2162 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2165 * Fill the array with submount fsids.
2166 * Since mounts are always added to the tail of the mount list, the
2167 * list is always in mount order.
2168 * For each mount check if the mounted-on vnode belongs to a
2169 * mount that's already added to our array of mounts to be unmounted.
2171 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2172 vp
= smp
->mnt_vnodecovered
;
2175 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2176 for (i
= 0; i
<= m
; i
++) {
2177 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2178 fsids
[i
].val
[1] == fsid
.val
[1]) {
2179 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2184 mount_list_unlock();
2186 // Unmount the submounts in reverse order. Ignore errors.
2187 for (i
= m
; i
> 0; i
--) {
2188 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2191 mount_iterdrop(smp
);
2192 (void) dounmount(smp
, flags
, 1, ctx
);
2197 FREE(fsids
, M_TEMP
);
2201 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2206 if (mp
->mnt_crossref
< 0)
2207 panic("mount cross refs -ve");
2209 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2212 vnode_put_locked(dp
);
2215 mount_lock_destroy(mp
);
2217 mac_mount_label_destroy(mp
);
2219 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2223 vnode_put_locked(dp
);
2229 * Sync each mounted filesystem.
2235 int print_vmpage_stat
=0;
2236 int sync_timeout
= 60; // Sync time limit (sec)
2239 sync_callback(mount_t mp
, __unused
void *arg
)
2241 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2242 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2244 mp
->mnt_flag
&= ~MNT_ASYNC
;
2245 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2247 mp
->mnt_flag
|= MNT_ASYNC
;
2250 return (VFS_RETURNED
);
2255 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2257 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2259 if (print_vmpage_stat
) {
2260 vm_countdirtypages();
2266 #endif /* DIAGNOSTIC */
2271 sync_thread(void *arg
, __unused wait_result_t wr
)
2273 int *timeout
= (int *) arg
;
2275 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2278 wakeup((caddr_t
) timeout
);
2279 if (print_vmpage_stat
) {
2280 vm_countdirtypages();
2286 #endif /* DIAGNOSTIC */
2290 * Sync in a separate thread so we can time out if it blocks.
2293 sync_async(int timeout
)
2297 struct timespec ts
= {timeout
, 0};
2299 lck_mtx_lock(sync_mtx_lck
);
2300 if (kernel_thread_start(sync_thread
, &timeout
, &thd
) != KERN_SUCCESS
) {
2301 printf("sync_thread failed\n");
2302 lck_mtx_unlock(sync_mtx_lck
);
2306 error
= msleep((caddr_t
) &timeout
, sync_mtx_lck
, (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2308 printf("sync timed out: %d sec\n", timeout
);
2310 thread_deallocate(thd
);
2316 * An in-kernel sync for power management to call.
2318 __private_extern__
int
2321 (void) sync_async(sync_timeout
);
2324 } /* end of sync_internal call */
2327 * Change filesystem quotas.
2331 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2334 int error
, quota_cmd
, quota_status
;
2337 struct nameidata nd
;
2338 vfs_context_t ctx
= vfs_context_current();
2339 struct dqblk my_dqblk
;
2341 AUDIT_ARG(uid
, uap
->uid
);
2342 AUDIT_ARG(cmd
, uap
->cmd
);
2343 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2348 mp
= nd
.ni_vp
->v_mount
;
2349 vnode_put(nd
.ni_vp
);
2352 /* copyin any data we will need for downstream code */
2353 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2355 switch (quota_cmd
) {
2357 /* uap->arg specifies a file from which to take the quotas */
2358 fnamelen
= MAXPATHLEN
;
2359 datap
= kalloc(MAXPATHLEN
);
2360 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2363 /* uap->arg is a pointer to a dqblk structure. */
2364 datap
= (caddr_t
) &my_dqblk
;
2368 /* uap->arg is a pointer to a dqblk structure. */
2369 datap
= (caddr_t
) &my_dqblk
;
2370 if (proc_is64bit(p
)) {
2371 struct user_dqblk my_dqblk64
;
2372 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2374 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2378 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2382 /* uap->arg is a pointer to an integer */
2383 datap
= (caddr_t
) "a_status
;
2391 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2394 switch (quota_cmd
) {
2397 kfree(datap
, MAXPATHLEN
);
2400 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2402 if (proc_is64bit(p
)) {
2403 struct user_dqblk my_dqblk64
= {.dqb_bhardlimit
= 0};
2404 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2405 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2408 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2413 /* uap->arg is a pointer to an integer */
2415 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2426 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2428 return (EOPNOTSUPP
);
2433 * Get filesystem statistics.
2435 * Returns: 0 Success
2437 * vfs_update_vfsstat:???
2438 * munge_statfs:EFAULT
2442 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2445 struct vfsstatfs
*sp
;
2447 struct nameidata nd
;
2448 vfs_context_t ctx
= vfs_context_current();
2451 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2452 UIO_USERSPACE
, uap
->path
, ctx
);
2458 sp
= &mp
->mnt_vfsstat
;
2462 error
= mac_mount_check_stat(ctx
, mp
);
2467 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2473 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2479 * Get filesystem statistics.
2483 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2487 struct vfsstatfs
*sp
;
2490 AUDIT_ARG(fd
, uap
->fd
);
2492 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2495 error
= vnode_getwithref(vp
);
2501 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2510 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2515 sp
= &mp
->mnt_vfsstat
;
2516 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2520 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2530 * Common routine to handle copying of statfs64 data to user space
2533 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2536 struct statfs64 sfs
;
2538 bzero(&sfs
, sizeof(sfs
));
2540 sfs
.f_bsize
= sfsp
->f_bsize
;
2541 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2542 sfs
.f_blocks
= sfsp
->f_blocks
;
2543 sfs
.f_bfree
= sfsp
->f_bfree
;
2544 sfs
.f_bavail
= sfsp
->f_bavail
;
2545 sfs
.f_files
= sfsp
->f_files
;
2546 sfs
.f_ffree
= sfsp
->f_ffree
;
2547 sfs
.f_fsid
= sfsp
->f_fsid
;
2548 sfs
.f_owner
= sfsp
->f_owner
;
2549 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2550 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2551 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2552 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2553 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2555 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2557 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2558 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2560 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2566 * Get file system statistics in 64-bit mode
2569 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2572 struct vfsstatfs
*sp
;
2574 struct nameidata nd
;
2575 vfs_context_t ctxp
= vfs_context_current();
2578 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2579 UIO_USERSPACE
, uap
->path
, ctxp
);
2585 sp
= &mp
->mnt_vfsstat
;
2589 error
= mac_mount_check_stat(ctxp
, mp
);
2594 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2600 error
= statfs64_common(mp
, sp
, uap
->buf
);
2607 * Get file system statistics in 64-bit mode
2610 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2614 struct vfsstatfs
*sp
;
2617 AUDIT_ARG(fd
, uap
->fd
);
2619 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2622 error
= vnode_getwithref(vp
);
2628 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2637 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2642 sp
= &mp
->mnt_vfsstat
;
2643 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2647 error
= statfs64_common(mp
, sp
, uap
->buf
);
2656 struct getfsstat_struct
{
2667 getfsstat_callback(mount_t mp
, void * arg
)
2670 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2671 struct vfsstatfs
*sp
;
2673 vfs_context_t ctx
= vfs_context_current();
2675 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2677 error
= mac_mount_check_stat(ctx
, mp
);
2679 fstp
->error
= error
;
2680 return(VFS_RETURNED_DONE
);
2683 sp
= &mp
->mnt_vfsstat
;
2685 * If MNT_NOWAIT is specified, do not refresh the
2686 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2688 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2689 (error
= vfs_update_vfsstat(mp
, ctx
,
2691 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2692 return(VFS_RETURNED
);
2696 * Need to handle LP64 version of struct statfs
2698 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2700 fstp
->error
= error
;
2701 return(VFS_RETURNED_DONE
);
2703 fstp
->sfsp
+= my_size
;
2707 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2709 fstp
->error
= error
;
2710 return(VFS_RETURNED_DONE
);
2717 return(VFS_RETURNED
);
2721 * Get statistics on all filesystems.
2724 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2726 struct __mac_getfsstat_args muap
;
2728 muap
.buf
= uap
->buf
;
2729 muap
.bufsize
= uap
->bufsize
;
2730 muap
.mac
= USER_ADDR_NULL
;
2732 muap
.flags
= uap
->flags
;
2734 return (__mac_getfsstat(p
, &muap
, retval
));
2738 * __mac_getfsstat: Get MAC-related file system statistics
2740 * Parameters: p (ignored)
2741 * uap User argument descriptor (see below)
2742 * retval Count of file system statistics (N stats)
2744 * Indirect: uap->bufsize Buffer size
2745 * uap->macsize MAC info size
2746 * uap->buf Buffer where information will be returned
2748 * uap->flags File system flags
2751 * Returns: 0 Success
2756 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2760 size_t count
, maxcount
, bufsize
, macsize
;
2761 struct getfsstat_struct fst
;
2763 bufsize
= (size_t) uap
->bufsize
;
2764 macsize
= (size_t) uap
->macsize
;
2766 if (IS_64BIT_PROCESS(p
)) {
2767 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2770 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2778 if (uap
->mac
!= USER_ADDR_NULL
) {
2783 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2784 if (count
!= maxcount
)
2787 /* Copy in the array */
2788 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2793 error
= copyin(uap
->mac
, mp0
, macsize
);
2795 FREE(mp0
, M_MACTEMP
);
2799 /* Normalize to an array of user_addr_t */
2800 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2802 FREE(mp0
, M_MACTEMP
);
2806 for (i
= 0; i
< count
; i
++) {
2807 if (IS_64BIT_PROCESS(p
))
2808 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2810 mp
[i
] = (user_addr_t
)mp0
[i
];
2812 FREE(mp0
, M_MACTEMP
);
2819 fst
.flags
= uap
->flags
;
2822 fst
.maxcount
= maxcount
;
2825 vfs_iterate(0, getfsstat_callback
, &fst
);
2828 FREE(mp
, M_MACTEMP
);
2831 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2835 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2836 *retval
= fst
.maxcount
;
2838 *retval
= fst
.count
;
2843 getfsstat64_callback(mount_t mp
, void * arg
)
2845 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2846 struct vfsstatfs
*sp
;
2849 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2851 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2853 fstp
->error
= error
;
2854 return(VFS_RETURNED_DONE
);
2857 sp
= &mp
->mnt_vfsstat
;
2859 * If MNT_NOWAIT is specified, do not refresh the fsstat
2860 * cache. MNT_WAIT overrides MNT_NOWAIT.
2862 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2863 * getfsstat, since the constants are out of the same
2866 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2867 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2868 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2869 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2870 return(VFS_RETURNED
);
2873 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2875 fstp
->error
= error
;
2876 return(VFS_RETURNED_DONE
);
2878 fstp
->sfsp
+= sizeof(struct statfs64
);
2881 return(VFS_RETURNED
);
2885 * Get statistics on all file systems in 64 bit mode.
2888 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2891 int count
, maxcount
;
2892 struct getfsstat_struct fst
;
2894 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2900 fst
.flags
= uap
->flags
;
2903 fst
.maxcount
= maxcount
;
2905 vfs_iterate(0, getfsstat64_callback
, &fst
);
2908 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2912 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2913 *retval
= fst
.maxcount
;
2915 *retval
= fst
.count
;
2921 * gets the associated vnode with the file descriptor passed.
2925 * ctx - vfs context of caller
2926 * fd - file descriptor for which vnode is required.
2927 * vpp - Pointer to pointer to vnode to be returned.
2929 * The vnode is returned with an iocount so any vnode obtained
2930 * by this call needs a vnode_put
2934 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
2938 struct fileproc
*fp
;
2939 proc_t p
= vfs_context_proc(ctx
);
2943 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
2947 error
= vnode_getwithref(vp
);
2949 (void)fp_drop(p
, fd
, fp
, 0);
2953 (void)fp_drop(p
, fd
, fp
, 0);
2959 * Wrapper function around namei to start lookup from a directory
2960 * specified by a file descriptor ni_dirfd.
2962 * In addition to all the errors returned by namei, this call can
2963 * return ENOTDIR if the file descriptor does not refer to a directory.
2964 * and EBADF if the file descriptor is not valid.
2967 nameiat(struct nameidata
*ndp
, int dirfd
)
2969 if ((dirfd
!= AT_FDCWD
) &&
2970 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
2971 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
2975 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
2976 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
2980 c
= *((char *)(ndp
->ni_dirp
));
2986 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
2991 if (vnode_vtype(dvp_at
) != VDIR
) {
2996 ndp
->ni_dvp
= dvp_at
;
2997 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
2999 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
3005 return (namei(ndp
));
3009 * Change current working directory to a given file descriptor.
3013 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3015 struct filedesc
*fdp
= p
->p_fd
;
3021 vfs_context_t ctx
= vfs_context_current();
3023 AUDIT_ARG(fd
, uap
->fd
);
3024 if (per_thread
&& uap
->fd
== -1) {
3026 * Switching back from per-thread to per process CWD; verify we
3027 * in fact have one before proceeding. The only success case
3028 * for this code path is to return 0 preemptively after zapping
3029 * the thread structure contents.
3031 thread_t th
= vfs_context_thread(ctx
);
3033 uthread_t uth
= get_bsdthread_info(th
);
3035 uth
->uu_cdir
= NULLVP
;
3036 if (tvp
!= NULLVP
) {
3044 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
3046 if ( (error
= vnode_getwithref(vp
)) ) {
3051 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3053 if (vp
->v_type
!= VDIR
) {
3059 error
= mac_vnode_check_chdir(ctx
, vp
);
3063 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3067 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3068 if (vfs_busy(mp
, LK_NOWAIT
)) {
3072 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3081 if ( (error
= vnode_ref(vp
)) )
3086 thread_t th
= vfs_context_thread(ctx
);
3088 uthread_t uth
= get_bsdthread_info(th
);
3091 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3116 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3118 return common_fchdir(p
, uap
, 0);
3122 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3124 return common_fchdir(p
, (void *)uap
, 1);
3128 * Change current working directory (".").
3130 * Returns: 0 Success
3131 * change_dir:ENOTDIR
3133 * vnode_ref:ENOENT No such file or directory
3137 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3139 struct filedesc
*fdp
= p
->p_fd
;
3141 struct nameidata nd
;
3143 vfs_context_t ctx
= vfs_context_current();
3145 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3146 UIO_USERSPACE
, uap
->path
, ctx
);
3147 error
= change_dir(&nd
, ctx
);
3150 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3151 vnode_put(nd
.ni_vp
);
3155 * drop the iocount we picked up in change_dir
3157 vnode_put(nd
.ni_vp
);
3160 thread_t th
= vfs_context_thread(ctx
);
3162 uthread_t uth
= get_bsdthread_info(th
);
3164 uth
->uu_cdir
= nd
.ni_vp
;
3165 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3167 vnode_rele(nd
.ni_vp
);
3173 fdp
->fd_cdir
= nd
.ni_vp
;
3187 * Change current working directory (".") for the entire process
3189 * Parameters: p Process requesting the call
3190 * uap User argument descriptor (see below)
3193 * Indirect parameters: uap->path Directory path
3195 * Returns: 0 Success
3196 * common_chdir: ENOTDIR
3197 * common_chdir: ENOENT No such file or directory
3202 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3204 return common_chdir(p
, (void *)uap
, 0);
3210 * Change current working directory (".") for a single thread
3212 * Parameters: p Process requesting the call
3213 * uap User argument descriptor (see below)
3216 * Indirect parameters: uap->path Directory path
3218 * Returns: 0 Success
3219 * common_chdir: ENOTDIR
3220 * common_chdir: ENOENT No such file or directory
3225 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3227 return common_chdir(p
, (void *)uap
, 1);
3232 * Change notion of root (``/'') directory.
3236 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3238 struct filedesc
*fdp
= p
->p_fd
;
3240 struct nameidata nd
;
3242 vfs_context_t ctx
= vfs_context_current();
3244 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3247 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3248 UIO_USERSPACE
, uap
->path
, ctx
);
3249 error
= change_dir(&nd
, ctx
);
3254 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3257 vnode_put(nd
.ni_vp
);
3262 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3263 vnode_put(nd
.ni_vp
);
3266 vnode_put(nd
.ni_vp
);
3270 fdp
->fd_rdir
= nd
.ni_vp
;
3271 fdp
->fd_flags
|= FD_CHROOT
;
3281 * Common routine for chroot and chdir.
3283 * Returns: 0 Success
3284 * ENOTDIR Not a directory
3285 * namei:??? [anything namei can return]
3286 * vnode_authorize:??? [anything vnode_authorize can return]
3289 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3294 if ((error
= namei(ndp
)))
3299 if (vp
->v_type
!= VDIR
) {
3305 error
= mac_vnode_check_chdir(ctx
, vp
);
3312 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3322 * Free the vnode data (for directories) associated with the file glob.
3325 fg_vn_data_alloc(void)
3327 struct fd_vn_data
*fvdata
;
3329 /* Allocate per fd vnode data */
3330 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3331 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3332 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3337 * Free the vnode data (for directories) associated with the file glob.
3340 fg_vn_data_free(void *fgvndata
)
3342 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3345 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3346 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3347 FREE(fvdata
, M_FD_VN_DATA
);
3351 * Check permissions, allocate an open file structure,
3352 * and call the device open routine if any.
3354 * Returns: 0 Success
3365 * XXX Need to implement uid, gid
3368 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3369 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3372 proc_t p
= vfs_context_proc(ctx
);
3373 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3374 struct fileproc
*fp
;
3377 int type
, indx
, error
;
3379 struct vfs_context context
;
3383 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3386 flags
= FFLAGS(uflags
);
3387 CLR(flags
, FENCRYPTED
);
3388 CLR(flags
, FUNENCRYPTED
);
3390 AUDIT_ARG(fflags
, oflags
);
3391 AUDIT_ARG(mode
, vap
->va_mode
);
3393 if ((error
= falloc_withalloc(p
,
3394 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3397 uu
->uu_dupfd
= -indx
- 1;
3399 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3400 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3401 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3402 fp_drop(p
, indx
, NULL
, 0);
3407 if (error
== ERESTART
)
3409 fp_free(p
, indx
, fp
);
3415 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3416 fp
->f_fglob
->fg_ops
= &vnops
;
3417 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3419 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3420 lf
.l_whence
= SEEK_SET
;
3423 if (flags
& O_EXLOCK
)
3424 lf
.l_type
= F_WRLCK
;
3426 lf
.l_type
= F_RDLCK
;
3428 if ((flags
& FNONBLOCK
) == 0)
3431 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3436 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3438 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3441 /* try to truncate by setting the size attribute */
3442 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3446 * For directories we hold some additional information in the fd.
3448 if (vnode_vtype(vp
) == VDIR
) {
3449 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3451 fp
->f_fglob
->fg_vn_data
= NULL
;
3457 * The first terminal open (without a O_NOCTTY) by a session leader
3458 * results in it being set as the controlling terminal.
3460 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3461 !(flags
& O_NOCTTY
)) {
3464 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3465 (caddr_t
)&tmp
, ctx
);
3469 if (flags
& O_CLOEXEC
)
3470 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3471 if (flags
& O_CLOFORK
)
3472 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3473 procfdtbl_releasefd(p
, indx
, NULL
);
3475 #if CONFIG_SECLUDED_MEMORY
3476 if (secluded_for_filecache
&&
3477 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
3478 vnode_vtype(vp
) == VREG
) {
3479 memory_object_control_t moc
;
3481 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
3483 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
3484 /* nothing to do... */
3485 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
3486 /* writable -> no longer eligible for secluded pages */
3487 memory_object_mark_eligible_for_secluded(moc
,
3489 } else if (secluded_for_filecache
== 1) {
3490 char pathname
[32] = { 0, };
3492 /* XXX FBDP: better way to detect /Applications/ ? */
3493 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3494 copyinstr(ndp
->ni_dirp
,
3499 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
3504 pathname
[sizeof (pathname
) - 1] = '\0';
3505 if (strncmp(pathname
,
3507 strlen("/Applications/")) == 0 &&
3509 "/Applications/Camera.app/",
3510 strlen("/Applications/Camera.app/")) != 0) {
3513 * AND from "/Applications/"
3514 * AND not from "/Applications/Camera.app/"
3515 * ==> eligible for secluded
3517 memory_object_mark_eligible_for_secluded(moc
,
3520 } else if (secluded_for_filecache
== 2) {
3521 /* not implemented... */
3522 if (!strncmp(vp
->v_name
,
3523 DYLD_SHARED_CACHE_NAME
,
3524 strlen(DYLD_SHARED_CACHE_NAME
)) ||
3525 !strncmp(vp
->v_name
,
3527 strlen(vp
->v_name
)) ||
3528 !strncmp(vp
->v_name
,
3530 strlen(vp
->v_name
)) ||
3531 !strncmp(vp
->v_name
,
3533 strlen(vp
->v_name
)) ||
3534 !strncmp(vp
->v_name
,
3536 strlen(vp
->v_name
))) {
3538 * This file matters when launching Camera:
3539 * do not store its contents in the secluded
3540 * pool that will be drained on Camera launch.
3542 memory_object_mark_eligible_for_secluded(moc
,
3547 #endif /* CONFIG_SECLUDED_MEMORY */
3549 fp_drop(p
, indx
, fp
, 1);
3556 context
= *vfs_context_current();
3557 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3559 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3560 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3561 lf
.l_whence
= SEEK_SET
;
3564 lf
.l_type
= F_UNLCK
;
3567 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3570 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3572 fp_free(p
, indx
, fp
);
3578 * While most of the *at syscall handlers can call nameiat() which
3579 * is a wrapper around namei, the use of namei and initialisation
3580 * of nameidata are far removed and in different functions - namei
3581 * gets called in vn_open_auth for open1. So we'll just do here what
3585 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3586 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3589 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3593 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3594 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3598 c
= *((char *)(ndp
->ni_dirp
));
3604 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3609 if (vnode_vtype(dvp_at
) != VDIR
) {
3614 ndp
->ni_dvp
= dvp_at
;
3615 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3616 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3623 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3627 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3629 * Parameters: p Process requesting the open
3630 * uap User argument descriptor (see below)
3631 * retval Pointer to an area to receive the
3632 * return calue from the system call
3634 * Indirect: uap->path Path to open (same as 'open')
3635 * uap->flags Flags to open (same as 'open'
3636 * uap->uid UID to set, if creating
3637 * uap->gid GID to set, if creating
3638 * uap->mode File mode, if creating (same as 'open')
3639 * uap->xsecurity ACL to set, if creating
3641 * Returns: 0 Success
3644 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3646 * XXX: We should enummerate the possible errno values here, and where
3647 * in the code they originated.
3650 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3652 struct filedesc
*fdp
= p
->p_fd
;
3654 kauth_filesec_t xsecdst
;
3655 struct vnode_attr va
;
3656 struct nameidata nd
;
3659 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3662 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3663 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3667 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3668 VATTR_SET(&va
, va_mode
, cmode
);
3669 if (uap
->uid
!= KAUTH_UID_NONE
)
3670 VATTR_SET(&va
, va_uid
, uap
->uid
);
3671 if (uap
->gid
!= KAUTH_GID_NONE
)
3672 VATTR_SET(&va
, va_gid
, uap
->gid
);
3673 if (xsecdst
!= NULL
)
3674 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3676 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3677 uap
->path
, vfs_context_current());
3679 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3680 fileproc_alloc_init
, NULL
, retval
);
3681 if (xsecdst
!= NULL
)
3682 kauth_filesec_free(xsecdst
);
3688 * Go through the data-protected atomically controlled open (2)
3690 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3692 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3693 int flags
= uap
->flags
;
3694 int class = uap
->class;
3695 int dpflags
= uap
->dpflags
;
3698 * Follow the same path as normal open(2)
3699 * Look up the item if it exists, and acquire the vnode.
3701 struct filedesc
*fdp
= p
->p_fd
;
3702 struct vnode_attr va
;
3703 struct nameidata nd
;
3708 /* Mask off all but regular access permissions */
3709 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3710 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3712 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3713 uap
->path
, vfs_context_current());
3716 * Initialize the extra fields in vnode_attr to pass down our
3718 * 1. target cprotect class.
3719 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3721 if (flags
& O_CREAT
) {
3722 /* lower level kernel code validates that the class is valid before applying it. */
3723 if (class != PROTECTION_CLASS_DEFAULT
) {
3725 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3726 * file behave the same as open (2)
3728 VATTR_SET(&va
, va_dataprotect_class
, class);
3732 if (dpflags
& (O_DP_GETRAWENCRYPTED
|O_DP_GETRAWUNENCRYPTED
)) {
3733 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3734 /* Not allowed to write raw encrypted bytes */
3737 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
3738 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3740 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
3741 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
3745 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3746 fileproc_alloc_init
, NULL
, retval
);
3752 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3753 int fd
, enum uio_seg segflg
, int *retval
)
3755 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3756 struct vnode_attr va
;
3757 struct nameidata nd
;
3761 /* Mask off all but regular access permissions */
3762 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3763 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3765 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3768 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3773 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3775 __pthread_testcancel(1);
3776 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3780 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3783 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3784 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3788 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3791 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3792 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3796 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3798 __pthread_testcancel(1);
3799 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3803 * openbyid_np: open a file given a file system id and a file system object id
3804 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3805 * file systems that don't support object ids it is a node id (uint64_t).
3807 * Parameters: p Process requesting the open
3808 * uap User argument descriptor (see below)
3809 * retval Pointer to an area to receive the
3810 * return calue from the system call
3812 * Indirect: uap->path Path to open (same as 'open')
3814 * uap->fsid id of target file system
3815 * uap->objid id of target file system object
3816 * uap->flags Flags to open (same as 'open')
3818 * Returns: 0 Success
3822 * XXX: We should enummerate the possible errno values here, and where
3823 * in the code they originated.
3826 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
3832 int buflen
= MAXPATHLEN
;
3834 vfs_context_t ctx
= vfs_context_current();
3836 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
3840 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
3844 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3845 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
3849 AUDIT_ARG(value32
, fsid
.val
[0]);
3850 AUDIT_ARG(value64
, objid
);
3852 /*resolve path from fsis, objid*/
3854 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
3859 error
= fsgetpath_internal(
3860 ctx
, fsid
.val
[0], objid
,
3861 buflen
, buf
, &pathlen
);
3867 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
3875 error
= openat_internal(
3876 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
3885 * Create a special file.
3887 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
3890 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
3892 struct vnode_attr va
;
3893 vfs_context_t ctx
= vfs_context_current();
3895 struct nameidata nd
;
3899 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3900 VATTR_SET(&va
, va_rdev
, uap
->dev
);
3902 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3903 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
3904 return(mkfifo1(ctx
, uap
->path
, &va
));
3906 AUDIT_ARG(mode
, uap
->mode
);
3907 AUDIT_ARG(value32
, uap
->dev
);
3909 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
3911 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
3912 UIO_USERSPACE
, uap
->path
, ctx
);
3924 switch (uap
->mode
& S_IFMT
) {
3926 VATTR_SET(&va
, va_type
, VCHR
);
3929 VATTR_SET(&va
, va_type
, VBLK
);
3937 error
= mac_vnode_check_create(ctx
,
3938 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
3943 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3946 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
3950 int update_flags
= 0;
3952 // Make sure the name & parent pointers are hooked up
3953 if (vp
->v_name
== NULL
)
3954 update_flags
|= VNODE_UPDATE_NAME
;
3955 if (vp
->v_parent
== NULLVP
)
3956 update_flags
|= VNODE_UPDATE_PARENT
;
3959 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3962 add_fsevent(FSE_CREATE_FILE
, ctx
,
3970 * nameidone has to happen before we vnode_put(dvp)
3971 * since it may need to release the fs_nodelock on the dvp
3983 * Create a named pipe.
3985 * Returns: 0 Success
3988 * vnode_authorize:???
3992 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
3996 struct nameidata nd
;
3998 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
3999 UIO_USERSPACE
, upath
, ctx
);
4006 /* check that this is a new file and authorize addition */
4011 VATTR_SET(vap
, va_type
, VFIFO
);
4013 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
4016 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4019 * nameidone has to happen before we vnode_put(dvp)
4020 * since it may need to release the fs_nodelock on the dvp
4033 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4035 * Parameters: p Process requesting the open
4036 * uap User argument descriptor (see below)
4039 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4040 * uap->uid UID to set
4041 * uap->gid GID to set
4042 * uap->mode File mode to set (same as 'mkfifo')
4043 * uap->xsecurity ACL to set, if creating
4045 * Returns: 0 Success
4048 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4050 * XXX: We should enummerate the possible errno values here, and where
4051 * in the code they originated.
4054 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4057 kauth_filesec_t xsecdst
;
4058 struct vnode_attr va
;
4060 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4062 xsecdst
= KAUTH_FILESEC_NONE
;
4063 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4064 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
4069 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4070 if (uap
->uid
!= KAUTH_UID_NONE
)
4071 VATTR_SET(&va
, va_uid
, uap
->uid
);
4072 if (uap
->gid
!= KAUTH_GID_NONE
)
4073 VATTR_SET(&va
, va_gid
, uap
->gid
);
4074 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4075 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4077 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4079 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4080 kauth_filesec_free(xsecdst
);
4086 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4088 struct vnode_attr va
;
4091 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4093 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
4098 my_strrchr(char *p
, int ch
)
4102 for (save
= NULL
;; ++p
) {
4111 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4114 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4116 int ret
, len
= _len
;
4118 *truncated_path
= 0;
4119 ret
= vn_getpath(dvp
, path
, &len
);
4120 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4123 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
4124 if (len
> MAXPATHLEN
) {
4127 // the string got truncated!
4128 *truncated_path
= 1;
4129 ptr
= my_strrchr(path
, '/');
4131 *ptr
= '\0'; // chop off the string at the last directory component
4133 len
= strlen(path
) + 1;
4136 } else if (ret
== 0) {
4137 *truncated_path
= 1;
4138 } else if (ret
!= 0) {
4139 struct vnode
*mydvp
=dvp
;
4141 if (ret
!= ENOSPC
) {
4142 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4143 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4145 *truncated_path
= 1;
4148 if (mydvp
->v_parent
!= NULL
) {
4149 mydvp
= mydvp
->v_parent
;
4150 } else if (mydvp
->v_mount
) {
4151 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4154 // no parent and no mount point? only thing is to punt and say "/" changed
4155 strlcpy(path
, "/", _len
);
4160 if (mydvp
== NULL
) {
4165 ret
= vn_getpath(mydvp
, path
, &len
);
4166 } while (ret
== ENOSPC
);
4174 * Make a hard file link.
4176 * Returns: 0 Success
4181 * vnode_authorize:???
4186 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4187 user_addr_t link
, int flag
, enum uio_seg segflg
)
4189 vnode_t vp
, dvp
, lvp
;
4190 struct nameidata nd
;
4196 int need_event
, has_listeners
;
4197 char *target_path
= NULL
;
4200 vp
= dvp
= lvp
= NULLVP
;
4202 /* look up the object we are linking to */
4203 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4204 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4207 error
= nameiat(&nd
, fd1
);
4215 * Normally, linking to directories is not supported.
4216 * However, some file systems may have limited support.
4218 if (vp
->v_type
== VDIR
) {
4219 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4220 error
= EPERM
; /* POSIX */
4224 /* Linking to a directory requires ownership. */
4225 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4226 struct vnode_attr dva
;
4229 VATTR_WANTED(&dva
, va_uid
);
4230 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4231 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4232 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4239 /* lookup the target node */
4243 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4244 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4246 error
= nameiat(&nd
, fd2
);
4253 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4257 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4258 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4261 /* target node must not exist */
4262 if (lvp
!= NULLVP
) {
4266 /* cannot link across mountpoints */
4267 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4272 /* authorize creation of the target note */
4273 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4276 /* and finally make the link */
4277 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4282 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4286 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4290 has_listeners
= kauth_authorize_fileop_has_listeners();
4292 if (need_event
|| has_listeners
) {
4293 char *link_to_path
= NULL
;
4294 int len
, link_name_len
;
4296 /* build the path to the new link file */
4297 GET_PATH(target_path
);
4298 if (target_path
== NULL
) {
4303 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4305 if (has_listeners
) {
4306 /* build the path to file we are linking to */
4307 GET_PATH(link_to_path
);
4308 if (link_to_path
== NULL
) {
4313 link_name_len
= MAXPATHLEN
;
4314 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4316 * Call out to allow 3rd party notification of rename.
4317 * Ignore result of kauth_authorize_fileop call.
4319 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4320 (uintptr_t)link_to_path
,
4321 (uintptr_t)target_path
);
4323 if (link_to_path
!= NULL
) {
4324 RELEASE_PATH(link_to_path
);
4329 /* construct fsevent */
4330 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4332 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4335 // build the path to the destination of the link
4336 add_fsevent(FSE_CREATE_FILE
, ctx
,
4337 FSE_ARG_STRING
, len
, target_path
,
4338 FSE_ARG_FINFO
, &finfo
,
4342 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4343 FSE_ARG_VNODE
, vp
->v_parent
,
4351 * nameidone has to happen before we vnode_put(dvp)
4352 * since it may need to release the fs_nodelock on the dvp
4355 if (target_path
!= NULL
) {
4356 RELEASE_PATH(target_path
);
4368 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4370 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4371 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4375 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4377 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4380 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4381 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4385 * Make a symbolic link.
4387 * We could add support for ACLs here too...
4391 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4392 user_addr_t link
, enum uio_seg segflg
)
4394 struct vnode_attr va
;
4397 struct nameidata nd
;
4403 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4404 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4405 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4407 path
= (char *)path_data
;
4411 AUDIT_ARG(text
, path
); /* This is the link string */
4413 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4416 error
= nameiat(&nd
, fd
);
4422 p
= vfs_context_proc(ctx
);
4424 VATTR_SET(&va
, va_type
, VLNK
);
4425 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4428 error
= mac_vnode_check_create(ctx
,
4429 dvp
, &nd
.ni_cnd
, &va
);
4442 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4443 /* get default ownership, etc. */
4445 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4447 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4450 if (error
== 0 && vp
)
4451 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4454 /* do fallback attribute handling */
4455 if (error
== 0 && vp
)
4456 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4459 int update_flags
= 0;
4461 /*check if a new vnode was created, else try to get one*/
4463 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4465 nd
.ni_op
= OP_LOOKUP
;
4467 nd
.ni_cnd
.cn_flags
= 0;
4468 error
= nameiat(&nd
, fd
);
4475 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4476 /* call out to allow 3rd party notification of rename.
4477 * Ignore result of kauth_authorize_fileop call.
4479 if (kauth_authorize_fileop_has_listeners() &&
4481 char *new_link_path
= NULL
;
4484 /* build the path to the new link file */
4485 new_link_path
= get_pathbuff();
4487 vn_getpath(dvp
, new_link_path
, &len
);
4488 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4489 new_link_path
[len
- 1] = '/';
4490 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4493 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4494 (uintptr_t)path
, (uintptr_t)new_link_path
);
4495 if (new_link_path
!= NULL
)
4496 release_pathbuff(new_link_path
);
4499 // Make sure the name & parent pointers are hooked up
4500 if (vp
->v_name
== NULL
)
4501 update_flags
|= VNODE_UPDATE_NAME
;
4502 if (vp
->v_parent
== NULLVP
)
4503 update_flags
|= VNODE_UPDATE_PARENT
;
4506 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4509 add_fsevent(FSE_CREATE_FILE
, ctx
,
4517 * nameidone has to happen before we vnode_put(dvp)
4518 * since it may need to release the fs_nodelock on the dvp
4526 if (path
&& (path
!= (char *)path_data
))
4527 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4533 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4535 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4536 uap
->link
, UIO_USERSPACE
));
4540 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4541 __unused
int32_t *retval
)
4543 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4544 uap
->path2
, UIO_USERSPACE
));
4548 * Delete a whiteout from the filesystem.
4549 * No longer supported.
4552 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4558 * Delete a name from the filesystem.
4562 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4563 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4565 struct nameidata nd
;
4568 struct componentname
*cnp
;
4573 struct vnode_attr va
;
4580 struct vnode_attr
*vap
;
4582 int retry_count
= 0;
4585 cn_flags
= LOCKPARENT
;
4586 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
))
4587 cn_flags
|= AUDITVNPATH1
;
4588 /* If a starting dvp is passed, it trumps any fd passed. */
4593 /* unlink or delete is allowed on rsrc forks and named streams */
4594 cn_flags
|= CN_ALLOWRSRCFORK
;
4605 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4607 nd
.ni_dvp
= start_dvp
;
4608 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4612 error
= nameiat(&nd
, fd
);
4620 /* With Carbon delete semantics, busy files cannot be deleted */
4621 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4622 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4625 /* Skip any potential upcalls if told to. */
4626 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4627 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4631 batched
= vnode_compound_remove_available(vp
);
4633 * The root of a mounted filesystem cannot be deleted.
4635 if (vp
->v_flag
& VROOT
) {
4640 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4642 if (error
== ENOENT
) {
4643 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4644 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4655 if (!vnode_compound_remove_available(dvp
)) {
4656 panic("No vp, but no compound remove?");
4661 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4664 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4665 /* XXX need to get these data in batched VNOP */
4666 get_fse_info(vp
, &finfo
, ctx
);
4669 error
= vfs_get_notify_attributes(&va
);
4678 has_listeners
= kauth_authorize_fileop_has_listeners();
4679 if (need_event
|| has_listeners
) {
4687 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4691 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4692 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4696 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
4698 if (error
== EKEEPLOOKING
) {
4700 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4703 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4704 panic("EKEEPLOOKING, but continue flag not set?");
4707 if (vnode_isdir(vp
)) {
4711 goto continue_lookup
;
4712 } else if (error
== ENOENT
&& batched
) {
4713 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4714 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4716 * For compound VNOPs, the authorization callback may
4717 * return ENOENT in case of racing hardlink lookups
4718 * hitting the name cache, redrive the lookup.
4728 * Call out to allow 3rd party notification of delete.
4729 * Ignore result of kauth_authorize_fileop call.
4732 if (has_listeners
) {
4733 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4734 KAUTH_FILEOP_DELETE
,
4739 if (vp
->v_flag
& VISHARDLINK
) {
4741 // if a hardlink gets deleted we want to blow away the
4742 // v_parent link because the path that got us to this
4743 // instance of the link is no longer valid. this will
4744 // force the next call to get the path to ask the file
4745 // system instead of just following the v_parent link.
4747 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4752 if (vp
->v_flag
& VISHARDLINK
) {
4753 get_fse_info(vp
, &finfo
, ctx
);
4755 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4757 if (truncated_path
) {
4758 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4760 add_fsevent(FSE_DELETE
, ctx
,
4761 FSE_ARG_STRING
, len
, path
,
4762 FSE_ARG_FINFO
, &finfo
,
4773 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4774 * will cause its shadow file to go away if necessary.
4776 if (vp
&& (vnode_isnamedstream(vp
)) &&
4777 (vp
->v_parent
!= NULLVP
) &&
4778 vnode_isshadow(vp
)) {
4783 * nameidone has to happen before we vnode_put(dvp)
4784 * since it may need to release the fs_nodelock on the dvp
4800 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
4801 enum uio_seg segflg
, int unlink_flags
)
4803 return (unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
4808 * Delete a name from the filesystem using Carbon semantics.
4811 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4813 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4814 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
));
4818 * Delete a name from the filesystem using POSIX semantics.
4821 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4823 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4824 uap
->path
, UIO_USERSPACE
, 0));
4828 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
4830 if (uap
->flag
& ~AT_REMOVEDIR
)
4833 if (uap
->flag
& AT_REMOVEDIR
)
4834 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
4835 uap
->path
, UIO_USERSPACE
));
4837 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
4838 NULLVP
, uap
->path
, UIO_USERSPACE
, 0));
4842 * Reposition read/write file offset.
4845 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4847 struct fileproc
*fp
;
4849 struct vfs_context
*ctx
;
4850 off_t offset
= uap
->offset
, file_size
;
4853 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4854 if (error
== ENOTSUP
)
4858 if (vnode_isfifo(vp
)) {
4864 ctx
= vfs_context_current();
4866 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4867 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4870 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
4877 if ( (error
= vnode_getwithref(vp
)) ) {
4882 switch (uap
->whence
) {
4884 offset
+= fp
->f_fglob
->fg_offset
;
4887 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
4889 offset
+= file_size
;
4894 error
= VNOP_IOCTL(vp
, FSCTL_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
);
4897 error
= VNOP_IOCTL(vp
, FSCTL_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
);
4903 if (uap
->offset
> 0 && offset
< 0) {
4904 /* Incremented/relative move past max size */
4908 * Allow negative offsets on character devices, per
4909 * POSIX 1003.1-2001. Most likely for writing disk
4912 if (offset
< 0 && vp
->v_type
!= VCHR
) {
4913 /* Decremented/relative move before start */
4917 fp
->f_fglob
->fg_offset
= offset
;
4918 *retval
= fp
->f_fglob
->fg_offset
;
4924 * An lseek can affect whether data is "available to read." Use
4925 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4927 post_event_if_success(vp
, error
, NOTE_NONE
);
4928 (void)vnode_put(vp
);
4935 * Check access permissions.
4937 * Returns: 0 Success
4938 * vnode_authorize:???
4941 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
4943 kauth_action_t action
;
4947 * If just the regular access bits, convert them to something
4948 * that vnode_authorize will understand.
4950 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
4953 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4954 if (uflags
& W_OK
) {
4955 if (vnode_isdir(vp
)) {
4956 action
|= KAUTH_VNODE_ADD_FILE
|
4957 KAUTH_VNODE_ADD_SUBDIRECTORY
;
4958 /* might want delete rights here too */
4960 action
|= KAUTH_VNODE_WRITE_DATA
;
4963 if (uflags
& X_OK
) {
4964 if (vnode_isdir(vp
)) {
4965 action
|= KAUTH_VNODE_SEARCH
;
4967 action
|= KAUTH_VNODE_EXECUTE
;
4971 /* take advantage of definition of uflags */
4972 action
= uflags
>> 8;
4976 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
4981 /* action == 0 means only check for existence */
4983 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
4994 * access_extended: Check access permissions in bulk.
4996 * Description: uap->entries Pointer to an array of accessx
4997 * descriptor structs, plus one or
4998 * more NULL terminated strings (see
4999 * "Notes" section below).
5000 * uap->size Size of the area pointed to by
5002 * uap->results Pointer to the results array.
5004 * Returns: 0 Success
5005 * ENOMEM Insufficient memory
5006 * EINVAL Invalid arguments
5007 * namei:EFAULT Bad address
5008 * namei:ENAMETOOLONG Filename too long
5009 * namei:ENOENT No such file or directory
5010 * namei:ELOOP Too many levels of symbolic links
5011 * namei:EBADF Bad file descriptor
5012 * namei:ENOTDIR Not a directory
5017 * uap->results Array contents modified
5019 * Notes: The uap->entries are structured as an arbitrary length array
5020 * of accessx descriptors, followed by one or more NULL terminated
5023 * struct accessx_descriptor[0]
5025 * struct accessx_descriptor[n]
5026 * char name_data[0];
5028 * We determine the entry count by walking the buffer containing
5029 * the uap->entries argument descriptor. For each descriptor we
5030 * see, the valid values for the offset ad_name_offset will be
5031 * in the byte range:
5033 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5035 * [ uap->entries + uap->size - 2 ]
5037 * since we must have at least one string, and the string must
5038 * be at least one character plus the NULL terminator in length.
5040 * XXX: Need to support the check-as uid argument
5043 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5045 struct accessx_descriptor
*input
= NULL
;
5046 errno_t
*result
= NULL
;
5049 unsigned int desc_max
, desc_actual
, i
, j
;
5050 struct vfs_context context
;
5051 struct nameidata nd
;
5055 #define ACCESSX_MAX_DESCR_ON_STACK 10
5056 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5058 context
.vc_ucred
= NULL
;
5061 * Validate parameters; if valid, copy the descriptor array and string
5062 * arguments into local memory. Before proceeding, the following
5063 * conditions must have been met:
5065 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5066 * o There must be sufficient room in the request for at least one
5067 * descriptor and a one yte NUL terminated string.
5068 * o The allocation of local storage must not fail.
5070 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
5072 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
5074 if (uap
->size
<= sizeof (stack_input
)) {
5075 input
= stack_input
;
5077 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5078 if (input
== NULL
) {
5083 error
= copyin(uap
->entries
, input
, uap
->size
);
5087 AUDIT_ARG(opaque
, input
, uap
->size
);
5090 * Force NUL termination of the copyin buffer to avoid nami() running
5091 * off the end. If the caller passes us bogus data, they may get a
5094 ((char *)input
)[uap
->size
- 1] = 0;
5097 * Access is defined as checking against the process' real identity,
5098 * even if operations are checking the effective identity. This
5099 * requires that we use a local vfs context.
5101 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5102 context
.vc_thread
= current_thread();
5105 * Find out how many entries we have, so we can allocate the result
5106 * array by walking the list and adjusting the count downward by the
5107 * earliest string offset we see.
5109 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5110 desc_actual
= desc_max
;
5111 for (i
= 0; i
< desc_actual
; i
++) {
5113 * Take the offset to the name string for this entry and
5114 * convert to an input array index, which would be one off
5115 * the end of the array if this entry was the lowest-addressed
5118 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5121 * An offset greater than the max allowable offset is an error.
5122 * It is also an error for any valid entry to point
5123 * to a location prior to the end of the current entry, if
5124 * it's not a reference to the string of the previous entry.
5126 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5131 /* Also do not let ad_name_offset point to something beyond the size of the input */
5132 if (input
[i
].ad_name_offset
>= uap
->size
) {
5138 * An offset of 0 means use the previous descriptor's offset;
5139 * this is used to chain multiple requests for the same file
5140 * to avoid multiple lookups.
5143 /* This is not valid for the first entry */
5152 * If the offset of the string for this descriptor is before
5153 * what we believe is the current actual last descriptor,
5154 * then we need to adjust our estimate downward; this permits
5155 * the string table following the last descriptor to be out
5156 * of order relative to the descriptor list.
5158 if (j
< desc_actual
)
5163 * We limit the actual number of descriptors we are willing to process
5164 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5165 * requested does not exceed this limit,
5167 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5171 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
5172 if (result
== NULL
) {
5178 * Do the work by iterating over the descriptor entries we know to
5179 * at least appear to contain valid data.
5182 for (i
= 0; i
< desc_actual
; i
++) {
5184 * If the ad_name_offset is 0, then we use the previous
5185 * results to make the check; otherwise, we are looking up
5188 if (input
[i
].ad_name_offset
!= 0) {
5189 /* discard old vnodes */
5200 * Scan forward in the descriptor list to see if we
5201 * need the parent vnode. We will need it if we are
5202 * deleting, since we must have rights to remove
5203 * entries in the parent directory, as well as the
5204 * rights to delete the object itself.
5206 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5207 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5208 if (input
[j
].ad_flags
& _DELETE_OK
)
5211 niopts
= FOLLOW
| AUDITVNPATH1
;
5213 /* need parent for vnode_authorize for deletion test */
5215 niopts
|= WANTPARENT
;
5218 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5219 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5231 * Handle lookup errors.
5241 /* run this access check */
5242 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5245 /* fatal lookup error */
5251 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5253 /* copy out results */
5254 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5257 if (input
&& input
!= stack_input
)
5258 FREE(input
, M_TEMP
);
5260 FREE(result
, M_TEMP
);
5265 if (IS_VALID_CRED(context
.vc_ucred
))
5266 kauth_cred_unref(&context
.vc_ucred
);
5272 * Returns: 0 Success
5273 * namei:EFAULT Bad address
5274 * namei:ENAMETOOLONG Filename too long
5275 * namei:ENOENT No such file or directory
5276 * namei:ELOOP Too many levels of symbolic links
5277 * namei:EBADF Bad file descriptor
5278 * namei:ENOTDIR Not a directory
5283 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5284 int flag
, enum uio_seg segflg
)
5287 struct nameidata nd
;
5289 struct vfs_context context
;
5291 int is_namedstream
= 0;
5295 * Unless the AT_EACCESS option is used, Access is defined as checking
5296 * against the process' real identity, even if operations are checking
5297 * the effective identity. So we need to tweak the credential
5298 * in the context for that case.
5300 if (!(flag
& AT_EACCESS
))
5301 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5303 context
.vc_ucred
= ctx
->vc_ucred
;
5304 context
.vc_thread
= ctx
->vc_thread
;
5307 niopts
= FOLLOW
| AUDITVNPATH1
;
5308 /* need parent for vnode_authorize for deletion test */
5309 if (amode
& _DELETE_OK
)
5310 niopts
|= WANTPARENT
;
5311 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5315 /* access(F_OK) calls are allowed for resource forks. */
5317 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5319 error
= nameiat(&nd
, fd
);
5324 /* Grab reference on the shadow stream file vnode to
5325 * force an inactive on release which will mark it
5328 if (vnode_isnamedstream(nd
.ni_vp
) &&
5329 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5330 vnode_isshadow(nd
.ni_vp
)) {
5332 vnode_ref(nd
.ni_vp
);
5336 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5339 if (is_namedstream
) {
5340 vnode_rele(nd
.ni_vp
);
5344 vnode_put(nd
.ni_vp
);
5345 if (amode
& _DELETE_OK
)
5346 vnode_put(nd
.ni_dvp
);
5350 if (!(flag
& AT_EACCESS
))
5351 kauth_cred_unref(&context
.vc_ucred
);
5356 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5358 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5359 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5363 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5364 __unused
int32_t *retval
)
5366 if (uap
->flag
& ~AT_EACCESS
)
5369 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5370 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5374 * Returns: 0 Success
5381 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5382 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5383 enum uio_seg segflg
, int fd
, int flag
)
5385 struct nameidata nd
;
5392 struct user64_stat user64_sb
;
5393 struct user32_stat user32_sb
;
5394 struct user64_stat64 user64_sb64
;
5395 struct user32_stat64 user32_sb64
;
5399 kauth_filesec_t fsec
;
5400 size_t xsecurity_bufsize
;
5403 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5404 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5408 int is_namedstream
= 0;
5409 /* stat calls are allowed for resource forks. */
5410 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5412 error
= nameiat(&nd
, fd
);
5415 fsec
= KAUTH_FILESEC_NONE
;
5417 statptr
= (void *)&source
;
5420 /* Grab reference on the shadow stream file vnode to
5421 * force an inactive on release which will mark it
5424 if (vnode_isnamedstream(nd
.ni_vp
) &&
5425 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5426 vnode_isshadow(nd
.ni_vp
)) {
5428 vnode_ref(nd
.ni_vp
);
5432 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5435 if (is_namedstream
) {
5436 vnode_rele(nd
.ni_vp
);
5439 vnode_put(nd
.ni_vp
);
5444 /* Zap spare fields */
5445 if (isstat64
!= 0) {
5446 source
.sb64
.st_lspare
= 0;
5447 source
.sb64
.st_qspare
[0] = 0LL;
5448 source
.sb64
.st_qspare
[1] = 0LL;
5449 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5450 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5451 my_size
= sizeof(dest
.user64_sb64
);
5452 sbp
= (caddr_t
)&dest
.user64_sb64
;
5454 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5455 my_size
= sizeof(dest
.user32_sb64
);
5456 sbp
= (caddr_t
)&dest
.user32_sb64
;
5459 * Check if we raced (post lookup) against the last unlink of a file.
5461 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5462 source
.sb64
.st_nlink
= 1;
5465 source
.sb
.st_lspare
= 0;
5466 source
.sb
.st_qspare
[0] = 0LL;
5467 source
.sb
.st_qspare
[1] = 0LL;
5468 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5469 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5470 my_size
= sizeof(dest
.user64_sb
);
5471 sbp
= (caddr_t
)&dest
.user64_sb
;
5473 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5474 my_size
= sizeof(dest
.user32_sb
);
5475 sbp
= (caddr_t
)&dest
.user32_sb
;
5479 * Check if we raced (post lookup) against the last unlink of a file.
5481 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5482 source
.sb
.st_nlink
= 1;
5485 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5488 /* caller wants extended security information? */
5489 if (xsecurity
!= USER_ADDR_NULL
) {
5491 /* did we get any? */
5492 if (fsec
== KAUTH_FILESEC_NONE
) {
5493 if (susize(xsecurity_size
, 0) != 0) {
5498 /* find the user buffer size */
5499 xsecurity_bufsize
= fusize(xsecurity_size
);
5501 /* copy out the actual data size */
5502 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5507 /* if the caller supplied enough room, copy out to it */
5508 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5509 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5513 if (fsec
!= KAUTH_FILESEC_NONE
)
5514 kauth_filesec_free(fsec
);
5519 * stat_extended: Get file status; with extended security (ACL).
5521 * Parameters: p (ignored)
5522 * uap User argument descriptor (see below)
5525 * Indirect: uap->path Path of file to get status from
5526 * uap->ub User buffer (holds file status info)
5527 * uap->xsecurity ACL to get (extended security)
5528 * uap->xsecurity_size Size of ACL
5530 * Returns: 0 Success
5535 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5536 __unused
int32_t *retval
)
5538 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5539 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5544 * Returns: 0 Success
5545 * fstatat_internal:??? [see fstatat_internal() in this file]
5548 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5550 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5551 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5555 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5557 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5558 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5562 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5564 * Parameters: p (ignored)
5565 * uap User argument descriptor (see below)
5568 * Indirect: uap->path Path of file to get status from
5569 * uap->ub User buffer (holds file status info)
5570 * uap->xsecurity ACL to get (extended security)
5571 * uap->xsecurity_size Size of ACL
5573 * Returns: 0 Success
5578 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5580 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5581 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5586 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5588 * Parameters: p (ignored)
5589 * uap User argument descriptor (see below)
5592 * Indirect: uap->path Path of file to get status from
5593 * uap->ub User buffer (holds file status info)
5594 * uap->xsecurity ACL to get (extended security)
5595 * uap->xsecurity_size Size of ACL
5597 * Returns: 0 Success
5602 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5604 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5605 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5606 AT_SYMLINK_NOFOLLOW
));
5610 * Get file status; this version does not follow links.
5613 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5615 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5616 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5620 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5622 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5623 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5627 * lstat64_extended: Get file status; can handle large inode numbers; does not
5628 * follow links; with extended security (ACL).
5630 * Parameters: p (ignored)
5631 * uap User argument descriptor (see below)
5634 * Indirect: uap->path Path of file to get status from
5635 * uap->ub User buffer (holds file status info)
5636 * uap->xsecurity ACL to get (extended security)
5637 * uap->xsecurity_size Size of ACL
5639 * Returns: 0 Success
5644 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5646 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5647 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5648 AT_SYMLINK_NOFOLLOW
));
5652 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5654 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5657 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5658 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5662 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5663 __unused
int32_t *retval
)
5665 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5668 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5669 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5673 * Get configurable pathname variables.
5675 * Returns: 0 Success
5679 * Notes: Global implementation constants are intended to be
5680 * implemented in this function directly; all other constants
5681 * are per-FS implementation, and therefore must be handled in
5682 * each respective FS, instead.
5684 * XXX We implement some things globally right now that should actually be
5685 * XXX per-FS; we will need to deal with this at some point.
5689 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5692 struct nameidata nd
;
5693 vfs_context_t ctx
= vfs_context_current();
5695 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5696 UIO_USERSPACE
, uap
->path
, ctx
);
5701 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5703 vnode_put(nd
.ni_vp
);
5709 * Return target name of a symbolic link.
5713 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5714 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5720 struct nameidata nd
;
5721 char uio_buf
[ UIO_SIZEOF(1) ];
5723 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5726 error
= nameiat(&nd
, fd
);
5733 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5734 &uio_buf
[0], sizeof(uio_buf
));
5735 uio_addiov(auio
, buf
, bufsize
);
5736 if (vp
->v_type
!= VLNK
) {
5740 error
= mac_vnode_check_readlink(ctx
, vp
);
5743 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5746 error
= VNOP_READLINK(vp
, auio
, ctx
);
5750 *retval
= bufsize
- (int)uio_resid(auio
);
5755 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5757 enum uio_seg procseg
;
5759 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5760 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5761 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5762 uap
->count
, procseg
, retval
));
5766 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5768 enum uio_seg procseg
;
5770 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5771 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5772 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5776 * Change file flags.
5778 * NOTE: this will vnode_put() `vp'
5781 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5783 struct vnode_attr va
;
5784 kauth_action_t action
;
5788 VATTR_SET(&va
, va_flags
, flags
);
5791 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5796 /* request authorisation, disregard immutability */
5797 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5800 * Request that the auth layer disregard those file flags it's allowed to when
5801 * authorizing this operation; we need to do this in order to be able to
5802 * clear immutable flags.
5804 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5806 error
= vnode_setattr(vp
, &va
, ctx
);
5810 mac_vnode_notify_setflags(ctx
, vp
, flags
);
5813 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5822 * Change flags of a file given a path name.
5826 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5829 vfs_context_t ctx
= vfs_context_current();
5831 struct nameidata nd
;
5833 AUDIT_ARG(fflags
, uap
->flags
);
5834 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5835 UIO_USERSPACE
, uap
->path
, ctx
);
5842 /* we don't vnode_put() here because chflags1 does internally */
5843 error
= chflags1(vp
, uap
->flags
, ctx
);
5849 * Change flags of a file given a file descriptor.
5853 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5858 AUDIT_ARG(fd
, uap
->fd
);
5859 AUDIT_ARG(fflags
, uap
->flags
);
5860 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5863 if ((error
= vnode_getwithref(vp
))) {
5868 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5870 /* we don't vnode_put() here because chflags1 does internally */
5871 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
5878 * Change security information on a filesystem object.
5880 * Returns: 0 Success
5881 * EPERM Operation not permitted
5882 * vnode_authattr:??? [anything vnode_authattr can return]
5883 * vnode_authorize:??? [anything vnode_authorize can return]
5884 * vnode_setattr:??? [anything vnode_setattr can return]
5886 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5887 * translated to EPERM before being returned.
5890 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
5892 kauth_action_t action
;
5895 AUDIT_ARG(mode
, vap
->va_mode
);
5896 /* XXX audit new args */
5899 /* chmod calls are not allowed for resource forks. */
5900 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5906 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
5907 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
5910 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
5911 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
5912 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
5913 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1)))
5917 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
5918 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
)))
5922 /* make sure that the caller is allowed to set this security information */
5923 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
5924 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5925 if (error
== EACCES
)
5930 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0)
5934 if (VATTR_IS_ACTIVE(vap
, va_mode
))
5935 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
5937 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
))
5938 mac_vnode_notify_setowner(ctx
, vp
,
5939 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
5940 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
5942 if (VATTR_IS_ACTIVE(vap
, va_acl
))
5943 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
5951 * Change mode of a file given a path name.
5953 * Returns: 0 Success
5954 * namei:??? [anything namei can return]
5955 * chmod_vnode:??? [anything chmod_vnode can return]
5958 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
5959 int fd
, int flag
, enum uio_seg segflg
)
5961 struct nameidata nd
;
5964 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5965 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
5967 if ((error
= nameiat(&nd
, fd
)))
5969 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
5970 vnode_put(nd
.ni_vp
);
5976 * chmod_extended: Change the mode of a file given a path name; with extended
5977 * argument list (including extended security (ACL)).
5979 * Parameters: p Process requesting the open
5980 * uap User argument descriptor (see below)
5983 * Indirect: uap->path Path to object (same as 'chmod')
5984 * uap->uid UID to set
5985 * uap->gid GID to set
5986 * uap->mode File mode to set (same as 'chmod')
5987 * uap->xsecurity ACL to set (or delete)
5989 * Returns: 0 Success
5992 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5994 * XXX: We should enummerate the possible errno values here, and where
5995 * in the code they originated.
5998 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
6001 struct vnode_attr va
;
6002 kauth_filesec_t xsecdst
;
6004 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6007 if (uap
->mode
!= -1)
6008 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6009 if (uap
->uid
!= KAUTH_UID_NONE
)
6010 VATTR_SET(&va
, va_uid
, uap
->uid
);
6011 if (uap
->gid
!= KAUTH_GID_NONE
)
6012 VATTR_SET(&va
, va_gid
, uap
->gid
);
6015 switch(uap
->xsecurity
) {
6016 /* explicit remove request */
6017 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6018 VATTR_SET(&va
, va_acl
, NULL
);
6021 case USER_ADDR_NULL
:
6024 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6026 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6027 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6030 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6033 if (xsecdst
!= NULL
)
6034 kauth_filesec_free(xsecdst
);
6039 * Returns: 0 Success
6040 * chmodat:??? [anything chmodat can return]
6043 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6044 int flag
, enum uio_seg segflg
)
6046 struct vnode_attr va
;
6049 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6051 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
6055 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6057 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6058 AT_FDCWD
, 0, UIO_USERSPACE
));
6062 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6064 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6067 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6068 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
6072 * Change mode of a file given a file descriptor.
6075 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6082 if ((error
= file_vnode(fd
, &vp
)) != 0)
6084 if ((error
= vnode_getwithref(vp
)) != 0) {
6088 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6090 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6091 (void)vnode_put(vp
);
6098 * fchmod_extended: Change mode of a file given a file descriptor; with
6099 * extended argument list (including extended security (ACL)).
6101 * Parameters: p Process requesting to change file mode
6102 * uap User argument descriptor (see below)
6105 * Indirect: uap->mode File mode to set (same as 'chmod')
6106 * uap->uid UID to set
6107 * uap->gid GID to set
6108 * uap->xsecurity ACL to set (or delete)
6109 * uap->fd File descriptor of file to change mode
6111 * Returns: 0 Success
6116 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6119 struct vnode_attr va
;
6120 kauth_filesec_t xsecdst
;
6122 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6125 if (uap
->mode
!= -1)
6126 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6127 if (uap
->uid
!= KAUTH_UID_NONE
)
6128 VATTR_SET(&va
, va_uid
, uap
->uid
);
6129 if (uap
->gid
!= KAUTH_GID_NONE
)
6130 VATTR_SET(&va
, va_gid
, uap
->gid
);
6133 switch(uap
->xsecurity
) {
6134 case USER_ADDR_NULL
:
6135 VATTR_SET(&va
, va_acl
, NULL
);
6137 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6138 VATTR_SET(&va
, va_acl
, NULL
);
6141 case CAST_USER_ADDR_T(-1):
6144 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6146 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6149 error
= fchmod1(p
, uap
->fd
, &va
);
6152 switch(uap
->xsecurity
) {
6153 case USER_ADDR_NULL
:
6154 case CAST_USER_ADDR_T(-1):
6157 if (xsecdst
!= NULL
)
6158 kauth_filesec_free(xsecdst
);
6164 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6166 struct vnode_attr va
;
6169 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6171 return(fchmod1(p
, uap
->fd
, &va
));
6176 * Set ownership given a path name.
6180 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6181 gid_t gid
, int flag
, enum uio_seg segflg
)
6184 struct vnode_attr va
;
6186 struct nameidata nd
;
6188 kauth_action_t action
;
6190 AUDIT_ARG(owner
, uid
, gid
);
6192 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6193 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6195 error
= nameiat(&nd
, fd
);
6203 if (uid
!= (uid_t
)VNOVAL
)
6204 VATTR_SET(&va
, va_uid
, uid
);
6205 if (gid
!= (gid_t
)VNOVAL
)
6206 VATTR_SET(&va
, va_gid
, gid
);
6209 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6214 /* preflight and authorize attribute changes */
6215 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6217 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6219 error
= vnode_setattr(vp
, &va
, ctx
);
6223 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
6228 * EACCES is only allowed from namei(); permissions failure should
6229 * return EPERM, so we need to translate the error code.
6231 if (error
== EACCES
)
6239 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6241 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6242 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6246 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6248 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6249 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6253 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6255 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6258 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6259 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6263 * Set ownership given a file descriptor.
6267 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6269 struct vnode_attr va
;
6270 vfs_context_t ctx
= vfs_context_current();
6273 kauth_action_t action
;
6275 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6276 AUDIT_ARG(fd
, uap
->fd
);
6278 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6281 if ( (error
= vnode_getwithref(vp
)) ) {
6285 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6288 if (uap
->uid
!= VNOVAL
)
6289 VATTR_SET(&va
, va_uid
, uap
->uid
);
6290 if (uap
->gid
!= VNOVAL
)
6291 VATTR_SET(&va
, va_gid
, uap
->gid
);
6294 /* chown calls are not allowed for resource forks. */
6295 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6302 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6307 /* preflight and authorize attribute changes */
6308 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6310 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6311 if (error
== EACCES
)
6315 error
= vnode_setattr(vp
, &va
, ctx
);
6319 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6323 (void)vnode_put(vp
);
6329 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6333 if (usrtvp
== USER_ADDR_NULL
) {
6334 struct timeval old_tv
;
6335 /* XXX Y2038 bug because of microtime argument */
6337 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6340 if (IS_64BIT_PROCESS(current_proc())) {
6341 struct user64_timeval tv
[2];
6342 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6345 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6346 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6348 struct user32_timeval tv
[2];
6349 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6352 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6353 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6360 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6364 struct vnode_attr va
;
6365 kauth_action_t action
;
6367 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6370 VATTR_SET(&va
, va_access_time
, ts
[0]);
6371 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6373 va
.va_vaflags
|= VA_UTIMES_NULL
;
6376 /* utimes calls are not allowed for resource forks. */
6377 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6384 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6388 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6389 if (!nullflag
&& error
== EACCES
)
6394 /* since we may not need to auth anything, check here */
6395 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6396 if (!nullflag
&& error
== EACCES
)
6400 error
= vnode_setattr(vp
, &va
, ctx
);
6404 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6412 * Set the access and modification times of a file.
6416 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6418 struct timespec ts
[2];
6421 struct nameidata nd
;
6422 vfs_context_t ctx
= vfs_context_current();
6425 * AUDIT: Needed to change the order of operations to do the
6426 * name lookup first because auditing wants the path.
6428 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6429 UIO_USERSPACE
, uap
->path
, ctx
);
6436 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6437 * the current time instead.
6440 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6443 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6446 vnode_put(nd
.ni_vp
);
6451 * Set the access and modification times of a file.
6455 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6457 struct timespec ts
[2];
6462 AUDIT_ARG(fd
, uap
->fd
);
6464 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6466 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6468 if((error
= vnode_getwithref(vp
))) {
6473 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6480 * Truncate a file given its path name.
6484 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6487 struct vnode_attr va
;
6488 vfs_context_t ctx
= vfs_context_current();
6490 struct nameidata nd
;
6491 kauth_action_t action
;
6493 if (uap
->length
< 0)
6495 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6496 UIO_USERSPACE
, uap
->path
, ctx
);
6497 if ((error
= namei(&nd
)))
6504 VATTR_SET(&va
, va_data_size
, uap
->length
);
6507 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6512 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6514 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6516 error
= vnode_setattr(vp
, &va
, ctx
);
6520 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
6529 * Truncate a file given a file descriptor.
6533 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6535 vfs_context_t ctx
= vfs_context_current();
6536 struct vnode_attr va
;
6538 struct fileproc
*fp
;
6542 AUDIT_ARG(fd
, uap
->fd
);
6543 if (uap
->length
< 0)
6546 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6550 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6552 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6561 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6563 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6564 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6569 if ((error
= vnode_getwithref(vp
)) != 0) {
6573 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6576 error
= mac_vnode_check_truncate(ctx
,
6577 fp
->f_fglob
->fg_cred
, vp
);
6579 (void)vnode_put(vp
);
6584 VATTR_SET(&va
, va_data_size
, uap
->length
);
6585 error
= vnode_setattr(vp
, &va
, ctx
);
6589 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
6592 (void)vnode_put(vp
);
6600 * Sync an open file with synchronized I/O _file_ integrity completion
6604 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6606 __pthread_testcancel(1);
6607 return(fsync_common(p
, uap
, MNT_WAIT
));
6612 * Sync an open file with synchronized I/O _file_ integrity completion
6614 * Notes: This is a legacy support function that does not test for
6615 * thread cancellation points.
6619 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6621 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6626 * Sync an open file with synchronized I/O _data_ integrity completion
6630 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6632 __pthread_testcancel(1);
6633 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6640 * Common fsync code to support both synchronized I/O file integrity completion
6641 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6643 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6644 * will only guarantee that the file data contents are retrievable. If
6645 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6646 * includes additional metadata unnecessary for retrieving the file data
6647 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6650 * Parameters: p The process
6651 * uap->fd The descriptor to synchronize
6652 * flags The data integrity flags
6654 * Returns: int Success
6655 * fp_getfvp:EBADF Bad file descriptor
6656 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6657 * VNOP_FSYNC:??? unspecified
6659 * Notes: We use struct fsync_args because it is a short name, and all
6660 * caller argument structures are otherwise identical.
6663 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6666 struct fileproc
*fp
;
6667 vfs_context_t ctx
= vfs_context_current();
6670 AUDIT_ARG(fd
, uap
->fd
);
6672 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6674 if ( (error
= vnode_getwithref(vp
)) ) {
6679 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6681 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6684 /* Sync resource fork shadow file if necessary. */
6686 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6687 (vp
->v_parent
!= NULLVP
) &&
6688 vnode_isshadow(vp
) &&
6689 (fp
->f_flags
& FP_WRITTEN
)) {
6690 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6694 (void)vnode_put(vp
);
6700 * Duplicate files. Source must be a file, target must be a file or
6703 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6704 * perform inheritance correctly.
6708 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6710 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6711 struct nameidata fromnd
, tond
;
6713 vfs_context_t ctx
= vfs_context_current();
6715 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
6716 struct vnode_attr va
;
6719 /* Check that the flags are valid. */
6721 if (uap
->flags
& ~CPF_MASK
) {
6725 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
6726 UIO_USERSPACE
, uap
->from
, ctx
);
6727 if ((error
= namei(&fromnd
)))
6731 NDINIT(&tond
, CREATE
, OP_LINK
,
6732 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6733 UIO_USERSPACE
, uap
->to
, ctx
);
6734 if ((error
= namei(&tond
))) {
6741 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6747 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6752 /* This calls existing MAC hooks for open */
6753 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
6760 * See unlinkat_internal for an explanation of the potential
6761 * ENOENT from the MAC hook but the gist is that the MAC hook
6762 * can fail because vn_getpath isn't able to return the full
6763 * path. We choose to ignore this failure.
6765 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
6766 if (error
&& error
!= ENOENT
)
6773 VATTR_SET(&va
, va_type
, fvp
->v_type
);
6774 /* Mask off all but regular access permissions */
6775 VATTR_SET(&va
, va_mode
,
6776 ((((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
6777 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
6780 #endif /* CONFIG_MACF */
6782 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6788 * If source is the same as the destination (that is the
6789 * same inode number) then there is nothing to do.
6790 * (fixed to have POSIX semantics - CSM 3/2/98)
6795 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6797 sdvp
= tond
.ni_startdir
;
6799 * nameidone has to happen before we vnode_put(tdvp)
6800 * since it may need to release the fs_nodelock on the tdvp
6818 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
6821 * Helper function for doing clones. The caller is expected to provide an
6822 * iocounted source vnode and release it.
6825 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
6826 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
6829 struct nameidata tond
;
6832 boolean_t free_src_acl
;
6833 boolean_t attr_cleanup
;
6835 kauth_action_t action
;
6836 struct componentname
*cnp
;
6838 struct vnode_attr va
;
6839 struct vnode_attr nva
;
6841 v_type
= vnode_vtype(fvp
);
6846 action
= KAUTH_VNODE_ADD_FILE
;
6849 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
6850 fvp
->v_mountedhere
) {
6853 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
6859 AUDIT_ARG(fd2
, dst_dirfd
);
6860 AUDIT_ARG(value32
, flags
);
6862 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6863 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
6864 UIO_USERSPACE
, dst
, ctx
);
6865 if ((error
= nameiat(&tond
, dst_dirfd
)))
6871 free_src_acl
= FALSE
;
6872 attr_cleanup
= FALSE
;
6879 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
6885 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
)))
6888 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
)))
6891 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
6892 if (data_read_authorised
)
6893 action
&= ~KAUTH_VNODE_READ_DATA
;
6894 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
)))
6898 * certain attributes may need to be changed from the source, we ask for
6902 VATTR_WANTED(&va
, va_uid
);
6903 VATTR_WANTED(&va
, va_gid
);
6904 VATTR_WANTED(&va
, va_mode
);
6905 VATTR_WANTED(&va
, va_flags
);
6906 VATTR_WANTED(&va
, va_acl
);
6908 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0)
6912 VATTR_SET(&nva
, va_type
, v_type
);
6913 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl
!= NULL
) {
6914 VATTR_SET(&nva
, va_acl
, va
.va_acl
);
6915 free_src_acl
= TRUE
;
6918 /* Handle ACL inheritance, initialize vap. */
6919 if (v_type
== VLNK
) {
6920 error
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
);
6922 error
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
);
6925 attr_cleanup
= TRUE
;
6929 * We've got initial values for all security parameters,
6930 * If we are superuser, then we can change owners to be the
6931 * same as the source. Both superuser and the owner have default
6932 * WRITE_SECURITY privileges so all other fields can be taken
6933 * from source as well.
6935 if (vfs_context_issuser(ctx
)) {
6936 if (VATTR_IS_SUPPORTED(&va
, va_uid
))
6937 VATTR_SET(&nva
, va_uid
, va
.va_uid
);
6938 if (VATTR_IS_SUPPORTED(&va
, va_gid
))
6939 VATTR_SET(&nva
, va_gid
, va
.va_gid
);
6941 if (VATTR_IS_SUPPORTED(&va
, va_mode
))
6942 VATTR_SET(&nva
, va_mode
, va
.va_mode
);
6943 if (VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6944 VATTR_SET(&nva
, va_flags
,
6945 ((va
.va_flags
& ~SF_RESTRICTED
) | /* Turn off from source */
6946 (nva
.va_flags
& SF_RESTRICTED
)));
6949 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
,
6950 VNODE_CLONEFILE_DEFAULT
, ctx
);
6952 if (!error
&& tvp
) {
6953 int update_flags
= 0;
6956 #endif /* CONFIG_FSE */
6959 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
6960 VNODE_LABEL_CREATE
, ctx
);
6963 * If some of the requested attributes weren't handled by the
6964 * VNOP, use our fallback code.
6966 if (!VATTR_ALL_SUPPORTED(&va
))
6967 (void)vnode_setattr_fallback(tvp
, &nva
, ctx
);
6969 // Make sure the name & parent pointers are hooked up
6970 if (tvp
->v_name
== NULL
)
6971 update_flags
|= VNODE_UPDATE_NAME
;
6972 if (tvp
->v_parent
== NULLVP
)
6973 update_flags
|= VNODE_UPDATE_PARENT
;
6976 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
6977 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
6981 switch (vnode_vtype(tvp
)) {
6985 fsevent
= FSE_CREATE_FILE
;
6988 fsevent
= FSE_CREATE_DIR
;
6994 if (need_fsevent(fsevent
, tvp
)) {
6995 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
6998 #endif /* CONFIG_FSE */
7003 vn_attribute_cleanup(&nva
, defaulted
);
7004 if (free_src_acl
&& va
.va_acl
)
7005 kauth_acl_free(va
.va_acl
);
7014 * clone files or directories, target must not exist.
7018 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7019 __unused
int32_t *retval
)
7022 struct nameidata fromnd
;
7025 vfs_context_t ctx
= vfs_context_current();
7027 /* Check that the flags are valid. */
7028 if (uap
->flags
& ~CLONE_NOFOLLOW
)
7031 AUDIT_ARG(fd
, uap
->src_dirfd
);
7033 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7034 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7035 UIO_USERSPACE
, uap
->src
, ctx
);
7036 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
)))
7042 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7050 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7051 __unused
int32_t *retval
)
7054 struct fileproc
*fp
;
7056 vfs_context_t ctx
= vfs_context_current();
7058 AUDIT_ARG(fd
, uap
->src_fd
);
7059 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7063 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7064 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7069 if ((error
= vnode_getwithref(fvp
)))
7072 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7074 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7079 file_drop(uap
->src_fd
);
7084 * Rename files. Source and destination must either both be directories,
7085 * or both not be directories. If target is a directory, it must be empty.
7089 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7090 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7092 if (flags
& ~VFS_RENAME_FLAGS_MASK
)
7095 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
))
7100 struct nameidata
*fromnd
, *tond
;
7106 const char *oname
= NULL
;
7107 char *from_name
= NULL
, *to_name
= NULL
;
7108 int from_len
=0, to_len
=0;
7109 int holding_mntlock
;
7110 mount_t locked_mp
= NULL
;
7111 vnode_t oparent
= NULLVP
;
7113 fse_info from_finfo
, to_finfo
;
7115 int from_truncated
=0, to_truncated
;
7117 struct vnode_attr
*fvap
, *tvap
;
7119 /* carving out a chunk for structs that are too big to be on stack. */
7121 struct nameidata from_node
, to_node
;
7122 struct vnode_attr fv_attr
, tv_attr
;
7124 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
7125 fromnd
= &__rename_data
->from_node
;
7126 tond
= &__rename_data
->to_node
;
7128 holding_mntlock
= 0;
7137 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
7139 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7141 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7143 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7146 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7147 if ( (error
= nameiat(fromnd
, fromfd
)) )
7149 fdvp
= fromnd
->ni_dvp
;
7150 fvp
= fromnd
->ni_vp
;
7152 if (fvp
&& fvp
->v_type
== VDIR
)
7153 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
7156 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7157 if ( (error
= nameiat(tond
, tofd
)) ) {
7159 * Translate error code for rename("dir1", "dir2/.").
7161 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
7165 tdvp
= tond
->ni_dvp
;
7169 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
7174 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
7179 batched
= vnode_compound_rename_available(fdvp
);
7182 * Claim: this check will never reject a valid rename.
7183 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7184 * Suppose fdvp and tdvp are not on the same mount.
7185 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7186 * then you can't move it to within another dir on the same mountpoint.
7187 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7189 * If this check passes, then we are safe to pass these vnodes to the same FS.
7191 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
7195 goto skipped_lookup
;
7199 error
= vn_authorize_renamex(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, flags
, NULL
);
7201 if (error
== ENOENT
) {
7202 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7203 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7205 * We encountered a race where after doing the namei, tvp stops
7206 * being valid. If so, simply re-drive the rename call from the
7218 * If the source and destination are the same (i.e. they're
7219 * links to the same vnode) and the target file system is
7220 * case sensitive, then there is nothing to do.
7222 * XXX Come back to this.
7228 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7229 * then assume that this file system is case sensitive.
7231 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
7232 pathconf_val
!= 0) {
7238 * Allow the renaming of mount points.
7239 * - target must not exist
7240 * - target must reside in the same directory as source
7241 * - union mounts cannot be renamed
7242 * - "/" cannot be renamed
7244 * XXX Handle this in VFS after a continued lookup (if we missed
7245 * in the cache to start off)
7247 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7248 * we'll skip past here. The file system is responsible for
7249 * checking that @tvp is not a descendent of @fvp and vice versa
7250 * so it should always return EINVAL if either @tvp or @fvp is the
7253 if ((fvp
->v_flag
& VROOT
) &&
7254 (fvp
->v_type
== VDIR
) &&
7256 (fvp
->v_mountedhere
== NULL
) &&
7258 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
7259 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
7262 /* switch fvp to the covered vnode */
7263 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
7264 if ( (vnode_getwithref(coveredvp
)) ) {
7274 * Check for cross-device rename.
7276 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
7277 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
7283 * If source is the same as the destination (that is the
7284 * same inode number) then there is nothing to do...
7285 * EXCEPT if the underlying file system supports case
7286 * insensitivity and is case preserving. In this case
7287 * the file system needs to handle the special case of
7288 * getting the same vnode as target (fvp) and source (tvp).
7290 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7291 * and _PC_CASE_PRESERVING can have this exception, and they need to
7292 * handle the special case of getting the same vnode as target and
7293 * source. NOTE: Then the target is unlocked going into vnop_rename,
7294 * so not to cause locking problems. There is a single reference on tvp.
7296 * NOTE - that fvp == tvp also occurs if they are hard linked and
7297 * that correct behaviour then is just to return success without doing
7300 * XXX filesystem should take care of this itself, perhaps...
7302 if (fvp
== tvp
&& fdvp
== tdvp
) {
7303 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
7304 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
7305 fromnd
->ni_cnd
.cn_namelen
)) {
7310 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
7312 * we're holding a reference and lock
7313 * on locked_mp, but it no longer matches
7314 * what we want to do... so drop our hold
7316 mount_unlock_renames(locked_mp
);
7317 mount_drop(locked_mp
, 0);
7318 holding_mntlock
= 0;
7320 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
7322 * serialize renames that re-shape
7323 * the tree... if holding_mntlock is
7324 * set, then we're ready to go...
7326 * first need to drop the iocounts
7327 * we picked up, second take the
7328 * lock to serialize the access,
7329 * then finally start the lookup
7330 * process over with the lock held
7332 if (!holding_mntlock
) {
7334 * need to grab a reference on
7335 * the mount point before we
7336 * drop all the iocounts... once
7337 * the iocounts are gone, the mount
7340 locked_mp
= fvp
->v_mount
;
7341 mount_ref(locked_mp
, 0);
7344 * nameidone has to happen before we vnode_put(tvp)
7345 * since it may need to release the fs_nodelock on the tvp
7354 * nameidone has to happen before we vnode_put(fdvp)
7355 * since it may need to release the fs_nodelock on the fvp
7362 mount_lock_renames(locked_mp
);
7363 holding_mntlock
= 1;
7369 * when we dropped the iocounts to take
7370 * the lock, we allowed the identity of
7371 * the various vnodes to change... if they did,
7372 * we may no longer be dealing with a rename
7373 * that reshapes the tree... once we're holding
7374 * the iocounts, the vnodes can't change type
7375 * so we're free to drop the lock at this point
7378 if (holding_mntlock
) {
7379 mount_unlock_renames(locked_mp
);
7380 mount_drop(locked_mp
, 0);
7381 holding_mntlock
= 0;
7385 // save these off so we can later verify that fvp is the same
7386 oname
= fvp
->v_name
;
7387 oparent
= fvp
->v_parent
;
7391 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
7394 get_fse_info(fvp
, &from_finfo
, ctx
);
7396 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
7401 fvap
= &__rename_data
->fv_attr
;
7405 get_fse_info(tvp
, &to_finfo
, ctx
);
7406 } else if (batched
) {
7407 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
7412 tvap
= &__rename_data
->tv_attr
;
7417 #endif /* CONFIG_FSE */
7419 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
7420 if (from_name
== NULL
) {
7421 GET_PATH(from_name
);
7422 if (from_name
== NULL
) {
7428 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
7430 if (to_name
== NULL
) {
7432 if (to_name
== NULL
) {
7438 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
7440 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
7441 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
7444 if (holding_mntlock
) {
7446 * we can drop our serialization
7449 mount_unlock_renames(locked_mp
);
7450 mount_drop(locked_mp
, 0);
7451 holding_mntlock
= 0;
7454 if (error
== EKEEPLOOKING
) {
7455 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7456 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7457 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7461 fromnd
->ni_vp
= fvp
;
7464 goto continue_lookup
;
7468 * We may encounter a race in the VNOP where the destination didn't
7469 * exist when we did the namei, but it does by the time we go and
7470 * try to create the entry. In this case, we should re-drive this rename
7471 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7472 * but other filesystems susceptible to this race could return it, too.
7474 if (error
== ERECYCLE
) {
7479 * For compound VNOPs, the authorization callback may return
7480 * ENOENT in case of racing hardlink lookups hitting the name
7481 * cache, redrive the lookup.
7483 if (batched
&& error
== ENOENT
) {
7484 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7485 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7494 /* call out to allow 3rd party notification of rename.
7495 * Ignore result of kauth_authorize_fileop call.
7497 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7498 KAUTH_FILEOP_RENAME
,
7499 (uintptr_t)from_name
, (uintptr_t)to_name
);
7500 if (flags
& VFS_RENAME_SWAP
) {
7501 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7502 KAUTH_FILEOP_RENAME
,
7503 (uintptr_t)to_name
, (uintptr_t)from_name
);
7507 if (from_name
!= NULL
&& to_name
!= NULL
) {
7508 if (from_truncated
|| to_truncated
) {
7509 // set it here since only the from_finfo gets reported up to user space
7510 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
7514 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
7517 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
7521 add_fsevent(FSE_RENAME
, ctx
,
7522 FSE_ARG_STRING
, from_len
, from_name
,
7523 FSE_ARG_FINFO
, &from_finfo
,
7524 FSE_ARG_STRING
, to_len
, to_name
,
7525 FSE_ARG_FINFO
, &to_finfo
,
7527 if (flags
& VFS_RENAME_SWAP
) {
7529 * Strictly speaking, swap is the equivalent of
7530 * *three* renames. FSEvents clients should only take
7531 * the events as a hint, so we only bother reporting
7534 add_fsevent(FSE_RENAME
, ctx
,
7535 FSE_ARG_STRING
, to_len
, to_name
,
7536 FSE_ARG_FINFO
, &to_finfo
,
7537 FSE_ARG_STRING
, from_len
, from_name
,
7538 FSE_ARG_FINFO
, &from_finfo
,
7542 add_fsevent(FSE_RENAME
, ctx
,
7543 FSE_ARG_STRING
, from_len
, from_name
,
7544 FSE_ARG_FINFO
, &from_finfo
,
7545 FSE_ARG_STRING
, to_len
, to_name
,
7549 #endif /* CONFIG_FSE */
7552 * update filesystem's mount point data
7555 char *cp
, *pathend
, *mpname
;
7561 mp
= fvp
->v_mountedhere
;
7563 if (vfs_busy(mp
, LK_NOWAIT
)) {
7567 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7569 if (UIO_SEG_IS_USER_SPACE(segflg
))
7570 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7572 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7574 /* find current mount point prefix */
7575 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7576 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7580 /* find last component of target name */
7581 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7585 /* append name to prefix */
7586 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7587 bzero(pathend
, maxlen
);
7588 strlcpy(pathend
, mpname
, maxlen
);
7590 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7595 * fix up name & parent pointers. note that we first
7596 * check that fvp has the same name/parent pointers it
7597 * had before the rename call... this is a 'weak' check
7600 * XXX oparent and oname may not be set in the compound vnop case
7602 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7605 update_flags
= VNODE_UPDATE_NAME
;
7608 update_flags
|= VNODE_UPDATE_PARENT
;
7610 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7613 if (to_name
!= NULL
) {
7614 RELEASE_PATH(to_name
);
7617 if (from_name
!= NULL
) {
7618 RELEASE_PATH(from_name
);
7621 if (holding_mntlock
) {
7622 mount_unlock_renames(locked_mp
);
7623 mount_drop(locked_mp
, 0);
7624 holding_mntlock
= 0;
7628 * nameidone has to happen before we vnode_put(tdvp)
7629 * since it may need to release the fs_nodelock on the tdvp
7639 * nameidone has to happen before we vnode_put(fdvp)
7640 * since it may need to release the fs_nodelock on the fdvp
7650 * If things changed after we did the namei, then we will re-drive
7651 * this rename call from the top.
7658 FREE(__rename_data
, M_TEMP
);
7663 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7665 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7666 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7669 int renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
7671 return renameat_internal(
7672 vfs_context_current(),
7673 uap
->fromfd
, uap
->from
,
7675 UIO_USERSPACE
, uap
->flags
);
7679 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7681 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7682 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7686 * Make a directory file.
7688 * Returns: 0 Success
7691 * vnode_authorize:???
7696 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7697 enum uio_seg segflg
)
7701 int update_flags
= 0;
7703 struct nameidata nd
;
7705 AUDIT_ARG(mode
, vap
->va_mode
);
7706 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7708 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7709 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7712 error
= nameiat(&nd
, fd
);
7723 batched
= vnode_compound_mkdir_available(dvp
);
7725 VATTR_SET(vap
, va_type
, VDIR
);
7729 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7730 * only get EXISTS or EISDIR for existing path components, and not that it could see
7731 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7732 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7734 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7735 if (error
== EACCES
|| error
== EPERM
) {
7743 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7744 * rather than EACCESS if the target exists.
7746 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7748 error2
= nameiat(&nd
, fd
);
7762 * make the directory
7764 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7765 if (error
== EKEEPLOOKING
) {
7767 goto continue_lookup
;
7773 // Make sure the name & parent pointers are hooked up
7774 if (vp
->v_name
== NULL
)
7775 update_flags
|= VNODE_UPDATE_NAME
;
7776 if (vp
->v_parent
== NULLVP
)
7777 update_flags
|= VNODE_UPDATE_PARENT
;
7780 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
7783 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
7788 * nameidone has to happen before we vnode_put(dvp)
7789 * since it may need to release the fs_nodelock on the dvp
7802 * mkdir_extended: Create a directory; with extended security (ACL).
7804 * Parameters: p Process requesting to create the directory
7805 * uap User argument descriptor (see below)
7808 * Indirect: uap->path Path of directory to create
7809 * uap->mode Access permissions to set
7810 * uap->xsecurity ACL to set
7812 * Returns: 0 Success
7817 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
7820 kauth_filesec_t xsecdst
;
7821 struct vnode_attr va
;
7823 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7826 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
7827 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
7831 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7832 if (xsecdst
!= NULL
)
7833 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7835 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7837 if (xsecdst
!= NULL
)
7838 kauth_filesec_free(xsecdst
);
7843 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
7845 struct vnode_attr va
;
7848 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7850 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7855 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
7857 struct vnode_attr va
;
7860 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7862 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
7867 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
7868 enum uio_seg segflg
)
7872 struct nameidata nd
;
7875 int has_listeners
= 0;
7879 struct vnode_attr va
;
7880 #endif /* CONFIG_FSE */
7881 struct vnode_attr
*vap
= NULL
;
7882 int restart_count
= 0;
7888 * This loop exists to restart rmdir in the unlikely case that two
7889 * processes are simultaneously trying to remove the same directory
7890 * containing orphaned appleDouble files.
7893 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
7894 segflg
, dirpath
, ctx
);
7895 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
7900 error
= nameiat(&nd
, fd
);
7908 batched
= vnode_compound_rmdir_available(vp
);
7910 if (vp
->v_flag
& VROOT
) {
7912 * The root of a mounted filesystem cannot be deleted.
7919 * Removed a check here; we used to abort if vp's vid
7920 * was not the same as what we'd seen the last time around.
7921 * I do not think that check was valid, because if we retry
7922 * and all dirents are gone, the directory could legitimately
7923 * be recycled but still be present in a situation where we would
7924 * have had permission to delete. Therefore, we won't make
7925 * an effort to preserve that check now that we may not have a
7930 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
7932 if (error
== ENOENT
) {
7933 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7934 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7945 if (!vnode_compound_rmdir_available(dvp
)) {
7946 panic("No error, but no compound rmdir?");
7953 need_event
= need_fsevent(FSE_DELETE
, dvp
);
7956 get_fse_info(vp
, &finfo
, ctx
);
7958 error
= vfs_get_notify_attributes(&va
);
7967 has_listeners
= kauth_authorize_fileop_has_listeners();
7968 if (need_event
|| has_listeners
) {
7977 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
7980 finfo
.mode
|= FSE_TRUNCATED_PATH
;
7985 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7988 /* Couldn't find a vnode */
7992 if (error
== EKEEPLOOKING
) {
7993 goto continue_lookup
;
7994 } else if (batched
&& error
== ENOENT
) {
7995 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7996 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7998 * For compound VNOPs, the authorization callback
7999 * may return ENOENT in case of racing hard link lookups
8000 * redrive the lookup.
8007 #if CONFIG_APPLEDOUBLE
8009 * Special case to remove orphaned AppleDouble
8010 * files. I don't like putting this in the kernel,
8011 * but carbon does not like putting this in carbon either,
8014 if (error
== ENOTEMPTY
) {
8015 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
8016 if (error
== EBUSY
) {
8022 * Assuming everything went well, we will try the RMDIR again
8025 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8027 #endif /* CONFIG_APPLEDOUBLE */
8029 * Call out to allow 3rd party notification of delete.
8030 * Ignore result of kauth_authorize_fileop call.
8033 if (has_listeners
) {
8034 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8035 KAUTH_FILEOP_DELETE
,
8040 if (vp
->v_flag
& VISHARDLINK
) {
8041 // see the comment in unlink1() about why we update
8042 // the parent of a hard link when it is removed
8043 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
8049 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
8051 add_fsevent(FSE_DELETE
, ctx
,
8052 FSE_ARG_STRING
, len
, path
,
8053 FSE_ARG_FINFO
, &finfo
,
8065 * nameidone has to happen before we vnode_put(dvp)
8066 * since it may need to release the fs_nodelock on the dvp
8074 if (restart_flag
== 0) {
8075 wakeup_one((caddr_t
)vp
);
8078 tsleep(vp
, PVFS
, "rm AD", 1);
8080 } while (restart_flag
!= 0);
8087 * Remove a directory file.
8091 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
8093 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
8094 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
8097 /* Get direntry length padded to 8 byte alignment */
8098 #define DIRENT64_LEN(namlen) \
8099 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8102 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
8103 int *numdirent
, vfs_context_t ctxp
)
8105 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8106 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
8107 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
8108 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
8113 struct direntry
*entry64
;
8119 * Our kernel buffer needs to be smaller since re-packing
8120 * will expand each dirent. The worse case (when the name
8121 * length is 3) corresponds to a struct direntry size of 32
8122 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8123 * (4-byte aligned). So having a buffer that is 3/8 the size
8124 * will prevent us from reading more than we can pack.
8126 * Since this buffer is wired memory, we will limit the
8127 * buffer size to a maximum of 32K. We would really like to
8128 * use 32K in the MIN(), but we use magic number 87371 to
8129 * prevent uio_resid() * 3 / 8 from overflowing.
8131 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
8132 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
8133 if (bufptr
== NULL
) {
8137 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
8138 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
8139 auio
->uio_offset
= uio
->uio_offset
;
8141 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
8143 dep
= (struct dirent
*)bufptr
;
8144 bytesread
= bufsize
- uio_resid(auio
);
8146 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
8149 * Convert all the entries and copy them out to user's buffer.
8151 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
8152 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
8154 bzero(entry64
, enbufsize
);
8155 /* Convert a dirent to a dirent64. */
8156 entry64
->d_ino
= dep
->d_ino
;
8157 entry64
->d_seekoff
= 0;
8158 entry64
->d_reclen
= enbufsize
;
8159 entry64
->d_namlen
= dep
->d_namlen
;
8160 entry64
->d_type
= dep
->d_type
;
8161 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
8163 /* Move to next entry. */
8164 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
8166 /* Copy entry64 to user's buffer. */
8167 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
8170 /* Update the real offset using the offset we got from VNOP_READDIR. */
8172 uio
->uio_offset
= auio
->uio_offset
;
8175 FREE(bufptr
, M_TEMP
);
8176 FREE(entry64
, M_TEMP
);
8181 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8184 * Read a block of directory entries in a file system independent format.
8187 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
8188 off_t
*offset
, int flags
)
8191 struct vfs_context context
= *vfs_context_current(); /* local copy */
8192 struct fileproc
*fp
;
8194 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8196 int error
, eofflag
, numdirent
;
8197 char uio_buf
[ UIO_SIZEOF(1) ];
8199 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
8203 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8204 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8209 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
8210 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
8213 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
8217 if ( (error
= vnode_getwithref(vp
)) ) {
8220 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8223 if (vp
->v_type
!= VDIR
) {
8224 (void)vnode_put(vp
);
8230 error
= mac_vnode_check_readdir(&context
, vp
);
8232 (void)vnode_put(vp
);
8237 loff
= fp
->f_fglob
->fg_offset
;
8238 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8239 uio_addiov(auio
, bufp
, bufsize
);
8241 if (flags
& VNODE_READDIR_EXTENDED
) {
8242 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
8243 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8245 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
8246 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8249 (void)vnode_put(vp
);
8253 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
8254 if (union_dircheckp
) {
8255 error
= union_dircheckp(&vp
, fp
, &context
);
8259 (void)vnode_put(vp
);
8264 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
8265 struct vnode
*tvp
= vp
;
8266 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
8268 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8269 fp
->f_fglob
->fg_offset
= 0;
8283 *bytesread
= bufsize
- uio_resid(auio
);
8291 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
8297 AUDIT_ARG(fd
, uap
->fd
);
8298 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
8301 if (proc_is64bit(p
)) {
8302 user64_long_t base
= (user64_long_t
)offset
;
8303 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
8305 user32_long_t base
= (user32_long_t
)offset
;
8306 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
8308 *retval
= bytesread
;
8314 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
8320 AUDIT_ARG(fd
, uap
->fd
);
8321 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
8324 *retval
= bytesread
;
8325 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
8332 * Set the mode mask for creation of filesystem nodes.
8333 * XXX implement xsecurity
8335 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8337 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
8339 struct filedesc
*fdp
;
8341 AUDIT_ARG(mask
, newmask
);
8344 *retval
= fdp
->fd_cmask
;
8345 fdp
->fd_cmask
= newmask
& ALLPERMS
;
8351 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8353 * Parameters: p Process requesting to set the umask
8354 * uap User argument descriptor (see below)
8355 * retval umask of the process (parameter p)
8357 * Indirect: uap->newmask umask to set
8358 * uap->xsecurity ACL to set
8360 * Returns: 0 Success
8365 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
8368 kauth_filesec_t xsecdst
;
8370 xsecdst
= KAUTH_FILESEC_NONE
;
8371 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
8372 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
8375 xsecdst
= KAUTH_FILESEC_NONE
;
8378 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
8380 if (xsecdst
!= KAUTH_FILESEC_NONE
)
8381 kauth_filesec_free(xsecdst
);
8386 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
8388 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
8392 * Void all references to file by ripping underlying filesystem
8397 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
8400 struct vnode_attr va
;
8401 vfs_context_t ctx
= vfs_context_current();
8403 struct nameidata nd
;
8405 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
8414 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
8419 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
8425 error
= mac_vnode_check_revoke(ctx
, vp
);
8431 VATTR_WANTED(&va
, va_uid
);
8432 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
8434 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
8435 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
8437 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
8438 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
8446 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8447 * The following system calls are designed to support features
8448 * which are specific to the HFS & HFS Plus volume formats
8453 * Obtain attribute information on objects in a directory while enumerating
8458 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
8461 struct fileproc
*fp
;
8463 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8464 uint32_t count
, savecount
;
8468 struct attrlist attributelist
;
8469 vfs_context_t ctx
= vfs_context_current();
8471 char uio_buf
[ UIO_SIZEOF(1) ];
8472 kauth_action_t action
;
8476 /* Get the attributes into kernel space */
8477 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
8480 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
8484 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
8487 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8488 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8495 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
8502 if ( (error
= vnode_getwithref(vp
)) )
8505 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8508 if (vp
->v_type
!= VDIR
) {
8509 (void)vnode_put(vp
);
8515 error
= mac_vnode_check_readdir(ctx
, vp
);
8517 (void)vnode_put(vp
);
8522 /* set up the uio structure which will contain the users return buffer */
8523 loff
= fp
->f_fglob
->fg_offset
;
8524 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8525 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
8528 * If the only item requested is file names, we can let that past with
8529 * just LIST_DIRECTORY. If they want any other attributes, that means
8530 * they need SEARCH as well.
8532 action
= KAUTH_VNODE_LIST_DIRECTORY
;
8533 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
8534 attributelist
.fileattr
|| attributelist
.dirattr
)
8535 action
|= KAUTH_VNODE_SEARCH
;
8537 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
8539 /* Believe it or not, uap->options only has 32-bits of valid
8540 * info, so truncate before extending again */
8542 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
8543 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
8547 (void) vnode_put(vp
);
8552 * If we've got the last entry of a directory in a union mount
8553 * then reset the eofflag and pretend there's still more to come.
8554 * The next call will again set eofflag and the buffer will be empty,
8555 * so traverse to the underlying directory and do the directory
8558 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
8559 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
8561 } else { // Empty buffer
8562 struct vnode
*tvp
= vp
;
8563 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
8564 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
8565 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8566 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
8568 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
8576 (void)vnode_put(vp
);
8580 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
8582 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8584 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8586 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8589 *retval
= eofflag
; /* similar to getdirentries */
8593 return (error
); /* return error earlier, an retval of 0 or 1 now */
8595 } /* end of getdirentriesattr system call */
8598 * Exchange data between two files
8603 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8606 struct nameidata fnd
, snd
;
8607 vfs_context_t ctx
= vfs_context_current();
8611 u_int32_t nameiflags
;
8615 int from_truncated
=0, to_truncated
=0;
8617 fse_info f_finfo
, s_finfo
;
8621 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8623 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8624 UIO_USERSPACE
, uap
->path1
, ctx
);
8626 error
= namei(&fnd
);
8633 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8634 UIO_USERSPACE
, uap
->path2
, ctx
);
8636 error
= namei(&snd
);
8645 * if the files are the same, return an inval error
8653 * if the files are on different volumes, return an error
8655 if (svp
->v_mount
!= fvp
->v_mount
) {
8660 /* If they're not files, return an error */
8661 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8667 error
= mac_vnode_check_exchangedata(ctx
,
8672 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8673 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8678 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8680 kauth_authorize_fileop_has_listeners()) {
8683 if (fpath
== NULL
|| spath
== NULL
) {
8688 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8689 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8692 get_fse_info(fvp
, &f_finfo
, ctx
);
8693 get_fse_info(svp
, &s_finfo
, ctx
);
8694 if (from_truncated
|| to_truncated
) {
8695 // set it here since only the f_finfo gets reported up to user space
8696 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8700 /* Ok, make the call */
8701 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8704 const char *tmpname
;
8706 if (fpath
!= NULL
&& spath
!= NULL
) {
8707 /* call out to allow 3rd party notification of exchangedata.
8708 * Ignore result of kauth_authorize_fileop call.
8710 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8711 (uintptr_t)fpath
, (uintptr_t)spath
);
8715 tmpname
= fvp
->v_name
;
8716 fvp
->v_name
= svp
->v_name
;
8717 svp
->v_name
= tmpname
;
8719 if (fvp
->v_parent
!= svp
->v_parent
) {
8722 tmp
= fvp
->v_parent
;
8723 fvp
->v_parent
= svp
->v_parent
;
8724 svp
->v_parent
= tmp
;
8726 name_cache_unlock();
8729 if (fpath
!= NULL
&& spath
!= NULL
) {
8730 add_fsevent(FSE_EXCHANGE
, ctx
,
8731 FSE_ARG_STRING
, flen
, fpath
,
8732 FSE_ARG_FINFO
, &f_finfo
,
8733 FSE_ARG_STRING
, slen
, spath
,
8734 FSE_ARG_FINFO
, &s_finfo
,
8742 RELEASE_PATH(fpath
);
8744 RELEASE_PATH(spath
);
8752 * Return (in MB) the amount of freespace on the given vnode's volume.
8754 uint32_t freespace_mb(vnode_t vp
);
8757 freespace_mb(vnode_t vp
)
8759 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
8760 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
8761 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
8769 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
8774 struct nameidata nd
;
8775 struct user64_fssearchblock searchblock
;
8776 struct searchstate
*state
;
8777 struct attrlist
*returnattrs
;
8778 struct timeval timelimit
;
8779 void *searchparams1
,*searchparams2
;
8781 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8782 uint32_t nummatches
;
8784 uint32_t nameiflags
;
8785 vfs_context_t ctx
= vfs_context_current();
8786 char uio_buf
[ UIO_SIZEOF(1) ];
8788 /* Start by copying in fsearchblock parameter list */
8789 if (IS_64BIT_PROCESS(p
)) {
8790 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
8791 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
8792 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
8795 struct user32_fssearchblock tmp_searchblock
;
8797 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
8798 // munge into 64-bit version
8799 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
8800 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
8801 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
8802 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
8804 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8805 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8807 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
8808 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
8809 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
8810 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
8811 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
8812 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
8813 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
8818 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8820 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
8821 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
8824 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8825 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8826 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8829 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8830 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8831 /* assumes the size is still 556 bytes it will continue to work */
8833 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
8834 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
8836 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
8838 /* Now set up the various pointers to the correct place in our newly allocated memory */
8840 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
8841 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
8842 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
8844 /* Now copy in the stuff given our local variables. */
8846 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
8849 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
8852 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
8855 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
8859 * When searching a union mount, need to set the
8860 * start flag at the first call on each layer to
8861 * reset state for the new volume.
8863 if (uap
->options
& SRCHFS_START
)
8864 state
->ss_union_layer
= 0;
8866 uap
->options
|= state
->ss_union_flags
;
8867 state
->ss_union_flags
= 0;
8870 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8871 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8872 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8873 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8874 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8877 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
8878 attrreference_t
* string_ref
;
8879 u_int32_t
* start_length
;
8880 user64_size_t param_length
;
8882 /* validate searchparams1 */
8883 param_length
= searchblock
.sizeofsearchparams1
;
8884 /* skip the word that specifies length of the buffer */
8885 start_length
= (u_int32_t
*) searchparams1
;
8886 start_length
= start_length
+1;
8887 string_ref
= (attrreference_t
*) start_length
;
8889 /* ensure no negative offsets or too big offsets */
8890 if (string_ref
->attr_dataoffset
< 0 ) {
8894 if (string_ref
->attr_length
> MAXPATHLEN
) {
8899 /* Check for pointer overflow in the string ref */
8900 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
8905 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
8909 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
8915 /* set up the uio structure which will contain the users return buffer */
8916 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8917 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
8920 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8921 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
8922 UIO_USERSPACE
, uap
->path
, ctx
);
8931 * Switch to the root vnode for the volume
8933 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
8940 * If it's a union mount, the path lookup takes
8941 * us to the top layer. But we may need to descend
8942 * to a lower layer. For non-union mounts the layer
8945 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
8946 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
8949 vp
= vp
->v_mount
->mnt_vnodecovered
;
8955 error
= vnode_getwithref(vp
);
8962 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
8971 * If searchblock.maxmatches == 0, then skip the search. This has happened
8972 * before and sometimes the underlying code doesnt deal with it well.
8974 if (searchblock
.maxmatches
== 0) {
8980 * Allright, we have everything we need, so lets make that call.
8982 * We keep special track of the return value from the file system:
8983 * EAGAIN is an acceptable error condition that shouldn't keep us
8984 * from copying out any results...
8987 fserror
= VNOP_SEARCHFS(vp
,
8990 &searchblock
.searchattrs
,
8991 (u_long
)searchblock
.maxmatches
,
8995 (u_long
)uap
->scriptcode
,
8996 (u_long
)uap
->options
,
8998 (struct searchstate
*) &state
->ss_fsstate
,
9002 * If it's a union mount we need to be called again
9003 * to search the mounted-on filesystem.
9005 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
9006 state
->ss_union_flags
= SRCHFS_START
;
9007 state
->ss_union_layer
++; // search next layer down
9015 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9016 search state. Everything was already put into he return buffer by the vop call. */
9018 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
9021 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
9028 FREE(searchparams1
,M_TEMP
);
9033 } /* end of searchfs system call */
9035 #else /* CONFIG_SEARCHFS */
9038 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
9043 #endif /* CONFIG_SEARCHFS */
9046 lck_grp_attr_t
* nspace_group_attr
;
9047 lck_attr_t
* nspace_lock_attr
;
9048 lck_grp_t
* nspace_mutex_group
;
9050 lck_mtx_t nspace_handler_lock
;
9051 lck_mtx_t nspace_handler_exclusion_lock
;
9053 time_t snapshot_timestamp
=0;
9054 int nspace_allow_virtual_devs
=0;
9056 void nspace_handler_init(void);
9058 typedef struct nspace_item_info
{
9068 #define MAX_NSPACE_ITEMS 128
9069 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
9070 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
9071 uint32_t nspace_token_id
=0;
9072 uint32_t nspace_handler_timeout
= 15; // seconds
9074 #define NSPACE_ITEM_NEW 0x0001
9075 #define NSPACE_ITEM_PROCESSING 0x0002
9076 #define NSPACE_ITEM_DEAD 0x0004
9077 #define NSPACE_ITEM_CANCELLED 0x0008
9078 #define NSPACE_ITEM_DONE 0x0010
9079 #define NSPACE_ITEM_RESET_TIMER 0x0020
9081 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9082 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9084 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9086 //#pragma optimization_level 0
9089 NSPACE_HANDLER_NSPACE
= 0,
9090 NSPACE_HANDLER_SNAPSHOT
= 1,
9092 NSPACE_HANDLER_COUNT
,
9096 uint64_t handler_tid
;
9097 struct proc
*handler_proc
;
9101 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
9103 /* namespace fsctl functions */
9104 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
9105 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
9106 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
9107 static nspace_type_t
nspace_type_for_op(uint64_t op
);
9108 static int nspace_is_special_process(struct proc
*proc
);
9109 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
9110 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
9111 static int validate_namespace_args (int is64bit
, int size
);
9112 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
9115 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
9117 switch(nspace_type
) {
9118 case NSPACE_HANDLER_NSPACE
:
9119 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
9120 case NSPACE_HANDLER_SNAPSHOT
:
9121 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
9123 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
9128 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
9130 switch(nspace_type
) {
9131 case NSPACE_HANDLER_NSPACE
:
9132 return NSPACE_ITEM_NSPACE_EVENT
;
9133 case NSPACE_HANDLER_SNAPSHOT
:
9134 return NSPACE_ITEM_SNAPSHOT_EVENT
;
9136 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
9141 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
9143 switch(nspace_type
) {
9144 case NSPACE_HANDLER_NSPACE
:
9145 return FREAD
| FWRITE
| O_EVTONLY
;
9146 case NSPACE_HANDLER_SNAPSHOT
:
9147 return FREAD
| O_EVTONLY
;
9149 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
9154 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
9156 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
9157 case NAMESPACE_HANDLER_NSPACE_EVENT
:
9158 return NSPACE_HANDLER_NSPACE
;
9159 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
9160 return NSPACE_HANDLER_SNAPSHOT
;
9162 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
9163 return NSPACE_HANDLER_NSPACE
;
9167 static inline int nspace_is_special_process(struct proc
*proc
)
9170 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9171 if (proc
== nspace_handlers
[i
].handler_proc
)
9178 nspace_handler_init(void)
9180 nspace_lock_attr
= lck_attr_alloc_init();
9181 nspace_group_attr
= lck_grp_attr_alloc_init();
9182 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
9183 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
9184 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
9185 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
9189 nspace_proc_exit(struct proc
*p
)
9191 int i
, event_mask
= 0;
9193 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9194 if (p
== nspace_handlers
[i
].handler_proc
) {
9195 event_mask
|= nspace_item_flags_for_type(i
);
9196 nspace_handlers
[i
].handler_tid
= 0;
9197 nspace_handlers
[i
].handler_proc
= NULL
;
9201 if (event_mask
== 0) {
9205 lck_mtx_lock(&nspace_handler_lock
);
9206 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9207 // if this process was the snapshot handler, zero snapshot_timeout
9208 snapshot_timestamp
= 0;
9212 // unblock anyone that's waiting for the handler that died
9214 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9215 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
9217 if ( nspace_items
[i
].flags
& event_mask
) {
9219 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9220 vnode_lock_spin(nspace_items
[i
].vp
);
9221 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9222 vnode_unlock(nspace_items
[i
].vp
);
9224 nspace_items
[i
].vp
= NULL
;
9225 nspace_items
[i
].vid
= 0;
9226 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9227 nspace_items
[i
].token
= 0;
9229 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9234 wakeup((caddr_t
)&nspace_item_idx
);
9235 lck_mtx_unlock(&nspace_handler_lock
);
9240 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
9242 return resolve_nspace_item_ext(vp
, op
, NULL
);
9246 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
9248 int i
, error
, keep_waiting
;
9250 nspace_type_t nspace_type
= nspace_type_for_op(op
);
9252 // only allow namespace events on regular files, directories and symlinks.
9253 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
9258 // if this is a snapshot event and the vnode is on a
9259 // disk image just pretend nothing happened since any
9260 // change to the disk image will cause the disk image
9261 // itself to get backed up and this avoids multi-way
9262 // deadlocks between the snapshot handler and the ever
9263 // popular diskimages-helper process. the variable
9264 // nspace_allow_virtual_devs allows this behavior to
9265 // be overridden (for use by the Mobile TimeMachine
9266 // testing infrastructure which uses disk images)
9268 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
9269 && (vp
->v_mount
!= NULL
)
9270 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
9271 && !nspace_allow_virtual_devs
) {
9276 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9277 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9281 if (nspace_is_special_process(current_proc())) {
9285 lck_mtx_lock(&nspace_handler_lock
);
9288 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9289 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
9294 if (i
>= MAX_NSPACE_ITEMS
) {
9295 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9296 if (nspace_items
[i
].flags
== 0) {
9301 nspace_items
[i
].refcount
++;
9304 if (i
>= MAX_NSPACE_ITEMS
) {
9305 ts
.tv_sec
= nspace_handler_timeout
;
9308 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
9310 // an entry got free'd up, go see if we can get a slot
9313 lck_mtx_unlock(&nspace_handler_lock
);
9319 // if it didn't already exist, add it. if it did exist
9320 // we'll get woken up when someone does a wakeup() on
9321 // the slot in the nspace_items table.
9323 if (vp
!= nspace_items
[i
].vp
) {
9324 nspace_items
[i
].vp
= vp
;
9325 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
9326 nspace_items
[i
].op
= op
;
9327 nspace_items
[i
].vid
= vnode_vid(vp
);
9328 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
9329 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
9330 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9332 vnode_lock_spin(vp
);
9333 vp
->v_flag
|= VNEEDSSNAPSHOT
;
9338 nspace_items
[i
].token
= 0;
9339 nspace_items
[i
].refcount
= 1;
9341 wakeup((caddr_t
)&nspace_item_idx
);
9345 // Now go to sleep until the handler does a wakeup on this
9346 // slot in the nspace_items table (or we timeout).
9349 while(keep_waiting
) {
9350 ts
.tv_sec
= nspace_handler_timeout
;
9352 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
9354 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
9356 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
9357 error
= nspace_items
[i
].token
;
9358 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
9359 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
9360 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
9365 } else if (error
== 0) {
9366 // hmmm, why did we get woken up?
9367 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9368 nspace_items
[i
].token
);
9371 if (--nspace_items
[i
].refcount
== 0) {
9372 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
9373 nspace_items
[i
].arg
= NULL
;
9374 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
9375 nspace_items
[i
].flags
= 0; // this clears it for re-use
9377 wakeup(&nspace_token_id
);
9381 lck_mtx_unlock(&nspace_handler_lock
);
9386 int nspace_snapshot_event(vnode_t vp
, time_t ctime
, uint64_t op_type
, void *arg
)
9388 int snapshot_error
= 0;
9394 /* Swap files are special; skip them */
9395 if (vnode_isswap(vp
)) {
9399 if (ctime
!= 0 && snapshot_timestamp
!= 0 && (ctime
<= snapshot_timestamp
|| vnode_needssnapshots(vp
))) {
9400 // the change time is within this epoch
9403 error
= resolve_nspace_item_ext(vp
, op_type
| NAMESPACE_HANDLER_SNAPSHOT_EVENT
, arg
);
9404 if (error
== EDEADLK
) {
9407 if (error
== EAGAIN
) {
9408 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9409 } else if (error
== EINTR
) {
9410 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9411 snapshot_error
= EINTR
;
9416 return snapshot_error
;
9420 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
9424 lck_mtx_lock(&nspace_handler_lock
);
9425 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9426 if (nspace_items
[i
].vp
== vp
) {
9431 if (i
>= MAX_NSPACE_ITEMS
) {
9432 lck_mtx_unlock(&nspace_handler_lock
);
9436 *status
= nspace_items
[i
].flags
;
9437 lck_mtx_unlock(&nspace_handler_lock
);
9444 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
9446 struct vnode_attr va
;
9450 VATTR_WANTED(&va
, va_fsid
);
9451 VATTR_WANTED(&va
, va_fileid
);
9453 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
9454 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
9457 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
9466 // Note: this function does NOT check permissions on all of the
9467 // parent directories leading to this vnode. It should only be
9468 // called on behalf of a root process. Otherwise a process may
9469 // get access to a file because the file itself is readable even
9470 // though its parent directories would prevent access.
9473 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
9477 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9482 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
9487 /* compute action to be authorized */
9489 if (fmode
& FREAD
) {
9490 action
|= KAUTH_VNODE_READ_DATA
;
9492 if (fmode
& (FWRITE
| O_TRUNC
)) {
9494 * If we are writing, appending, and not truncating,
9495 * indicate that we are appending so that if the
9496 * UF_APPEND or SF_APPEND bits are set, we do not deny
9499 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
9500 action
|= KAUTH_VNODE_APPEND_DATA
;
9502 action
|= KAUTH_VNODE_WRITE_DATA
;
9506 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
9511 // if the vnode is tagged VOPENEVT and the current process
9512 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9513 // flag to the open mode so that this open won't count against
9514 // the vnode when carbon delete() does a vnode_isinuse() to see
9515 // if a file is currently in use. this allows spotlight
9516 // importers to not interfere with carbon apps that depend on
9517 // the no-delete-if-busy semantics of carbon delete().
9519 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
9523 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
9526 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
9527 VNOP_CLOSE(vp
, fmode
, ctx
);
9531 /* Call out to allow 3rd party notification of open.
9532 * Ignore result of kauth_authorize_fileop call.
9535 mac_vnode_notify_open(ctx
, vp
, fmode
);
9537 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
9545 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
9552 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9553 if (nspace_handlers
[nspace_type
].handler_busy
) {
9554 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9558 nspace_handlers
[nspace_type
].handler_busy
= 1;
9559 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9562 * Any process that gets here will be one of the namespace handlers.
9563 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9564 * as we can cause deadlocks to occur, because the namespace handler may prevent
9565 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9568 curtask
= current_task();
9569 bsd_set_dependency_capable (curtask
);
9571 lck_mtx_lock(&nspace_handler_lock
);
9572 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9573 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
9574 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
9577 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9578 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9582 while (error
== 0) {
9584 /* Try to find matching namespace item */
9585 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9586 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9587 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9593 if (i
>= MAX_NSPACE_ITEMS
) {
9594 /* Nothing is there yet. Wait for wake up and retry */
9595 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9596 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9597 /* Prevent infinite loop if snapshot handler exited */
9604 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
9605 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
9606 nspace_items
[i
].token
= ++nspace_token_id
;
9608 assert(nspace_items
[i
].vp
);
9609 struct fileproc
*fp
;
9612 struct proc
*p
= current_proc();
9613 vfs_context_t ctx
= vfs_context_current();
9614 struct vnode_attr va
;
9615 bool vn_get_succsessful
= false;
9616 bool vn_open_successful
= false;
9617 bool fp_alloc_successful
= false;
9620 * Use vnode pointer to acquire a file descriptor for
9621 * hand-off to userland
9623 fmode
= nspace_open_flags_for_type(nspace_type
);
9624 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
9625 if (error
) goto cleanup
;
9626 vn_get_succsessful
= true;
9628 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
9629 if (error
) goto cleanup
;
9630 vn_open_successful
= true;
9632 error
= falloc(p
, &fp
, &indx
, ctx
);
9633 if (error
) goto cleanup
;
9634 fp_alloc_successful
= true;
9636 fp
->f_fglob
->fg_flag
= fmode
;
9637 fp
->f_fglob
->fg_ops
= &vnops
;
9638 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9641 procfdtbl_releasefd(p
, indx
, NULL
);
9642 fp_drop(p
, indx
, fp
, 1);
9646 * All variants of the namespace handler struct support these three fields:
9647 * token, flags, and the FD pointer
9649 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9650 if (error
) goto cleanup
;
9651 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9652 if (error
) goto cleanup
;
9653 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9654 if (error
) goto cleanup
;
9657 * Handle optional fields:
9658 * extended version support an info ptr (offset, length), and the
9660 * namedata version supports a unique per-link object ID
9664 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9665 uint64_t u_offset
, u_length
;
9668 u_offset
= uio_offset(uio
);
9669 u_length
= uio_resid(uio
);
9674 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9675 if (error
) goto cleanup
;
9676 error
= copyout(&u_length
, nhd
->infoptr
+ sizeof(uint64_t), sizeof(uint64_t));
9677 if (error
) goto cleanup
;
9682 VATTR_WANTED(&va
, va_linkid
);
9683 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9684 if (error
) goto cleanup
;
9686 uint64_t linkid
= 0;
9687 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9688 linkid
= (uint64_t)va
.va_linkid
;
9690 error
= copyout(&linkid
, nhd
->objid
, sizeof(uint64_t));
9694 if (fp_alloc_successful
) fp_free(p
, indx
, fp
);
9695 if (vn_open_successful
) vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9699 if (vn_get_succsessful
) vnode_put(nspace_items
[i
].vp
);
9705 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9706 vnode_lock_spin(nspace_items
[i
].vp
);
9707 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9708 vnode_unlock(nspace_items
[i
].vp
);
9710 nspace_items
[i
].vp
= NULL
;
9711 nspace_items
[i
].vid
= 0;
9712 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9713 nspace_items
[i
].token
= 0;
9715 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9718 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9719 // just go through every snapshot event and unblock it immediately.
9720 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9721 for(i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9722 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9723 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9724 nspace_items
[i
].vp
= NULL
;
9725 nspace_items
[i
].vid
= 0;
9726 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9727 nspace_items
[i
].token
= 0;
9729 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9736 lck_mtx_unlock(&nspace_handler_lock
);
9738 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9739 nspace_handlers
[nspace_type
].handler_busy
= 0;
9740 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9745 static inline int validate_namespace_args (int is64bit
, int size
) {
9748 /* Must be one of these */
9749 if (size
== sizeof(user64_namespace_handler_info
)) {
9752 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
9755 if (size
== sizeof(user64_namespace_handler_data
)) {
9761 /* 32 bit -- must be one of these */
9762 if (size
== sizeof(user32_namespace_handler_info
)) {
9765 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
9768 if (size
== sizeof(user32_namespace_handler_data
)) {
9780 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
9783 namespace_handler_data nhd
;
9785 bzero (&nhd
, sizeof(namespace_handler_data
));
9787 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9791 error
= validate_namespace_args (is64bit
, size
);
9796 /* Copy in the userland pointers into our kernel-only struct */
9799 /* 64 bit userland structures */
9800 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
9801 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
9802 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
9804 /* If the size is greater than the standard info struct, add in extra fields */
9805 if (size
> (sizeof(user64_namespace_handler_info
))) {
9806 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
9807 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
9809 if (size
== (sizeof(user64_namespace_handler_data
))) {
9810 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
9812 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9816 /* 32 bit userland structures */
9817 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
9818 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
9819 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
9821 if (size
> (sizeof(user32_namespace_handler_info
))) {
9822 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
9823 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
9825 if (size
== (sizeof(user32_namespace_handler_data
))) {
9826 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
9828 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9832 return wait_for_namespace_event(&nhd
, nspace_type
);
9836 * Make a filesystem-specific control call:
9840 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
9845 #define STK_PARAMS 128
9846 char stkbuf
[STK_PARAMS
] = {0};
9848 vnode_t vp
= *arg_vp
;
9850 size
= IOCPARM_LEN(cmd
);
9851 if (size
> IOCPARM_MAX
) return (EINVAL
);
9853 is64bit
= proc_is64bit(p
);
9859 * ensure the buffer is large enough for underlying calls
9861 #ifndef HFSIOC_GETPATH
9862 typedef char pn_t
[MAXPATHLEN
];
9863 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9867 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9869 if (IOCBASECMD(cmd
) == HFS_GETPATH
) {
9870 /* Round up to MAXPATHLEN regardless of user input */
9874 if (size
> sizeof (stkbuf
)) {
9875 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
9883 error
= copyin(udata
, data
, size
);
9892 *(user_addr_t
*)data
= udata
;
9895 *(uint32_t *)data
= (uint32_t)udata
;
9898 } else if ((cmd
& IOC_OUT
) && size
) {
9900 * Zero the buffer so the user always
9901 * gets back something deterministic.
9904 } else if (cmd
& IOC_VOID
) {
9906 *(user_addr_t
*)data
= udata
;
9909 *(uint32_t *)data
= (uint32_t)udata
;
9913 /* Check to see if it's a generic command */
9914 switch (IOCBASECMD(cmd
)) {
9916 case FSCTL_SYNC_VOLUME
: {
9917 mount_t mp
= vp
->v_mount
;
9918 int arg
= *(uint32_t*)data
;
9920 /* record vid of vp so we can drop it below. */
9921 uint32_t vvid
= vp
->v_id
;
9924 * Then grab mount_iterref so that we can release the vnode.
9925 * Without this, a thread may call vnode_iterate_prepare then
9926 * get into a deadlock because we've never released the root vp
9928 error
= mount_iterref (mp
, 0);
9934 /* issue the sync for this volume */
9935 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
9938 * Then release the mount_iterref once we're done syncing; it's not
9939 * needed for the VNOP_IOCTL below
9943 if (arg
& FSCTL_SYNC_FULLSYNC
) {
9944 /* re-obtain vnode iocount on the root vp, if possible */
9945 error
= vnode_getwithvid (vp
, vvid
);
9947 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
9951 /* mark the argument VP as having been released */
9956 case FSCTL_ROUTEFS_SETROUTEID
: {
9958 char routepath
[MAXPATHLEN
];
9961 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9964 bzero(routepath
, MAXPATHLEN
);
9965 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
9969 error
= routefs_kernel_mount(routepath
);
9977 case FSCTL_SET_PACKAGE_EXTS
: {
9978 user_addr_t ext_strings
;
9979 uint32_t num_entries
;
9982 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0)))
9985 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
9986 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
9988 // either you're 64-bit and passed a 64-bit struct or
9989 // you're 32-bit and passed a 32-bit struct. otherwise
9996 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
9997 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
9998 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
10000 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
10001 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
10002 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
10004 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
10008 /* namespace handlers */
10009 case FSCTL_NAMESPACE_HANDLER_GET
: {
10010 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
10014 /* Snapshot handlers */
10015 case FSCTL_OLD_SNAPSHOT_HANDLER_GET
: {
10016 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10020 case FSCTL_SNAPSHOT_HANDLER_GET_EXT
: {
10021 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10025 case FSCTL_NAMESPACE_HANDLER_UPDATE
: {
10026 uint32_t token
, val
;
10029 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10033 if (!nspace_is_special_process(p
)) {
10038 token
= ((uint32_t *)data
)[0];
10039 val
= ((uint32_t *)data
)[1];
10041 lck_mtx_lock(&nspace_handler_lock
);
10043 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10044 if (nspace_items
[i
].token
== token
) {
10045 break; /* exit for loop, not case stmt */
10049 if (i
>= MAX_NSPACE_ITEMS
) {
10053 // if this bit is set, when resolve_nspace_item() times out
10054 // it will loop and go back to sleep.
10056 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
10059 lck_mtx_unlock(&nspace_handler_lock
);
10062 printf("nspace-handler-update: did not find token %u\n", token
);
10067 case FSCTL_NAMESPACE_HANDLER_UNBLOCK
: {
10068 uint32_t token
, val
;
10071 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10075 if (!nspace_is_special_process(p
)) {
10080 token
= ((uint32_t *)data
)[0];
10081 val
= ((uint32_t *)data
)[1];
10083 lck_mtx_lock(&nspace_handler_lock
);
10085 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10086 if (nspace_items
[i
].token
== token
) {
10087 break; /* exit for loop, not case statement */
10091 if (i
>= MAX_NSPACE_ITEMS
) {
10092 printf("nspace-handler-unblock: did not find token %u\n", token
);
10095 if (val
== 0 && nspace_items
[i
].vp
) {
10096 vnode_lock_spin(nspace_items
[i
].vp
);
10097 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10098 vnode_unlock(nspace_items
[i
].vp
);
10101 nspace_items
[i
].vp
= NULL
;
10102 nspace_items
[i
].arg
= NULL
;
10103 nspace_items
[i
].op
= 0;
10104 nspace_items
[i
].vid
= 0;
10105 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
10106 nspace_items
[i
].token
= 0;
10108 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10111 lck_mtx_unlock(&nspace_handler_lock
);
10115 case FSCTL_NAMESPACE_HANDLER_CANCEL
: {
10116 uint32_t token
, val
;
10119 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10123 if (!nspace_is_special_process(p
)) {
10128 token
= ((uint32_t *)data
)[0];
10129 val
= ((uint32_t *)data
)[1];
10131 lck_mtx_lock(&nspace_handler_lock
);
10133 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10134 if (nspace_items
[i
].token
== token
) {
10135 break; /* exit for loop, not case stmt */
10139 if (i
>= MAX_NSPACE_ITEMS
) {
10140 printf("nspace-handler-cancel: did not find token %u\n", token
);
10143 if (nspace_items
[i
].vp
) {
10144 vnode_lock_spin(nspace_items
[i
].vp
);
10145 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10146 vnode_unlock(nspace_items
[i
].vp
);
10149 nspace_items
[i
].vp
= NULL
;
10150 nspace_items
[i
].arg
= NULL
;
10151 nspace_items
[i
].vid
= 0;
10152 nspace_items
[i
].token
= val
;
10153 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
10154 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
10156 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10159 lck_mtx_unlock(&nspace_handler_lock
);
10163 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
10164 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10168 // we explicitly do not do the namespace_handler_proc check here
10170 lck_mtx_lock(&nspace_handler_lock
);
10171 snapshot_timestamp
= ((uint32_t *)data
)[0];
10172 wakeup(&nspace_item_idx
);
10173 lck_mtx_unlock(&nspace_handler_lock
);
10174 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
10179 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
10181 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10185 lck_mtx_lock(&nspace_handler_lock
);
10186 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
10187 lck_mtx_unlock(&nspace_handler_lock
);
10188 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10189 nspace_allow_virtual_devs
? "" : " NOT");
10195 case FSCTL_SET_FSTYPENAME_OVERRIDE
:
10197 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10201 mount_lock(vp
->v_mount
);
10202 if (data
[0] != 0) {
10203 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
10204 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
10205 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10206 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
10207 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
10210 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10211 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
10213 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
10214 vp
->v_mount
->fstypename_override
[0] = '\0';
10216 mount_unlock(vp
->v_mount
);
10222 /* Invoke the filesystem-specific code */
10223 error
= VNOP_IOCTL(vp
, IOCBASECMD(cmd
), data
, options
, ctx
);
10226 } /* end switch stmt */
10229 * if no errors, copy any data to user. Size was
10230 * already set and checked above.
10232 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
10233 error
= copyout(data
, udata
, size
);
10244 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
10247 struct nameidata nd
;
10250 vfs_context_t ctx
= vfs_context_current();
10252 AUDIT_ARG(cmd
, uap
->cmd
);
10253 AUDIT_ARG(value32
, uap
->options
);
10254 /* Get the vnode for the file we are getting info on: */
10256 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
10257 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
10258 UIO_USERSPACE
, uap
->path
, ctx
);
10259 if ((error
= namei(&nd
))) goto done
;
10264 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
10270 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10279 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
10283 vfs_context_t ctx
= vfs_context_current();
10286 AUDIT_ARG(fd
, uap
->fd
);
10287 AUDIT_ARG(cmd
, uap
->cmd
);
10288 AUDIT_ARG(value32
, uap
->options
);
10290 /* Get the vnode for the file we are getting info on: */
10291 if ((error
= file_vnode(uap
->fd
, &vp
)))
10294 if ((error
= vnode_getwithref(vp
))) {
10300 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
10307 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10311 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10318 /* end of fsctl system call */
10321 * Retrieve the data of an extended attribute.
10324 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
10327 struct nameidata nd
;
10328 char attrname
[XATTR_MAXNAMELEN
+1];
10329 vfs_context_t ctx
= vfs_context_current();
10331 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10332 size_t attrsize
= 0;
10334 u_int32_t nameiflags
;
10336 char uio_buf
[ UIO_SIZEOF(1) ];
10338 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10341 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10342 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10343 if ((error
= namei(&nd
))) {
10349 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10352 if (xattr_protected(attrname
)) {
10353 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
10359 * the specific check for 0xffffffff is a hack to preserve
10360 * binaray compatibilty in K64 with applications that discovered
10361 * that passing in a buf pointer and a size of -1 resulted in
10362 * just the size of the indicated extended attribute being returned.
10363 * this isn't part of the documented behavior, but because of the
10364 * original implemtation's check for "uap->size > 0", this behavior
10365 * was allowed. In K32 that check turned into a signed comparison
10366 * even though uap->size is unsigned... in K64, we blow by that
10367 * check because uap->size is unsigned and doesn't get sign smeared
10368 * in the munger for a 32 bit user app. we also need to add a
10369 * check to limit the maximum size of the buffer being passed in...
10370 * unfortunately, the underlying fileystems seem to just malloc
10371 * the requested size even if the actual extended attribute is tiny.
10372 * because that malloc is for kernel wired memory, we have to put a
10373 * sane limit on it.
10375 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10376 * U64 running on K64 will yield -1 (64 bits wide)
10377 * U32/U64 running on K32 will yield -1 (32 bits wide)
10379 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
10383 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
10384 uap
->size
= XATTR_MAXSIZE
;
10386 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10387 &uio_buf
[0], sizeof(uio_buf
));
10388 uio_addiov(auio
, uap
->value
, uap
->size
);
10391 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
10396 *retval
= uap
->size
- uio_resid(auio
);
10398 *retval
= (user_ssize_t
)attrsize
;
10405 * Retrieve the data of an extended attribute.
10408 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
10411 char attrname
[XATTR_MAXNAMELEN
+1];
10413 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10414 size_t attrsize
= 0;
10417 char uio_buf
[ UIO_SIZEOF(1) ];
10419 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10422 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10425 if ( (error
= vnode_getwithref(vp
)) ) {
10426 file_drop(uap
->fd
);
10429 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10432 if (xattr_protected(attrname
)) {
10436 if (uap
->value
&& uap
->size
> 0) {
10437 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10438 &uio_buf
[0], sizeof(uio_buf
));
10439 uio_addiov(auio
, uap
->value
, uap
->size
);
10442 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10444 (void)vnode_put(vp
);
10445 file_drop(uap
->fd
);
10448 *retval
= uap
->size
- uio_resid(auio
);
10450 *retval
= (user_ssize_t
)attrsize
;
10456 * Set the data of an extended attribute.
10459 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
10462 struct nameidata nd
;
10463 char attrname
[XATTR_MAXNAMELEN
+1];
10464 vfs_context_t ctx
= vfs_context_current();
10466 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10468 u_int32_t nameiflags
;
10470 char uio_buf
[ UIO_SIZEOF(1) ];
10472 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10475 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10476 if (error
== EPERM
) {
10477 /* if the string won't fit in attrname, copyinstr emits EPERM */
10478 return (ENAMETOOLONG
);
10480 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10483 if (xattr_protected(attrname
))
10485 if (uap
->size
!= 0 && uap
->value
== 0) {
10489 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10490 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10491 if ((error
= namei(&nd
))) {
10497 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10498 &uio_buf
[0], sizeof(uio_buf
));
10499 uio_addiov(auio
, uap
->value
, uap
->size
);
10501 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
10504 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10515 * Set the data of an extended attribute.
10518 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
10521 char attrname
[XATTR_MAXNAMELEN
+1];
10523 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10526 char uio_buf
[ UIO_SIZEOF(1) ];
10528 vfs_context_t ctx
= vfs_context_current();
10531 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10534 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10535 if (error
== EPERM
) {
10536 /* if the string won't fit in attrname, copyinstr emits EPERM */
10537 return (ENAMETOOLONG
);
10539 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10542 if (xattr_protected(attrname
))
10544 if (uap
->size
!= 0 && uap
->value
== 0) {
10547 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10550 if ( (error
= vnode_getwithref(vp
)) ) {
10551 file_drop(uap
->fd
);
10554 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10555 &uio_buf
[0], sizeof(uio_buf
));
10556 uio_addiov(auio
, uap
->value
, uap
->size
);
10558 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
10561 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10567 file_drop(uap
->fd
);
10573 * Remove an extended attribute.
10574 * XXX Code duplication here.
10577 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
10580 struct nameidata nd
;
10581 char attrname
[XATTR_MAXNAMELEN
+1];
10582 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10583 vfs_context_t ctx
= vfs_context_current();
10585 u_int32_t nameiflags
;
10588 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10591 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10595 if (xattr_protected(attrname
))
10597 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10598 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10599 if ((error
= namei(&nd
))) {
10605 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
10608 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10619 * Remove an extended attribute.
10620 * XXX Code duplication here.
10623 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
10626 char attrname
[XATTR_MAXNAMELEN
+1];
10630 vfs_context_t ctx
= vfs_context_current();
10633 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10636 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10640 if (xattr_protected(attrname
))
10642 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10645 if ( (error
= vnode_getwithref(vp
)) ) {
10646 file_drop(uap
->fd
);
10650 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
10653 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10659 file_drop(uap
->fd
);
10665 * Retrieve the list of extended attribute names.
10666 * XXX Code duplication here.
10669 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
10672 struct nameidata nd
;
10673 vfs_context_t ctx
= vfs_context_current();
10675 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10676 size_t attrsize
= 0;
10677 u_int32_t nameiflags
;
10679 char uio_buf
[ UIO_SIZEOF(1) ];
10681 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10684 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10685 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10686 if ((error
= namei(&nd
))) {
10691 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10692 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
10693 &uio_buf
[0], sizeof(uio_buf
));
10694 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10697 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
10701 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10703 *retval
= (user_ssize_t
)attrsize
;
10709 * Retrieve the list of extended attribute names.
10710 * XXX Code duplication here.
10713 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
10717 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10718 size_t attrsize
= 0;
10720 char uio_buf
[ UIO_SIZEOF(1) ];
10722 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10725 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10728 if ( (error
= vnode_getwithref(vp
)) ) {
10729 file_drop(uap
->fd
);
10732 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10733 auio
= uio_createwithbuffer(1, 0, spacetype
,
10734 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10735 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10738 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10741 file_drop(uap
->fd
);
10743 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10745 *retval
= (user_ssize_t
)attrsize
;
10750 static int fsgetpath_internal(
10751 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
10752 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
10755 struct mount
*mp
= NULL
;
10759 /* maximum number of times to retry build_path */
10760 unsigned int retries
= 0x10;
10762 if (bufsize
> PAGE_SIZE
) {
10771 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
10772 error
= ENOTSUP
; /* unexpected failure */
10778 error
= VFS_ROOT(mp
, &vp
, ctx
);
10780 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
10783 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
10785 * If the fileid isn't found and we're in a union
10786 * mount volume, then see if the fileid is in the
10787 * mounted-on volume.
10789 struct mount
*tmp
= mp
;
10790 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
10792 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
10803 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
10810 /* Obtain the absolute path to this vnode. */
10811 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
10812 bpflags
|= BUILDPATH_CHECK_MOVED
;
10813 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
10817 /* there was a race building the path, try a few more times */
10818 if (error
== EAGAIN
) {
10828 AUDIT_ARG(text
, buf
);
10830 if (kdebug_enable
) {
10831 long dbg_parms
[NUMPARMS
];
10834 dbg_namelen
= (int)sizeof(dbg_parms
);
10836 if (length
< dbg_namelen
) {
10837 memcpy((char *)dbg_parms
, buf
, length
);
10838 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
10840 dbg_namelen
= length
;
10842 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
10845 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
10848 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
10855 * Obtain the full pathname of a file system object by id.
10857 * This is a private SPI used by the File Manager.
10861 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
10863 vfs_context_t ctx
= vfs_context_current();
10869 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
10872 AUDIT_ARG(value32
, fsid
.val
[0]);
10873 AUDIT_ARG(value64
, uap
->objid
);
10874 /* Restrict output buffer size for now. */
10876 if (uap
->bufsize
> PAGE_SIZE
) {
10879 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
10880 if (realpath
== NULL
) {
10884 error
= fsgetpath_internal(
10885 ctx
, fsid
.val
[0], uap
->objid
,
10886 uap
->bufsize
, realpath
, &length
);
10892 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
10894 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
10897 FREE(realpath
, M_TEMP
);
10903 * Common routine to handle various flavors of statfs data heading out
10906 * Returns: 0 Success
10910 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
10911 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
10912 boolean_t partial_copy
)
10915 int my_size
, copy_size
;
10918 struct user64_statfs sfs
;
10919 my_size
= copy_size
= sizeof(sfs
);
10920 bzero(&sfs
, my_size
);
10921 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10922 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10923 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10924 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
10925 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
10926 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
10927 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
10928 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
10929 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
10930 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
10931 sfs
.f_fsid
= sfsp
->f_fsid
;
10932 sfs
.f_owner
= sfsp
->f_owner
;
10933 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10934 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10936 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10938 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10939 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10941 if (partial_copy
) {
10942 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10944 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10947 struct user32_statfs sfs
;
10949 my_size
= copy_size
= sizeof(sfs
);
10950 bzero(&sfs
, my_size
);
10952 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10953 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10954 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10957 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10958 * have to fudge the numbers here in that case. We inflate the blocksize in order
10959 * to reflect the filesystem size as best we can.
10961 if ((sfsp
->f_blocks
> INT_MAX
)
10962 /* Hack for 4061702 . I think the real fix is for Carbon to
10963 * look for some volume capability and not depend on hidden
10964 * semantics agreed between a FS and carbon.
10965 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10966 * for Carbon to set bNoVolumeSizes volume attribute.
10967 * Without this the webdavfs files cannot be copied onto
10968 * disk as they look huge. This change should not affect
10969 * XSAN as they should not setting these to -1..
10971 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
10972 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
10973 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
10977 * Work out how far we have to shift the block count down to make it fit.
10978 * Note that it's possible to have to shift so far that the resulting
10979 * blocksize would be unreportably large. At that point, we will clip
10980 * any values that don't fit.
10982 * For safety's sake, we also ensure that f_iosize is never reported as
10983 * being smaller than f_bsize.
10985 for (shift
= 0; shift
< 32; shift
++) {
10986 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
10988 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
10991 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10992 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
10993 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
10994 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
10995 #undef __SHIFT_OR_CLIP
10996 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
10997 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
10999 /* filesystem is small enough to be reported honestly */
11000 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
11001 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
11002 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
11003 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
11004 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
11006 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
11007 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
11008 sfs
.f_fsid
= sfsp
->f_fsid
;
11009 sfs
.f_owner
= sfsp
->f_owner
;
11010 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
11011 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
11013 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
11015 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
11016 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
11018 if (partial_copy
) {
11019 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
11021 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11024 if (sizep
!= NULL
) {
11031 * copy stat structure into user_stat structure.
11033 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
11035 bzero(usbp
, sizeof(*usbp
));
11037 usbp
->st_dev
= sbp
->st_dev
;
11038 usbp
->st_ino
= sbp
->st_ino
;
11039 usbp
->st_mode
= sbp
->st_mode
;
11040 usbp
->st_nlink
= sbp
->st_nlink
;
11041 usbp
->st_uid
= sbp
->st_uid
;
11042 usbp
->st_gid
= sbp
->st_gid
;
11043 usbp
->st_rdev
= sbp
->st_rdev
;
11044 #ifndef _POSIX_C_SOURCE
11045 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11046 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11047 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11048 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11049 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11050 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11052 usbp
->st_atime
= sbp
->st_atime
;
11053 usbp
->st_atimensec
= sbp
->st_atimensec
;
11054 usbp
->st_mtime
= sbp
->st_mtime
;
11055 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11056 usbp
->st_ctime
= sbp
->st_ctime
;
11057 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11059 usbp
->st_size
= sbp
->st_size
;
11060 usbp
->st_blocks
= sbp
->st_blocks
;
11061 usbp
->st_blksize
= sbp
->st_blksize
;
11062 usbp
->st_flags
= sbp
->st_flags
;
11063 usbp
->st_gen
= sbp
->st_gen
;
11064 usbp
->st_lspare
= sbp
->st_lspare
;
11065 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11066 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11069 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
11071 bzero(usbp
, sizeof(*usbp
));
11073 usbp
->st_dev
= sbp
->st_dev
;
11074 usbp
->st_ino
= sbp
->st_ino
;
11075 usbp
->st_mode
= sbp
->st_mode
;
11076 usbp
->st_nlink
= sbp
->st_nlink
;
11077 usbp
->st_uid
= sbp
->st_uid
;
11078 usbp
->st_gid
= sbp
->st_gid
;
11079 usbp
->st_rdev
= sbp
->st_rdev
;
11080 #ifndef _POSIX_C_SOURCE
11081 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11082 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11083 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11084 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11085 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11086 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11088 usbp
->st_atime
= sbp
->st_atime
;
11089 usbp
->st_atimensec
= sbp
->st_atimensec
;
11090 usbp
->st_mtime
= sbp
->st_mtime
;
11091 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11092 usbp
->st_ctime
= sbp
->st_ctime
;
11093 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11095 usbp
->st_size
= sbp
->st_size
;
11096 usbp
->st_blocks
= sbp
->st_blocks
;
11097 usbp
->st_blksize
= sbp
->st_blksize
;
11098 usbp
->st_flags
= sbp
->st_flags
;
11099 usbp
->st_gen
= sbp
->st_gen
;
11100 usbp
->st_lspare
= sbp
->st_lspare
;
11101 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11102 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11106 * copy stat64 structure into user_stat64 structure.
11108 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
11110 bzero(usbp
, sizeof(*usbp
));
11112 usbp
->st_dev
= sbp
->st_dev
;
11113 usbp
->st_ino
= sbp
->st_ino
;
11114 usbp
->st_mode
= sbp
->st_mode
;
11115 usbp
->st_nlink
= sbp
->st_nlink
;
11116 usbp
->st_uid
= sbp
->st_uid
;
11117 usbp
->st_gid
= sbp
->st_gid
;
11118 usbp
->st_rdev
= sbp
->st_rdev
;
11119 #ifndef _POSIX_C_SOURCE
11120 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11121 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11122 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11123 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11124 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11125 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11126 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11127 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11129 usbp
->st_atime
= sbp
->st_atime
;
11130 usbp
->st_atimensec
= sbp
->st_atimensec
;
11131 usbp
->st_mtime
= sbp
->st_mtime
;
11132 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11133 usbp
->st_ctime
= sbp
->st_ctime
;
11134 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11135 usbp
->st_birthtime
= sbp
->st_birthtime
;
11136 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11138 usbp
->st_size
= sbp
->st_size
;
11139 usbp
->st_blocks
= sbp
->st_blocks
;
11140 usbp
->st_blksize
= sbp
->st_blksize
;
11141 usbp
->st_flags
= sbp
->st_flags
;
11142 usbp
->st_gen
= sbp
->st_gen
;
11143 usbp
->st_lspare
= sbp
->st_lspare
;
11144 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11145 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11148 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
11150 bzero(usbp
, sizeof(*usbp
));
11152 usbp
->st_dev
= sbp
->st_dev
;
11153 usbp
->st_ino
= sbp
->st_ino
;
11154 usbp
->st_mode
= sbp
->st_mode
;
11155 usbp
->st_nlink
= sbp
->st_nlink
;
11156 usbp
->st_uid
= sbp
->st_uid
;
11157 usbp
->st_gid
= sbp
->st_gid
;
11158 usbp
->st_rdev
= sbp
->st_rdev
;
11159 #ifndef _POSIX_C_SOURCE
11160 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11161 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11162 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11163 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11164 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11165 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11166 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11167 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11169 usbp
->st_atime
= sbp
->st_atime
;
11170 usbp
->st_atimensec
= sbp
->st_atimensec
;
11171 usbp
->st_mtime
= sbp
->st_mtime
;
11172 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11173 usbp
->st_ctime
= sbp
->st_ctime
;
11174 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11175 usbp
->st_birthtime
= sbp
->st_birthtime
;
11176 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11178 usbp
->st_size
= sbp
->st_size
;
11179 usbp
->st_blocks
= sbp
->st_blocks
;
11180 usbp
->st_blksize
= sbp
->st_blksize
;
11181 usbp
->st_flags
= sbp
->st_flags
;
11182 usbp
->st_gen
= sbp
->st_gen
;
11183 usbp
->st_lspare
= sbp
->st_lspare
;
11184 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11185 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11189 * Purge buffer cache for simulating cold starts
11191 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
11193 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
11195 return VNODE_RETURNED
;
11198 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
11200 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
11202 return VFS_RETURNED
;
11206 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
11208 if (!kauth_cred_issuser(kauth_cred_get()))
11211 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);
11217 * gets the vnode associated with the (unnamed) snapshot directory
11218 * for a Filesystem. The snapshot directory vnode is returned with
11219 * an iocount on it.
11222 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
11224 return (VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
));
11228 * Get the snapshot vnode.
11230 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11231 * needs nameidone() on ndp.
11233 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11235 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11239 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
11240 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
11241 #if !CONFIG_TRIGGERS
11244 enum path_operation pathop
,
11250 struct vfs_attr vfa
;
11255 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
11259 if (!vnode_isvroot(*rvpp
)) {
11264 /* Make sure the filesystem supports snapshots */
11265 VFSATTR_INIT(&vfa
);
11266 VFSATTR_WANTED(&vfa
, f_capabilities
);
11267 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
11268 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
11269 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
11270 VOL_CAP_INT_SNAPSHOT
)) ||
11271 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
11272 VOL_CAP_INT_SNAPSHOT
))) {
11277 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
11281 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11282 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11287 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11288 * (the length returned by copyinstr includes the terminating NUL)
11290 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
11291 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
11295 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++);
11296 if (i
< (int)name_len
) {
11302 if (op
== CREATE
) {
11303 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
11305 } else if (op
== DELETE
) {
11306 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
11313 /* Check if the snapshot already exists ... */
11314 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
11315 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
11316 ndp
->ni_dvp
= *sdvpp
;
11318 error
= namei(ndp
);
11320 FREE(name_buf
, M_TEMP
);
11336 * create a filesystem snapshot (for supporting filesystems)
11338 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11339 * We get to the (unnamed) snapshot directory vnode and create the vnode
11340 * for the snapshot in it.
11344 * a) Passed in name for snapshot cannot have slashes.
11345 * b) name can't be "." or ".."
11347 * Since this requires superuser privileges, vnode_authorize calls are not
11351 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11354 vnode_t rvp
, snapdvp
;
11356 struct nameidata namend
;
11358 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
11363 if (namend
.ni_vp
) {
11364 vnode_put(namend
.ni_vp
);
11367 struct vnode_attr va
;
11368 vnode_t vp
= NULLVP
;
11371 VATTR_SET(&va
, va_type
, VREG
);
11372 VATTR_SET(&va
, va_mode
, 0);
11374 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
11375 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
11380 nameidone(&namend
);
11381 vnode_put(snapdvp
);
11387 * Delete a Filesystem snapshot
11389 * get the vnode for the unnamed snapshot directory and the snapshot and
11390 * delete the snapshot.
11393 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11396 vnode_t rvp
, snapdvp
;
11398 struct nameidata namend
;
11400 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
11405 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
11406 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
11408 vnode_put(namend
.ni_vp
);
11409 nameidone(&namend
);
11410 vnode_put(snapdvp
);
11417 * Revert a filesystem to a snapshot
11419 * Marks the filesystem to revert to the given snapshot on next mount.
11422 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11428 struct fs_snapshot_revert_args revert_data
;
11429 struct componentname cnp
;
11433 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
11437 mp
= vnode_mount(rvp
);
11439 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11440 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11442 FREE(name_buf
, M_TEMP
);
11448 error
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
);
11450 FREE(name_buf
, M_TEMP
);
11457 * Grab mount_iterref so that we can release the vnode,
11458 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11460 error
= mount_iterref (mp
, 0);
11463 FREE(name_buf
, M_TEMP
);
11467 memset(&cnp
, 0, sizeof(cnp
));
11468 cnp
.cn_pnbuf
= (char *)name_buf
;
11469 cnp
.cn_nameiop
= LOOKUP
;
11470 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
11471 cnp
.cn_pnlen
= MAXPATHLEN
;
11472 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
11473 cnp
.cn_namelen
= (int)name_len
;
11474 revert_data
.sr_cnp
= &cnp
;
11476 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
11477 mount_iterdrop(mp
);
11478 FREE(name_buf
, M_TEMP
);
11481 /* If there was any error, try again using VNOP_IOCTL */
11484 struct nameidata namend
;
11486 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
11493 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
11494 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11497 #ifndef APFS_REVERT_TO_SNAPSHOT
11498 #define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
11501 error
= VNOP_IOCTL(namend
.ni_vp
, APFS_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
11504 vnode_put(namend
.ni_vp
);
11505 nameidone(&namend
);
11506 vnode_put(snapdvp
);
11514 * rename a Filesystem snapshot
11516 * get the vnode for the unnamed snapshot directory and the snapshot and
11517 * rename the snapshot. This is a very specialised (and simple) case of
11518 * rename(2) (which has to deal with a lot more complications). It differs
11519 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11522 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
11523 __unused
uint32_t flags
, vfs_context_t ctx
)
11525 vnode_t rvp
, snapdvp
;
11527 caddr_t newname_buf
;
11530 struct nameidata
*fromnd
, *tond
;
11531 /* carving out a chunk for structs that are too big to be on stack. */
11533 struct nameidata from_node
;
11534 struct nameidata to_node
;
11537 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
11538 fromnd
= &__rename_data
->from_node
;
11539 tond
= &__rename_data
->to_node
;
11541 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
11545 fvp
= fromnd
->ni_vp
;
11547 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11548 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
11553 * Some sanity checks- new name can't be empty, "." or ".." or have
11555 * (the length returned by copyinstr includes the terminating NUL)
11557 * The FS rename VNOP is suppossed to handle this but we'll pick it
11560 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
11561 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
11565 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++);
11566 if (i
< (int)name_len
) {
11572 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
11578 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
11579 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
11580 tond
->ni_dvp
= snapdvp
;
11582 error
= namei(tond
);
11585 } else if (tond
->ni_vp
) {
11587 * snapshot rename behaves differently than rename(2) - if the
11588 * new name exists, EEXIST is returned.
11590 vnode_put(tond
->ni_vp
);
11595 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
11596 &tond
->ni_cnd
, ctx
);
11601 FREE(newname_buf
, M_TEMP
);
11603 vnode_put(snapdvp
);
11607 FREE(__rename_data
, M_TEMP
);
11612 * Mount a Filesystem snapshot
11614 * get the vnode for the unnamed snapshot directory and the snapshot and
11615 * mount the snapshot.
11618 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
11619 __unused user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
11621 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
11623 struct nameidata
*snapndp
, *dirndp
;
11624 /* carving out a chunk for structs that are too big to be on stack. */
11626 struct nameidata snapnd
;
11627 struct nameidata dirnd
;
11628 } * __snapshot_mount_data
;
11630 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
11632 snapndp
= &__snapshot_mount_data
->snapnd
;
11633 dirndp
= &__snapshot_mount_data
->dirnd
;
11635 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
11640 snapvp
= snapndp
->ni_vp
;
11641 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
11646 /* Get the vnode to be covered */
11647 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
11648 UIO_USERSPACE
, directory
, ctx
);
11649 error
= namei(dirndp
);
11653 vp
= dirndp
->ni_vp
;
11654 pvp
= dirndp
->ni_dvp
;
11656 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
11659 mount_t mp
= vnode_mount(rvp
);
11660 struct fs_snapshot_mount_args smnt_data
;
11662 smnt_data
.sm_mp
= mp
;
11663 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
11664 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
11665 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), 0,
11666 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
11674 vnode_put(snapdvp
);
11676 nameidone(snapndp
);
11678 FREE(__snapshot_mount_data
, M_TEMP
);
11683 * Root from a snapshot of the filesystem
11685 * Marks the filesystem to root from the given snapshot on next boot.
11688 snapshot_root(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11694 struct fs_snapshot_root_args root_data
;
11695 struct componentname cnp
;
11699 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
11703 mp
= vnode_mount(rvp
);
11705 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11706 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11708 FREE(name_buf
, M_TEMP
);
11713 // XXX MAC checks ?
11716 * Grab mount_iterref so that we can release the vnode,
11717 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11719 error
= mount_iterref (mp
, 0);
11722 FREE(name_buf
, M_TEMP
);
11726 memset(&cnp
, 0, sizeof(cnp
));
11727 cnp
.cn_pnbuf
= (char *)name_buf
;
11728 cnp
.cn_nameiop
= LOOKUP
;
11729 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
11730 cnp
.cn_pnlen
= MAXPATHLEN
;
11731 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
11732 cnp
.cn_namelen
= (int)name_len
;
11733 root_data
.sr_cnp
= &cnp
;
11735 error
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
);
11737 mount_iterdrop(mp
);
11738 FREE(name_buf
, M_TEMP
);
11744 * FS snapshot operations dispatcher
11747 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
11748 __unused
int32_t *retval
)
11751 vfs_context_t ctx
= vfs_context_current();
11753 AUDIT_ARG(fd
, uap
->dirfd
);
11754 AUDIT_ARG(value32
, uap
->op
);
11756 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
11761 case SNAPSHOT_OP_CREATE
:
11762 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
11764 case SNAPSHOT_OP_DELETE
:
11765 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
11767 case SNAPSHOT_OP_RENAME
:
11768 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
11771 case SNAPSHOT_OP_MOUNT
:
11772 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
11773 uap
->data
, uap
->flags
, ctx
);
11775 case SNAPSHOT_OP_REVERT
:
11776 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
11778 case SNAPSHOT_OP_ROOT
:
11779 error
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);