2 * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
110 #include <security/audit/audit.h>
111 #include <bsm/audit_kevents.h>
113 #include <mach/mach_types.h>
114 #include <kern/kern_types.h>
115 #include <kern/kalloc.h>
116 #include <kern/task.h>
118 #include <vm/vm_pageout.h>
119 #include <vm/vm_protos.h>
121 #include <libkern/OSAtomic.h>
122 #include <pexpert/pexpert.h>
123 #include <IOKit/IOBSD.h>
126 #include <miscfs/routefs/routefs.h>
130 #include <security/mac.h>
131 #include <security/mac_framework.h>
135 #define GET_PATH(x) \
136 (x) = get_pathbuff();
137 #define RELEASE_PATH(x) \
140 #define GET_PATH(x) \
141 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
142 #define RELEASE_PATH(x) \
143 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
144 #endif /* CONFIG_FSE */
146 /* struct for checkdirs iteration */
151 /* callback for checkdirs iteration */
152 static int checkdirs_callback(proc_t p
, void * arg
);
154 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
155 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
156 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
157 static int getfsstat_callback(mount_t mp
, void * arg
);
158 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
159 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
160 static int sync_callback(mount_t
, void *);
161 static void sync_thread(void *, __unused wait_result_t
);
162 static int sync_async(int);
163 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
164 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
165 boolean_t partial_copy
);
166 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
168 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
169 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
170 struct componentname
*cnp
, user_addr_t fsmountargs
,
171 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
173 void vfs_notify_mount(vnode_t pdvp
);
175 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
177 struct fd_vn_data
* fg_vn_data_alloc(void);
180 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
181 * Concurrent lookups (or lookups by ids) on hard links can cause the
182 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
183 * does) to return ENOENT as the path cannot be returned from the name cache
184 * alone. We have no option but to retry and hope to get one namei->reverse path
185 * generation done without an intervening lookup, lookup by id on the hard link
186 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
187 * which currently are the MAC hooks for rename, unlink and rmdir.
189 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
191 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
193 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
195 #ifdef CONFIG_IMGSRC_ACCESS
196 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
197 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
198 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
199 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
200 static void mount_end_update(mount_t mp
);
201 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
202 #endif /* CONFIG_IMGSRC_ACCESS */
204 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
207 int sync_internal(void);
210 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
212 extern lck_grp_t
*fd_vn_lck_grp
;
213 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
214 extern lck_attr_t
*fd_vn_lck_attr
;
217 * incremented each time a mount or unmount operation occurs
218 * used to invalidate the cached value of the rootvp in the
219 * mount structure utilized by cache_lookup_path
221 uint32_t mount_generation
= 0;
223 /* counts number of mount and unmount operations */
224 unsigned int vfs_nummntops
=0;
226 extern const struct fileops vnops
;
227 #if CONFIG_APPLEDOUBLE
228 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
229 #endif /* CONFIG_APPLEDOUBLE */
232 * Virtual File System System Calls
235 #if NFSCLIENT || DEVFS || ROUTEFS
237 * Private in-kernel mounting spi (NFS only, not exported)
241 vfs_iskernelmount(mount_t mp
)
243 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
248 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
249 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
255 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
256 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
259 * Get the vnode to be covered if it's not supplied
269 char *pnbuf
= CAST_DOWN(char *, path
);
271 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
272 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
276 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
277 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
287 #endif /* NFSCLIENT || DEVFS */
290 * Mount a file system.
294 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
296 struct __mac_mount_args muap
;
298 muap
.type
= uap
->type
;
299 muap
.path
= uap
->path
;
300 muap
.flags
= uap
->flags
;
301 muap
.data
= uap
->data
;
302 muap
.mac_p
= USER_ADDR_NULL
;
303 return (__mac_mount(p
, &muap
, retval
));
307 vfs_notify_mount(vnode_t pdvp
)
309 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
310 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
315 * Mount a file system taking into account MAC label behavior.
316 * See mount(2) man page for more information
318 * Parameters: p Process requesting the mount
319 * uap User argument descriptor (see below)
322 * Indirect: uap->type Filesystem type
323 * uap->path Path to mount
324 * uap->data Mount arguments
325 * uap->mac_p MAC info
326 * uap->flags Mount flags
332 boolean_t root_fs_upgrade_try
= FALSE
;
335 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
339 int need_nameidone
= 0;
340 vfs_context_t ctx
= vfs_context_current();
341 char fstypename
[MFSNAMELEN
];
344 char *labelstr
= NULL
;
345 int flags
= uap
->flags
;
347 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
348 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
353 * Get the fs type name from user space
355 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
360 * Get the vnode to be covered
362 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
363 UIO_USERSPACE
, uap
->path
, ctx
);
372 #ifdef CONFIG_IMGSRC_ACCESS
373 /* Mounting image source cannot be batched with other operations */
374 if (flags
== MNT_IMGSRC_BY_INDEX
) {
375 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
376 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
379 #endif /* CONFIG_IMGSRC_ACCESS */
383 * Get the label string (if any) from user space
385 if (uap
->mac_p
!= USER_ADDR_NULL
) {
390 struct user64_mac mac64
;
391 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
392 mac
.m_buflen
= mac64
.m_buflen
;
393 mac
.m_string
= mac64
.m_string
;
395 struct user32_mac mac32
;
396 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
397 mac
.m_buflen
= mac32
.m_buflen
;
398 mac
.m_string
= mac32
.m_string
;
402 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
403 (mac
.m_buflen
< 2)) {
407 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
408 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
412 AUDIT_ARG(mac_string
, labelstr
);
414 #endif /* CONFIG_MACF */
416 AUDIT_ARG(fflags
, flags
);
419 if (flags
& MNT_UNION
) {
420 /* No union mounts on release kernels */
426 if ((vp
->v_flag
& VROOT
) &&
427 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
428 if (!(flags
& MNT_UNION
)) {
433 * For a union mount on '/', treat it as fresh
434 * mount instead of update.
435 * Otherwise, union mouting on '/' used to panic the
436 * system before, since mnt_vnodecovered was found to
437 * be NULL for '/' which is required for unionlookup
438 * after it gets ENOENT on union mount.
440 flags
= (flags
& ~(MNT_UPDATE
));
444 if ((flags
& MNT_RDONLY
) == 0) {
445 /* Release kernels are not allowed to mount "/" as rw */
451 * See 7392553 for more details on why this check exists.
452 * Suffice to say: If this check is ON and something tries
453 * to mount the rootFS RW, we'll turn off the codesign
454 * bitmap optimization.
456 #if CHECK_CS_VALIDATION_BITMAP
457 if ((flags
& MNT_RDONLY
) == 0 ) {
458 root_fs_upgrade_try
= TRUE
;
463 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
464 labelstr
, FALSE
, ctx
);
470 FREE(labelstr
, M_MACTEMP
);
471 #endif /* CONFIG_MACF */
479 if (need_nameidone
) {
487 * common mount implementation (final stage of mounting)
490 * fstypename file system type (ie it's vfs name)
491 * pvp parent of covered vnode
493 * cnp component name (ie path) of covered vnode
494 * flags generic mount flags
495 * fsmountargs file system specific data
496 * labelstr optional MAC label
497 * kernelmount TRUE for mounts initiated from inside the kernel
498 * ctx caller's context
501 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
502 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
503 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
506 #pragma unused(labelstr)
508 struct vnode
*devvp
= NULLVP
;
509 struct vnode
*device_vnode
= NULLVP
;
514 struct vfstable
*vfsp
= (struct vfstable
*)0;
515 struct proc
*p
= vfs_context_proc(ctx
);
517 user_addr_t devpath
= USER_ADDR_NULL
;
520 boolean_t vfsp_ref
= FALSE
;
521 boolean_t is_rwlock_locked
= FALSE
;
522 boolean_t did_rele
= FALSE
;
523 boolean_t have_usecount
= FALSE
;
526 * Process an update for an existing mount
528 if (flags
& MNT_UPDATE
) {
529 if ((vp
->v_flag
& VROOT
) == 0) {
535 /* unmount in progress return error */
537 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
543 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
544 is_rwlock_locked
= TRUE
;
546 * We only allow the filesystem to be reloaded if it
547 * is currently mounted read-only.
549 if ((flags
& MNT_RELOAD
) &&
550 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
556 * If content protection is enabled, update mounts are not
557 * allowed to turn it off.
559 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
560 ((flags
& MNT_CPROTECT
) == 0)) {
565 #ifdef CONFIG_IMGSRC_ACCESS
566 /* Can't downgrade the backer of the root FS */
567 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
568 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
572 #endif /* CONFIG_IMGSRC_ACCESS */
575 * Only root, or the user that did the original mount is
576 * permitted to update it.
578 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
579 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
583 error
= mac_mount_check_remount(ctx
, mp
);
589 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
590 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
592 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
593 flags
|= MNT_NOSUID
| MNT_NODEV
;
594 if (mp
->mnt_flag
& MNT_NOEXEC
)
601 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
603 vfsp
= mp
->mnt_vtable
;
607 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
608 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
610 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
611 flags
|= MNT_NOSUID
| MNT_NODEV
;
612 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
616 /* XXXAUDIT: Should we capture the type on the error path as well? */
617 AUDIT_ARG(text
, fstypename
);
619 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
620 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
621 vfsp
->vfc_refcount
++;
632 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
634 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
635 error
= EINVAL
; /* unsupported request */
639 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
645 * Allocate and initialize the filesystem (mount_t)
647 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
649 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
652 /* Initialize the default IO constraints */
653 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
654 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
655 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
656 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
657 mp
->mnt_devblocksize
= DEV_BSIZE
;
658 mp
->mnt_alignmentmask
= PAGE_MASK
;
659 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
662 mp
->mnt_realrootvp
= NULLVP
;
663 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
665 TAILQ_INIT(&mp
->mnt_vnodelist
);
666 TAILQ_INIT(&mp
->mnt_workerqueue
);
667 TAILQ_INIT(&mp
->mnt_newvnodes
);
669 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
670 is_rwlock_locked
= TRUE
;
671 mp
->mnt_op
= vfsp
->vfc_vfsops
;
672 mp
->mnt_vtable
= vfsp
;
673 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
674 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
675 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
676 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
677 mp
->mnt_vnodecovered
= vp
;
678 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
679 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
680 mp
->mnt_devbsdunit
= 0;
682 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
683 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
685 #if NFSCLIENT || DEVFS || ROUTEFS
687 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
688 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
689 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
690 #endif /* NFSCLIENT || DEVFS */
694 * Set the mount level flags.
696 if (flags
& MNT_RDONLY
)
697 mp
->mnt_flag
|= MNT_RDONLY
;
698 else if (mp
->mnt_flag
& MNT_RDONLY
) {
699 // disallow read/write upgrades of file systems that
700 // had the TYPENAME_OVERRIDE feature set.
701 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
705 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
707 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
708 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
709 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
710 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
711 MNT_QUARANTINE
| MNT_CPROTECT
);
712 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
713 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
714 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
715 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
716 MNT_QUARANTINE
| MNT_CPROTECT
);
719 if (flags
& MNT_MULTILABEL
) {
720 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
724 mp
->mnt_flag
|= MNT_MULTILABEL
;
728 * Process device path for local file systems if requested
730 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
&&
731 !(internal_flags
& KERNEL_MOUNT_SNAPSHOT
)) {
732 if (vfs_context_is64bit(ctx
)) {
733 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
735 fsmountargs
+= sizeof(devpath
);
738 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
740 /* munge into LP64 addr */
741 devpath
= CAST_USER_ADDR_T(tmp
);
742 fsmountargs
+= sizeof(tmp
);
745 /* Lookup device and authorize access to it */
749 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
750 if ( (error
= namei(&nd
)) )
753 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
758 if (devvp
->v_type
!= VBLK
) {
762 if (major(devvp
->v_rdev
) >= nblkdev
) {
767 * If mount by non-root, then verify that user has necessary
768 * permissions on the device.
770 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
771 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
773 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
774 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
775 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
779 /* On first mount, preflight and open device */
780 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
781 if ( (error
= vnode_ref(devvp
)) )
784 * Disallow multiple mounts of the same device.
785 * Disallow mounting of a device that is currently in use
786 * (except for root, which might share swap device for miniroot).
787 * Flush out any old buffers remaining from a previous use.
789 if ( (error
= vfs_mountedon(devvp
)) )
792 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
796 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
800 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
803 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
805 error
= mac_vnode_check_open(ctx
,
807 ronly
? FREAD
: FREAD
|FWRITE
);
811 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
814 mp
->mnt_devvp
= devvp
;
815 device_vnode
= devvp
;
817 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
818 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
819 (device_vnode
= mp
->mnt_devvp
)) {
823 * If upgrade to read-write by non-root, then verify
824 * that user has necessary permissions on the device.
826 vnode_getalways(device_vnode
);
828 if (suser(vfs_context_ucred(ctx
), NULL
) &&
829 (error
= vnode_authorize(device_vnode
, NULL
,
830 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
832 vnode_put(device_vnode
);
836 /* Tell the device that we're upgrading */
837 dev
= (dev_t
)device_vnode
->v_rdev
;
840 if ((u_int
)maj
>= (u_int
)nblkdev
)
841 panic("Volume mounted on a device with invalid major number.");
843 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
844 vnode_put(device_vnode
);
845 device_vnode
= NULLVP
;
852 if ((flags
& MNT_UPDATE
) == 0) {
853 mac_mount_label_init(mp
);
854 mac_mount_label_associate(ctx
, mp
);
857 if ((flags
& MNT_UPDATE
) != 0) {
858 error
= mac_mount_check_label_update(ctx
, mp
);
865 * Mount the filesystem.
867 if (internal_flags
& KERNEL_MOUNT_SNAPSHOT
) {
868 error
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
,
869 (caddr_t
)fsmountargs
, 0, ctx
);
871 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
874 if (flags
& MNT_UPDATE
) {
875 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
876 mp
->mnt_flag
&= ~MNT_RDONLY
;
878 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
879 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
881 mp
->mnt_flag
= flag
; /* restore flag value */
882 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
883 lck_rw_done(&mp
->mnt_rwlock
);
884 is_rwlock_locked
= FALSE
;
886 enablequotas(mp
, ctx
);
891 * Put the new filesystem on the mount list after root.
894 struct vfs_attr vfsattr
;
896 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
897 error
= VFS_ROOT(mp
, &rvp
, ctx
);
899 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
902 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
904 * drop reference provided by VFS_ROOT
914 CLR(vp
->v_flag
, VMOUNT
);
915 vp
->v_mountedhere
= mp
;
919 * taking the name_cache_lock exclusively will
920 * insure that everyone is out of the fast path who
921 * might be trying to use a now stale copy of
922 * vp->v_mountedhere->mnt_realrootvp
923 * bumping mount_generation causes the cached values
930 error
= vnode_ref(vp
);
935 have_usecount
= TRUE
;
937 error
= checkdirs(vp
, ctx
);
939 /* Unmount the filesystem as cdir/rdirs cannot be updated */
943 * there is no cleanup code here so I have made it void
944 * we need to revisit this
946 (void)VFS_START(mp
, 0, ctx
);
948 if (mount_list_add(mp
) != 0) {
950 * The system is shutting down trying to umount
951 * everything, so fail with a plausible errno.
956 lck_rw_done(&mp
->mnt_rwlock
);
957 is_rwlock_locked
= FALSE
;
959 /* Check if this mounted file system supports EAs or named streams. */
960 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
961 VFSATTR_INIT(&vfsattr
);
962 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
963 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
964 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
965 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
966 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
967 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
968 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
971 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
972 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
973 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
976 /* Check if this file system supports path from id lookups. */
977 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
978 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
979 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
980 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
981 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
982 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
985 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) &&
986 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) {
987 mp
->mnt_kern_flag
|= MNTK_DIR_HARDLINKS
;
990 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
991 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
993 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
994 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
996 /* increment the operations count */
997 OSAddAtomic(1, &vfs_nummntops
);
998 enablequotas(mp
, ctx
);
1001 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
1004 * cache the IO attributes for the underlying physical media...
1005 * an error return indicates the underlying driver doesn't
1006 * support all the queries necessary... however, reasonable
1007 * defaults will have been set, so no reason to bail or care
1009 vfs_init_io_attributes(device_vnode
, mp
);
1012 /* Now that mount is setup, notify the listeners */
1013 vfs_notify_mount(pvp
);
1014 IOBSDMountChange(mp
, kIOMountChangeMount
);
1017 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1018 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1019 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1020 mp
->mnt_vtable
->vfc_name
, error
);
1023 vnode_lock_spin(vp
);
1024 CLR(vp
->v_flag
, VMOUNT
);
1027 mp
->mnt_vtable
->vfc_refcount
--;
1028 mount_list_unlock();
1030 if (device_vnode
) {
1031 vnode_rele(device_vnode
);
1032 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
1034 lck_rw_done(&mp
->mnt_rwlock
);
1035 is_rwlock_locked
= FALSE
;
1038 * if we get here, we have a mount structure that needs to be freed,
1039 * but since the coveredvp hasn't yet been updated to point at it,
1040 * no need to worry about other threads holding a crossref on this mp
1041 * so it's ok to just free it
1043 mount_lock_destroy(mp
);
1045 mac_mount_label_destroy(mp
);
1047 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1051 * drop I/O count on the device vp if there was one
1053 if (devpath
&& devvp
)
1058 /* Error condition exits */
1060 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1063 * If the mount has been placed on the covered vp,
1064 * it may have been discovered by now, so we have
1065 * to treat this just like an unmount
1067 mount_lock_spin(mp
);
1068 mp
->mnt_lflag
|= MNT_LDEAD
;
1071 if (device_vnode
!= NULLVP
) {
1072 vnode_rele(device_vnode
);
1073 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1078 vnode_lock_spin(vp
);
1081 vp
->v_mountedhere
= (mount_t
) 0;
1085 if (have_usecount
) {
1089 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1092 if (devpath
&& devvp
)
1095 /* Release mnt_rwlock only when it was taken */
1096 if (is_rwlock_locked
== TRUE
) {
1097 lck_rw_done(&mp
->mnt_rwlock
);
1101 if (mp
->mnt_crossref
)
1102 mount_dropcrossref(mp
, vp
, 0);
1104 mount_lock_destroy(mp
);
1106 mac_mount_label_destroy(mp
);
1108 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1113 vfsp
->vfc_refcount
--;
1114 mount_list_unlock();
1121 * Flush in-core data, check for competing mount attempts,
1125 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1128 #pragma unused(cnp,fsname)
1130 struct vnode_attr va
;
1135 * If the user is not root, ensure that they own the directory
1136 * onto which we are attempting to mount.
1139 VATTR_WANTED(&va
, va_uid
);
1140 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1141 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1142 (!vfs_context_issuser(ctx
)))) {
1148 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1151 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1154 if (vp
->v_type
!= VDIR
) {
1159 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1165 error
= mac_mount_check_mount(ctx
, vp
,
1171 vnode_lock_spin(vp
);
1172 SET(vp
->v_flag
, VMOUNT
);
1179 #if CONFIG_IMGSRC_ACCESS
1182 #define IMGSRC_DEBUG(args...) printf(args)
1184 #define IMGSRC_DEBUG(args...) do { } while(0)
1188 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1190 struct nameidata nd
;
1191 vnode_t vp
, realdevvp
;
1195 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1196 if ( (error
= namei(&nd
)) ) {
1197 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1203 if (!vnode_isblk(vp
)) {
1204 IMGSRC_DEBUG("Not block device.\n");
1209 realdevvp
= mp
->mnt_devvp
;
1210 if (realdevvp
== NULLVP
) {
1211 IMGSRC_DEBUG("No device backs the mount.\n");
1216 error
= vnode_getwithref(realdevvp
);
1218 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1222 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1223 IMGSRC_DEBUG("Wrong dev_t.\n");
1228 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1231 * If mount by non-root, then verify that user has necessary
1232 * permissions on the device.
1234 if (!vfs_context_issuser(ctx
)) {
1235 accessmode
= KAUTH_VNODE_READ_DATA
;
1236 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1237 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1238 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1239 IMGSRC_DEBUG("Access denied.\n");
1247 vnode_put(realdevvp
);
1258 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1259 * and call checkdirs()
1262 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1266 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1268 vnode_lock_spin(vp
);
1269 CLR(vp
->v_flag
, VMOUNT
);
1270 vp
->v_mountedhere
= mp
;
1274 * taking the name_cache_lock exclusively will
1275 * insure that everyone is out of the fast path who
1276 * might be trying to use a now stale copy of
1277 * vp->v_mountedhere->mnt_realrootvp
1278 * bumping mount_generation causes the cached values
1283 name_cache_unlock();
1285 error
= vnode_ref(vp
);
1290 error
= checkdirs(vp
, ctx
);
1292 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1299 mp
->mnt_vnodecovered
= NULLVP
;
1305 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1308 vnode_lock_spin(vp
);
1309 vp
->v_mountedhere
= (mount_t
)NULL
;
1312 mp
->mnt_vnodecovered
= NULLVP
;
1316 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1320 /* unmount in progress return error */
1321 mount_lock_spin(mp
);
1322 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1327 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1330 * We only allow the filesystem to be reloaded if it
1331 * is currently mounted read-only.
1333 if ((flags
& MNT_RELOAD
) &&
1334 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1340 * Only root, or the user that did the original mount is
1341 * permitted to update it.
1343 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1344 (!vfs_context_issuser(ctx
))) {
1349 error
= mac_mount_check_remount(ctx
, mp
);
1357 lck_rw_done(&mp
->mnt_rwlock
);
1364 mount_end_update(mount_t mp
)
1366 lck_rw_done(&mp
->mnt_rwlock
);
1370 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1374 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1378 vp
= imgsrc_rootvnodes
[height
];
1379 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1388 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1389 const char *fsname
, vfs_context_t ctx
,
1390 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1394 boolean_t placed
= FALSE
;
1395 vnode_t devvp
= NULLVP
;
1396 struct vfstable
*vfsp
;
1397 user_addr_t devpath
;
1398 char *old_mntonname
;
1403 /* If we didn't imageboot, nothing to move */
1404 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1408 /* Only root can do this */
1409 if (!vfs_context_issuser(ctx
)) {
1413 IMGSRC_DEBUG("looking for root vnode.\n");
1416 * Get root vnode of filesystem we're moving.
1420 struct user64_mnt_imgsrc_args mia64
;
1421 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1423 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1427 height
= mia64
.mi_height
;
1428 flags
= mia64
.mi_flags
;
1429 devpath
= mia64
.mi_devpath
;
1431 struct user32_mnt_imgsrc_args mia32
;
1432 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1434 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1438 height
= mia32
.mi_height
;
1439 flags
= mia32
.mi_flags
;
1440 devpath
= mia32
.mi_devpath
;
1444 * For binary compatibility--assumes one level of nesting.
1447 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1451 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1454 /* munge into LP64 addr */
1455 devpath
= CAST_USER_ADDR_T(tmp
);
1463 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1467 error
= get_imgsrc_rootvnode(height
, &rvp
);
1469 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1473 IMGSRC_DEBUG("got root vnode.\n");
1475 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1477 /* Can only move once */
1478 mp
= vnode_mount(rvp
);
1479 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1480 IMGSRC_DEBUG("Already moved.\n");
1485 IMGSRC_DEBUG("Starting updated.\n");
1487 /* Get exclusive rwlock on mount, authorize update on mp */
1488 error
= mount_begin_update(mp
, ctx
, 0);
1490 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1495 * It can only be moved once. Flag is set under the rwlock,
1496 * so we're now safe to proceed.
1498 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1499 IMGSRC_DEBUG("Already moved [2]\n");
1504 IMGSRC_DEBUG("Preparing coveredvp.\n");
1506 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1507 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1509 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1513 IMGSRC_DEBUG("Covered vp OK.\n");
1515 /* Sanity check the name caller has provided */
1516 vfsp
= mp
->mnt_vtable
;
1517 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1518 IMGSRC_DEBUG("Wrong fs name.\n");
1523 /* Check the device vnode and update mount-from name, for local filesystems */
1524 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1525 IMGSRC_DEBUG("Local, doing device validation.\n");
1527 if (devpath
!= USER_ADDR_NULL
) {
1528 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1530 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1539 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1540 * and increment the name cache's mount generation
1543 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1544 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1551 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1552 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1554 /* Forbid future moves */
1556 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1559 /* Finally, add to mount list, completely ready to go */
1560 if (mount_list_add(mp
) != 0) {
1562 * The system is shutting down trying to umount
1563 * everything, so fail with a plausible errno.
1569 mount_end_update(mp
);
1571 FREE(old_mntonname
, M_TEMP
);
1573 vfs_notify_mount(pvp
);
1577 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1580 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1585 * Placing the mp on the vnode clears VMOUNT,
1586 * so cleanup is different after that point
1589 /* Rele the vp, clear VMOUNT and v_mountedhere */
1590 undo_place_on_covered_vp(mp
, vp
);
1592 vnode_lock_spin(vp
);
1593 CLR(vp
->v_flag
, VMOUNT
);
1597 mount_end_update(mp
);
1601 FREE(old_mntonname
, M_TEMP
);
1605 #endif /* CONFIG_IMGSRC_ACCESS */
1608 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1610 struct nameidata qnd
;
1612 char qfpath
[MAXPATHLEN
];
1613 const char *qfname
= QUOTAFILENAME
;
1614 const char *qfopsname
= QUOTAOPSNAME
;
1615 const char *qfextension
[] = INITQFNAMES
;
1617 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1618 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1622 * Enable filesystem disk quotas if necessary.
1623 * We ignore errors as this should not interfere with final mount
1625 for (type
=0; type
< MAXQUOTAS
; type
++) {
1626 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1627 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1628 CAST_USER_ADDR_T(qfpath
), ctx
);
1629 if (namei(&qnd
) != 0)
1630 continue; /* option file to trigger quotas is not present */
1631 vnode_put(qnd
.ni_vp
);
1633 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1635 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1642 checkdirs_callback(proc_t p
, void * arg
)
1644 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1645 vnode_t olddp
= cdrp
->olddp
;
1646 vnode_t newdp
= cdrp
->newdp
;
1647 struct filedesc
*fdp
;
1651 int cdir_changed
= 0;
1652 int rdir_changed
= 0;
1655 * XXX Also needs to iterate each thread in the process to see if it
1656 * XXX is using a per-thread current working directory, and, if so,
1657 * XXX update that as well.
1662 if (fdp
== (struct filedesc
*)0) {
1664 return(PROC_RETURNED
);
1666 fdp_cvp
= fdp
->fd_cdir
;
1667 fdp_rvp
= fdp
->fd_rdir
;
1670 if (fdp_cvp
== olddp
) {
1677 if (fdp_rvp
== olddp
) {
1684 if (cdir_changed
|| rdir_changed
) {
1686 fdp
->fd_cdir
= fdp_cvp
;
1687 fdp
->fd_rdir
= fdp_rvp
;
1690 return(PROC_RETURNED
);
1696 * Scan all active processes to see if any of them have a current
1697 * or root directory onto which the new filesystem has just been
1698 * mounted. If so, replace them with the new mount point.
1701 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1706 struct cdirargs cdr
;
1708 if (olddp
->v_usecount
== 1)
1710 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1714 panic("mount: lost mount: error %d", err
);
1721 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1722 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1724 if (rootvnode
== olddp
) {
1736 * Unmount a file system.
1738 * Note: unmount takes a path to the vnode mounted on as argument,
1739 * not special file (as before).
1743 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1748 struct nameidata nd
;
1749 vfs_context_t ctx
= vfs_context_current();
1751 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1752 UIO_USERSPACE
, uap
->path
, ctx
);
1761 error
= mac_mount_check_umount(ctx
, mp
);
1768 * Must be the root of the filesystem
1770 if ((vp
->v_flag
& VROOT
) == 0) {
1776 /* safedounmount consumes the mount ref */
1777 return (safedounmount(mp
, uap
->flags
, ctx
));
1781 vfs_unmountbyfsid(fsid_t
*fsid
, int flags
, vfs_context_t ctx
)
1785 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1786 if (mp
== (mount_t
)0) {
1791 /* safedounmount consumes the mount ref */
1792 return(safedounmount(mp
, flags
, ctx
));
1797 * The mount struct comes with a mount ref which will be consumed.
1798 * Do the actual file system unmount, prevent some common foot shooting.
1801 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1804 proc_t p
= vfs_context_proc(ctx
);
1807 * If the file system is not responding and MNT_NOBLOCK
1808 * is set and not a forced unmount then return EBUSY.
1810 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1811 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1817 * Skip authorization if the mount is tagged as permissive and
1818 * this is not a forced-unmount attempt.
1820 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1822 * Only root, or the user that did the original mount is
1823 * permitted to unmount this filesystem.
1825 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1826 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1830 * Don't allow unmounting the root file system.
1832 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1833 error
= EBUSY
; /* the root is always busy */
1837 #ifdef CONFIG_IMGSRC_ACCESS
1838 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1842 #endif /* CONFIG_IMGSRC_ACCESS */
1844 return (dounmount(mp
, flags
, 1, ctx
));
1852 * Do the actual file system unmount.
1855 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1857 vnode_t coveredvp
= (vnode_t
)0;
1860 int forcedunmount
= 0;
1862 struct vnode
*devvp
= NULLVP
;
1864 proc_t p
= vfs_context_proc(ctx
);
1866 int pflags_save
= 0;
1867 #endif /* CONFIG_TRIGGERS */
1872 * If already an unmount in progress just return EBUSY.
1873 * Even a forced unmount cannot override.
1875 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1882 if (flags
& MNT_FORCE
) {
1884 mp
->mnt_lflag
|= MNT_LFORCE
;
1888 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1889 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1892 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1893 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1894 mp
->mnt_flag
&=~ MNT_ASYNC
;
1896 * anyone currently in the fast path that
1897 * trips over the cached rootvp will be
1898 * dumped out and forced into the slow path
1899 * to regenerate a new cached value
1901 mp
->mnt_realrootvp
= NULLVP
;
1904 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
1906 * Force unmount any mounts in this filesystem.
1907 * If any unmounts fail - just leave them dangling.
1910 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
1914 * taking the name_cache_lock exclusively will
1915 * insure that everyone is out of the fast path who
1916 * might be trying to use a now stale copy of
1917 * vp->v_mountedhere->mnt_realrootvp
1918 * bumping mount_generation causes the cached values
1923 name_cache_unlock();
1926 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1930 fsevent_unmount(mp
); /* has to come first! */
1933 if (forcedunmount
== 0) {
1934 ubc_umount(mp
); /* release cached vnodes */
1935 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1936 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
1939 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1940 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1941 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1947 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
1950 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
1954 lflags
|= FORCECLOSE
;
1955 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
1956 if ((forcedunmount
== 0) && error
) {
1958 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1959 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1960 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1964 /* make sure there are no one in the mount iterations or lookup */
1965 mount_iterdrain(mp
);
1967 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
1969 mount_iterreset(mp
);
1971 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1972 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1973 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1977 /* increment the operations count */
1979 OSAddAtomic(1, &vfs_nummntops
);
1981 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1982 /* hold an io reference and drop the usecount before close */
1983 devvp
= mp
->mnt_devvp
;
1984 vnode_getalways(devvp
);
1986 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1988 vnode_clearmountedon(devvp
);
1991 lck_rw_done(&mp
->mnt_rwlock
);
1992 mount_list_remove(mp
);
1993 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1995 /* mark the mount point hook in the vp but not drop the ref yet */
1996 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
1998 * The covered vnode needs special handling. Trying to get an
1999 * iocount must not block here as this may lead to deadlocks
2000 * if the Filesystem to which the covered vnode belongs is
2001 * undergoing forced unmounts. Since we hold a usecount, the
2002 * vnode cannot be reused (it can, however, still be terminated)
2004 vnode_getalways(coveredvp
);
2005 vnode_lock_spin(coveredvp
);
2008 coveredvp
->v_mountedhere
= (struct mount
*)0;
2009 CLR(coveredvp
->v_flag
, VMOUNT
);
2011 vnode_unlock(coveredvp
);
2012 vnode_put(coveredvp
);
2016 mp
->mnt_vtable
->vfc_refcount
--;
2017 mount_list_unlock();
2019 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2020 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2022 mp
->mnt_lflag
|= MNT_LDEAD
;
2024 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2026 * do the wakeup here
2027 * in case we block in mount_refdrain
2028 * which will drop the mount lock
2029 * and allow anyone blocked in vfs_busy
2030 * to wakeup and see the LDEAD state
2032 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2033 wakeup((caddr_t
)mp
);
2037 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2038 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2043 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2044 // Restore P_NOREMOTEHANG bit to its previous value
2045 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2046 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2050 * Callback and context are set together under the mount lock, and
2051 * never cleared, so we're safe to examine them here, drop the lock,
2054 if (mp
->mnt_triggercallback
!= NULL
) {
2057 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2058 } else if (did_vflush
) {
2059 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2066 #endif /* CONFIG_TRIGGERS */
2068 lck_rw_done(&mp
->mnt_rwlock
);
2071 wakeup((caddr_t
)mp
);
2074 if ((coveredvp
!= NULLVP
)) {
2075 vnode_t pvp
= NULLVP
;
2078 * The covered vnode needs special handling. Trying to
2079 * get an iocount must not block here as this may lead
2080 * to deadlocks if the Filesystem to which the covered
2081 * vnode belongs is undergoing forced unmounts. Since we
2082 * hold a usecount, the vnode cannot be reused
2083 * (it can, however, still be terminated).
2085 vnode_getalways(coveredvp
);
2087 mount_dropcrossref(mp
, coveredvp
, 0);
2089 * We'll _try_ to detect if this really needs to be
2090 * done. The coveredvp can only be in termination (or
2091 * terminated) if the coveredvp's mount point is in a
2092 * forced unmount (or has been) since we still hold the
2095 if (!vnode_isrecycled(coveredvp
)) {
2096 pvp
= vnode_getparent(coveredvp
);
2098 if (coveredvp
->v_resolve
) {
2099 vnode_trigger_rearm(coveredvp
, ctx
);
2104 vnode_rele(coveredvp
);
2105 vnode_put(coveredvp
);
2109 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2112 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2113 mount_lock_destroy(mp
);
2115 mac_mount_label_destroy(mp
);
2117 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2119 panic("dounmount: no coveredvp");
2125 * Unmount any mounts in this filesystem.
2128 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2131 fsid_t
*fsids
, fsid
;
2133 int count
= 0, i
, m
= 0;
2138 // Get an array to hold the submounts fsids.
2139 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2141 fsids_sz
= count
* sizeof(fsid_t
);
2142 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2143 if (fsids
== NULL
) {
2144 mount_list_unlock();
2147 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2150 * Fill the array with submount fsids.
2151 * Since mounts are always added to the tail of the mount list, the
2152 * list is always in mount order.
2153 * For each mount check if the mounted-on vnode belongs to a
2154 * mount that's already added to our array of mounts to be unmounted.
2156 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2157 vp
= smp
->mnt_vnodecovered
;
2160 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2161 for (i
= 0; i
<= m
; i
++) {
2162 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2163 fsids
[i
].val
[1] == fsid
.val
[1]) {
2164 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2169 mount_list_unlock();
2171 // Unmount the submounts in reverse order. Ignore errors.
2172 for (i
= m
; i
> 0; i
--) {
2173 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2176 mount_iterdrop(smp
);
2177 (void) dounmount(smp
, flags
, 1, ctx
);
2182 FREE(fsids
, M_TEMP
);
2186 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2191 if (mp
->mnt_crossref
< 0)
2192 panic("mount cross refs -ve");
2194 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2197 vnode_put_locked(dp
);
2200 mount_lock_destroy(mp
);
2202 mac_mount_label_destroy(mp
);
2204 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2208 vnode_put_locked(dp
);
2214 * Sync each mounted filesystem.
2220 int print_vmpage_stat
=0;
2221 int sync_timeout
= 60; // Sync time limit (sec)
2224 sync_callback(mount_t mp
, __unused
void *arg
)
2226 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2227 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2229 mp
->mnt_flag
&= ~MNT_ASYNC
;
2230 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2232 mp
->mnt_flag
|= MNT_ASYNC
;
2235 return (VFS_RETURNED
);
2240 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2242 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2244 if (print_vmpage_stat
) {
2245 vm_countdirtypages();
2251 #endif /* DIAGNOSTIC */
2256 sync_thread(void *arg
, __unused wait_result_t wr
)
2258 int *timeout
= (int *) arg
;
2260 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2263 wakeup((caddr_t
) timeout
);
2264 if (print_vmpage_stat
) {
2265 vm_countdirtypages();
2271 #endif /* DIAGNOSTIC */
2275 * Sync in a separate thread so we can time out if it blocks.
2278 sync_async(int timeout
)
2282 struct timespec ts
= {timeout
, 0};
2284 lck_mtx_lock(sync_mtx_lck
);
2285 if (kernel_thread_start(sync_thread
, &timeout
, &thd
) != KERN_SUCCESS
) {
2286 printf("sync_thread failed\n");
2287 lck_mtx_unlock(sync_mtx_lck
);
2291 error
= msleep((caddr_t
) &timeout
, sync_mtx_lck
, (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2293 printf("sync timed out: %d sec\n", timeout
);
2295 thread_deallocate(thd
);
2301 * An in-kernel sync for power management to call.
2303 __private_extern__
int
2306 (void) sync_async(sync_timeout
);
2309 } /* end of sync_internal call */
2312 * Change filesystem quotas.
2316 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2319 int error
, quota_cmd
, quota_status
;
2322 struct nameidata nd
;
2323 vfs_context_t ctx
= vfs_context_current();
2324 struct dqblk my_dqblk
;
2326 AUDIT_ARG(uid
, uap
->uid
);
2327 AUDIT_ARG(cmd
, uap
->cmd
);
2328 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2333 mp
= nd
.ni_vp
->v_mount
;
2334 vnode_put(nd
.ni_vp
);
2337 /* copyin any data we will need for downstream code */
2338 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2340 switch (quota_cmd
) {
2342 /* uap->arg specifies a file from which to take the quotas */
2343 fnamelen
= MAXPATHLEN
;
2344 datap
= kalloc(MAXPATHLEN
);
2345 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2348 /* uap->arg is a pointer to a dqblk structure. */
2349 datap
= (caddr_t
) &my_dqblk
;
2353 /* uap->arg is a pointer to a dqblk structure. */
2354 datap
= (caddr_t
) &my_dqblk
;
2355 if (proc_is64bit(p
)) {
2356 struct user_dqblk my_dqblk64
;
2357 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2359 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2363 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2367 /* uap->arg is a pointer to an integer */
2368 datap
= (caddr_t
) "a_status
;
2376 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2379 switch (quota_cmd
) {
2382 kfree(datap
, MAXPATHLEN
);
2385 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2387 if (proc_is64bit(p
)) {
2388 struct user_dqblk my_dqblk64
= {.dqb_bhardlimit
= 0};
2389 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2390 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2393 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2398 /* uap->arg is a pointer to an integer */
2400 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2411 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2413 return (EOPNOTSUPP
);
2418 * Get filesystem statistics.
2420 * Returns: 0 Success
2422 * vfs_update_vfsstat:???
2423 * munge_statfs:EFAULT
2427 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2430 struct vfsstatfs
*sp
;
2432 struct nameidata nd
;
2433 vfs_context_t ctx
= vfs_context_current();
2436 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2437 UIO_USERSPACE
, uap
->path
, ctx
);
2443 sp
= &mp
->mnt_vfsstat
;
2447 error
= mac_mount_check_stat(ctx
, mp
);
2452 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2458 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2464 * Get filesystem statistics.
2468 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2472 struct vfsstatfs
*sp
;
2475 AUDIT_ARG(fd
, uap
->fd
);
2477 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2480 error
= vnode_getwithref(vp
);
2486 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2495 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2500 sp
= &mp
->mnt_vfsstat
;
2501 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2505 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2515 * Common routine to handle copying of statfs64 data to user space
2518 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2521 struct statfs64 sfs
;
2523 bzero(&sfs
, sizeof(sfs
));
2525 sfs
.f_bsize
= sfsp
->f_bsize
;
2526 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2527 sfs
.f_blocks
= sfsp
->f_blocks
;
2528 sfs
.f_bfree
= sfsp
->f_bfree
;
2529 sfs
.f_bavail
= sfsp
->f_bavail
;
2530 sfs
.f_files
= sfsp
->f_files
;
2531 sfs
.f_ffree
= sfsp
->f_ffree
;
2532 sfs
.f_fsid
= sfsp
->f_fsid
;
2533 sfs
.f_owner
= sfsp
->f_owner
;
2534 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2535 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2536 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2537 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2538 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2540 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2542 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2543 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2545 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2551 * Get file system statistics in 64-bit mode
2554 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2557 struct vfsstatfs
*sp
;
2559 struct nameidata nd
;
2560 vfs_context_t ctxp
= vfs_context_current();
2563 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2564 UIO_USERSPACE
, uap
->path
, ctxp
);
2570 sp
= &mp
->mnt_vfsstat
;
2574 error
= mac_mount_check_stat(ctxp
, mp
);
2579 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2585 error
= statfs64_common(mp
, sp
, uap
->buf
);
2592 * Get file system statistics in 64-bit mode
2595 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2599 struct vfsstatfs
*sp
;
2602 AUDIT_ARG(fd
, uap
->fd
);
2604 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2607 error
= vnode_getwithref(vp
);
2613 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2622 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2627 sp
= &mp
->mnt_vfsstat
;
2628 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2632 error
= statfs64_common(mp
, sp
, uap
->buf
);
2641 struct getfsstat_struct
{
2652 getfsstat_callback(mount_t mp
, void * arg
)
2655 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2656 struct vfsstatfs
*sp
;
2658 vfs_context_t ctx
= vfs_context_current();
2660 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2662 error
= mac_mount_check_stat(ctx
, mp
);
2664 fstp
->error
= error
;
2665 return(VFS_RETURNED_DONE
);
2668 sp
= &mp
->mnt_vfsstat
;
2670 * If MNT_NOWAIT is specified, do not refresh the
2671 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2673 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2674 (error
= vfs_update_vfsstat(mp
, ctx
,
2676 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2677 return(VFS_RETURNED
);
2681 * Need to handle LP64 version of struct statfs
2683 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2685 fstp
->error
= error
;
2686 return(VFS_RETURNED_DONE
);
2688 fstp
->sfsp
+= my_size
;
2692 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2694 fstp
->error
= error
;
2695 return(VFS_RETURNED_DONE
);
2702 return(VFS_RETURNED
);
2706 * Get statistics on all filesystems.
2709 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2711 struct __mac_getfsstat_args muap
;
2713 muap
.buf
= uap
->buf
;
2714 muap
.bufsize
= uap
->bufsize
;
2715 muap
.mac
= USER_ADDR_NULL
;
2717 muap
.flags
= uap
->flags
;
2719 return (__mac_getfsstat(p
, &muap
, retval
));
2723 * __mac_getfsstat: Get MAC-related file system statistics
2725 * Parameters: p (ignored)
2726 * uap User argument descriptor (see below)
2727 * retval Count of file system statistics (N stats)
2729 * Indirect: uap->bufsize Buffer size
2730 * uap->macsize MAC info size
2731 * uap->buf Buffer where information will be returned
2733 * uap->flags File system flags
2736 * Returns: 0 Success
2741 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2745 size_t count
, maxcount
, bufsize
, macsize
;
2746 struct getfsstat_struct fst
;
2748 bufsize
= (size_t) uap
->bufsize
;
2749 macsize
= (size_t) uap
->macsize
;
2751 if (IS_64BIT_PROCESS(p
)) {
2752 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2755 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2763 if (uap
->mac
!= USER_ADDR_NULL
) {
2768 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2769 if (count
!= maxcount
)
2772 /* Copy in the array */
2773 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2778 error
= copyin(uap
->mac
, mp0
, macsize
);
2780 FREE(mp0
, M_MACTEMP
);
2784 /* Normalize to an array of user_addr_t */
2785 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2787 FREE(mp0
, M_MACTEMP
);
2791 for (i
= 0; i
< count
; i
++) {
2792 if (IS_64BIT_PROCESS(p
))
2793 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2795 mp
[i
] = (user_addr_t
)mp0
[i
];
2797 FREE(mp0
, M_MACTEMP
);
2804 fst
.flags
= uap
->flags
;
2807 fst
.maxcount
= maxcount
;
2810 vfs_iterate(0, getfsstat_callback
, &fst
);
2813 FREE(mp
, M_MACTEMP
);
2816 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2820 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2821 *retval
= fst
.maxcount
;
2823 *retval
= fst
.count
;
2828 getfsstat64_callback(mount_t mp
, void * arg
)
2830 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2831 struct vfsstatfs
*sp
;
2834 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2836 error
= mac_mount_check_stat(vfs_context_current(), mp
);
2838 fstp
->error
= error
;
2839 return(VFS_RETURNED_DONE
);
2842 sp
= &mp
->mnt_vfsstat
;
2844 * If MNT_NOWAIT is specified, do not refresh the fsstat
2845 * cache. MNT_WAIT overrides MNT_NOWAIT.
2847 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2848 * getfsstat, since the constants are out of the same
2851 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2852 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2853 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2854 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2855 return(VFS_RETURNED
);
2858 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2860 fstp
->error
= error
;
2861 return(VFS_RETURNED_DONE
);
2863 fstp
->sfsp
+= sizeof(struct statfs64
);
2866 return(VFS_RETURNED
);
2870 * Get statistics on all file systems in 64 bit mode.
2873 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2876 int count
, maxcount
;
2877 struct getfsstat_struct fst
;
2879 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2885 fst
.flags
= uap
->flags
;
2888 fst
.maxcount
= maxcount
;
2890 vfs_iterate(0, getfsstat64_callback
, &fst
);
2893 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2897 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2898 *retval
= fst
.maxcount
;
2900 *retval
= fst
.count
;
2906 * gets the associated vnode with the file descriptor passed.
2910 * ctx - vfs context of caller
2911 * fd - file descriptor for which vnode is required.
2912 * vpp - Pointer to pointer to vnode to be returned.
2914 * The vnode is returned with an iocount so any vnode obtained
2915 * by this call needs a vnode_put
2919 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
2923 struct fileproc
*fp
;
2924 proc_t p
= vfs_context_proc(ctx
);
2928 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
2932 error
= vnode_getwithref(vp
);
2934 (void)fp_drop(p
, fd
, fp
, 0);
2938 (void)fp_drop(p
, fd
, fp
, 0);
2944 * Wrapper function around namei to start lookup from a directory
2945 * specified by a file descriptor ni_dirfd.
2947 * In addition to all the errors returned by namei, this call can
2948 * return ENOTDIR if the file descriptor does not refer to a directory.
2949 * and EBADF if the file descriptor is not valid.
2952 nameiat(struct nameidata
*ndp
, int dirfd
)
2954 if ((dirfd
!= AT_FDCWD
) &&
2955 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
2956 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
2960 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
2961 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
2965 c
= *((char *)(ndp
->ni_dirp
));
2971 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
2976 if (vnode_vtype(dvp_at
) != VDIR
) {
2981 ndp
->ni_dvp
= dvp_at
;
2982 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
2984 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
2990 return (namei(ndp
));
2994 * Change current working directory to a given file descriptor.
2998 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
3000 struct filedesc
*fdp
= p
->p_fd
;
3006 vfs_context_t ctx
= vfs_context_current();
3008 AUDIT_ARG(fd
, uap
->fd
);
3009 if (per_thread
&& uap
->fd
== -1) {
3011 * Switching back from per-thread to per process CWD; verify we
3012 * in fact have one before proceeding. The only success case
3013 * for this code path is to return 0 preemptively after zapping
3014 * the thread structure contents.
3016 thread_t th
= vfs_context_thread(ctx
);
3018 uthread_t uth
= get_bsdthread_info(th
);
3020 uth
->uu_cdir
= NULLVP
;
3021 if (tvp
!= NULLVP
) {
3029 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
3031 if ( (error
= vnode_getwithref(vp
)) ) {
3036 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
3038 if (vp
->v_type
!= VDIR
) {
3044 error
= mac_vnode_check_chdir(ctx
, vp
);
3048 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3052 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3053 if (vfs_busy(mp
, LK_NOWAIT
)) {
3057 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3066 if ( (error
= vnode_ref(vp
)) )
3071 thread_t th
= vfs_context_thread(ctx
);
3073 uthread_t uth
= get_bsdthread_info(th
);
3076 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3101 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3103 return common_fchdir(p
, uap
, 0);
3107 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3109 return common_fchdir(p
, (void *)uap
, 1);
3113 * Change current working directory (".").
3115 * Returns: 0 Success
3116 * change_dir:ENOTDIR
3118 * vnode_ref:ENOENT No such file or directory
3122 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3124 struct filedesc
*fdp
= p
->p_fd
;
3126 struct nameidata nd
;
3128 vfs_context_t ctx
= vfs_context_current();
3130 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3131 UIO_USERSPACE
, uap
->path
, ctx
);
3132 error
= change_dir(&nd
, ctx
);
3135 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3136 vnode_put(nd
.ni_vp
);
3140 * drop the iocount we picked up in change_dir
3142 vnode_put(nd
.ni_vp
);
3145 thread_t th
= vfs_context_thread(ctx
);
3147 uthread_t uth
= get_bsdthread_info(th
);
3149 uth
->uu_cdir
= nd
.ni_vp
;
3150 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3152 vnode_rele(nd
.ni_vp
);
3158 fdp
->fd_cdir
= nd
.ni_vp
;
3172 * Change current working directory (".") for the entire process
3174 * Parameters: p Process requesting the call
3175 * uap User argument descriptor (see below)
3178 * Indirect parameters: uap->path Directory path
3180 * Returns: 0 Success
3181 * common_chdir: ENOTDIR
3182 * common_chdir: ENOENT No such file or directory
3187 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3189 return common_chdir(p
, (void *)uap
, 0);
3195 * Change current working directory (".") for a single thread
3197 * Parameters: p Process requesting the call
3198 * uap User argument descriptor (see below)
3201 * Indirect parameters: uap->path Directory path
3203 * Returns: 0 Success
3204 * common_chdir: ENOTDIR
3205 * common_chdir: ENOENT No such file or directory
3210 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3212 return common_chdir(p
, (void *)uap
, 1);
3217 * Change notion of root (``/'') directory.
3221 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3223 struct filedesc
*fdp
= p
->p_fd
;
3225 struct nameidata nd
;
3227 vfs_context_t ctx
= vfs_context_current();
3229 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3232 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3233 UIO_USERSPACE
, uap
->path
, ctx
);
3234 error
= change_dir(&nd
, ctx
);
3239 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3242 vnode_put(nd
.ni_vp
);
3247 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3248 vnode_put(nd
.ni_vp
);
3251 vnode_put(nd
.ni_vp
);
3255 fdp
->fd_rdir
= nd
.ni_vp
;
3256 fdp
->fd_flags
|= FD_CHROOT
;
3266 * Common routine for chroot and chdir.
3268 * Returns: 0 Success
3269 * ENOTDIR Not a directory
3270 * namei:??? [anything namei can return]
3271 * vnode_authorize:??? [anything vnode_authorize can return]
3274 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3279 if ((error
= namei(ndp
)))
3284 if (vp
->v_type
!= VDIR
) {
3290 error
= mac_vnode_check_chdir(ctx
, vp
);
3297 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3307 * Free the vnode data (for directories) associated with the file glob.
3310 fg_vn_data_alloc(void)
3312 struct fd_vn_data
*fvdata
;
3314 /* Allocate per fd vnode data */
3315 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3316 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3317 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3322 * Free the vnode data (for directories) associated with the file glob.
3325 fg_vn_data_free(void *fgvndata
)
3327 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3330 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3331 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3332 FREE(fvdata
, M_FD_VN_DATA
);
3336 * Check permissions, allocate an open file structure,
3337 * and call the device open routine if any.
3339 * Returns: 0 Success
3350 * XXX Need to implement uid, gid
3353 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3354 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3357 proc_t p
= vfs_context_proc(ctx
);
3358 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3359 struct fileproc
*fp
;
3362 int type
, indx
, error
;
3364 struct vfs_context context
;
3368 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3371 flags
= FFLAGS(uflags
);
3372 CLR(flags
, FENCRYPTED
);
3373 CLR(flags
, FUNENCRYPTED
);
3375 AUDIT_ARG(fflags
, oflags
);
3376 AUDIT_ARG(mode
, vap
->va_mode
);
3378 if ((error
= falloc_withalloc(p
,
3379 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3382 uu
->uu_dupfd
= -indx
- 1;
3384 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3385 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3386 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3387 fp_drop(p
, indx
, NULL
, 0);
3392 if (error
== ERESTART
)
3394 fp_free(p
, indx
, fp
);
3400 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3401 fp
->f_fglob
->fg_ops
= &vnops
;
3402 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3404 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3405 lf
.l_whence
= SEEK_SET
;
3408 if (flags
& O_EXLOCK
)
3409 lf
.l_type
= F_WRLCK
;
3411 lf
.l_type
= F_RDLCK
;
3413 if ((flags
& FNONBLOCK
) == 0)
3416 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3421 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3423 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3426 /* try to truncate by setting the size attribute */
3427 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3431 * For directories we hold some additional information in the fd.
3433 if (vnode_vtype(vp
) == VDIR
) {
3434 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3436 fp
->f_fglob
->fg_vn_data
= NULL
;
3442 * The first terminal open (without a O_NOCTTY) by a session leader
3443 * results in it being set as the controlling terminal.
3445 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3446 !(flags
& O_NOCTTY
)) {
3449 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3450 (caddr_t
)&tmp
, ctx
);
3454 if (flags
& O_CLOEXEC
)
3455 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3456 if (flags
& O_CLOFORK
)
3457 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3458 procfdtbl_releasefd(p
, indx
, NULL
);
3460 #if CONFIG_SECLUDED_MEMORY
3461 if (secluded_for_filecache
&&
3462 FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
&&
3463 vnode_vtype(vp
) == VREG
) {
3464 memory_object_control_t moc
;
3466 moc
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
3468 if (moc
== MEMORY_OBJECT_CONTROL_NULL
) {
3469 /* nothing to do... */
3470 } else if (fp
->f_fglob
->fg_flag
& FWRITE
) {
3471 /* writable -> no longer eligible for secluded pages */
3472 memory_object_mark_eligible_for_secluded(moc
,
3474 } else if (secluded_for_filecache
== 1) {
3475 char pathname
[32] = { 0, };
3477 /* XXX FBDP: better way to detect /Applications/ ? */
3478 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3479 copyinstr(ndp
->ni_dirp
,
3484 copystr(CAST_DOWN(void *, ndp
->ni_dirp
),
3489 pathname
[sizeof (pathname
) - 1] = '\0';
3490 if (strncmp(pathname
,
3492 strlen("/Applications/")) == 0 &&
3494 "/Applications/Camera.app/",
3495 strlen("/Applications/Camera.app/")) != 0) {
3498 * AND from "/Applications/"
3499 * AND not from "/Applications/Camera.app/"
3500 * ==> eligible for secluded
3502 memory_object_mark_eligible_for_secluded(moc
,
3505 } else if (secluded_for_filecache
== 2) {
3506 /* not implemented... */
3507 if (!strncmp(vp
->v_name
,
3508 DYLD_SHARED_CACHE_NAME
,
3509 strlen(DYLD_SHARED_CACHE_NAME
)) ||
3510 !strncmp(vp
->v_name
,
3512 strlen(vp
->v_name
)) ||
3513 !strncmp(vp
->v_name
,
3515 strlen(vp
->v_name
)) ||
3516 !strncmp(vp
->v_name
,
3518 strlen(vp
->v_name
)) ||
3519 !strncmp(vp
->v_name
,
3521 strlen(vp
->v_name
))) {
3523 * This file matters when launching Camera:
3524 * do not store its contents in the secluded
3525 * pool that will be drained on Camera launch.
3527 memory_object_mark_eligible_for_secluded(moc
,
3532 #endif /* CONFIG_SECLUDED_MEMORY */
3534 fp_drop(p
, indx
, fp
, 1);
3541 context
= *vfs_context_current();
3542 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3544 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3545 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3546 lf
.l_whence
= SEEK_SET
;
3549 lf
.l_type
= F_UNLCK
;
3552 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3555 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3557 fp_free(p
, indx
, fp
);
3563 * While most of the *at syscall handlers can call nameiat() which
3564 * is a wrapper around namei, the use of namei and initialisation
3565 * of nameidata are far removed and in different functions - namei
3566 * gets called in vn_open_auth for open1. So we'll just do here what
3570 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3571 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3574 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3578 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3579 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3583 c
= *((char *)(ndp
->ni_dirp
));
3589 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3594 if (vnode_vtype(dvp_at
) != VDIR
) {
3599 ndp
->ni_dvp
= dvp_at
;
3600 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3601 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3608 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3612 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3614 * Parameters: p Process requesting the open
3615 * uap User argument descriptor (see below)
3616 * retval Pointer to an area to receive the
3617 * return calue from the system call
3619 * Indirect: uap->path Path to open (same as 'open')
3620 * uap->flags Flags to open (same as 'open'
3621 * uap->uid UID to set, if creating
3622 * uap->gid GID to set, if creating
3623 * uap->mode File mode, if creating (same as 'open')
3624 * uap->xsecurity ACL to set, if creating
3626 * Returns: 0 Success
3629 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3631 * XXX: We should enummerate the possible errno values here, and where
3632 * in the code they originated.
3635 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3637 struct filedesc
*fdp
= p
->p_fd
;
3639 kauth_filesec_t xsecdst
;
3640 struct vnode_attr va
;
3641 struct nameidata nd
;
3644 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3647 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3648 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3652 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3653 VATTR_SET(&va
, va_mode
, cmode
);
3654 if (uap
->uid
!= KAUTH_UID_NONE
)
3655 VATTR_SET(&va
, va_uid
, uap
->uid
);
3656 if (uap
->gid
!= KAUTH_GID_NONE
)
3657 VATTR_SET(&va
, va_gid
, uap
->gid
);
3658 if (xsecdst
!= NULL
)
3659 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3661 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3662 uap
->path
, vfs_context_current());
3664 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3665 fileproc_alloc_init
, NULL
, retval
);
3666 if (xsecdst
!= NULL
)
3667 kauth_filesec_free(xsecdst
);
3673 * Go through the data-protected atomically controlled open (2)
3675 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3677 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3678 int flags
= uap
->flags
;
3679 int class = uap
->class;
3680 int dpflags
= uap
->dpflags
;
3683 * Follow the same path as normal open(2)
3684 * Look up the item if it exists, and acquire the vnode.
3686 struct filedesc
*fdp
= p
->p_fd
;
3687 struct vnode_attr va
;
3688 struct nameidata nd
;
3693 /* Mask off all but regular access permissions */
3694 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3695 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3697 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3698 uap
->path
, vfs_context_current());
3701 * Initialize the extra fields in vnode_attr to pass down our
3703 * 1. target cprotect class.
3704 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3706 if (flags
& O_CREAT
) {
3707 /* lower level kernel code validates that the class is valid before applying it. */
3708 if (class != PROTECTION_CLASS_DEFAULT
) {
3710 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3711 * file behave the same as open (2)
3713 VATTR_SET(&va
, va_dataprotect_class
, class);
3717 if (dpflags
& (O_DP_GETRAWENCRYPTED
|O_DP_GETRAWUNENCRYPTED
)) {
3718 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3719 /* Not allowed to write raw encrypted bytes */
3722 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
3723 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3725 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
3726 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
3730 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3731 fileproc_alloc_init
, NULL
, retval
);
3737 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3738 int fd
, enum uio_seg segflg
, int *retval
)
3740 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3741 struct vnode_attr va
;
3742 struct nameidata nd
;
3746 /* Mask off all but regular access permissions */
3747 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3748 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3750 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3753 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3758 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3760 __pthread_testcancel(1);
3761 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3765 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3768 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3769 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3773 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3776 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3777 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3781 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3783 __pthread_testcancel(1);
3784 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3788 * openbyid_np: open a file given a file system id and a file system object id
3789 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3790 * file systems that don't support object ids it is a node id (uint64_t).
3792 * Parameters: p Process requesting the open
3793 * uap User argument descriptor (see below)
3794 * retval Pointer to an area to receive the
3795 * return calue from the system call
3797 * Indirect: uap->path Path to open (same as 'open')
3799 * uap->fsid id of target file system
3800 * uap->objid id of target file system object
3801 * uap->flags Flags to open (same as 'open')
3803 * Returns: 0 Success
3807 * XXX: We should enummerate the possible errno values here, and where
3808 * in the code they originated.
3811 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
3817 int buflen
= MAXPATHLEN
;
3819 vfs_context_t ctx
= vfs_context_current();
3821 if ((error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) {
3825 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
3829 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3830 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
3834 AUDIT_ARG(value32
, fsid
.val
[0]);
3835 AUDIT_ARG(value64
, objid
);
3837 /*resolve path from fsis, objid*/
3839 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
3844 error
= fsgetpath_internal(
3845 ctx
, fsid
.val
[0], objid
,
3846 buflen
, buf
, &pathlen
);
3852 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
3860 error
= openat_internal(
3861 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
3870 * Create a special file.
3872 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
3875 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
3877 struct vnode_attr va
;
3878 vfs_context_t ctx
= vfs_context_current();
3880 struct nameidata nd
;
3884 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3885 VATTR_SET(&va
, va_rdev
, uap
->dev
);
3887 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3888 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
3889 return(mkfifo1(ctx
, uap
->path
, &va
));
3891 AUDIT_ARG(mode
, uap
->mode
);
3892 AUDIT_ARG(value32
, uap
->dev
);
3894 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
3896 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
3897 UIO_USERSPACE
, uap
->path
, ctx
);
3909 switch (uap
->mode
& S_IFMT
) {
3911 VATTR_SET(&va
, va_type
, VCHR
);
3914 VATTR_SET(&va
, va_type
, VBLK
);
3922 error
= mac_vnode_check_create(ctx
,
3923 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
3928 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3931 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
3935 int update_flags
= 0;
3937 // Make sure the name & parent pointers are hooked up
3938 if (vp
->v_name
== NULL
)
3939 update_flags
|= VNODE_UPDATE_NAME
;
3940 if (vp
->v_parent
== NULLVP
)
3941 update_flags
|= VNODE_UPDATE_PARENT
;
3944 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3947 add_fsevent(FSE_CREATE_FILE
, ctx
,
3955 * nameidone has to happen before we vnode_put(dvp)
3956 * since it may need to release the fs_nodelock on the dvp
3968 * Create a named pipe.
3970 * Returns: 0 Success
3973 * vnode_authorize:???
3977 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
3981 struct nameidata nd
;
3983 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
3984 UIO_USERSPACE
, upath
, ctx
);
3991 /* check that this is a new file and authorize addition */
3996 VATTR_SET(vap
, va_type
, VFIFO
);
3998 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
4001 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
4004 * nameidone has to happen before we vnode_put(dvp)
4005 * since it may need to release the fs_nodelock on the dvp
4018 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4020 * Parameters: p Process requesting the open
4021 * uap User argument descriptor (see below)
4024 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4025 * uap->uid UID to set
4026 * uap->gid GID to set
4027 * uap->mode File mode to set (same as 'mkfifo')
4028 * uap->xsecurity ACL to set, if creating
4030 * Returns: 0 Success
4033 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4035 * XXX: We should enummerate the possible errno values here, and where
4036 * in the code they originated.
4039 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
4042 kauth_filesec_t xsecdst
;
4043 struct vnode_attr va
;
4045 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
4047 xsecdst
= KAUTH_FILESEC_NONE
;
4048 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
4049 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
4054 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4055 if (uap
->uid
!= KAUTH_UID_NONE
)
4056 VATTR_SET(&va
, va_uid
, uap
->uid
);
4057 if (uap
->gid
!= KAUTH_GID_NONE
)
4058 VATTR_SET(&va
, va_gid
, uap
->gid
);
4059 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4060 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
4062 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
4064 if (xsecdst
!= KAUTH_FILESEC_NONE
)
4065 kauth_filesec_free(xsecdst
);
4071 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
4073 struct vnode_attr va
;
4076 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
4078 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
4083 my_strrchr(char *p
, int ch
)
4087 for (save
= NULL
;; ++p
) {
4096 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4099 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4101 int ret
, len
= _len
;
4103 *truncated_path
= 0;
4104 ret
= vn_getpath(dvp
, path
, &len
);
4105 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4108 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
4109 if (len
> MAXPATHLEN
) {
4112 // the string got truncated!
4113 *truncated_path
= 1;
4114 ptr
= my_strrchr(path
, '/');
4116 *ptr
= '\0'; // chop off the string at the last directory component
4118 len
= strlen(path
) + 1;
4121 } else if (ret
== 0) {
4122 *truncated_path
= 1;
4123 } else if (ret
!= 0) {
4124 struct vnode
*mydvp
=dvp
;
4126 if (ret
!= ENOSPC
) {
4127 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4128 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4130 *truncated_path
= 1;
4133 if (mydvp
->v_parent
!= NULL
) {
4134 mydvp
= mydvp
->v_parent
;
4135 } else if (mydvp
->v_mount
) {
4136 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4139 // no parent and no mount point? only thing is to punt and say "/" changed
4140 strlcpy(path
, "/", _len
);
4145 if (mydvp
== NULL
) {
4150 ret
= vn_getpath(mydvp
, path
, &len
);
4151 } while (ret
== ENOSPC
);
4159 * Make a hard file link.
4161 * Returns: 0 Success
4166 * vnode_authorize:???
4171 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4172 user_addr_t link
, int flag
, enum uio_seg segflg
)
4174 vnode_t vp
, dvp
, lvp
;
4175 struct nameidata nd
;
4181 int need_event
, has_listeners
;
4182 char *target_path
= NULL
;
4185 vp
= dvp
= lvp
= NULLVP
;
4187 /* look up the object we are linking to */
4188 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4189 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4192 error
= nameiat(&nd
, fd1
);
4200 * Normally, linking to directories is not supported.
4201 * However, some file systems may have limited support.
4203 if (vp
->v_type
== VDIR
) {
4204 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) {
4205 error
= EPERM
; /* POSIX */
4209 /* Linking to a directory requires ownership. */
4210 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4211 struct vnode_attr dva
;
4214 VATTR_WANTED(&dva
, va_uid
);
4215 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4216 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4217 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4224 /* lookup the target node */
4228 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4229 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4231 error
= nameiat(&nd
, fd2
);
4238 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4242 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4243 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4246 /* target node must not exist */
4247 if (lvp
!= NULLVP
) {
4251 /* cannot link across mountpoints */
4252 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4257 /* authorize creation of the target note */
4258 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4261 /* and finally make the link */
4262 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4267 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4271 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4275 has_listeners
= kauth_authorize_fileop_has_listeners();
4277 if (need_event
|| has_listeners
) {
4278 char *link_to_path
= NULL
;
4279 int len
, link_name_len
;
4281 /* build the path to the new link file */
4282 GET_PATH(target_path
);
4283 if (target_path
== NULL
) {
4288 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4290 if (has_listeners
) {
4291 /* build the path to file we are linking to */
4292 GET_PATH(link_to_path
);
4293 if (link_to_path
== NULL
) {
4298 link_name_len
= MAXPATHLEN
;
4299 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4301 * Call out to allow 3rd party notification of rename.
4302 * Ignore result of kauth_authorize_fileop call.
4304 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4305 (uintptr_t)link_to_path
,
4306 (uintptr_t)target_path
);
4308 if (link_to_path
!= NULL
) {
4309 RELEASE_PATH(link_to_path
);
4314 /* construct fsevent */
4315 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4317 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4320 // build the path to the destination of the link
4321 add_fsevent(FSE_CREATE_FILE
, ctx
,
4322 FSE_ARG_STRING
, len
, target_path
,
4323 FSE_ARG_FINFO
, &finfo
,
4327 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4328 FSE_ARG_VNODE
, vp
->v_parent
,
4336 * nameidone has to happen before we vnode_put(dvp)
4337 * since it may need to release the fs_nodelock on the dvp
4340 if (target_path
!= NULL
) {
4341 RELEASE_PATH(target_path
);
4353 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4355 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4356 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4360 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4362 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4365 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4366 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4370 * Make a symbolic link.
4372 * We could add support for ACLs here too...
4376 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4377 user_addr_t link
, enum uio_seg segflg
)
4379 struct vnode_attr va
;
4382 struct nameidata nd
;
4388 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4389 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4390 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4392 path
= (char *)path_data
;
4396 AUDIT_ARG(text
, path
); /* This is the link string */
4398 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4401 error
= nameiat(&nd
, fd
);
4407 p
= vfs_context_proc(ctx
);
4409 VATTR_SET(&va
, va_type
, VLNK
);
4410 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4413 error
= mac_vnode_check_create(ctx
,
4414 dvp
, &nd
.ni_cnd
, &va
);
4427 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4428 /* get default ownership, etc. */
4430 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4432 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4435 if (error
== 0 && vp
)
4436 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4439 /* do fallback attribute handling */
4440 if (error
== 0 && vp
)
4441 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4444 int update_flags
= 0;
4446 /*check if a new vnode was created, else try to get one*/
4448 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4450 nd
.ni_op
= OP_LOOKUP
;
4452 nd
.ni_cnd
.cn_flags
= 0;
4453 error
= nameiat(&nd
, fd
);
4460 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4461 /* call out to allow 3rd party notification of rename.
4462 * Ignore result of kauth_authorize_fileop call.
4464 if (kauth_authorize_fileop_has_listeners() &&
4466 char *new_link_path
= NULL
;
4469 /* build the path to the new link file */
4470 new_link_path
= get_pathbuff();
4472 vn_getpath(dvp
, new_link_path
, &len
);
4473 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4474 new_link_path
[len
- 1] = '/';
4475 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4478 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4479 (uintptr_t)path
, (uintptr_t)new_link_path
);
4480 if (new_link_path
!= NULL
)
4481 release_pathbuff(new_link_path
);
4484 // Make sure the name & parent pointers are hooked up
4485 if (vp
->v_name
== NULL
)
4486 update_flags
|= VNODE_UPDATE_NAME
;
4487 if (vp
->v_parent
== NULLVP
)
4488 update_flags
|= VNODE_UPDATE_PARENT
;
4491 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4494 add_fsevent(FSE_CREATE_FILE
, ctx
,
4502 * nameidone has to happen before we vnode_put(dvp)
4503 * since it may need to release the fs_nodelock on the dvp
4511 if (path
&& (path
!= (char *)path_data
))
4512 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4518 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4520 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4521 uap
->link
, UIO_USERSPACE
));
4525 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4526 __unused
int32_t *retval
)
4528 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4529 uap
->path2
, UIO_USERSPACE
));
4533 * Delete a whiteout from the filesystem.
4534 * No longer supported.
4537 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4543 * Delete a name from the filesystem.
4547 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4548 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4550 struct nameidata nd
;
4553 struct componentname
*cnp
;
4558 struct vnode_attr va
;
4565 struct vnode_attr
*vap
;
4567 int retry_count
= 0;
4570 cn_flags
= LOCKPARENT
;
4571 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
))
4572 cn_flags
|= AUDITVNPATH1
;
4573 /* If a starting dvp is passed, it trumps any fd passed. */
4578 /* unlink or delete is allowed on rsrc forks and named streams */
4579 cn_flags
|= CN_ALLOWRSRCFORK
;
4590 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4592 nd
.ni_dvp
= start_dvp
;
4593 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4597 error
= nameiat(&nd
, fd
);
4605 /* With Carbon delete semantics, busy files cannot be deleted */
4606 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4607 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4610 /* Skip any potential upcalls if told to. */
4611 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4612 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4616 batched
= vnode_compound_remove_available(vp
);
4618 * The root of a mounted filesystem cannot be deleted.
4620 if (vp
->v_flag
& VROOT
) {
4625 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4627 if (error
== ENOENT
) {
4628 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4629 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4640 if (!vnode_compound_remove_available(dvp
)) {
4641 panic("No vp, but no compound remove?");
4646 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4649 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4650 /* XXX need to get these data in batched VNOP */
4651 get_fse_info(vp
, &finfo
, ctx
);
4654 error
= vfs_get_notify_attributes(&va
);
4663 has_listeners
= kauth_authorize_fileop_has_listeners();
4664 if (need_event
|| has_listeners
) {
4672 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4676 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4677 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4681 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
4683 if (error
== EKEEPLOOKING
) {
4685 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4688 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4689 panic("EKEEPLOOKING, but continue flag not set?");
4692 if (vnode_isdir(vp
)) {
4696 goto lookup_continue
;
4697 } else if (error
== ENOENT
&& batched
) {
4698 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4699 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4701 * For compound VNOPs, the authorization callback may
4702 * return ENOENT in case of racing hardlink lookups
4703 * hitting the name cache, redrive the lookup.
4713 * Call out to allow 3rd party notification of delete.
4714 * Ignore result of kauth_authorize_fileop call.
4717 if (has_listeners
) {
4718 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4719 KAUTH_FILEOP_DELETE
,
4724 if (vp
->v_flag
& VISHARDLINK
) {
4726 // if a hardlink gets deleted we want to blow away the
4727 // v_parent link because the path that got us to this
4728 // instance of the link is no longer valid. this will
4729 // force the next call to get the path to ask the file
4730 // system instead of just following the v_parent link.
4732 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4737 if (vp
->v_flag
& VISHARDLINK
) {
4738 get_fse_info(vp
, &finfo
, ctx
);
4740 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4742 if (truncated_path
) {
4743 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4745 add_fsevent(FSE_DELETE
, ctx
,
4746 FSE_ARG_STRING
, len
, path
,
4747 FSE_ARG_FINFO
, &finfo
,
4758 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4759 * will cause its shadow file to go away if necessary.
4761 if (vp
&& (vnode_isnamedstream(vp
)) &&
4762 (vp
->v_parent
!= NULLVP
) &&
4763 vnode_isshadow(vp
)) {
4768 * nameidone has to happen before we vnode_put(dvp)
4769 * since it may need to release the fs_nodelock on the dvp
4785 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
4786 enum uio_seg segflg
, int unlink_flags
)
4788 return (unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
4793 * Delete a name from the filesystem using Carbon semantics.
4796 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4798 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4799 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
));
4803 * Delete a name from the filesystem using POSIX semantics.
4806 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4808 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4809 uap
->path
, UIO_USERSPACE
, 0));
4813 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
4815 if (uap
->flag
& ~AT_REMOVEDIR
)
4818 if (uap
->flag
& AT_REMOVEDIR
)
4819 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
4820 uap
->path
, UIO_USERSPACE
));
4822 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
4823 NULLVP
, uap
->path
, UIO_USERSPACE
, 0));
4827 * Reposition read/write file offset.
4830 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4832 struct fileproc
*fp
;
4834 struct vfs_context
*ctx
;
4835 off_t offset
= uap
->offset
, file_size
;
4838 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4839 if (error
== ENOTSUP
)
4843 if (vnode_isfifo(vp
)) {
4849 ctx
= vfs_context_current();
4851 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4852 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4855 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
4862 if ( (error
= vnode_getwithref(vp
)) ) {
4867 switch (uap
->whence
) {
4869 offset
+= fp
->f_fglob
->fg_offset
;
4872 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
4874 offset
+= file_size
;
4882 if (uap
->offset
> 0 && offset
< 0) {
4883 /* Incremented/relative move past max size */
4887 * Allow negative offsets on character devices, per
4888 * POSIX 1003.1-2001. Most likely for writing disk
4891 if (offset
< 0 && vp
->v_type
!= VCHR
) {
4892 /* Decremented/relative move before start */
4896 fp
->f_fglob
->fg_offset
= offset
;
4897 *retval
= fp
->f_fglob
->fg_offset
;
4903 * An lseek can affect whether data is "available to read." Use
4904 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4906 post_event_if_success(vp
, error
, NOTE_NONE
);
4907 (void)vnode_put(vp
);
4914 * Check access permissions.
4916 * Returns: 0 Success
4917 * vnode_authorize:???
4920 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
4922 kauth_action_t action
;
4926 * If just the regular access bits, convert them to something
4927 * that vnode_authorize will understand.
4929 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
4932 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4933 if (uflags
& W_OK
) {
4934 if (vnode_isdir(vp
)) {
4935 action
|= KAUTH_VNODE_ADD_FILE
|
4936 KAUTH_VNODE_ADD_SUBDIRECTORY
;
4937 /* might want delete rights here too */
4939 action
|= KAUTH_VNODE_WRITE_DATA
;
4942 if (uflags
& X_OK
) {
4943 if (vnode_isdir(vp
)) {
4944 action
|= KAUTH_VNODE_SEARCH
;
4946 action
|= KAUTH_VNODE_EXECUTE
;
4950 /* take advantage of definition of uflags */
4951 action
= uflags
>> 8;
4955 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
4960 /* action == 0 means only check for existence */
4962 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
4973 * access_extended: Check access permissions in bulk.
4975 * Description: uap->entries Pointer to an array of accessx
4976 * descriptor structs, plus one or
4977 * more NULL terminated strings (see
4978 * "Notes" section below).
4979 * uap->size Size of the area pointed to by
4981 * uap->results Pointer to the results array.
4983 * Returns: 0 Success
4984 * ENOMEM Insufficient memory
4985 * EINVAL Invalid arguments
4986 * namei:EFAULT Bad address
4987 * namei:ENAMETOOLONG Filename too long
4988 * namei:ENOENT No such file or directory
4989 * namei:ELOOP Too many levels of symbolic links
4990 * namei:EBADF Bad file descriptor
4991 * namei:ENOTDIR Not a directory
4996 * uap->results Array contents modified
4998 * Notes: The uap->entries are structured as an arbitrary length array
4999 * of accessx descriptors, followed by one or more NULL terminated
5002 * struct accessx_descriptor[0]
5004 * struct accessx_descriptor[n]
5005 * char name_data[0];
5007 * We determine the entry count by walking the buffer containing
5008 * the uap->entries argument descriptor. For each descriptor we
5009 * see, the valid values for the offset ad_name_offset will be
5010 * in the byte range:
5012 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5014 * [ uap->entries + uap->size - 2 ]
5016 * since we must have at least one string, and the string must
5017 * be at least one character plus the NULL terminator in length.
5019 * XXX: Need to support the check-as uid argument
5022 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
5024 struct accessx_descriptor
*input
= NULL
;
5025 errno_t
*result
= NULL
;
5028 unsigned int desc_max
, desc_actual
, i
, j
;
5029 struct vfs_context context
;
5030 struct nameidata nd
;
5034 #define ACCESSX_MAX_DESCR_ON_STACK 10
5035 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
5037 context
.vc_ucred
= NULL
;
5040 * Validate parameters; if valid, copy the descriptor array and string
5041 * arguments into local memory. Before proceeding, the following
5042 * conditions must have been met:
5044 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5045 * o There must be sufficient room in the request for at least one
5046 * descriptor and a one yte NUL terminated string.
5047 * o The allocation of local storage must not fail.
5049 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
5051 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
5053 if (uap
->size
<= sizeof (stack_input
)) {
5054 input
= stack_input
;
5056 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
5057 if (input
== NULL
) {
5062 error
= copyin(uap
->entries
, input
, uap
->size
);
5066 AUDIT_ARG(opaque
, input
, uap
->size
);
5069 * Force NUL termination of the copyin buffer to avoid nami() running
5070 * off the end. If the caller passes us bogus data, they may get a
5073 ((char *)input
)[uap
->size
- 1] = 0;
5076 * Access is defined as checking against the process' real identity,
5077 * even if operations are checking the effective identity. This
5078 * requires that we use a local vfs context.
5080 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5081 context
.vc_thread
= current_thread();
5084 * Find out how many entries we have, so we can allocate the result
5085 * array by walking the list and adjusting the count downward by the
5086 * earliest string offset we see.
5088 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5089 desc_actual
= desc_max
;
5090 for (i
= 0; i
< desc_actual
; i
++) {
5092 * Take the offset to the name string for this entry and
5093 * convert to an input array index, which would be one off
5094 * the end of the array if this entry was the lowest-addressed
5097 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5100 * An offset greater than the max allowable offset is an error.
5101 * It is also an error for any valid entry to point
5102 * to a location prior to the end of the current entry, if
5103 * it's not a reference to the string of the previous entry.
5105 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5110 /* Also do not let ad_name_offset point to something beyond the size of the input */
5111 if (input
[i
].ad_name_offset
>= uap
->size
) {
5117 * An offset of 0 means use the previous descriptor's offset;
5118 * this is used to chain multiple requests for the same file
5119 * to avoid multiple lookups.
5122 /* This is not valid for the first entry */
5131 * If the offset of the string for this descriptor is before
5132 * what we believe is the current actual last descriptor,
5133 * then we need to adjust our estimate downward; this permits
5134 * the string table following the last descriptor to be out
5135 * of order relative to the descriptor list.
5137 if (j
< desc_actual
)
5142 * We limit the actual number of descriptors we are willing to process
5143 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5144 * requested does not exceed this limit,
5146 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5150 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
5151 if (result
== NULL
) {
5157 * Do the work by iterating over the descriptor entries we know to
5158 * at least appear to contain valid data.
5161 for (i
= 0; i
< desc_actual
; i
++) {
5163 * If the ad_name_offset is 0, then we use the previous
5164 * results to make the check; otherwise, we are looking up
5167 if (input
[i
].ad_name_offset
!= 0) {
5168 /* discard old vnodes */
5179 * Scan forward in the descriptor list to see if we
5180 * need the parent vnode. We will need it if we are
5181 * deleting, since we must have rights to remove
5182 * entries in the parent directory, as well as the
5183 * rights to delete the object itself.
5185 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5186 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5187 if (input
[j
].ad_flags
& _DELETE_OK
)
5190 niopts
= FOLLOW
| AUDITVNPATH1
;
5192 /* need parent for vnode_authorize for deletion test */
5194 niopts
|= WANTPARENT
;
5197 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5198 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5210 * Handle lookup errors.
5220 /* run this access check */
5221 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5224 /* fatal lookup error */
5230 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5232 /* copy out results */
5233 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5236 if (input
&& input
!= stack_input
)
5237 FREE(input
, M_TEMP
);
5239 FREE(result
, M_TEMP
);
5244 if (IS_VALID_CRED(context
.vc_ucred
))
5245 kauth_cred_unref(&context
.vc_ucred
);
5251 * Returns: 0 Success
5252 * namei:EFAULT Bad address
5253 * namei:ENAMETOOLONG Filename too long
5254 * namei:ENOENT No such file or directory
5255 * namei:ELOOP Too many levels of symbolic links
5256 * namei:EBADF Bad file descriptor
5257 * namei:ENOTDIR Not a directory
5262 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5263 int flag
, enum uio_seg segflg
)
5266 struct nameidata nd
;
5268 struct vfs_context context
;
5270 int is_namedstream
= 0;
5274 * Unless the AT_EACCESS option is used, Access is defined as checking
5275 * against the process' real identity, even if operations are checking
5276 * the effective identity. So we need to tweak the credential
5277 * in the context for that case.
5279 if (!(flag
& AT_EACCESS
))
5280 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5282 context
.vc_ucred
= ctx
->vc_ucred
;
5283 context
.vc_thread
= ctx
->vc_thread
;
5286 niopts
= FOLLOW
| AUDITVNPATH1
;
5287 /* need parent for vnode_authorize for deletion test */
5288 if (amode
& _DELETE_OK
)
5289 niopts
|= WANTPARENT
;
5290 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5294 /* access(F_OK) calls are allowed for resource forks. */
5296 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5298 error
= nameiat(&nd
, fd
);
5303 /* Grab reference on the shadow stream file vnode to
5304 * force an inactive on release which will mark it
5307 if (vnode_isnamedstream(nd
.ni_vp
) &&
5308 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5309 vnode_isshadow(nd
.ni_vp
)) {
5311 vnode_ref(nd
.ni_vp
);
5315 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5318 if (is_namedstream
) {
5319 vnode_rele(nd
.ni_vp
);
5323 vnode_put(nd
.ni_vp
);
5324 if (amode
& _DELETE_OK
)
5325 vnode_put(nd
.ni_dvp
);
5329 if (!(flag
& AT_EACCESS
))
5330 kauth_cred_unref(&context
.vc_ucred
);
5335 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5337 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5338 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5342 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5343 __unused
int32_t *retval
)
5345 if (uap
->flag
& ~AT_EACCESS
)
5348 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5349 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5353 * Returns: 0 Success
5360 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5361 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5362 enum uio_seg segflg
, int fd
, int flag
)
5364 struct nameidata nd
;
5371 struct user64_stat user64_sb
;
5372 struct user32_stat user32_sb
;
5373 struct user64_stat64 user64_sb64
;
5374 struct user32_stat64 user32_sb64
;
5378 kauth_filesec_t fsec
;
5379 size_t xsecurity_bufsize
;
5382 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5383 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5387 int is_namedstream
= 0;
5388 /* stat calls are allowed for resource forks. */
5389 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5391 error
= nameiat(&nd
, fd
);
5394 fsec
= KAUTH_FILESEC_NONE
;
5396 statptr
= (void *)&source
;
5399 /* Grab reference on the shadow stream file vnode to
5400 * force an inactive on release which will mark it
5403 if (vnode_isnamedstream(nd
.ni_vp
) &&
5404 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5405 vnode_isshadow(nd
.ni_vp
)) {
5407 vnode_ref(nd
.ni_vp
);
5411 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5414 if (is_namedstream
) {
5415 vnode_rele(nd
.ni_vp
);
5418 vnode_put(nd
.ni_vp
);
5423 /* Zap spare fields */
5424 if (isstat64
!= 0) {
5425 source
.sb64
.st_lspare
= 0;
5426 source
.sb64
.st_qspare
[0] = 0LL;
5427 source
.sb64
.st_qspare
[1] = 0LL;
5428 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5429 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5430 my_size
= sizeof(dest
.user64_sb64
);
5431 sbp
= (caddr_t
)&dest
.user64_sb64
;
5433 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5434 my_size
= sizeof(dest
.user32_sb64
);
5435 sbp
= (caddr_t
)&dest
.user32_sb64
;
5438 * Check if we raced (post lookup) against the last unlink of a file.
5440 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5441 source
.sb64
.st_nlink
= 1;
5444 source
.sb
.st_lspare
= 0;
5445 source
.sb
.st_qspare
[0] = 0LL;
5446 source
.sb
.st_qspare
[1] = 0LL;
5447 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5448 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5449 my_size
= sizeof(dest
.user64_sb
);
5450 sbp
= (caddr_t
)&dest
.user64_sb
;
5452 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5453 my_size
= sizeof(dest
.user32_sb
);
5454 sbp
= (caddr_t
)&dest
.user32_sb
;
5458 * Check if we raced (post lookup) against the last unlink of a file.
5460 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5461 source
.sb
.st_nlink
= 1;
5464 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5467 /* caller wants extended security information? */
5468 if (xsecurity
!= USER_ADDR_NULL
) {
5470 /* did we get any? */
5471 if (fsec
== KAUTH_FILESEC_NONE
) {
5472 if (susize(xsecurity_size
, 0) != 0) {
5477 /* find the user buffer size */
5478 xsecurity_bufsize
= fusize(xsecurity_size
);
5480 /* copy out the actual data size */
5481 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5486 /* if the caller supplied enough room, copy out to it */
5487 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5488 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5492 if (fsec
!= KAUTH_FILESEC_NONE
)
5493 kauth_filesec_free(fsec
);
5498 * stat_extended: Get file status; with extended security (ACL).
5500 * Parameters: p (ignored)
5501 * uap User argument descriptor (see below)
5504 * Indirect: uap->path Path of file to get status from
5505 * uap->ub User buffer (holds file status info)
5506 * uap->xsecurity ACL to get (extended security)
5507 * uap->xsecurity_size Size of ACL
5509 * Returns: 0 Success
5514 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5515 __unused
int32_t *retval
)
5517 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5518 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5523 * Returns: 0 Success
5524 * fstatat_internal:??? [see fstatat_internal() in this file]
5527 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5529 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5530 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5534 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5536 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5537 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5541 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5543 * Parameters: p (ignored)
5544 * uap User argument descriptor (see below)
5547 * Indirect: uap->path Path of file to get status from
5548 * uap->ub User buffer (holds file status info)
5549 * uap->xsecurity ACL to get (extended security)
5550 * uap->xsecurity_size Size of ACL
5552 * Returns: 0 Success
5557 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5559 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5560 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5565 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5567 * Parameters: p (ignored)
5568 * uap User argument descriptor (see below)
5571 * Indirect: uap->path Path of file to get status from
5572 * uap->ub User buffer (holds file status info)
5573 * uap->xsecurity ACL to get (extended security)
5574 * uap->xsecurity_size Size of ACL
5576 * Returns: 0 Success
5581 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5583 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5584 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5585 AT_SYMLINK_NOFOLLOW
));
5589 * Get file status; this version does not follow links.
5592 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5594 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5595 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5599 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5601 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5602 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5606 * lstat64_extended: Get file status; can handle large inode numbers; does not
5607 * follow links; with extended security (ACL).
5609 * Parameters: p (ignored)
5610 * uap User argument descriptor (see below)
5613 * Indirect: uap->path Path of file to get status from
5614 * uap->ub User buffer (holds file status info)
5615 * uap->xsecurity ACL to get (extended security)
5616 * uap->xsecurity_size Size of ACL
5618 * Returns: 0 Success
5623 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5625 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5626 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5627 AT_SYMLINK_NOFOLLOW
));
5631 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5633 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5636 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5637 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5641 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5642 __unused
int32_t *retval
)
5644 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5647 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5648 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5652 * Get configurable pathname variables.
5654 * Returns: 0 Success
5658 * Notes: Global implementation constants are intended to be
5659 * implemented in this function directly; all other constants
5660 * are per-FS implementation, and therefore must be handled in
5661 * each respective FS, instead.
5663 * XXX We implement some things globally right now that should actually be
5664 * XXX per-FS; we will need to deal with this at some point.
5668 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5671 struct nameidata nd
;
5672 vfs_context_t ctx
= vfs_context_current();
5674 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5675 UIO_USERSPACE
, uap
->path
, ctx
);
5680 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5682 vnode_put(nd
.ni_vp
);
5688 * Return target name of a symbolic link.
5692 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5693 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5699 struct nameidata nd
;
5700 char uio_buf
[ UIO_SIZEOF(1) ];
5702 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5705 error
= nameiat(&nd
, fd
);
5712 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5713 &uio_buf
[0], sizeof(uio_buf
));
5714 uio_addiov(auio
, buf
, bufsize
);
5715 if (vp
->v_type
!= VLNK
) {
5719 error
= mac_vnode_check_readlink(ctx
, vp
);
5722 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5725 error
= VNOP_READLINK(vp
, auio
, ctx
);
5729 *retval
= bufsize
- (int)uio_resid(auio
);
5734 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5736 enum uio_seg procseg
;
5738 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5739 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5740 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5741 uap
->count
, procseg
, retval
));
5745 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5747 enum uio_seg procseg
;
5749 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5750 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5751 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5755 * Change file flags.
5758 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5760 struct vnode_attr va
;
5761 kauth_action_t action
;
5765 VATTR_SET(&va
, va_flags
, flags
);
5768 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5773 /* request authorisation, disregard immutability */
5774 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5777 * Request that the auth layer disregard those file flags it's allowed to when
5778 * authorizing this operation; we need to do this in order to be able to
5779 * clear immutable flags.
5781 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5783 error
= vnode_setattr(vp
, &va
, ctx
);
5787 mac_vnode_notify_setflags(ctx
, vp
, flags
);
5790 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5799 * Change flags of a file given a path name.
5803 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5806 vfs_context_t ctx
= vfs_context_current();
5808 struct nameidata nd
;
5810 AUDIT_ARG(fflags
, uap
->flags
);
5811 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5812 UIO_USERSPACE
, uap
->path
, ctx
);
5819 error
= chflags1(vp
, uap
->flags
, ctx
);
5825 * Change flags of a file given a file descriptor.
5829 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5834 AUDIT_ARG(fd
, uap
->fd
);
5835 AUDIT_ARG(fflags
, uap
->flags
);
5836 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5839 if ((error
= vnode_getwithref(vp
))) {
5844 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5846 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
5853 * Change security information on a filesystem object.
5855 * Returns: 0 Success
5856 * EPERM Operation not permitted
5857 * vnode_authattr:??? [anything vnode_authattr can return]
5858 * vnode_authorize:??? [anything vnode_authorize can return]
5859 * vnode_setattr:??? [anything vnode_setattr can return]
5861 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5862 * translated to EPERM before being returned.
5865 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
5867 kauth_action_t action
;
5870 AUDIT_ARG(mode
, vap
->va_mode
);
5871 /* XXX audit new args */
5874 /* chmod calls are not allowed for resource forks. */
5875 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5881 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
5882 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
5885 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) {
5886 if ((error
= mac_vnode_check_setowner(ctx
, vp
,
5887 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
5888 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1)))
5892 if (VATTR_IS_ACTIVE(vap
, va_acl
) &&
5893 (error
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
)))
5897 /* make sure that the caller is allowed to set this security information */
5898 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
5899 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5900 if (error
== EACCES
)
5905 if ((error
= vnode_setattr(vp
, vap
, ctx
)) != 0)
5909 if (VATTR_IS_ACTIVE(vap
, va_mode
))
5910 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
);
5912 if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
))
5913 mac_vnode_notify_setowner(ctx
, vp
,
5914 VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid
: -1,
5915 VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid
: -1);
5917 if (VATTR_IS_ACTIVE(vap
, va_acl
))
5918 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
);
5926 * Change mode of a file given a path name.
5928 * Returns: 0 Success
5929 * namei:??? [anything namei can return]
5930 * chmod_vnode:??? [anything chmod_vnode can return]
5933 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
5934 int fd
, int flag
, enum uio_seg segflg
)
5936 struct nameidata nd
;
5939 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5940 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
5942 if ((error
= nameiat(&nd
, fd
)))
5944 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
5945 vnode_put(nd
.ni_vp
);
5951 * chmod_extended: Change the mode of a file given a path name; with extended
5952 * argument list (including extended security (ACL)).
5954 * Parameters: p Process requesting the open
5955 * uap User argument descriptor (see below)
5958 * Indirect: uap->path Path to object (same as 'chmod')
5959 * uap->uid UID to set
5960 * uap->gid GID to set
5961 * uap->mode File mode to set (same as 'chmod')
5962 * uap->xsecurity ACL to set (or delete)
5964 * Returns: 0 Success
5967 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5969 * XXX: We should enummerate the possible errno values here, and where
5970 * in the code they originated.
5973 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
5976 struct vnode_attr va
;
5977 kauth_filesec_t xsecdst
;
5979 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5982 if (uap
->mode
!= -1)
5983 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5984 if (uap
->uid
!= KAUTH_UID_NONE
)
5985 VATTR_SET(&va
, va_uid
, uap
->uid
);
5986 if (uap
->gid
!= KAUTH_GID_NONE
)
5987 VATTR_SET(&va
, va_gid
, uap
->gid
);
5990 switch(uap
->xsecurity
) {
5991 /* explicit remove request */
5992 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5993 VATTR_SET(&va
, va_acl
, NULL
);
5996 case USER_ADDR_NULL
:
5999 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6001 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6002 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
6005 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
6008 if (xsecdst
!= NULL
)
6009 kauth_filesec_free(xsecdst
);
6014 * Returns: 0 Success
6015 * chmodat:??? [anything chmodat can return]
6018 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
6019 int flag
, enum uio_seg segflg
)
6021 struct vnode_attr va
;
6024 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
6026 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
6030 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
6032 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6033 AT_FDCWD
, 0, UIO_USERSPACE
));
6037 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
6039 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6042 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
6043 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
6047 * Change mode of a file given a file descriptor.
6050 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
6057 if ((error
= file_vnode(fd
, &vp
)) != 0)
6059 if ((error
= vnode_getwithref(vp
)) != 0) {
6063 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6065 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
6066 (void)vnode_put(vp
);
6073 * fchmod_extended: Change mode of a file given a file descriptor; with
6074 * extended argument list (including extended security (ACL)).
6076 * Parameters: p Process requesting to change file mode
6077 * uap User argument descriptor (see below)
6080 * Indirect: uap->mode File mode to set (same as 'chmod')
6081 * uap->uid UID to set
6082 * uap->gid GID to set
6083 * uap->xsecurity ACL to set (or delete)
6084 * uap->fd File descriptor of file to change mode
6086 * Returns: 0 Success
6091 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
6094 struct vnode_attr va
;
6095 kauth_filesec_t xsecdst
;
6097 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6100 if (uap
->mode
!= -1)
6101 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6102 if (uap
->uid
!= KAUTH_UID_NONE
)
6103 VATTR_SET(&va
, va_uid
, uap
->uid
);
6104 if (uap
->gid
!= KAUTH_GID_NONE
)
6105 VATTR_SET(&va
, va_gid
, uap
->gid
);
6108 switch(uap
->xsecurity
) {
6109 case USER_ADDR_NULL
:
6110 VATTR_SET(&va
, va_acl
, NULL
);
6112 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6113 VATTR_SET(&va
, va_acl
, NULL
);
6116 case CAST_USER_ADDR_T(-1):
6119 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6121 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6124 error
= fchmod1(p
, uap
->fd
, &va
);
6127 switch(uap
->xsecurity
) {
6128 case USER_ADDR_NULL
:
6129 case CAST_USER_ADDR_T(-1):
6132 if (xsecdst
!= NULL
)
6133 kauth_filesec_free(xsecdst
);
6139 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6141 struct vnode_attr va
;
6144 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6146 return(fchmod1(p
, uap
->fd
, &va
));
6151 * Set ownership given a path name.
6155 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6156 gid_t gid
, int flag
, enum uio_seg segflg
)
6159 struct vnode_attr va
;
6161 struct nameidata nd
;
6163 kauth_action_t action
;
6165 AUDIT_ARG(owner
, uid
, gid
);
6167 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6168 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6170 error
= nameiat(&nd
, fd
);
6178 if (uid
!= (uid_t
)VNOVAL
)
6179 VATTR_SET(&va
, va_uid
, uid
);
6180 if (gid
!= (gid_t
)VNOVAL
)
6181 VATTR_SET(&va
, va_gid
, gid
);
6184 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6189 /* preflight and authorize attribute changes */
6190 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6192 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6194 error
= vnode_setattr(vp
, &va
, ctx
);
6198 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
);
6203 * EACCES is only allowed from namei(); permissions failure should
6204 * return EPERM, so we need to translate the error code.
6206 if (error
== EACCES
)
6214 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6216 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6217 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6221 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6223 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6224 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6228 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6230 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6233 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6234 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6238 * Set ownership given a file descriptor.
6242 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6244 struct vnode_attr va
;
6245 vfs_context_t ctx
= vfs_context_current();
6248 kauth_action_t action
;
6250 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6251 AUDIT_ARG(fd
, uap
->fd
);
6253 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6256 if ( (error
= vnode_getwithref(vp
)) ) {
6260 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6263 if (uap
->uid
!= VNOVAL
)
6264 VATTR_SET(&va
, va_uid
, uap
->uid
);
6265 if (uap
->gid
!= VNOVAL
)
6266 VATTR_SET(&va
, va_gid
, uap
->gid
);
6269 /* chown calls are not allowed for resource forks. */
6270 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6277 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6282 /* preflight and authorize attribute changes */
6283 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6285 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6286 if (error
== EACCES
)
6290 error
= vnode_setattr(vp
, &va
, ctx
);
6294 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6298 (void)vnode_put(vp
);
6304 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6308 if (usrtvp
== USER_ADDR_NULL
) {
6309 struct timeval old_tv
;
6310 /* XXX Y2038 bug because of microtime argument */
6312 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6315 if (IS_64BIT_PROCESS(current_proc())) {
6316 struct user64_timeval tv
[2];
6317 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6320 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6321 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6323 struct user32_timeval tv
[2];
6324 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6327 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6328 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6335 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6339 struct vnode_attr va
;
6340 kauth_action_t action
;
6342 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6345 VATTR_SET(&va
, va_access_time
, ts
[0]);
6346 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6348 va
.va_vaflags
|= VA_UTIMES_NULL
;
6351 /* utimes calls are not allowed for resource forks. */
6352 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6359 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6363 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6364 if (!nullflag
&& error
== EACCES
)
6369 /* since we may not need to auth anything, check here */
6370 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6371 if (!nullflag
&& error
== EACCES
)
6375 error
= vnode_setattr(vp
, &va
, ctx
);
6379 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6387 * Set the access and modification times of a file.
6391 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6393 struct timespec ts
[2];
6396 struct nameidata nd
;
6397 vfs_context_t ctx
= vfs_context_current();
6400 * AUDIT: Needed to change the order of operations to do the
6401 * name lookup first because auditing wants the path.
6403 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6404 UIO_USERSPACE
, uap
->path
, ctx
);
6411 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6412 * the current time instead.
6415 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6418 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6421 vnode_put(nd
.ni_vp
);
6426 * Set the access and modification times of a file.
6430 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6432 struct timespec ts
[2];
6437 AUDIT_ARG(fd
, uap
->fd
);
6439 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6441 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6443 if((error
= vnode_getwithref(vp
))) {
6448 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6455 * Truncate a file given its path name.
6459 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6462 struct vnode_attr va
;
6463 vfs_context_t ctx
= vfs_context_current();
6465 struct nameidata nd
;
6466 kauth_action_t action
;
6468 if (uap
->length
< 0)
6470 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6471 UIO_USERSPACE
, uap
->path
, ctx
);
6472 if ((error
= namei(&nd
)))
6479 VATTR_SET(&va
, va_data_size
, uap
->length
);
6482 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6487 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6489 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6491 error
= vnode_setattr(vp
, &va
, ctx
);
6495 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
);
6504 * Truncate a file given a file descriptor.
6508 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6510 vfs_context_t ctx
= vfs_context_current();
6511 struct vnode_attr va
;
6513 struct fileproc
*fp
;
6517 AUDIT_ARG(fd
, uap
->fd
);
6518 if (uap
->length
< 0)
6521 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6525 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6527 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6536 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6538 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6539 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6544 if ((error
= vnode_getwithref(vp
)) != 0) {
6548 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6551 error
= mac_vnode_check_truncate(ctx
,
6552 fp
->f_fglob
->fg_cred
, vp
);
6554 (void)vnode_put(vp
);
6559 VATTR_SET(&va
, va_data_size
, uap
->length
);
6560 error
= vnode_setattr(vp
, &va
, ctx
);
6564 mac_vnode_notify_truncate(ctx
, fp
->f_fglob
->fg_cred
, vp
);
6567 (void)vnode_put(vp
);
6575 * Sync an open file with synchronized I/O _file_ integrity completion
6579 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6581 __pthread_testcancel(1);
6582 return(fsync_common(p
, uap
, MNT_WAIT
));
6587 * Sync an open file with synchronized I/O _file_ integrity completion
6589 * Notes: This is a legacy support function that does not test for
6590 * thread cancellation points.
6594 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6596 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6601 * Sync an open file with synchronized I/O _data_ integrity completion
6605 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6607 __pthread_testcancel(1);
6608 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6615 * Common fsync code to support both synchronized I/O file integrity completion
6616 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6618 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6619 * will only guarantee that the file data contents are retrievable. If
6620 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6621 * includes additional metadata unnecessary for retrieving the file data
6622 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6625 * Parameters: p The process
6626 * uap->fd The descriptor to synchronize
6627 * flags The data integrity flags
6629 * Returns: int Success
6630 * fp_getfvp:EBADF Bad file descriptor
6631 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6632 * VNOP_FSYNC:??? unspecified
6634 * Notes: We use struct fsync_args because it is a short name, and all
6635 * caller argument structures are otherwise identical.
6638 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6641 struct fileproc
*fp
;
6642 vfs_context_t ctx
= vfs_context_current();
6645 AUDIT_ARG(fd
, uap
->fd
);
6647 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6649 if ( (error
= vnode_getwithref(vp
)) ) {
6654 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6656 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6659 /* Sync resource fork shadow file if necessary. */
6661 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6662 (vp
->v_parent
!= NULLVP
) &&
6663 vnode_isshadow(vp
) &&
6664 (fp
->f_flags
& FP_WRITTEN
)) {
6665 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6669 (void)vnode_put(vp
);
6675 * Duplicate files. Source must be a file, target must be a file or
6678 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6679 * perform inheritance correctly.
6683 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6685 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6686 struct nameidata fromnd
, tond
;
6688 vfs_context_t ctx
= vfs_context_current();
6690 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
6691 struct vnode_attr va
;
6694 /* Check that the flags are valid. */
6696 if (uap
->flags
& ~CPF_MASK
) {
6700 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
6701 UIO_USERSPACE
, uap
->from
, ctx
);
6702 if ((error
= namei(&fromnd
)))
6706 NDINIT(&tond
, CREATE
, OP_LINK
,
6707 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6708 UIO_USERSPACE
, uap
->to
, ctx
);
6709 if ((error
= namei(&tond
))) {
6716 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6722 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6727 /* This calls existing MAC hooks for open */
6728 if ((error
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
,
6735 * See unlinkat_internal for an explanation of the potential
6736 * ENOENT from the MAC hook but the gist is that the MAC hook
6737 * can fail because vn_getpath isn't able to return the full
6738 * path. We choose to ignore this failure.
6740 error
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
);
6741 if (error
&& error
!= ENOENT
)
6748 VATTR_SET(&va
, va_type
, fvp
->v_type
);
6749 /* Mask off all but regular access permissions */
6750 VATTR_SET(&va
, va_mode
,
6751 ((((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
));
6752 error
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
);
6755 #endif /* CONFIG_MACF */
6757 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6763 * If source is the same as the destination (that is the
6764 * same inode number) then there is nothing to do.
6765 * (fixed to have POSIX semantics - CSM 3/2/98)
6770 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6772 sdvp
= tond
.ni_startdir
;
6774 * nameidone has to happen before we vnode_put(tdvp)
6775 * since it may need to release the fs_nodelock on the tdvp
6793 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
6796 * Helper function for doing clones. The caller is expected to provide an
6797 * iocounted source vnode and release it.
6800 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
,
6801 user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
)
6804 struct nameidata tond
;
6808 boolean_t attr_cleanup
;
6810 kauth_action_t action
;
6811 struct componentname
*cnp
;
6813 struct vnode_attr va
;
6815 v_type
= vnode_vtype(fvp
);
6820 action
= KAUTH_VNODE_ADD_FILE
;
6823 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) ||
6824 fvp
->v_mountedhere
) {
6827 action
= KAUTH_VNODE_ADD_SUBDIRECTORY
;
6833 AUDIT_ARG(fd2
, dst_dirfd
);
6834 AUDIT_ARG(value32
, flags
);
6836 follow
= (flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6837 NDINIT(&tond
, CREATE
, OP_LINK
, follow
| WANTPARENT
| AUDITVNPATH2
,
6838 UIO_USERSPACE
, dst
, ctx
);
6839 if ((error
= nameiat(&tond
, dst_dirfd
)))
6846 attr_cleanup
= FALSE
;
6853 if (vnode_mount(tdvp
) != vnode_mount(fvp
)) {
6859 if ((error
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
)))
6862 if ((error
= vnode_authorize(tdvp
, NULL
, action
, ctx
)))
6865 action
= KAUTH_VNODE_GENERIC_READ_BITS
;
6866 if (data_read_authorised
)
6867 action
&= ~KAUTH_VNODE_READ_DATA
;
6868 if ((error
= vnode_authorize(fvp
, NULL
, action
, ctx
)))
6872 * certain attributes may need to be changed from the source, we ask for
6876 VATTR_WANTED(&va
, va_type
);
6877 VATTR_WANTED(&va
, va_mode
);
6878 VATTR_WANTED(&va
, va_flags
);
6879 VATTR_WANTED(&va
, va_acl
);
6881 if ((error
= vnode_getattr(fvp
, &va
, ctx
)) != 0)
6884 if (!VATTR_IS_SUPPORTED(&va
, va_acl
))
6885 VATTR_CLEAR_ACTIVE(&va
, va_acl
);
6886 else if (va
.va_acl
!= NULL
)
6889 if (!VATTR_IS_SUPPORTED(&va
, va_mode
)) {
6890 VATTR_CLEAR_ACTIVE(&va
, va_mode
);
6892 proc_t p
= vfs_context_proc(ctx
);
6894 VATTR_SET(&va
, va_mode
,
6895 (va
.va_mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
6898 if (!VATTR_IS_SUPPORTED(&va
, va_flags
)) {
6899 VATTR_CLEAR_ACTIVE(&va
, va_flags
);
6900 } else if (va
.va_flags
& SF_RESTRICTED
) {
6902 * Turn off SF_RESTRICTED from source, if the destination needs
6903 * it, it will be handled in vnode_authattr_new.
6905 VATTR_SET(&va
, va_flags
, (va
.va_flags
& ~SF_RESTRICTED
));
6908 /* Handle ACL inheritance, initialize vap. */
6909 if (v_type
== VLNK
) {
6910 error
= vnode_authattr_new(tdvp
, &va
, 0, ctx
);
6912 error
= vn_attribute_prepare(tdvp
, &va
, &defaulted
, ctx
);
6913 attr_cleanup
= TRUE
;
6917 attr_cleanup
= FALSE
;
6921 error
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &va
, flags
, ctx
);
6923 if (!error
&& tvp
) {
6924 int update_flags
= 0;
6927 #endif /* CONFIG_FSE */
6930 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
,
6931 VNODE_LABEL_CREATE
, ctx
);
6934 * If some of the requested attributes weren't handled by the
6935 * VNOP, use our fallback code.
6937 if (!VATTR_ALL_SUPPORTED(&va
))
6938 (void)vnode_setattr_fallback(tvp
, &va
, ctx
);
6940 // Make sure the name & parent pointers are hooked up
6941 if (tvp
->v_name
== NULL
)
6942 update_flags
|= VNODE_UPDATE_NAME
;
6943 if (tvp
->v_parent
== NULLVP
)
6944 update_flags
|= VNODE_UPDATE_PARENT
;
6947 (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
,
6948 cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
);
6952 switch (vnode_vtype(tvp
)) {
6956 fsevent
= FSE_CREATE_FILE
;
6959 fsevent
= FSE_CREATE_DIR
;
6965 if (need_fsevent(fsevent
, tvp
)) {
6966 add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
,
6969 #endif /* CONFIG_FSE */
6971 #if CLONE_SNAPSHOT_FALLBACKS_ENABLED
6972 else if (error
== ENOTSUP
) {
6973 struct vfs_attr vfa
;
6976 * Fallback to VNOP_COPYFILE but check first that the
6977 * filesystem supports cloning.
6980 VFSATTR_WANTED(&vfa
, f_capabilities
);
6981 if ((vfs_getattr(vnode_mount(tdvp
), &vfa
, ctx
) == 0) &&
6982 VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) &&
6983 (vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_CLONE
) &&
6984 (vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_CLONE
)) {
6986 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, cnp
, 0,
6990 #endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
6994 vn_attribute_cleanup(&va
, defaulted
);
6995 if (free_acl
&& va
.va_acl
)
6996 kauth_acl_free(va
.va_acl
);
7005 * clone files or directories, target must not exist.
7009 clonefileat(__unused proc_t p
, struct clonefileat_args
*uap
,
7010 __unused
int32_t *retval
)
7013 struct nameidata fromnd
;
7016 vfs_context_t ctx
= vfs_context_current();
7018 /* Check that the flags are valid. */
7019 if (uap
->flags
& ~CLONE_NOFOLLOW
)
7022 AUDIT_ARG(fd
, uap
->src_dirfd
);
7024 follow
= (uap
->flags
& CLONE_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
7025 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow
| AUDITVNPATH1
,
7026 UIO_USERSPACE
, uap
->src
, ctx
);
7027 if ((error
= nameiat(&fromnd
, uap
->src_dirfd
)))
7033 error
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
,
7041 fclonefileat(__unused proc_t p
, struct fclonefileat_args
*uap
,
7042 __unused
int32_t *retval
)
7045 struct fileproc
*fp
;
7047 vfs_context_t ctx
= vfs_context_current();
7049 AUDIT_ARG(fd
, uap
->src_fd
);
7050 error
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
);
7054 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7055 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
);
7060 if ((error
= vnode_getwithref(fvp
)))
7063 AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
);
7065 error
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
,
7070 file_drop(uap
->src_fd
);
7075 * Rename files. Source and destination must either both be directories,
7076 * or both not be directories. If target is a directory, it must be empty.
7080 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
7081 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
7083 if (flags
& ~VFS_RENAME_FLAGS_MASK
)
7086 if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
))
7091 struct nameidata
*fromnd
, *tond
;
7097 const char *oname
= NULL
;
7098 char *from_name
= NULL
, *to_name
= NULL
;
7099 int from_len
=0, to_len
=0;
7100 int holding_mntlock
;
7101 mount_t locked_mp
= NULL
;
7102 vnode_t oparent
= NULLVP
;
7104 fse_info from_finfo
, to_finfo
;
7106 int from_truncated
=0, to_truncated
;
7108 struct vnode_attr
*fvap
, *tvap
;
7110 /* carving out a chunk for structs that are too big to be on stack. */
7112 struct nameidata from_node
, to_node
;
7113 struct vnode_attr fv_attr
, tv_attr
;
7115 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
7116 fromnd
= &__rename_data
->from_node
;
7117 tond
= &__rename_data
->to_node
;
7119 holding_mntlock
= 0;
7128 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
7130 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7132 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
7134 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
7137 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7138 if ( (error
= nameiat(fromnd
, fromfd
)) )
7140 fdvp
= fromnd
->ni_dvp
;
7141 fvp
= fromnd
->ni_vp
;
7143 if (fvp
&& fvp
->v_type
== VDIR
)
7144 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
7147 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
7148 if ( (error
= nameiat(tond
, tofd
)) ) {
7150 * Translate error code for rename("dir1", "dir2/.").
7152 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
7156 tdvp
= tond
->ni_dvp
;
7160 if (!tvp
&& ISSET(flags
, VFS_RENAME_SWAP
)) {
7165 if (tvp
&& ISSET(flags
, VFS_RENAME_EXCL
)) {
7170 batched
= vnode_compound_rename_available(fdvp
);
7173 * Claim: this check will never reject a valid rename.
7174 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7175 * Suppose fdvp and tdvp are not on the same mount.
7176 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7177 * then you can't move it to within another dir on the same mountpoint.
7178 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7180 * If this check passes, then we are safe to pass these vnodes to the same FS.
7182 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
7186 goto skipped_lookup
;
7190 error
= vn_authorize_renamex(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, flags
, NULL
);
7192 if (error
== ENOENT
) {
7193 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7194 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7196 * We encountered a race where after doing the namei, tvp stops
7197 * being valid. If so, simply re-drive the rename call from the
7209 * If the source and destination are the same (i.e. they're
7210 * links to the same vnode) and the target file system is
7211 * case sensitive, then there is nothing to do.
7213 * XXX Come back to this.
7219 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7220 * then assume that this file system is case sensitive.
7222 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
7223 pathconf_val
!= 0) {
7229 * Allow the renaming of mount points.
7230 * - target must not exist
7231 * - target must reside in the same directory as source
7232 * - union mounts cannot be renamed
7233 * - "/" cannot be renamed
7235 * XXX Handle this in VFS after a continued lookup (if we missed
7236 * in the cache to start off)
7238 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7239 * we'll skip past here. The file system is responsible for
7240 * checking that @tvp is not a descendent of @fvp and vice versa
7241 * so it should always return EINVAL if either @tvp or @fvp is the
7244 if ((fvp
->v_flag
& VROOT
) &&
7245 (fvp
->v_type
== VDIR
) &&
7247 (fvp
->v_mountedhere
== NULL
) &&
7249 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
7250 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
7253 /* switch fvp to the covered vnode */
7254 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
7255 if ( (vnode_getwithref(coveredvp
)) ) {
7265 * Check for cross-device rename.
7267 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
7268 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
7274 * If source is the same as the destination (that is the
7275 * same inode number) then there is nothing to do...
7276 * EXCEPT if the underlying file system supports case
7277 * insensitivity and is case preserving. In this case
7278 * the file system needs to handle the special case of
7279 * getting the same vnode as target (fvp) and source (tvp).
7281 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7282 * and _PC_CASE_PRESERVING can have this exception, and they need to
7283 * handle the special case of getting the same vnode as target and
7284 * source. NOTE: Then the target is unlocked going into vnop_rename,
7285 * so not to cause locking problems. There is a single reference on tvp.
7287 * NOTE - that fvp == tvp also occurs if they are hard linked and
7288 * that correct behaviour then is just to return success without doing
7291 * XXX filesystem should take care of this itself, perhaps...
7293 if (fvp
== tvp
&& fdvp
== tdvp
) {
7294 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
7295 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
7296 fromnd
->ni_cnd
.cn_namelen
)) {
7301 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
7303 * we're holding a reference and lock
7304 * on locked_mp, but it no longer matches
7305 * what we want to do... so drop our hold
7307 mount_unlock_renames(locked_mp
);
7308 mount_drop(locked_mp
, 0);
7309 holding_mntlock
= 0;
7311 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
7313 * serialize renames that re-shape
7314 * the tree... if holding_mntlock is
7315 * set, then we're ready to go...
7317 * first need to drop the iocounts
7318 * we picked up, second take the
7319 * lock to serialize the access,
7320 * then finally start the lookup
7321 * process over with the lock held
7323 if (!holding_mntlock
) {
7325 * need to grab a reference on
7326 * the mount point before we
7327 * drop all the iocounts... once
7328 * the iocounts are gone, the mount
7331 locked_mp
= fvp
->v_mount
;
7332 mount_ref(locked_mp
, 0);
7335 * nameidone has to happen before we vnode_put(tvp)
7336 * since it may need to release the fs_nodelock on the tvp
7345 * nameidone has to happen before we vnode_put(fdvp)
7346 * since it may need to release the fs_nodelock on the fvp
7353 mount_lock_renames(locked_mp
);
7354 holding_mntlock
= 1;
7360 * when we dropped the iocounts to take
7361 * the lock, we allowed the identity of
7362 * the various vnodes to change... if they did,
7363 * we may no longer be dealing with a rename
7364 * that reshapes the tree... once we're holding
7365 * the iocounts, the vnodes can't change type
7366 * so we're free to drop the lock at this point
7369 if (holding_mntlock
) {
7370 mount_unlock_renames(locked_mp
);
7371 mount_drop(locked_mp
, 0);
7372 holding_mntlock
= 0;
7376 // save these off so we can later verify that fvp is the same
7377 oname
= fvp
->v_name
;
7378 oparent
= fvp
->v_parent
;
7382 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
7385 get_fse_info(fvp
, &from_finfo
, ctx
);
7387 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
7392 fvap
= &__rename_data
->fv_attr
;
7396 get_fse_info(tvp
, &to_finfo
, ctx
);
7397 } else if (batched
) {
7398 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
7403 tvap
= &__rename_data
->tv_attr
;
7408 #endif /* CONFIG_FSE */
7410 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
7411 if (from_name
== NULL
) {
7412 GET_PATH(from_name
);
7413 if (from_name
== NULL
) {
7419 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
7421 if (to_name
== NULL
) {
7423 if (to_name
== NULL
) {
7429 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
7431 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
7432 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
7435 if (holding_mntlock
) {
7437 * we can drop our serialization
7440 mount_unlock_renames(locked_mp
);
7441 mount_drop(locked_mp
, 0);
7442 holding_mntlock
= 0;
7445 if (error
== EKEEPLOOKING
) {
7446 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7447 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
7448 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7452 fromnd
->ni_vp
= fvp
;
7455 goto continue_lookup
;
7459 * We may encounter a race in the VNOP where the destination didn't
7460 * exist when we did the namei, but it does by the time we go and
7461 * try to create the entry. In this case, we should re-drive this rename
7462 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7463 * but other filesystems susceptible to this race could return it, too.
7465 if (error
== ERECYCLE
) {
7470 * For compound VNOPs, the authorization callback may return
7471 * ENOENT in case of racing hardlink lookups hitting the name
7472 * cache, redrive the lookup.
7474 if (batched
&& error
== ENOENT
) {
7475 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7476 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7485 /* call out to allow 3rd party notification of rename.
7486 * Ignore result of kauth_authorize_fileop call.
7488 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7489 KAUTH_FILEOP_RENAME
,
7490 (uintptr_t)from_name
, (uintptr_t)to_name
);
7491 if (flags
& VFS_RENAME_SWAP
) {
7492 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7493 KAUTH_FILEOP_RENAME
,
7494 (uintptr_t)to_name
, (uintptr_t)from_name
);
7498 if (from_name
!= NULL
&& to_name
!= NULL
) {
7499 if (from_truncated
|| to_truncated
) {
7500 // set it here since only the from_finfo gets reported up to user space
7501 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
7505 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
7508 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
7512 add_fsevent(FSE_RENAME
, ctx
,
7513 FSE_ARG_STRING
, from_len
, from_name
,
7514 FSE_ARG_FINFO
, &from_finfo
,
7515 FSE_ARG_STRING
, to_len
, to_name
,
7516 FSE_ARG_FINFO
, &to_finfo
,
7518 if (flags
& VFS_RENAME_SWAP
) {
7520 * Strictly speaking, swap is the equivalent of
7521 * *three* renames. FSEvents clients should only take
7522 * the events as a hint, so we only bother reporting
7525 add_fsevent(FSE_RENAME
, ctx
,
7526 FSE_ARG_STRING
, to_len
, to_name
,
7527 FSE_ARG_FINFO
, &to_finfo
,
7528 FSE_ARG_STRING
, from_len
, from_name
,
7529 FSE_ARG_FINFO
, &from_finfo
,
7533 add_fsevent(FSE_RENAME
, ctx
,
7534 FSE_ARG_STRING
, from_len
, from_name
,
7535 FSE_ARG_FINFO
, &from_finfo
,
7536 FSE_ARG_STRING
, to_len
, to_name
,
7540 #endif /* CONFIG_FSE */
7543 * update filesystem's mount point data
7546 char *cp
, *pathend
, *mpname
;
7552 mp
= fvp
->v_mountedhere
;
7554 if (vfs_busy(mp
, LK_NOWAIT
)) {
7558 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7560 if (UIO_SEG_IS_USER_SPACE(segflg
))
7561 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7563 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7565 /* find current mount point prefix */
7566 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7567 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7571 /* find last component of target name */
7572 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7576 /* append name to prefix */
7577 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7578 bzero(pathend
, maxlen
);
7579 strlcpy(pathend
, mpname
, maxlen
);
7581 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7586 * fix up name & parent pointers. note that we first
7587 * check that fvp has the same name/parent pointers it
7588 * had before the rename call... this is a 'weak' check
7591 * XXX oparent and oname may not be set in the compound vnop case
7593 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7596 update_flags
= VNODE_UPDATE_NAME
;
7599 update_flags
|= VNODE_UPDATE_PARENT
;
7601 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7604 if (to_name
!= NULL
) {
7605 RELEASE_PATH(to_name
);
7608 if (from_name
!= NULL
) {
7609 RELEASE_PATH(from_name
);
7612 if (holding_mntlock
) {
7613 mount_unlock_renames(locked_mp
);
7614 mount_drop(locked_mp
, 0);
7615 holding_mntlock
= 0;
7619 * nameidone has to happen before we vnode_put(tdvp)
7620 * since it may need to release the fs_nodelock on the tdvp
7630 * nameidone has to happen before we vnode_put(fdvp)
7631 * since it may need to release the fs_nodelock on the fdvp
7641 * If things changed after we did the namei, then we will re-drive
7642 * this rename call from the top.
7649 FREE(__rename_data
, M_TEMP
);
7654 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7656 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7657 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7660 int renameatx_np(__unused proc_t p
, struct renameatx_np_args
*uap
, __unused
int32_t *retval
)
7662 return renameat_internal(
7663 vfs_context_current(),
7664 uap
->fromfd
, uap
->from
,
7666 UIO_USERSPACE
, uap
->flags
);
7670 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7672 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7673 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7677 * Make a directory file.
7679 * Returns: 0 Success
7682 * vnode_authorize:???
7687 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7688 enum uio_seg segflg
)
7692 int update_flags
= 0;
7694 struct nameidata nd
;
7696 AUDIT_ARG(mode
, vap
->va_mode
);
7697 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7699 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7700 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7703 error
= nameiat(&nd
, fd
);
7714 batched
= vnode_compound_mkdir_available(dvp
);
7716 VATTR_SET(vap
, va_type
, VDIR
);
7720 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7721 * only get EXISTS or EISDIR for existing path components, and not that it could see
7722 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7723 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7725 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7726 if (error
== EACCES
|| error
== EPERM
) {
7734 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7735 * rather than EACCESS if the target exists.
7737 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7739 error2
= nameiat(&nd
, fd
);
7753 * make the directory
7755 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7756 if (error
== EKEEPLOOKING
) {
7758 goto continue_lookup
;
7764 // Make sure the name & parent pointers are hooked up
7765 if (vp
->v_name
== NULL
)
7766 update_flags
|= VNODE_UPDATE_NAME
;
7767 if (vp
->v_parent
== NULLVP
)
7768 update_flags
|= VNODE_UPDATE_PARENT
;
7771 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
7774 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
7779 * nameidone has to happen before we vnode_put(dvp)
7780 * since it may need to release the fs_nodelock on the dvp
7793 * mkdir_extended: Create a directory; with extended security (ACL).
7795 * Parameters: p Process requesting to create the directory
7796 * uap User argument descriptor (see below)
7799 * Indirect: uap->path Path of directory to create
7800 * uap->mode Access permissions to set
7801 * uap->xsecurity ACL to set
7803 * Returns: 0 Success
7808 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
7811 kauth_filesec_t xsecdst
;
7812 struct vnode_attr va
;
7814 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7817 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
7818 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
7822 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7823 if (xsecdst
!= NULL
)
7824 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7826 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7828 if (xsecdst
!= NULL
)
7829 kauth_filesec_free(xsecdst
);
7834 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
7836 struct vnode_attr va
;
7839 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7841 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7846 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
7848 struct vnode_attr va
;
7851 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7853 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
7858 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
7859 enum uio_seg segflg
)
7863 struct nameidata nd
;
7866 int has_listeners
= 0;
7870 struct vnode_attr va
;
7871 #endif /* CONFIG_FSE */
7872 struct vnode_attr
*vap
= NULL
;
7873 int restart_count
= 0;
7879 * This loop exists to restart rmdir in the unlikely case that two
7880 * processes are simultaneously trying to remove the same directory
7881 * containing orphaned appleDouble files.
7884 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
7885 segflg
, dirpath
, ctx
);
7886 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
7891 error
= nameiat(&nd
, fd
);
7899 batched
= vnode_compound_rmdir_available(vp
);
7901 if (vp
->v_flag
& VROOT
) {
7903 * The root of a mounted filesystem cannot be deleted.
7910 * Removed a check here; we used to abort if vp's vid
7911 * was not the same as what we'd seen the last time around.
7912 * I do not think that check was valid, because if we retry
7913 * and all dirents are gone, the directory could legitimately
7914 * be recycled but still be present in a situation where we would
7915 * have had permission to delete. Therefore, we won't make
7916 * an effort to preserve that check now that we may not have a
7921 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
7923 if (error
== ENOENT
) {
7924 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7925 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7936 if (!vnode_compound_rmdir_available(dvp
)) {
7937 panic("No error, but no compound rmdir?");
7944 need_event
= need_fsevent(FSE_DELETE
, dvp
);
7947 get_fse_info(vp
, &finfo
, ctx
);
7949 error
= vfs_get_notify_attributes(&va
);
7958 has_listeners
= kauth_authorize_fileop_has_listeners();
7959 if (need_event
|| has_listeners
) {
7968 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
7971 finfo
.mode
|= FSE_TRUNCATED_PATH
;
7976 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7979 /* Couldn't find a vnode */
7983 if (error
== EKEEPLOOKING
) {
7984 goto continue_lookup
;
7985 } else if (batched
&& error
== ENOENT
) {
7986 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7987 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7989 * For compound VNOPs, the authorization callback
7990 * may return ENOENT in case of racing hard link lookups
7991 * redrive the lookup.
7998 #if CONFIG_APPLEDOUBLE
8000 * Special case to remove orphaned AppleDouble
8001 * files. I don't like putting this in the kernel,
8002 * but carbon does not like putting this in carbon either,
8005 if (error
== ENOTEMPTY
) {
8006 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
8007 if (error
== EBUSY
) {
8013 * Assuming everything went well, we will try the RMDIR again
8016 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
8018 #endif /* CONFIG_APPLEDOUBLE */
8020 * Call out to allow 3rd party notification of delete.
8021 * Ignore result of kauth_authorize_fileop call.
8024 if (has_listeners
) {
8025 kauth_authorize_fileop(vfs_context_ucred(ctx
),
8026 KAUTH_FILEOP_DELETE
,
8031 if (vp
->v_flag
& VISHARDLINK
) {
8032 // see the comment in unlink1() about why we update
8033 // the parent of a hard link when it is removed
8034 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
8040 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
8042 add_fsevent(FSE_DELETE
, ctx
,
8043 FSE_ARG_STRING
, len
, path
,
8044 FSE_ARG_FINFO
, &finfo
,
8056 * nameidone has to happen before we vnode_put(dvp)
8057 * since it may need to release the fs_nodelock on the dvp
8065 if (restart_flag
== 0) {
8066 wakeup_one((caddr_t
)vp
);
8069 tsleep(vp
, PVFS
, "rm AD", 1);
8071 } while (restart_flag
!= 0);
8078 * Remove a directory file.
8082 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
8084 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
8085 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
8088 /* Get direntry length padded to 8 byte alignment */
8089 #define DIRENT64_LEN(namlen) \
8090 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8093 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
8094 int *numdirent
, vfs_context_t ctxp
)
8096 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8097 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
8098 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
8099 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
8104 struct direntry
*entry64
;
8110 * Our kernel buffer needs to be smaller since re-packing
8111 * will expand each dirent. The worse case (when the name
8112 * length is 3) corresponds to a struct direntry size of 32
8113 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8114 * (4-byte aligned). So having a buffer that is 3/8 the size
8115 * will prevent us from reading more than we can pack.
8117 * Since this buffer is wired memory, we will limit the
8118 * buffer size to a maximum of 32K. We would really like to
8119 * use 32K in the MIN(), but we use magic number 87371 to
8120 * prevent uio_resid() * 3 / 8 from overflowing.
8122 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
8123 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
8124 if (bufptr
== NULL
) {
8128 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
8129 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
8130 auio
->uio_offset
= uio
->uio_offset
;
8132 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
8134 dep
= (struct dirent
*)bufptr
;
8135 bytesread
= bufsize
- uio_resid(auio
);
8137 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
8140 * Convert all the entries and copy them out to user's buffer.
8142 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
8143 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
8145 bzero(entry64
, enbufsize
);
8146 /* Convert a dirent to a dirent64. */
8147 entry64
->d_ino
= dep
->d_ino
;
8148 entry64
->d_seekoff
= 0;
8149 entry64
->d_reclen
= enbufsize
;
8150 entry64
->d_namlen
= dep
->d_namlen
;
8151 entry64
->d_type
= dep
->d_type
;
8152 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
8154 /* Move to next entry. */
8155 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
8157 /* Copy entry64 to user's buffer. */
8158 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
8161 /* Update the real offset using the offset we got from VNOP_READDIR. */
8163 uio
->uio_offset
= auio
->uio_offset
;
8166 FREE(bufptr
, M_TEMP
);
8167 FREE(entry64
, M_TEMP
);
8172 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8175 * Read a block of directory entries in a file system independent format.
8178 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
8179 off_t
*offset
, int flags
)
8182 struct vfs_context context
= *vfs_context_current(); /* local copy */
8183 struct fileproc
*fp
;
8185 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8187 int error
, eofflag
, numdirent
;
8188 char uio_buf
[ UIO_SIZEOF(1) ];
8190 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
8194 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8195 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8200 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
8201 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
8204 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
8208 if ( (error
= vnode_getwithref(vp
)) ) {
8211 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8214 if (vp
->v_type
!= VDIR
) {
8215 (void)vnode_put(vp
);
8221 error
= mac_vnode_check_readdir(&context
, vp
);
8223 (void)vnode_put(vp
);
8228 loff
= fp
->f_fglob
->fg_offset
;
8229 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8230 uio_addiov(auio
, bufp
, bufsize
);
8232 if (flags
& VNODE_READDIR_EXTENDED
) {
8233 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
8234 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8236 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
8237 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
8240 (void)vnode_put(vp
);
8244 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
8245 if (union_dircheckp
) {
8246 error
= union_dircheckp(&vp
, fp
, &context
);
8253 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
8254 struct vnode
*tvp
= vp
;
8255 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
8257 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8258 fp
->f_fglob
->fg_offset
= 0;
8272 *bytesread
= bufsize
- uio_resid(auio
);
8280 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
8286 AUDIT_ARG(fd
, uap
->fd
);
8287 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
8290 if (proc_is64bit(p
)) {
8291 user64_long_t base
= (user64_long_t
)offset
;
8292 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
8294 user32_long_t base
= (user32_long_t
)offset
;
8295 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
8297 *retval
= bytesread
;
8303 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
8309 AUDIT_ARG(fd
, uap
->fd
);
8310 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
8313 *retval
= bytesread
;
8314 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
8321 * Set the mode mask for creation of filesystem nodes.
8322 * XXX implement xsecurity
8324 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8326 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
8328 struct filedesc
*fdp
;
8330 AUDIT_ARG(mask
, newmask
);
8333 *retval
= fdp
->fd_cmask
;
8334 fdp
->fd_cmask
= newmask
& ALLPERMS
;
8340 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8342 * Parameters: p Process requesting to set the umask
8343 * uap User argument descriptor (see below)
8344 * retval umask of the process (parameter p)
8346 * Indirect: uap->newmask umask to set
8347 * uap->xsecurity ACL to set
8349 * Returns: 0 Success
8354 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
8357 kauth_filesec_t xsecdst
;
8359 xsecdst
= KAUTH_FILESEC_NONE
;
8360 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
8361 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
8364 xsecdst
= KAUTH_FILESEC_NONE
;
8367 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
8369 if (xsecdst
!= KAUTH_FILESEC_NONE
)
8370 kauth_filesec_free(xsecdst
);
8375 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
8377 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
8381 * Void all references to file by ripping underlying filesystem
8386 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
8389 struct vnode_attr va
;
8390 vfs_context_t ctx
= vfs_context_current();
8392 struct nameidata nd
;
8394 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
8403 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
8408 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
8414 error
= mac_vnode_check_revoke(ctx
, vp
);
8420 VATTR_WANTED(&va
, va_uid
);
8421 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
8423 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
8424 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
8426 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
8427 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
8435 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8436 * The following system calls are designed to support features
8437 * which are specific to the HFS & HFS Plus volume formats
8442 * Obtain attribute information on objects in a directory while enumerating
8447 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
8450 struct fileproc
*fp
;
8452 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8453 uint32_t count
, savecount
;
8457 struct attrlist attributelist
;
8458 vfs_context_t ctx
= vfs_context_current();
8460 char uio_buf
[ UIO_SIZEOF(1) ];
8461 kauth_action_t action
;
8465 /* Get the attributes into kernel space */
8466 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
8469 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
8473 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
8476 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
8477 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
8484 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
8491 if ( (error
= vnode_getwithref(vp
)) )
8494 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
8497 if (vp
->v_type
!= VDIR
) {
8498 (void)vnode_put(vp
);
8504 error
= mac_vnode_check_readdir(ctx
, vp
);
8506 (void)vnode_put(vp
);
8511 /* set up the uio structure which will contain the users return buffer */
8512 loff
= fp
->f_fglob
->fg_offset
;
8513 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8514 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
8517 * If the only item requested is file names, we can let that past with
8518 * just LIST_DIRECTORY. If they want any other attributes, that means
8519 * they need SEARCH as well.
8521 action
= KAUTH_VNODE_LIST_DIRECTORY
;
8522 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
8523 attributelist
.fileattr
|| attributelist
.dirattr
)
8524 action
|= KAUTH_VNODE_SEARCH
;
8526 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
8528 /* Believe it or not, uap->options only has 32-bits of valid
8529 * info, so truncate before extending again */
8531 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
8532 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
8536 (void) vnode_put(vp
);
8541 * If we've got the last entry of a directory in a union mount
8542 * then reset the eofflag and pretend there's still more to come.
8543 * The next call will again set eofflag and the buffer will be empty,
8544 * so traverse to the underlying directory and do the directory
8547 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
8548 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
8550 } else { // Empty buffer
8551 struct vnode
*tvp
= vp
;
8552 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
8553 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
8554 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8555 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
8557 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
8565 (void)vnode_put(vp
);
8569 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
8571 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8573 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8575 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8578 *retval
= eofflag
; /* similar to getdirentries */
8582 return (error
); /* return error earlier, an retval of 0 or 1 now */
8584 } /* end of getdirentriesattr system call */
8587 * Exchange data between two files
8592 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8595 struct nameidata fnd
, snd
;
8596 vfs_context_t ctx
= vfs_context_current();
8600 u_int32_t nameiflags
;
8604 int from_truncated
=0, to_truncated
=0;
8606 fse_info f_finfo
, s_finfo
;
8610 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8612 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8613 UIO_USERSPACE
, uap
->path1
, ctx
);
8615 error
= namei(&fnd
);
8622 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8623 UIO_USERSPACE
, uap
->path2
, ctx
);
8625 error
= namei(&snd
);
8634 * if the files are the same, return an inval error
8642 * if the files are on different volumes, return an error
8644 if (svp
->v_mount
!= fvp
->v_mount
) {
8649 /* If they're not files, return an error */
8650 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8656 error
= mac_vnode_check_exchangedata(ctx
,
8661 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8662 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8667 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8669 kauth_authorize_fileop_has_listeners()) {
8672 if (fpath
== NULL
|| spath
== NULL
) {
8677 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8678 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8681 get_fse_info(fvp
, &f_finfo
, ctx
);
8682 get_fse_info(svp
, &s_finfo
, ctx
);
8683 if (from_truncated
|| to_truncated
) {
8684 // set it here since only the f_finfo gets reported up to user space
8685 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8689 /* Ok, make the call */
8690 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8693 const char *tmpname
;
8695 if (fpath
!= NULL
&& spath
!= NULL
) {
8696 /* call out to allow 3rd party notification of exchangedata.
8697 * Ignore result of kauth_authorize_fileop call.
8699 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8700 (uintptr_t)fpath
, (uintptr_t)spath
);
8704 tmpname
= fvp
->v_name
;
8705 fvp
->v_name
= svp
->v_name
;
8706 svp
->v_name
= tmpname
;
8708 if (fvp
->v_parent
!= svp
->v_parent
) {
8711 tmp
= fvp
->v_parent
;
8712 fvp
->v_parent
= svp
->v_parent
;
8713 svp
->v_parent
= tmp
;
8715 name_cache_unlock();
8718 if (fpath
!= NULL
&& spath
!= NULL
) {
8719 add_fsevent(FSE_EXCHANGE
, ctx
,
8720 FSE_ARG_STRING
, flen
, fpath
,
8721 FSE_ARG_FINFO
, &f_finfo
,
8722 FSE_ARG_STRING
, slen
, spath
,
8723 FSE_ARG_FINFO
, &s_finfo
,
8731 RELEASE_PATH(fpath
);
8733 RELEASE_PATH(spath
);
8741 * Return (in MB) the amount of freespace on the given vnode's volume.
8743 uint32_t freespace_mb(vnode_t vp
);
8746 freespace_mb(vnode_t vp
)
8748 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
8749 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
8750 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
8758 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
8763 struct nameidata nd
;
8764 struct user64_fssearchblock searchblock
;
8765 struct searchstate
*state
;
8766 struct attrlist
*returnattrs
;
8767 struct timeval timelimit
;
8768 void *searchparams1
,*searchparams2
;
8770 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8771 uint32_t nummatches
;
8773 uint32_t nameiflags
;
8774 vfs_context_t ctx
= vfs_context_current();
8775 char uio_buf
[ UIO_SIZEOF(1) ];
8777 /* Start by copying in fsearchblock parameter list */
8778 if (IS_64BIT_PROCESS(p
)) {
8779 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
8780 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
8781 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
8784 struct user32_fssearchblock tmp_searchblock
;
8786 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
8787 // munge into 64-bit version
8788 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
8789 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
8790 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
8791 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
8793 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8794 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8796 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
8797 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
8798 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
8799 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
8800 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
8801 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
8802 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
8807 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8809 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
8810 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
8813 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8814 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8815 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8818 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8819 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8820 /* assumes the size is still 556 bytes it will continue to work */
8822 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
8823 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
8825 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
8827 /* Now set up the various pointers to the correct place in our newly allocated memory */
8829 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
8830 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
8831 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
8833 /* Now copy in the stuff given our local variables. */
8835 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
8838 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
8841 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
8844 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
8848 * When searching a union mount, need to set the
8849 * start flag at the first call on each layer to
8850 * reset state for the new volume.
8852 if (uap
->options
& SRCHFS_START
)
8853 state
->ss_union_layer
= 0;
8855 uap
->options
|= state
->ss_union_flags
;
8856 state
->ss_union_flags
= 0;
8859 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8860 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8861 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8862 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8863 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8866 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
8867 attrreference_t
* string_ref
;
8868 u_int32_t
* start_length
;
8869 user64_size_t param_length
;
8871 /* validate searchparams1 */
8872 param_length
= searchblock
.sizeofsearchparams1
;
8873 /* skip the word that specifies length of the buffer */
8874 start_length
= (u_int32_t
*) searchparams1
;
8875 start_length
= start_length
+1;
8876 string_ref
= (attrreference_t
*) start_length
;
8878 /* ensure no negative offsets or too big offsets */
8879 if (string_ref
->attr_dataoffset
< 0 ) {
8883 if (string_ref
->attr_length
> MAXPATHLEN
) {
8888 /* Check for pointer overflow in the string ref */
8889 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
8894 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
8898 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
8904 /* set up the uio structure which will contain the users return buffer */
8905 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8906 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
8909 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8910 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
8911 UIO_USERSPACE
, uap
->path
, ctx
);
8920 * Switch to the root vnode for the volume
8922 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
8929 * If it's a union mount, the path lookup takes
8930 * us to the top layer. But we may need to descend
8931 * to a lower layer. For non-union mounts the layer
8934 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
8935 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
8938 vp
= vp
->v_mount
->mnt_vnodecovered
;
8944 vnode_getwithref(vp
);
8949 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
8958 * If searchblock.maxmatches == 0, then skip the search. This has happened
8959 * before and sometimes the underlying code doesnt deal with it well.
8961 if (searchblock
.maxmatches
== 0) {
8967 * Allright, we have everything we need, so lets make that call.
8969 * We keep special track of the return value from the file system:
8970 * EAGAIN is an acceptable error condition that shouldn't keep us
8971 * from copying out any results...
8974 fserror
= VNOP_SEARCHFS(vp
,
8977 &searchblock
.searchattrs
,
8978 (u_long
)searchblock
.maxmatches
,
8982 (u_long
)uap
->scriptcode
,
8983 (u_long
)uap
->options
,
8985 (struct searchstate
*) &state
->ss_fsstate
,
8989 * If it's a union mount we need to be called again
8990 * to search the mounted-on filesystem.
8992 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
8993 state
->ss_union_flags
= SRCHFS_START
;
8994 state
->ss_union_layer
++; // search next layer down
9002 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9003 search state. Everything was already put into he return buffer by the vop call. */
9005 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
9008 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
9015 FREE(searchparams1
,M_TEMP
);
9020 } /* end of searchfs system call */
9022 #else /* CONFIG_SEARCHFS */
9025 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
9030 #endif /* CONFIG_SEARCHFS */
9033 lck_grp_attr_t
* nspace_group_attr
;
9034 lck_attr_t
* nspace_lock_attr
;
9035 lck_grp_t
* nspace_mutex_group
;
9037 lck_mtx_t nspace_handler_lock
;
9038 lck_mtx_t nspace_handler_exclusion_lock
;
9040 time_t snapshot_timestamp
=0;
9041 int nspace_allow_virtual_devs
=0;
9043 void nspace_handler_init(void);
9045 typedef struct nspace_item_info
{
9055 #define MAX_NSPACE_ITEMS 128
9056 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
9057 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
9058 uint32_t nspace_token_id
=0;
9059 uint32_t nspace_handler_timeout
= 15; // seconds
9061 #define NSPACE_ITEM_NEW 0x0001
9062 #define NSPACE_ITEM_PROCESSING 0x0002
9063 #define NSPACE_ITEM_DEAD 0x0004
9064 #define NSPACE_ITEM_CANCELLED 0x0008
9065 #define NSPACE_ITEM_DONE 0x0010
9066 #define NSPACE_ITEM_RESET_TIMER 0x0020
9068 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9069 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9071 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9073 //#pragma optimization_level 0
9076 NSPACE_HANDLER_NSPACE
= 0,
9077 NSPACE_HANDLER_SNAPSHOT
= 1,
9079 NSPACE_HANDLER_COUNT
,
9083 uint64_t handler_tid
;
9084 struct proc
*handler_proc
;
9088 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
9090 /* namespace fsctl functions */
9091 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
9092 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
9093 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
9094 static nspace_type_t
nspace_type_for_op(uint64_t op
);
9095 static int nspace_is_special_process(struct proc
*proc
);
9096 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
9097 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
9098 static int validate_namespace_args (int is64bit
, int size
);
9099 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
9102 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
9104 switch(nspace_type
) {
9105 case NSPACE_HANDLER_NSPACE
:
9106 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
9107 case NSPACE_HANDLER_SNAPSHOT
:
9108 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
9110 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
9115 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
9117 switch(nspace_type
) {
9118 case NSPACE_HANDLER_NSPACE
:
9119 return NSPACE_ITEM_NSPACE_EVENT
;
9120 case NSPACE_HANDLER_SNAPSHOT
:
9121 return NSPACE_ITEM_SNAPSHOT_EVENT
;
9123 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
9128 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
9130 switch(nspace_type
) {
9131 case NSPACE_HANDLER_NSPACE
:
9132 return FREAD
| FWRITE
| O_EVTONLY
;
9133 case NSPACE_HANDLER_SNAPSHOT
:
9134 return FREAD
| O_EVTONLY
;
9136 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
9141 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
9143 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
9144 case NAMESPACE_HANDLER_NSPACE_EVENT
:
9145 return NSPACE_HANDLER_NSPACE
;
9146 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
9147 return NSPACE_HANDLER_SNAPSHOT
;
9149 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
9150 return NSPACE_HANDLER_NSPACE
;
9154 static inline int nspace_is_special_process(struct proc
*proc
)
9157 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9158 if (proc
== nspace_handlers
[i
].handler_proc
)
9165 nspace_handler_init(void)
9167 nspace_lock_attr
= lck_attr_alloc_init();
9168 nspace_group_attr
= lck_grp_attr_alloc_init();
9169 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
9170 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
9171 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
9172 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
9176 nspace_proc_exit(struct proc
*p
)
9178 int i
, event_mask
= 0;
9180 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
9181 if (p
== nspace_handlers
[i
].handler_proc
) {
9182 event_mask
|= nspace_item_flags_for_type(i
);
9183 nspace_handlers
[i
].handler_tid
= 0;
9184 nspace_handlers
[i
].handler_proc
= NULL
;
9188 if (event_mask
== 0) {
9192 lck_mtx_lock(&nspace_handler_lock
);
9193 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9194 // if this process was the snapshot handler, zero snapshot_timeout
9195 snapshot_timestamp
= 0;
9199 // unblock anyone that's waiting for the handler that died
9201 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9202 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
9204 if ( nspace_items
[i
].flags
& event_mask
) {
9206 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9207 vnode_lock_spin(nspace_items
[i
].vp
);
9208 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9209 vnode_unlock(nspace_items
[i
].vp
);
9211 nspace_items
[i
].vp
= NULL
;
9212 nspace_items
[i
].vid
= 0;
9213 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9214 nspace_items
[i
].token
= 0;
9216 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9221 wakeup((caddr_t
)&nspace_item_idx
);
9222 lck_mtx_unlock(&nspace_handler_lock
);
9227 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
9229 return resolve_nspace_item_ext(vp
, op
, NULL
);
9233 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
9235 int i
, error
, keep_waiting
;
9237 nspace_type_t nspace_type
= nspace_type_for_op(op
);
9239 // only allow namespace events on regular files, directories and symlinks.
9240 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
9245 // if this is a snapshot event and the vnode is on a
9246 // disk image just pretend nothing happened since any
9247 // change to the disk image will cause the disk image
9248 // itself to get backed up and this avoids multi-way
9249 // deadlocks between the snapshot handler and the ever
9250 // popular diskimages-helper process. the variable
9251 // nspace_allow_virtual_devs allows this behavior to
9252 // be overridden (for use by the Mobile TimeMachine
9253 // testing infrastructure which uses disk images)
9255 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
9256 && (vp
->v_mount
!= NULL
)
9257 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
9258 && !nspace_allow_virtual_devs
) {
9263 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9264 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9268 if (nspace_is_special_process(current_proc())) {
9272 lck_mtx_lock(&nspace_handler_lock
);
9275 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9276 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
9281 if (i
>= MAX_NSPACE_ITEMS
) {
9282 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9283 if (nspace_items
[i
].flags
== 0) {
9288 nspace_items
[i
].refcount
++;
9291 if (i
>= MAX_NSPACE_ITEMS
) {
9292 ts
.tv_sec
= nspace_handler_timeout
;
9295 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
9297 // an entry got free'd up, go see if we can get a slot
9300 lck_mtx_unlock(&nspace_handler_lock
);
9306 // if it didn't already exist, add it. if it did exist
9307 // we'll get woken up when someone does a wakeup() on
9308 // the slot in the nspace_items table.
9310 if (vp
!= nspace_items
[i
].vp
) {
9311 nspace_items
[i
].vp
= vp
;
9312 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
9313 nspace_items
[i
].op
= op
;
9314 nspace_items
[i
].vid
= vnode_vid(vp
);
9315 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
9316 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
9317 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
9319 vnode_lock_spin(vp
);
9320 vp
->v_flag
|= VNEEDSSNAPSHOT
;
9325 nspace_items
[i
].token
= 0;
9326 nspace_items
[i
].refcount
= 1;
9328 wakeup((caddr_t
)&nspace_item_idx
);
9332 // Now go to sleep until the handler does a wakeup on this
9333 // slot in the nspace_items table (or we timeout).
9336 while(keep_waiting
) {
9337 ts
.tv_sec
= nspace_handler_timeout
;
9339 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
9341 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
9343 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
9344 error
= nspace_items
[i
].token
;
9345 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
9346 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
9347 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
9352 } else if (error
== 0) {
9353 // hmmm, why did we get woken up?
9354 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9355 nspace_items
[i
].token
);
9358 if (--nspace_items
[i
].refcount
== 0) {
9359 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
9360 nspace_items
[i
].arg
= NULL
;
9361 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
9362 nspace_items
[i
].flags
= 0; // this clears it for re-use
9364 wakeup(&nspace_token_id
);
9368 lck_mtx_unlock(&nspace_handler_lock
);
9373 int nspace_snapshot_event(vnode_t vp
, time_t ctime
, uint64_t op_type
, void *arg
)
9375 int snapshot_error
= 0;
9381 /* Swap files are special; skip them */
9382 if (vnode_isswap(vp
)) {
9386 if (ctime
!= 0 && snapshot_timestamp
!= 0 && (ctime
<= snapshot_timestamp
|| vnode_needssnapshots(vp
))) {
9387 // the change time is within this epoch
9390 error
= resolve_nspace_item_ext(vp
, op_type
| NAMESPACE_HANDLER_SNAPSHOT_EVENT
, arg
);
9391 if (error
== EDEADLK
) {
9394 if (error
== EAGAIN
) {
9395 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9396 } else if (error
== EINTR
) {
9397 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9398 snapshot_error
= EINTR
;
9403 return snapshot_error
;
9407 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
9411 lck_mtx_lock(&nspace_handler_lock
);
9412 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9413 if (nspace_items
[i
].vp
== vp
) {
9418 if (i
>= MAX_NSPACE_ITEMS
) {
9419 lck_mtx_unlock(&nspace_handler_lock
);
9423 *status
= nspace_items
[i
].flags
;
9424 lck_mtx_unlock(&nspace_handler_lock
);
9431 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
9433 struct vnode_attr va
;
9437 VATTR_WANTED(&va
, va_fsid
);
9438 VATTR_WANTED(&va
, va_fileid
);
9440 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
9441 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
9444 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
9453 // Note: this function does NOT check permissions on all of the
9454 // parent directories leading to this vnode. It should only be
9455 // called on behalf of a root process. Otherwise a process may
9456 // get access to a file because the file itself is readable even
9457 // though its parent directories would prevent access.
9460 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
9464 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9469 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
9474 /* compute action to be authorized */
9476 if (fmode
& FREAD
) {
9477 action
|= KAUTH_VNODE_READ_DATA
;
9479 if (fmode
& (FWRITE
| O_TRUNC
)) {
9481 * If we are writing, appending, and not truncating,
9482 * indicate that we are appending so that if the
9483 * UF_APPEND or SF_APPEND bits are set, we do not deny
9486 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
9487 action
|= KAUTH_VNODE_APPEND_DATA
;
9489 action
|= KAUTH_VNODE_WRITE_DATA
;
9493 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
9498 // if the vnode is tagged VOPENEVT and the current process
9499 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9500 // flag to the open mode so that this open won't count against
9501 // the vnode when carbon delete() does a vnode_isinuse() to see
9502 // if a file is currently in use. this allows spotlight
9503 // importers to not interfere with carbon apps that depend on
9504 // the no-delete-if-busy semantics of carbon delete().
9506 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
9510 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
9513 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
9514 VNOP_CLOSE(vp
, fmode
, ctx
);
9518 /* Call out to allow 3rd party notification of open.
9519 * Ignore result of kauth_authorize_fileop call.
9522 mac_vnode_notify_open(ctx
, vp
, fmode
);
9524 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
9532 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
9539 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9540 if (nspace_handlers
[nspace_type
].handler_busy
) {
9541 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9545 nspace_handlers
[nspace_type
].handler_busy
= 1;
9546 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9549 * Any process that gets here will be one of the namespace handlers.
9550 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9551 * as we can cause deadlocks to occur, because the namespace handler may prevent
9552 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9555 curtask
= current_task();
9556 bsd_set_dependency_capable (curtask
);
9558 lck_mtx_lock(&nspace_handler_lock
);
9559 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9560 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
9561 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
9564 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9565 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9569 while (error
== 0) {
9571 /* Try to find matching namespace item */
9572 for (i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9573 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9574 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9580 if (i
>= MAX_NSPACE_ITEMS
) {
9581 /* Nothing is there yet. Wait for wake up and retry */
9582 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9583 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9584 /* Prevent infinite loop if snapshot handler exited */
9591 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
9592 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
9593 nspace_items
[i
].token
= ++nspace_token_id
;
9595 assert(nspace_items
[i
].vp
);
9596 struct fileproc
*fp
;
9599 struct proc
*p
= current_proc();
9600 vfs_context_t ctx
= vfs_context_current();
9601 struct vnode_attr va
;
9602 bool vn_get_succsessful
= false;
9603 bool vn_open_successful
= false;
9604 bool fp_alloc_successful
= false;
9607 * Use vnode pointer to acquire a file descriptor for
9608 * hand-off to userland
9610 fmode
= nspace_open_flags_for_type(nspace_type
);
9611 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
9612 if (error
) goto cleanup
;
9613 vn_get_succsessful
= true;
9615 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
9616 if (error
) goto cleanup
;
9617 vn_open_successful
= true;
9619 error
= falloc(p
, &fp
, &indx
, ctx
);
9620 if (error
) goto cleanup
;
9621 fp_alloc_successful
= true;
9623 fp
->f_fglob
->fg_flag
= fmode
;
9624 fp
->f_fglob
->fg_ops
= &vnops
;
9625 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9628 procfdtbl_releasefd(p
, indx
, NULL
);
9629 fp_drop(p
, indx
, fp
, 1);
9633 * All variants of the namespace handler struct support these three fields:
9634 * token, flags, and the FD pointer
9636 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9637 if (error
) goto cleanup
;
9638 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9639 if (error
) goto cleanup
;
9640 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9641 if (error
) goto cleanup
;
9644 * Handle optional fields:
9645 * extended version support an info ptr (offset, length), and the
9647 * namedata version supports a unique per-link object ID
9651 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9652 uint64_t u_offset
, u_length
;
9655 u_offset
= uio_offset(uio
);
9656 u_length
= uio_resid(uio
);
9661 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9662 if (error
) goto cleanup
;
9663 error
= copyout(&u_length
, nhd
->infoptr
+ sizeof(uint64_t), sizeof(uint64_t));
9664 if (error
) goto cleanup
;
9669 VATTR_WANTED(&va
, va_linkid
);
9670 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9671 if (error
) goto cleanup
;
9673 uint64_t linkid
= 0;
9674 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9675 linkid
= (uint64_t)va
.va_linkid
;
9677 error
= copyout(&linkid
, nhd
->objid
, sizeof(uint64_t));
9681 if (fp_alloc_successful
) fp_free(p
, indx
, fp
);
9682 if (vn_open_successful
) vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9686 if (vn_get_succsessful
) vnode_put(nspace_items
[i
].vp
);
9692 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9693 vnode_lock_spin(nspace_items
[i
].vp
);
9694 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9695 vnode_unlock(nspace_items
[i
].vp
);
9697 nspace_items
[i
].vp
= NULL
;
9698 nspace_items
[i
].vid
= 0;
9699 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9700 nspace_items
[i
].token
= 0;
9702 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9705 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9706 // just go through every snapshot event and unblock it immediately.
9707 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9708 for(i
= 0; i
< MAX_NSPACE_ITEMS
; i
++) {
9709 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9710 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9711 nspace_items
[i
].vp
= NULL
;
9712 nspace_items
[i
].vid
= 0;
9713 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9714 nspace_items
[i
].token
= 0;
9716 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9723 lck_mtx_unlock(&nspace_handler_lock
);
9725 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9726 nspace_handlers
[nspace_type
].handler_busy
= 0;
9727 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9732 static inline int validate_namespace_args (int is64bit
, int size
) {
9735 /* Must be one of these */
9736 if (size
== sizeof(user64_namespace_handler_info
)) {
9739 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
9742 if (size
== sizeof(user64_namespace_handler_data
)) {
9748 /* 32 bit -- must be one of these */
9749 if (size
== sizeof(user32_namespace_handler_info
)) {
9752 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
9755 if (size
== sizeof(user32_namespace_handler_data
)) {
9767 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
9770 namespace_handler_data nhd
;
9772 bzero (&nhd
, sizeof(namespace_handler_data
));
9774 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9778 error
= validate_namespace_args (is64bit
, size
);
9783 /* Copy in the userland pointers into our kernel-only struct */
9786 /* 64 bit userland structures */
9787 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
9788 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
9789 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
9791 /* If the size is greater than the standard info struct, add in extra fields */
9792 if (size
> (sizeof(user64_namespace_handler_info
))) {
9793 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
9794 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
9796 if (size
== (sizeof(user64_namespace_handler_data
))) {
9797 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
9799 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9803 /* 32 bit userland structures */
9804 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
9805 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
9806 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
9808 if (size
> (sizeof(user32_namespace_handler_info
))) {
9809 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
9810 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
9812 if (size
== (sizeof(user32_namespace_handler_data
))) {
9813 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
9815 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9819 return wait_for_namespace_event(&nhd
, nspace_type
);
9823 * Make a filesystem-specific control call:
9827 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
9832 #define STK_PARAMS 128
9833 char stkbuf
[STK_PARAMS
] = {0};
9835 vnode_t vp
= *arg_vp
;
9837 size
= IOCPARM_LEN(cmd
);
9838 if (size
> IOCPARM_MAX
) return (EINVAL
);
9840 is64bit
= proc_is64bit(p
);
9846 * ensure the buffer is large enough for underlying calls
9848 #ifndef HFSIOC_GETPATH
9849 typedef char pn_t
[MAXPATHLEN
];
9850 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9854 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9856 if (IOCBASECMD(cmd
) == HFS_GETPATH
) {
9857 /* Round up to MAXPATHLEN regardless of user input */
9861 if (size
> sizeof (stkbuf
)) {
9862 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
9870 error
= copyin(udata
, data
, size
);
9879 *(user_addr_t
*)data
= udata
;
9882 *(uint32_t *)data
= (uint32_t)udata
;
9885 } else if ((cmd
& IOC_OUT
) && size
) {
9887 * Zero the buffer so the user always
9888 * gets back something deterministic.
9891 } else if (cmd
& IOC_VOID
) {
9893 *(user_addr_t
*)data
= udata
;
9896 *(uint32_t *)data
= (uint32_t)udata
;
9900 /* Check to see if it's a generic command */
9901 switch (IOCBASECMD(cmd
)) {
9903 case FSCTL_SYNC_VOLUME
: {
9904 mount_t mp
= vp
->v_mount
;
9905 int arg
= *(uint32_t*)data
;
9907 /* record vid of vp so we can drop it below. */
9908 uint32_t vvid
= vp
->v_id
;
9911 * Then grab mount_iterref so that we can release the vnode.
9912 * Without this, a thread may call vnode_iterate_prepare then
9913 * get into a deadlock because we've never released the root vp
9915 error
= mount_iterref (mp
, 0);
9921 /* issue the sync for this volume */
9922 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
9925 * Then release the mount_iterref once we're done syncing; it's not
9926 * needed for the VNOP_IOCTL below
9930 if (arg
& FSCTL_SYNC_FULLSYNC
) {
9931 /* re-obtain vnode iocount on the root vp, if possible */
9932 error
= vnode_getwithvid (vp
, vvid
);
9934 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
9938 /* mark the argument VP as having been released */
9943 case FSCTL_ROUTEFS_SETROUTEID
: {
9945 char routepath
[MAXPATHLEN
];
9948 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9951 bzero(routepath
, MAXPATHLEN
);
9952 error
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
);
9956 error
= routefs_kernel_mount(routepath
);
9964 case FSCTL_SET_PACKAGE_EXTS
: {
9965 user_addr_t ext_strings
;
9966 uint32_t num_entries
;
9969 if ((error
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0)))
9972 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
9973 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
9975 // either you're 64-bit and passed a 64-bit struct or
9976 // you're 32-bit and passed a 32-bit struct. otherwise
9983 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
9984 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
9985 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
9987 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
9988 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
9989 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
9991 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
9995 /* namespace handlers */
9996 case FSCTL_NAMESPACE_HANDLER_GET
: {
9997 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
10001 /* Snapshot handlers */
10002 case FSCTL_OLD_SNAPSHOT_HANDLER_GET
: {
10003 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10007 case FSCTL_SNAPSHOT_HANDLER_GET_EXT
: {
10008 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
10012 case FSCTL_NAMESPACE_HANDLER_UPDATE
: {
10013 uint32_t token
, val
;
10016 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10020 if (!nspace_is_special_process(p
)) {
10025 token
= ((uint32_t *)data
)[0];
10026 val
= ((uint32_t *)data
)[1];
10028 lck_mtx_lock(&nspace_handler_lock
);
10030 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10031 if (nspace_items
[i
].token
== token
) {
10032 break; /* exit for loop, not case stmt */
10036 if (i
>= MAX_NSPACE_ITEMS
) {
10040 // if this bit is set, when resolve_nspace_item() times out
10041 // it will loop and go back to sleep.
10043 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
10046 lck_mtx_unlock(&nspace_handler_lock
);
10049 printf("nspace-handler-update: did not find token %u\n", token
);
10054 case FSCTL_NAMESPACE_HANDLER_UNBLOCK
: {
10055 uint32_t token
, val
;
10058 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10062 if (!nspace_is_special_process(p
)) {
10067 token
= ((uint32_t *)data
)[0];
10068 val
= ((uint32_t *)data
)[1];
10070 lck_mtx_lock(&nspace_handler_lock
);
10072 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10073 if (nspace_items
[i
].token
== token
) {
10074 break; /* exit for loop, not case statement */
10078 if (i
>= MAX_NSPACE_ITEMS
) {
10079 printf("nspace-handler-unblock: did not find token %u\n", token
);
10082 if (val
== 0 && nspace_items
[i
].vp
) {
10083 vnode_lock_spin(nspace_items
[i
].vp
);
10084 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10085 vnode_unlock(nspace_items
[i
].vp
);
10088 nspace_items
[i
].vp
= NULL
;
10089 nspace_items
[i
].arg
= NULL
;
10090 nspace_items
[i
].op
= 0;
10091 nspace_items
[i
].vid
= 0;
10092 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
10093 nspace_items
[i
].token
= 0;
10095 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10098 lck_mtx_unlock(&nspace_handler_lock
);
10102 case FSCTL_NAMESPACE_HANDLER_CANCEL
: {
10103 uint32_t token
, val
;
10106 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
10110 if (!nspace_is_special_process(p
)) {
10115 token
= ((uint32_t *)data
)[0];
10116 val
= ((uint32_t *)data
)[1];
10118 lck_mtx_lock(&nspace_handler_lock
);
10120 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
10121 if (nspace_items
[i
].token
== token
) {
10122 break; /* exit for loop, not case stmt */
10126 if (i
>= MAX_NSPACE_ITEMS
) {
10127 printf("nspace-handler-cancel: did not find token %u\n", token
);
10130 if (nspace_items
[i
].vp
) {
10131 vnode_lock_spin(nspace_items
[i
].vp
);
10132 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
10133 vnode_unlock(nspace_items
[i
].vp
);
10136 nspace_items
[i
].vp
= NULL
;
10137 nspace_items
[i
].arg
= NULL
;
10138 nspace_items
[i
].vid
= 0;
10139 nspace_items
[i
].token
= val
;
10140 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
10141 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
10143 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
10146 lck_mtx_unlock(&nspace_handler_lock
);
10150 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
10151 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10155 // we explicitly do not do the namespace_handler_proc check here
10157 lck_mtx_lock(&nspace_handler_lock
);
10158 snapshot_timestamp
= ((uint32_t *)data
)[0];
10159 wakeup(&nspace_item_idx
);
10160 lck_mtx_unlock(&nspace_handler_lock
);
10161 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
10166 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
10168 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10172 lck_mtx_lock(&nspace_handler_lock
);
10173 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
10174 lck_mtx_unlock(&nspace_handler_lock
);
10175 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10176 nspace_allow_virtual_devs
? "" : " NOT");
10182 case FSCTL_SET_FSTYPENAME_OVERRIDE
:
10184 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
10188 mount_lock(vp
->v_mount
);
10189 if (data
[0] != 0) {
10190 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
10191 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
10192 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10193 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
10194 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
10197 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
10198 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
10200 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
10201 vp
->v_mount
->fstypename_override
[0] = '\0';
10203 mount_unlock(vp
->v_mount
);
10209 /* Invoke the filesystem-specific code */
10210 error
= VNOP_IOCTL(vp
, IOCBASECMD(cmd
), data
, options
, ctx
);
10213 } /* end switch stmt */
10216 * if no errors, copy any data to user. Size was
10217 * already set and checked above.
10219 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
10220 error
= copyout(data
, udata
, size
);
10231 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
10234 struct nameidata nd
;
10237 vfs_context_t ctx
= vfs_context_current();
10239 AUDIT_ARG(cmd
, uap
->cmd
);
10240 AUDIT_ARG(value32
, uap
->options
);
10241 /* Get the vnode for the file we are getting info on: */
10243 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
10244 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
10245 UIO_USERSPACE
, uap
->path
, ctx
);
10246 if ((error
= namei(&nd
))) goto done
;
10251 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
10257 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10266 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
10270 vfs_context_t ctx
= vfs_context_current();
10273 AUDIT_ARG(fd
, uap
->fd
);
10274 AUDIT_ARG(cmd
, uap
->cmd
);
10275 AUDIT_ARG(value32
, uap
->options
);
10277 /* Get the vnode for the file we are getting info on: */
10278 if ((error
= file_vnode(uap
->fd
, &vp
)))
10281 if ((error
= vnode_getwithref(vp
))) {
10287 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
10294 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
10298 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10305 /* end of fsctl system call */
10308 * Retrieve the data of an extended attribute.
10311 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
10314 struct nameidata nd
;
10315 char attrname
[XATTR_MAXNAMELEN
+1];
10316 vfs_context_t ctx
= vfs_context_current();
10318 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10319 size_t attrsize
= 0;
10321 u_int32_t nameiflags
;
10323 char uio_buf
[ UIO_SIZEOF(1) ];
10325 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10328 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10329 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10330 if ((error
= namei(&nd
))) {
10336 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10339 if (xattr_protected(attrname
)) {
10340 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
10346 * the specific check for 0xffffffff is a hack to preserve
10347 * binaray compatibilty in K64 with applications that discovered
10348 * that passing in a buf pointer and a size of -1 resulted in
10349 * just the size of the indicated extended attribute being returned.
10350 * this isn't part of the documented behavior, but because of the
10351 * original implemtation's check for "uap->size > 0", this behavior
10352 * was allowed. In K32 that check turned into a signed comparison
10353 * even though uap->size is unsigned... in K64, we blow by that
10354 * check because uap->size is unsigned and doesn't get sign smeared
10355 * in the munger for a 32 bit user app. we also need to add a
10356 * check to limit the maximum size of the buffer being passed in...
10357 * unfortunately, the underlying fileystems seem to just malloc
10358 * the requested size even if the actual extended attribute is tiny.
10359 * because that malloc is for kernel wired memory, we have to put a
10360 * sane limit on it.
10362 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10363 * U64 running on K64 will yield -1 (64 bits wide)
10364 * U32/U64 running on K32 will yield -1 (32 bits wide)
10366 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
10370 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
10371 uap
->size
= XATTR_MAXSIZE
;
10373 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10374 &uio_buf
[0], sizeof(uio_buf
));
10375 uio_addiov(auio
, uap
->value
, uap
->size
);
10378 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
10383 *retval
= uap
->size
- uio_resid(auio
);
10385 *retval
= (user_ssize_t
)attrsize
;
10392 * Retrieve the data of an extended attribute.
10395 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
10398 char attrname
[XATTR_MAXNAMELEN
+1];
10400 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10401 size_t attrsize
= 0;
10404 char uio_buf
[ UIO_SIZEOF(1) ];
10406 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10409 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10412 if ( (error
= vnode_getwithref(vp
)) ) {
10413 file_drop(uap
->fd
);
10416 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10419 if (xattr_protected(attrname
)) {
10423 if (uap
->value
&& uap
->size
> 0) {
10424 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
10425 &uio_buf
[0], sizeof(uio_buf
));
10426 uio_addiov(auio
, uap
->value
, uap
->size
);
10429 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10431 (void)vnode_put(vp
);
10432 file_drop(uap
->fd
);
10435 *retval
= uap
->size
- uio_resid(auio
);
10437 *retval
= (user_ssize_t
)attrsize
;
10443 * Set the data of an extended attribute.
10446 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
10449 struct nameidata nd
;
10450 char attrname
[XATTR_MAXNAMELEN
+1];
10451 vfs_context_t ctx
= vfs_context_current();
10453 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10455 u_int32_t nameiflags
;
10457 char uio_buf
[ UIO_SIZEOF(1) ];
10459 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10462 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10463 if (error
== EPERM
) {
10464 /* if the string won't fit in attrname, copyinstr emits EPERM */
10465 return (ENAMETOOLONG
);
10467 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10470 if (xattr_protected(attrname
))
10472 if (uap
->size
!= 0 && uap
->value
== 0) {
10476 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10477 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10478 if ((error
= namei(&nd
))) {
10484 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10485 &uio_buf
[0], sizeof(uio_buf
));
10486 uio_addiov(auio
, uap
->value
, uap
->size
);
10488 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
10491 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10502 * Set the data of an extended attribute.
10505 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
10508 char attrname
[XATTR_MAXNAMELEN
+1];
10510 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10513 char uio_buf
[ UIO_SIZEOF(1) ];
10515 vfs_context_t ctx
= vfs_context_current();
10518 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10521 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
10522 if (error
== EPERM
) {
10523 /* if the string won't fit in attrname, copyinstr emits EPERM */
10524 return (ENAMETOOLONG
);
10526 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10529 if (xattr_protected(attrname
))
10531 if (uap
->size
!= 0 && uap
->value
== 0) {
10534 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10537 if ( (error
= vnode_getwithref(vp
)) ) {
10538 file_drop(uap
->fd
);
10541 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
10542 &uio_buf
[0], sizeof(uio_buf
));
10543 uio_addiov(auio
, uap
->value
, uap
->size
);
10545 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
10548 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
10554 file_drop(uap
->fd
);
10560 * Remove an extended attribute.
10561 * XXX Code duplication here.
10564 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
10567 struct nameidata nd
;
10568 char attrname
[XATTR_MAXNAMELEN
+1];
10569 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10570 vfs_context_t ctx
= vfs_context_current();
10572 u_int32_t nameiflags
;
10575 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10578 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10582 if (xattr_protected(attrname
))
10584 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10585 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10586 if ((error
= namei(&nd
))) {
10592 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
10595 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10606 * Remove an extended attribute.
10607 * XXX Code duplication here.
10610 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
10613 char attrname
[XATTR_MAXNAMELEN
+1];
10617 vfs_context_t ctx
= vfs_context_current();
10620 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10623 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10627 if (xattr_protected(attrname
))
10629 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10632 if ( (error
= vnode_getwithref(vp
)) ) {
10633 file_drop(uap
->fd
);
10637 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
10640 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10646 file_drop(uap
->fd
);
10652 * Retrieve the list of extended attribute names.
10653 * XXX Code duplication here.
10656 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
10659 struct nameidata nd
;
10660 vfs_context_t ctx
= vfs_context_current();
10662 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10663 size_t attrsize
= 0;
10664 u_int32_t nameiflags
;
10666 char uio_buf
[ UIO_SIZEOF(1) ];
10668 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10671 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10672 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10673 if ((error
= namei(&nd
))) {
10678 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10679 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
10680 &uio_buf
[0], sizeof(uio_buf
));
10681 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10684 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
10688 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10690 *retval
= (user_ssize_t
)attrsize
;
10696 * Retrieve the list of extended attribute names.
10697 * XXX Code duplication here.
10700 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
10704 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10705 size_t attrsize
= 0;
10707 char uio_buf
[ UIO_SIZEOF(1) ];
10709 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10712 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10715 if ( (error
= vnode_getwithref(vp
)) ) {
10716 file_drop(uap
->fd
);
10719 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10720 auio
= uio_createwithbuffer(1, 0, spacetype
,
10721 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10722 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10725 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10728 file_drop(uap
->fd
);
10730 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10732 *retval
= (user_ssize_t
)attrsize
;
10737 static int fsgetpath_internal(
10738 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
10739 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
10742 struct mount
*mp
= NULL
;
10747 if (bufsize
> PAGE_SIZE
) {
10755 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
10756 error
= ENOTSUP
; /* unexpected failure */
10762 error
= VFS_ROOT(mp
, &vp
, ctx
);
10764 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
10767 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
10769 * If the fileid isn't found and we're in a union
10770 * mount volume, then see if the fileid is in the
10771 * mounted-on volume.
10773 struct mount
*tmp
= mp
;
10774 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
10776 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
10787 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
10794 /* Obtain the absolute path to this vnode. */
10795 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
10796 bpflags
|= BUILDPATH_CHECK_MOVED
;
10797 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
10804 AUDIT_ARG(text
, buf
);
10806 if (kdebug_enable
) {
10807 long dbg_parms
[NUMPARMS
];
10810 dbg_namelen
= (int)sizeof(dbg_parms
);
10812 if (length
< dbg_namelen
) {
10813 memcpy((char *)dbg_parms
, buf
, length
);
10814 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
10816 dbg_namelen
= length
;
10818 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
10821 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
10824 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
10831 * Obtain the full pathname of a file system object by id.
10833 * This is a private SPI used by the File Manager.
10837 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
10839 vfs_context_t ctx
= vfs_context_current();
10845 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
10848 AUDIT_ARG(value32
, fsid
.val
[0]);
10849 AUDIT_ARG(value64
, uap
->objid
);
10850 /* Restrict output buffer size for now. */
10852 if (uap
->bufsize
> PAGE_SIZE
) {
10855 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
10856 if (realpath
== NULL
) {
10860 error
= fsgetpath_internal(
10861 ctx
, fsid
.val
[0], uap
->objid
,
10862 uap
->bufsize
, realpath
, &length
);
10868 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
10870 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
10873 FREE(realpath
, M_TEMP
);
10879 * Common routine to handle various flavors of statfs data heading out
10882 * Returns: 0 Success
10886 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
10887 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
10888 boolean_t partial_copy
)
10891 int my_size
, copy_size
;
10894 struct user64_statfs sfs
;
10895 my_size
= copy_size
= sizeof(sfs
);
10896 bzero(&sfs
, my_size
);
10897 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10898 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10899 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10900 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
10901 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
10902 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
10903 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
10904 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
10905 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
10906 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
10907 sfs
.f_fsid
= sfsp
->f_fsid
;
10908 sfs
.f_owner
= sfsp
->f_owner
;
10909 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10910 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10912 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10914 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10915 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10917 if (partial_copy
) {
10918 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10920 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10923 struct user32_statfs sfs
;
10925 my_size
= copy_size
= sizeof(sfs
);
10926 bzero(&sfs
, my_size
);
10928 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10929 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10930 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10933 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10934 * have to fudge the numbers here in that case. We inflate the blocksize in order
10935 * to reflect the filesystem size as best we can.
10937 if ((sfsp
->f_blocks
> INT_MAX
)
10938 /* Hack for 4061702 . I think the real fix is for Carbon to
10939 * look for some volume capability and not depend on hidden
10940 * semantics agreed between a FS and carbon.
10941 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10942 * for Carbon to set bNoVolumeSizes volume attribute.
10943 * Without this the webdavfs files cannot be copied onto
10944 * disk as they look huge. This change should not affect
10945 * XSAN as they should not setting these to -1..
10947 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
10948 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
10949 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
10953 * Work out how far we have to shift the block count down to make it fit.
10954 * Note that it's possible to have to shift so far that the resulting
10955 * blocksize would be unreportably large. At that point, we will clip
10956 * any values that don't fit.
10958 * For safety's sake, we also ensure that f_iosize is never reported as
10959 * being smaller than f_bsize.
10961 for (shift
= 0; shift
< 32; shift
++) {
10962 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
10964 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
10967 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10968 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
10969 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
10970 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
10971 #undef __SHIFT_OR_CLIP
10972 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
10973 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
10975 /* filesystem is small enough to be reported honestly */
10976 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
10977 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
10978 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
10979 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
10980 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
10982 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
10983 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
10984 sfs
.f_fsid
= sfsp
->f_fsid
;
10985 sfs
.f_owner
= sfsp
->f_owner
;
10986 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10987 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10989 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10991 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10992 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10994 if (partial_copy
) {
10995 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10997 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
11000 if (sizep
!= NULL
) {
11007 * copy stat structure into user_stat structure.
11009 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
11011 bzero(usbp
, sizeof(*usbp
));
11013 usbp
->st_dev
= sbp
->st_dev
;
11014 usbp
->st_ino
= sbp
->st_ino
;
11015 usbp
->st_mode
= sbp
->st_mode
;
11016 usbp
->st_nlink
= sbp
->st_nlink
;
11017 usbp
->st_uid
= sbp
->st_uid
;
11018 usbp
->st_gid
= sbp
->st_gid
;
11019 usbp
->st_rdev
= sbp
->st_rdev
;
11020 #ifndef _POSIX_C_SOURCE
11021 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11022 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11023 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11024 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11025 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11026 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11028 usbp
->st_atime
= sbp
->st_atime
;
11029 usbp
->st_atimensec
= sbp
->st_atimensec
;
11030 usbp
->st_mtime
= sbp
->st_mtime
;
11031 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11032 usbp
->st_ctime
= sbp
->st_ctime
;
11033 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11035 usbp
->st_size
= sbp
->st_size
;
11036 usbp
->st_blocks
= sbp
->st_blocks
;
11037 usbp
->st_blksize
= sbp
->st_blksize
;
11038 usbp
->st_flags
= sbp
->st_flags
;
11039 usbp
->st_gen
= sbp
->st_gen
;
11040 usbp
->st_lspare
= sbp
->st_lspare
;
11041 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11042 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11045 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
11047 bzero(usbp
, sizeof(*usbp
));
11049 usbp
->st_dev
= sbp
->st_dev
;
11050 usbp
->st_ino
= sbp
->st_ino
;
11051 usbp
->st_mode
= sbp
->st_mode
;
11052 usbp
->st_nlink
= sbp
->st_nlink
;
11053 usbp
->st_uid
= sbp
->st_uid
;
11054 usbp
->st_gid
= sbp
->st_gid
;
11055 usbp
->st_rdev
= sbp
->st_rdev
;
11056 #ifndef _POSIX_C_SOURCE
11057 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11058 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11059 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11060 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11061 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11062 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11064 usbp
->st_atime
= sbp
->st_atime
;
11065 usbp
->st_atimensec
= sbp
->st_atimensec
;
11066 usbp
->st_mtime
= sbp
->st_mtime
;
11067 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11068 usbp
->st_ctime
= sbp
->st_ctime
;
11069 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11071 usbp
->st_size
= sbp
->st_size
;
11072 usbp
->st_blocks
= sbp
->st_blocks
;
11073 usbp
->st_blksize
= sbp
->st_blksize
;
11074 usbp
->st_flags
= sbp
->st_flags
;
11075 usbp
->st_gen
= sbp
->st_gen
;
11076 usbp
->st_lspare
= sbp
->st_lspare
;
11077 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11078 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11082 * copy stat64 structure into user_stat64 structure.
11084 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
11086 bzero(usbp
, sizeof(*usbp
));
11088 usbp
->st_dev
= sbp
->st_dev
;
11089 usbp
->st_ino
= sbp
->st_ino
;
11090 usbp
->st_mode
= sbp
->st_mode
;
11091 usbp
->st_nlink
= sbp
->st_nlink
;
11092 usbp
->st_uid
= sbp
->st_uid
;
11093 usbp
->st_gid
= sbp
->st_gid
;
11094 usbp
->st_rdev
= sbp
->st_rdev
;
11095 #ifndef _POSIX_C_SOURCE
11096 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11097 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11098 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11099 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11100 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11101 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11102 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11103 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11105 usbp
->st_atime
= sbp
->st_atime
;
11106 usbp
->st_atimensec
= sbp
->st_atimensec
;
11107 usbp
->st_mtime
= sbp
->st_mtime
;
11108 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11109 usbp
->st_ctime
= sbp
->st_ctime
;
11110 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11111 usbp
->st_birthtime
= sbp
->st_birthtime
;
11112 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11114 usbp
->st_size
= sbp
->st_size
;
11115 usbp
->st_blocks
= sbp
->st_blocks
;
11116 usbp
->st_blksize
= sbp
->st_blksize
;
11117 usbp
->st_flags
= sbp
->st_flags
;
11118 usbp
->st_gen
= sbp
->st_gen
;
11119 usbp
->st_lspare
= sbp
->st_lspare
;
11120 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11121 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11124 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
11126 bzero(usbp
, sizeof(*usbp
));
11128 usbp
->st_dev
= sbp
->st_dev
;
11129 usbp
->st_ino
= sbp
->st_ino
;
11130 usbp
->st_mode
= sbp
->st_mode
;
11131 usbp
->st_nlink
= sbp
->st_nlink
;
11132 usbp
->st_uid
= sbp
->st_uid
;
11133 usbp
->st_gid
= sbp
->st_gid
;
11134 usbp
->st_rdev
= sbp
->st_rdev
;
11135 #ifndef _POSIX_C_SOURCE
11136 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
11137 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
11138 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
11139 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
11140 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
11141 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
11142 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
11143 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
11145 usbp
->st_atime
= sbp
->st_atime
;
11146 usbp
->st_atimensec
= sbp
->st_atimensec
;
11147 usbp
->st_mtime
= sbp
->st_mtime
;
11148 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
11149 usbp
->st_ctime
= sbp
->st_ctime
;
11150 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
11151 usbp
->st_birthtime
= sbp
->st_birthtime
;
11152 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
11154 usbp
->st_size
= sbp
->st_size
;
11155 usbp
->st_blocks
= sbp
->st_blocks
;
11156 usbp
->st_blksize
= sbp
->st_blksize
;
11157 usbp
->st_flags
= sbp
->st_flags
;
11158 usbp
->st_gen
= sbp
->st_gen
;
11159 usbp
->st_lspare
= sbp
->st_lspare
;
11160 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
11161 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
11165 * Purge buffer cache for simulating cold starts
11167 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
11169 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
11171 return VNODE_RETURNED
;
11174 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
11176 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
11178 return VFS_RETURNED
;
11182 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
11184 if (!kauth_cred_issuser(kauth_cred_get()))
11187 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);
11193 * gets the vnode associated with the (unnamed) snapshot directory
11194 * for a Filesystem. The snapshot directory vnode is returned with
11195 * an iocount on it.
11198 vnode_get_snapdir(vnode_t rvp
, vnode_t
*sdvpp
, vfs_context_t ctx
)
11202 error
= VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
);
11204 #if CLONE_SNAPSHOT_FALLBACKS_ENABLED
11205 if (error
== ENOTSUP
) {
11206 struct nameidata snapnd
;
11209 * Temporary fallback to <mountpoint>/.snaps lookup
11210 * XXX: To be removed.
11212 NDINIT(&snapnd
, LOOKUP
, OP_LOOKUP
, USEDVP
,
11213 UIO_SYSSPACE
, CAST_USER_ADDR_T(".snaps"), ctx
);
11214 snapnd
.ni_dvp
= rvp
;
11216 if ((error
= namei(&snapnd
))) {
11220 *sdvpp
= snapnd
.ni_vp
;
11221 nameidone(&snapnd
);
11224 #endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
11229 * Get the snapshot vnode.
11231 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11232 * needs nameidone() on ndp.
11234 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11236 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11240 vnode_get_snapshot(int dirfd
, vnode_t
*rvpp
, vnode_t
*sdvpp
,
11241 user_addr_t name
, struct nameidata
*ndp
, int32_t op
,
11242 #if !CONFIG_TRIGGERS
11245 enum path_operation pathop
,
11251 struct vfs_attr vfa
;
11256 error
= vnode_getfromfd(ctx
, dirfd
, rvpp
);
11260 if (!vnode_isvroot(*rvpp
)) {
11265 /* Make sure the filesystem supports snapshots */
11266 VFSATTR_INIT(&vfa
);
11267 VFSATTR_WANTED(&vfa
, f_capabilities
);
11268 if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) ||
11269 !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) ||
11270 !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] &
11271 VOL_CAP_INT_SNAPSHOT
)) ||
11272 !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] &
11273 VOL_CAP_INT_SNAPSHOT
))) {
11278 error
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
);
11282 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11283 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11288 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11289 * (the length returned by copyinstr includes the terminating NUL)
11291 if ((name_len
== 1) || (name_len
== 2 && name_buf
[0] == '.') ||
11292 (name_len
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) {
11296 for (i
= 0; i
< (int)name_len
&& name_buf
[i
] != '/'; i
++);
11297 if (i
< (int)name_len
) {
11303 if (op
== CREATE
) {
11304 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
),
11306 } else if (op
== DELETE
) {
11307 error
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
),
11314 /* Check if the snapshot already exists ... */
11315 NDINIT(ndp
, op
, pathop
, USEDVP
| NOCACHE
| AUDITVNPATH1
,
11316 UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
);
11317 ndp
->ni_dvp
= *sdvpp
;
11319 error
= namei(ndp
);
11321 FREE(name_buf
, M_TEMP
);
11337 * create a filesystem snapshot (for supporting filesystems)
11339 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11340 * We get to the (unnamed) snapshot directory vnode and create the vnode
11341 * for the snapshot in it.
11345 * a) Passed in name for snapshot cannot have slashes.
11346 * b) name can't be "." or ".."
11348 * Since this requires superuser privileges, vnode_authorize calls are not
11352 snapshot_create(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11355 vnode_t rvp
, snapdvp
;
11357 struct nameidata namend
;
11359 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, CREATE
,
11364 if (namend
.ni_vp
) {
11365 vnode_put(namend
.ni_vp
);
11368 struct vnode_attr va
;
11369 vnode_t vp
= NULLVP
;
11372 VATTR_SET(&va
, va_type
, VREG
);
11373 VATTR_SET(&va
, va_mode
, 0);
11375 error
= vn_create(snapdvp
, &vp
, &namend
, &va
,
11376 VN_CREATE_NOAUTH
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
);
11379 #if CLONE_SNAPSHOT_FALLBACKS_ENABLED
11381 error
= VNOP_COPYFILE(rvp
, rvp
, NULLVP
, &namend
.ni_cnd
,
11384 #endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
11387 nameidone(&namend
);
11388 vnode_put(snapdvp
);
11394 * Delete a Filesystem snapshot
11396 * get the vnode for the unnamed snapshot directory and the snapshot and
11397 * delete the snapshot.
11400 snapshot_delete(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11403 vnode_t rvp
, snapdvp
;
11405 struct nameidata namend
;
11407 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, DELETE
,
11412 error
= VNOP_REMOVE(snapdvp
, namend
.ni_vp
, &namend
.ni_cnd
,
11413 VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
);
11415 vnode_put(namend
.ni_vp
);
11416 nameidone(&namend
);
11417 vnode_put(snapdvp
);
11424 * Revert a filesystem to a snapshot
11426 * Marks the filesystem to revert to the given snapshot on next mount.
11429 snapshot_revert(int dirfd
, user_addr_t name
, __unused
uint32_t flags
,
11435 struct fs_snapshot_revert_args revert_data
;
11436 struct componentname cnp
;
11440 error
= vnode_getfromfd(ctx
, dirfd
, &rvp
);
11444 mp
= vnode_mount(rvp
);
11447 * Grab mount_iterref so that we can release the vnode,
11448 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11450 error
= mount_iterref (mp
, 0);
11456 MALLOC(name_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11457 error
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
);
11459 mount_iterdrop(mp
);
11460 FREE(name_buf
, M_TEMP
);
11464 memset(&cnp
, 0, sizeof(cnp
));
11465 cnp
.cn_pnbuf
= (char *)name_buf
;
11466 cnp
.cn_nameiop
= LOOKUP
;
11467 cnp
.cn_flags
= ISLASTCN
| HASBUF
;
11468 cnp
.cn_pnlen
= MAXPATHLEN
;
11469 cnp
.cn_nameptr
= cnp
.cn_pnbuf
;
11470 cnp
.cn_namelen
= (int)name_len
;
11471 revert_data
.sr_cnp
= &cnp
;
11473 error
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
);
11474 mount_iterdrop(mp
);
11475 FREE(name_buf
, M_TEMP
);
11478 /* If there was any error, try again using VNOP_IOCTL */
11481 struct nameidata namend
;
11483 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
,
11490 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
11491 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11494 #ifndef APFS_REVERT_TO_SNAPSHOT
11495 #define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
11498 error
= VNOP_IOCTL(namend
.ni_vp
, APFS_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
,
11501 vnode_put(namend
.ni_vp
);
11502 nameidone(&namend
);
11503 vnode_put(snapdvp
);
11511 * rename a Filesystem snapshot
11513 * get the vnode for the unnamed snapshot directory and the snapshot and
11514 * rename the snapshot. This is a very specialised (and simple) case of
11515 * rename(2) (which has to deal with a lot more complications). It differs
11516 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11519 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t
new,
11520 __unused
uint32_t flags
, vfs_context_t ctx
)
11522 vnode_t rvp
, snapdvp
;
11524 caddr_t newname_buf
;
11527 struct nameidata
*fromnd
, *tond
;
11528 /* carving out a chunk for structs that are too big to be on stack. */
11530 struct nameidata from_node
;
11531 struct nameidata to_node
;
11534 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
11535 fromnd
= &__rename_data
->from_node
;
11536 tond
= &__rename_data
->to_node
;
11538 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
,
11542 fvp
= fromnd
->ni_vp
;
11544 MALLOC(newname_buf
, caddr_t
, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
11545 error
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
);
11550 * Some sanity checks- new name can't be empty, "." or ".." or have
11552 * (the length returned by copyinstr includes the terminating NUL)
11554 * The FS rename VNOP is suppossed to handle this but we'll pick it
11557 if ((name_len
== 1) || (name_len
== 2 && newname_buf
[0] == '.') ||
11558 (name_len
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) {
11562 for (i
= 0; i
< (int)name_len
&& newname_buf
[i
] != '/'; i
++);
11563 if (i
< (int)name_len
) {
11569 error
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
),
11575 NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP
| NOCACHE
| AUDITVNPATH2
,
11576 UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
);
11577 tond
->ni_dvp
= snapdvp
;
11579 error
= namei(tond
);
11582 } else if (tond
->ni_vp
) {
11584 * snapshot rename behaves differently than rename(2) - if the
11585 * new name exists, EEXIST is returned.
11587 vnode_put(tond
->ni_vp
);
11592 error
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
,
11593 &tond
->ni_cnd
, ctx
);
11598 FREE(newname_buf
, M_TEMP
);
11600 vnode_put(snapdvp
);
11604 FREE(__rename_data
, M_TEMP
);
11609 * Mount a Filesystem snapshot
11611 * get the vnode for the unnamed snapshot directory and the snapshot and
11612 * mount the snapshot.
11615 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
,
11616 user_addr_t mnt_data
, __unused
uint32_t flags
, vfs_context_t ctx
)
11618 vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
;
11620 struct nameidata
*snapndp
, *dirndp
;
11621 /* carving out a chunk for structs that are too big to be on stack. */
11623 struct nameidata snapnd
;
11624 struct nameidata dirnd
;
11625 } * __snapshot_mount_data
;
11627 MALLOC(__snapshot_mount_data
, void *, sizeof(*__snapshot_mount_data
),
11629 snapndp
= &__snapshot_mount_data
->snapnd
;
11630 dirndp
= &__snapshot_mount_data
->dirnd
;
11632 error
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
,
11637 snapvp
= snapndp
->ni_vp
;
11638 if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) {
11643 /* Get the vnode to be covered */
11644 NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
11645 UIO_USERSPACE
, directory
, ctx
);
11646 error
= namei(dirndp
);
11650 vp
= dirndp
->ni_vp
;
11651 pvp
= dirndp
->ni_dvp
;
11653 if ((vp
->v_flag
& VROOT
) && (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
11656 mount_t mp
= vnode_mount(rvp
);
11657 struct fs_snapshot_mount_args smnt_data
;
11659 smnt_data
.sm_mp
= mp
;
11660 smnt_data
.sm_cnp
= &snapndp
->ni_cnd
;
11661 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
,
11662 &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), 0,
11663 KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
);
11665 /* Retry with user passed args */
11666 error
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
,
11667 vp
, &dirndp
->ni_cnd
, CAST_USER_ADDR_T(mnt_data
), 0,
11668 0, NULL
, FALSE
, ctx
);
11677 vnode_put(snapdvp
);
11679 nameidone(snapndp
);
11681 FREE(__snapshot_mount_data
, M_TEMP
);
11686 * FS snapshot operations dispatcher
11689 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args
*uap
,
11690 __unused
int32_t *retval
)
11693 vfs_context_t ctx
= vfs_context_current();
11695 error
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0);
11700 case SNAPSHOT_OP_CREATE
:
11701 error
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
11703 case SNAPSHOT_OP_DELETE
:
11704 error
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);
11706 case SNAPSHOT_OP_RENAME
:
11707 error
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
,
11710 case SNAPSHOT_OP_MOUNT
:
11711 error
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
,
11712 uap
->data
, uap
->flags
, ctx
);
11714 case SNAPSHOT_OP_REVERT
:
11715 error
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
);