2 * Copyright (c) 1995-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <machine/cons.h>
103 #include <machine/limits.h>
104 #include <miscfs/specfs/specdev.h>
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
109 #include <mach/mach_types.h>
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/task.h>
114 #include <vm/vm_pageout.h>
116 #include <libkern/OSAtomic.h>
117 #include <pexpert/pexpert.h>
120 #include <security/mac.h>
121 #include <security/mac_framework.h>
125 #define GET_PATH(x) \
126 (x) = get_pathbuff();
127 #define RELEASE_PATH(x) \
130 #define GET_PATH(x) \
131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
132 #define RELEASE_PATH(x) \
133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
134 #endif /* CONFIG_FSE */
136 /* struct for checkdirs iteration */
141 /* callback for checkdirs iteration */
142 static int checkdirs_callback(proc_t p
, void * arg
);
144 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
145 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
146 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
147 static int getfsstat_callback(mount_t mp
, void * arg
);
148 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
149 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
150 static int sync_callback(mount_t
, void *);
151 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
152 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
153 boolean_t partial_copy
);
154 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
156 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
157 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
158 struct componentname
*cnp
, user_addr_t fsmountargs
,
159 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
161 void vfs_notify_mount(vnode_t pdvp
);
163 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
165 #ifdef CONFIG_IMGSRC_ACCESS
166 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
167 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
168 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
169 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
170 static void mount_end_update(mount_t mp
);
171 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
172 #endif /* CONFIG_IMGSRC_ACCESS */
174 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
177 int sync_internal(void);
180 int unlink1(vfs_context_t
, struct nameidata
*, int);
183 * incremented each time a mount or unmount operation occurs
184 * used to invalidate the cached value of the rootvp in the
185 * mount structure utilized by cache_lookup_path
187 uint32_t mount_generation
= 0;
189 /* counts number of mount and unmount operations */
190 unsigned int vfs_nummntops
=0;
192 extern const struct fileops vnops
;
193 #if CONFIG_APPLEDOUBLE
194 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
195 #endif /* CONFIG_APPLEDOUBLE */
198 * Virtual File System System Calls
203 * Private in-kernel mounting spi (NFS only, not exported)
207 vfs_iskernelmount(mount_t mp
)
209 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
214 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
215 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
221 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
222 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
225 * Get the vnode to be covered if it's not supplied
235 char *pnbuf
= CAST_DOWN(char *, path
);
237 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
238 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
242 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
243 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
253 #endif /* NFSCLIENT */
256 * Mount a file system.
260 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
262 struct __mac_mount_args muap
;
264 muap
.type
= uap
->type
;
265 muap
.path
= uap
->path
;
266 muap
.flags
= uap
->flags
;
267 muap
.data
= uap
->data
;
268 muap
.mac_p
= USER_ADDR_NULL
;
269 return (__mac_mount(p
, &muap
, retval
));
273 vfs_notify_mount(vnode_t pdvp
)
275 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
276 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
281 * Mount a file system taking into account MAC label behavior.
282 * See mount(2) man page for more information
284 * Parameters: p Process requesting the mount
285 * uap User argument descriptor (see below)
288 * Indirect: uap->type Filesystem type
289 * uap->path Path to mount
290 * uap->data Mount arguments
291 * uap->mac_p MAC info
292 * uap->flags Mount flags
298 boolean_t root_fs_upgrade_try
= FALSE
;
301 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
305 int need_nameidone
= 0;
306 vfs_context_t ctx
= vfs_context_current();
307 char fstypename
[MFSNAMELEN
];
310 char *labelstr
= NULL
;
311 int flags
= uap
->flags
;
313 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
314 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
319 * Get the fs type name from user space
321 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
326 * Get the vnode to be covered
328 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, NOTRIGGER
| FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
329 UIO_USERSPACE
, uap
->path
, ctx
);
338 #ifdef CONFIG_IMGSRC_ACCESS
339 /* Mounting image source cannot be batched with other operations */
340 if (flags
== MNT_IMGSRC_BY_INDEX
) {
341 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
342 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
345 #endif /* CONFIG_IMGSRC_ACCESS */
349 * Get the label string (if any) from user space
351 if (uap
->mac_p
!= USER_ADDR_NULL
) {
356 struct user64_mac mac64
;
357 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
358 mac
.m_buflen
= mac64
.m_buflen
;
359 mac
.m_string
= mac64
.m_string
;
361 struct user32_mac mac32
;
362 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
363 mac
.m_buflen
= mac32
.m_buflen
;
364 mac
.m_string
= mac32
.m_string
;
368 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
369 (mac
.m_buflen
< 2)) {
373 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
374 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
378 AUDIT_ARG(mac_string
, labelstr
);
380 #endif /* CONFIG_MACF */
382 AUDIT_ARG(fflags
, flags
);
384 if ((vp
->v_flag
& VROOT
) &&
385 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
386 if (!(flags
& MNT_UNION
)) {
391 * For a union mount on '/', treat it as fresh
392 * mount instead of update.
393 * Otherwise, union mouting on '/' used to panic the
394 * system before, since mnt_vnodecovered was found to
395 * be NULL for '/' which is required for unionlookup
396 * after it gets ENOENT on union mount.
398 flags
= (flags
& ~(MNT_UPDATE
));
402 //#ifdef SECURE_KERNEL
403 if ((flags
& MNT_RDONLY
) == 0) {
404 /* Release kernels are not allowed to mount "/" as rw */
411 * See 7392553 for more details on why this check exists.
412 * Suffice to say: If this check is ON and something tries
413 * to mount the rootFS RW, we'll turn off the codesign
414 * bitmap optimization.
416 #if CHECK_CS_VALIDATION_BITMAP
417 if ((flags
& MNT_RDONLY
) == 0 ) {
418 root_fs_upgrade_try
= TRUE
;
423 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
424 labelstr
, FALSE
, ctx
);
430 FREE(labelstr
, M_MACTEMP
);
431 #endif /* CONFIG_MACF */
439 if (need_nameidone
) {
447 * common mount implementation (final stage of mounting)
450 * fstypename file system type (ie it's vfs name)
451 * pvp parent of covered vnode
453 * cnp component name (ie path) of covered vnode
454 * flags generic mount flags
455 * fsmountargs file system specific data
456 * labelstr optional MAC label
457 * kernelmount TRUE for mounts initiated from inside the kernel
458 * ctx caller's context
461 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
462 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
463 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
466 #pragma unused(labelstr)
468 struct vnode
*devvp
= NULLVP
;
469 struct vnode
*device_vnode
= NULLVP
;
474 struct vfstable
*vfsp
= (struct vfstable
*)0;
475 struct proc
*p
= vfs_context_proc(ctx
);
477 user_addr_t devpath
= USER_ADDR_NULL
;
480 boolean_t vfsp_ref
= FALSE
;
481 boolean_t is_rwlock_locked
= FALSE
;
482 boolean_t did_rele
= FALSE
;
483 boolean_t have_usecount
= FALSE
;
486 * Process an update for an existing mount
488 if (flags
& MNT_UPDATE
) {
489 if ((vp
->v_flag
& VROOT
) == 0) {
495 /* unmount in progress return error */
497 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
503 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
504 is_rwlock_locked
= TRUE
;
506 * We only allow the filesystem to be reloaded if it
507 * is currently mounted read-only.
509 if ((flags
& MNT_RELOAD
) &&
510 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
516 * If content protection is enabled, update mounts are not
517 * allowed to turn it off.
519 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
520 ((flags
& MNT_CPROTECT
) == 0)) {
525 #ifdef CONFIG_IMGSRC_ACCESS
526 /* Can't downgrade the backer of the root FS */
527 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
528 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
532 #endif /* CONFIG_IMGSRC_ACCESS */
535 * Only root, or the user that did the original mount is
536 * permitted to update it.
538 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
539 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
543 error
= mac_mount_check_remount(ctx
, mp
);
549 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
550 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
552 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
553 flags
|= MNT_NOSUID
| MNT_NODEV
;
554 if (mp
->mnt_flag
& MNT_NOEXEC
)
561 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
563 vfsp
= mp
->mnt_vtable
;
567 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
568 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
570 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
571 flags
|= MNT_NOSUID
| MNT_NODEV
;
572 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
576 /* XXXAUDIT: Should we capture the type on the error path as well? */
577 AUDIT_ARG(text
, fstypename
);
579 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
580 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
581 vfsp
->vfc_refcount
++;
592 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
594 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
595 error
= EINVAL
; /* unsupported request */
599 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
605 * Allocate and initialize the filesystem (mount_t)
607 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
609 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
612 /* Initialize the default IO constraints */
613 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
614 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
615 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
616 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
617 mp
->mnt_devblocksize
= DEV_BSIZE
;
618 mp
->mnt_alignmentmask
= PAGE_MASK
;
619 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
622 mp
->mnt_realrootvp
= NULLVP
;
623 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
625 TAILQ_INIT(&mp
->mnt_vnodelist
);
626 TAILQ_INIT(&mp
->mnt_workerqueue
);
627 TAILQ_INIT(&mp
->mnt_newvnodes
);
629 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
630 is_rwlock_locked
= TRUE
;
631 mp
->mnt_op
= vfsp
->vfc_vfsops
;
632 mp
->mnt_vtable
= vfsp
;
633 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
634 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
635 strncpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
636 strncpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
637 mp
->mnt_vnodecovered
= vp
;
638 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
639 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
640 mp
->mnt_devbsdunit
= 0;
642 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
643 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
647 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
648 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
649 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
650 #endif /* NFSCLIENT */
654 * Set the mount level flags.
656 if (flags
& MNT_RDONLY
)
657 mp
->mnt_flag
|= MNT_RDONLY
;
658 else if (mp
->mnt_flag
& MNT_RDONLY
) {
659 // disallow read/write upgrades of file systems that
660 // had the TYPENAME_OVERRIDE feature set.
661 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
665 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
667 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
668 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
669 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
670 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
671 MNT_QUARANTINE
| MNT_CPROTECT
);
672 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
673 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
674 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
675 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
676 MNT_QUARANTINE
| MNT_CPROTECT
);
679 if (flags
& MNT_MULTILABEL
) {
680 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
684 mp
->mnt_flag
|= MNT_MULTILABEL
;
688 * Process device path for local file systems if requested
690 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
691 if (vfs_context_is64bit(ctx
)) {
692 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
694 fsmountargs
+= sizeof(devpath
);
697 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
699 /* munge into LP64 addr */
700 devpath
= CAST_USER_ADDR_T(tmp
);
701 fsmountargs
+= sizeof(tmp
);
704 /* Lookup device and authorize access to it */
708 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
709 if ( (error
= namei(&nd
)) )
712 strncpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
717 if (devvp
->v_type
!= VBLK
) {
721 if (major(devvp
->v_rdev
) >= nblkdev
) {
726 * If mount by non-root, then verify that user has necessary
727 * permissions on the device.
729 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
730 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
732 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
733 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
734 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
738 /* On first mount, preflight and open device */
739 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
740 if ( (error
= vnode_ref(devvp
)) )
743 * Disallow multiple mounts of the same device.
744 * Disallow mounting of a device that is currently in use
745 * (except for root, which might share swap device for miniroot).
746 * Flush out any old buffers remaining from a previous use.
748 if ( (error
= vfs_mountedon(devvp
)) )
751 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
755 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
759 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
762 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
764 error
= mac_vnode_check_open(ctx
,
766 ronly
? FREAD
: FREAD
|FWRITE
);
770 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
773 mp
->mnt_devvp
= devvp
;
774 device_vnode
= devvp
;
776 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
777 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
778 (device_vnode
= mp
->mnt_devvp
)) {
782 * If upgrade to read-write by non-root, then verify
783 * that user has necessary permissions on the device.
785 vnode_getalways(device_vnode
);
787 if (suser(vfs_context_ucred(ctx
), NULL
) &&
788 (error
= vnode_authorize(device_vnode
, NULL
,
789 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
791 vnode_put(device_vnode
);
795 /* Tell the device that we're upgrading */
796 dev
= (dev_t
)device_vnode
->v_rdev
;
799 if ((u_int
)maj
>= (u_int
)nblkdev
)
800 panic("Volume mounted on a device with invalid major number.");
802 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
803 vnode_put(device_vnode
);
804 device_vnode
= NULLVP
;
811 if ((flags
& MNT_UPDATE
) == 0) {
812 mac_mount_label_init(mp
);
813 mac_mount_label_associate(ctx
, mp
);
816 if ((flags
& MNT_UPDATE
) != 0) {
817 error
= mac_mount_check_label_update(ctx
, mp
);
824 * Mount the filesystem.
826 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
828 if (flags
& MNT_UPDATE
) {
829 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
830 mp
->mnt_flag
&= ~MNT_RDONLY
;
832 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
833 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
835 mp
->mnt_flag
= flag
; /* restore flag value */
836 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
837 lck_rw_done(&mp
->mnt_rwlock
);
838 is_rwlock_locked
= FALSE
;
840 enablequotas(mp
, ctx
);
845 * Put the new filesystem on the mount list after root.
848 struct vfs_attr vfsattr
;
850 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
851 error
= VFS_ROOT(mp
, &rvp
, ctx
);
853 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
856 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
858 * drop reference provided by VFS_ROOT
868 CLR(vp
->v_flag
, VMOUNT
);
869 vp
->v_mountedhere
= mp
;
873 * taking the name_cache_lock exclusively will
874 * insure that everyone is out of the fast path who
875 * might be trying to use a now stale copy of
876 * vp->v_mountedhere->mnt_realrootvp
877 * bumping mount_generation causes the cached values
884 error
= vnode_ref(vp
);
889 have_usecount
= TRUE
;
891 error
= checkdirs(vp
, ctx
);
893 /* Unmount the filesystem as cdir/rdirs cannot be updated */
897 * there is no cleanup code here so I have made it void
898 * we need to revisit this
900 (void)VFS_START(mp
, 0, ctx
);
902 if (mount_list_add(mp
) != 0) {
904 * The system is shutting down trying to umount
905 * everything, so fail with a plausible errno.
910 lck_rw_done(&mp
->mnt_rwlock
);
911 is_rwlock_locked
= FALSE
;
913 /* Check if this mounted file system supports EAs or named streams. */
914 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
915 VFSATTR_INIT(&vfsattr
);
916 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
917 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
918 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
919 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
920 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
921 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
922 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
925 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
926 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
927 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
930 /* Check if this file system supports path from id lookups. */
931 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
932 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
933 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
934 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
935 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
936 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
939 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
940 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
942 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
943 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
945 /* increment the operations count */
946 OSAddAtomic(1, &vfs_nummntops
);
947 enablequotas(mp
, ctx
);
950 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
953 * cache the IO attributes for the underlying physical media...
954 * an error return indicates the underlying driver doesn't
955 * support all the queries necessary... however, reasonable
956 * defaults will have been set, so no reason to bail or care
958 vfs_init_io_attributes(device_vnode
, mp
);
961 /* Now that mount is setup, notify the listeners */
962 vfs_notify_mount(pvp
);
964 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
965 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
966 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
967 mp
->mnt_vtable
->vfc_name
, error
);
971 CLR(vp
->v_flag
, VMOUNT
);
974 mp
->mnt_vtable
->vfc_refcount
--;
978 vnode_rele(device_vnode
);
979 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
981 lck_rw_done(&mp
->mnt_rwlock
);
982 is_rwlock_locked
= FALSE
;
985 * if we get here, we have a mount structure that needs to be freed,
986 * but since the coveredvp hasn't yet been updated to point at it,
987 * no need to worry about other threads holding a crossref on this mp
988 * so it's ok to just free it
990 mount_lock_destroy(mp
);
992 mac_mount_label_destroy(mp
);
994 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
998 * drop I/O count on the device vp if there was one
1000 if (devpath
&& devvp
)
1005 /* Error condition exits */
1007 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1010 * If the mount has been placed on the covered vp,
1011 * it may have been discovered by now, so we have
1012 * to treat this just like an unmount
1014 mount_lock_spin(mp
);
1015 mp
->mnt_lflag
|= MNT_LDEAD
;
1018 if (device_vnode
!= NULLVP
) {
1019 vnode_rele(device_vnode
);
1020 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1025 vnode_lock_spin(vp
);
1028 vp
->v_mountedhere
= (mount_t
) 0;
1032 if (have_usecount
) {
1036 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1039 if (devpath
&& devvp
)
1042 /* Release mnt_rwlock only when it was taken */
1043 if (is_rwlock_locked
== TRUE
) {
1044 lck_rw_done(&mp
->mnt_rwlock
);
1048 if (mp
->mnt_crossref
)
1049 mount_dropcrossref(mp
, vp
, 0);
1051 mount_lock_destroy(mp
);
1053 mac_mount_label_destroy(mp
);
1055 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1060 vfsp
->vfc_refcount
--;
1061 mount_list_unlock();
1068 * Flush in-core data, check for competing mount attempts,
1072 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1075 #pragma unused(cnp,fsname)
1077 struct vnode_attr va
;
1082 * If the user is not root, ensure that they own the directory
1083 * onto which we are attempting to mount.
1086 VATTR_WANTED(&va
, va_uid
);
1087 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1088 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1089 (!vfs_context_issuser(ctx
)))) {
1095 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1098 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1101 if (vp
->v_type
!= VDIR
) {
1106 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1112 error
= mac_mount_check_mount(ctx
, vp
,
1118 vnode_lock_spin(vp
);
1119 SET(vp
->v_flag
, VMOUNT
);
1126 #if CONFIG_IMGSRC_ACCESS
1129 #define IMGSRC_DEBUG(args...) printf(args)
1131 #define IMGSRC_DEBUG(args...) do { } while(0)
1135 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1137 struct nameidata nd
;
1138 vnode_t vp
, realdevvp
;
1142 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1143 if ( (error
= namei(&nd
)) ) {
1144 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1150 if (!vnode_isblk(vp
)) {
1151 IMGSRC_DEBUG("Not block device.\n");
1156 realdevvp
= mp
->mnt_devvp
;
1157 if (realdevvp
== NULLVP
) {
1158 IMGSRC_DEBUG("No device backs the mount.\n");
1163 error
= vnode_getwithref(realdevvp
);
1165 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1169 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1170 IMGSRC_DEBUG("Wrong dev_t.\n");
1175 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1178 * If mount by non-root, then verify that user has necessary
1179 * permissions on the device.
1181 if (!vfs_context_issuser(ctx
)) {
1182 accessmode
= KAUTH_VNODE_READ_DATA
;
1183 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1184 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1185 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1186 IMGSRC_DEBUG("Access denied.\n");
1194 vnode_put(realdevvp
);
1205 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1206 * and call checkdirs()
1209 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1213 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1215 vnode_lock_spin(vp
);
1216 CLR(vp
->v_flag
, VMOUNT
);
1217 vp
->v_mountedhere
= mp
;
1221 * taking the name_cache_lock exclusively will
1222 * insure that everyone is out of the fast path who
1223 * might be trying to use a now stale copy of
1224 * vp->v_mountedhere->mnt_realrootvp
1225 * bumping mount_generation causes the cached values
1230 name_cache_unlock();
1232 error
= vnode_ref(vp
);
1237 error
= checkdirs(vp
, ctx
);
1239 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1246 mp
->mnt_vnodecovered
= NULLVP
;
1252 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1255 vnode_lock_spin(vp
);
1256 vp
->v_mountedhere
= (mount_t
)NULL
;
1259 mp
->mnt_vnodecovered
= NULLVP
;
1263 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1267 /* unmount in progress return error */
1268 mount_lock_spin(mp
);
1269 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1274 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1277 * We only allow the filesystem to be reloaded if it
1278 * is currently mounted read-only.
1280 if ((flags
& MNT_RELOAD
) &&
1281 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1287 * Only root, or the user that did the original mount is
1288 * permitted to update it.
1290 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1291 (!vfs_context_issuser(ctx
))) {
1296 error
= mac_mount_check_remount(ctx
, mp
);
1304 lck_rw_done(&mp
->mnt_rwlock
);
1311 mount_end_update(mount_t mp
)
1313 lck_rw_done(&mp
->mnt_rwlock
);
1317 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1321 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1325 vp
= imgsrc_rootvnodes
[height
];
1326 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1335 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1336 const char *fsname
, vfs_context_t ctx
,
1337 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1341 boolean_t placed
= FALSE
;
1342 vnode_t devvp
= NULLVP
;
1343 struct vfstable
*vfsp
;
1344 user_addr_t devpath
;
1345 char *old_mntonname
;
1350 /* If we didn't imageboot, nothing to move */
1351 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1355 /* Only root can do this */
1356 if (!vfs_context_issuser(ctx
)) {
1360 IMGSRC_DEBUG("looking for root vnode.\n");
1363 * Get root vnode of filesystem we're moving.
1367 struct user64_mnt_imgsrc_args mia64
;
1368 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1370 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1374 height
= mia64
.mi_height
;
1375 flags
= mia64
.mi_flags
;
1376 devpath
= mia64
.mi_devpath
;
1378 struct user32_mnt_imgsrc_args mia32
;
1379 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1381 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1385 height
= mia32
.mi_height
;
1386 flags
= mia32
.mi_flags
;
1387 devpath
= mia32
.mi_devpath
;
1391 * For binary compatibility--assumes one level of nesting.
1394 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1398 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1401 /* munge into LP64 addr */
1402 devpath
= CAST_USER_ADDR_T(tmp
);
1410 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1414 error
= get_imgsrc_rootvnode(height
, &rvp
);
1416 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1420 IMGSRC_DEBUG("got root vnode.\n");
1422 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1424 /* Can only move once */
1425 mp
= vnode_mount(rvp
);
1426 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1427 IMGSRC_DEBUG("Already moved.\n");
1432 IMGSRC_DEBUG("Starting updated.\n");
1434 /* Get exclusive rwlock on mount, authorize update on mp */
1435 error
= mount_begin_update(mp
, ctx
, 0);
1437 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1442 * It can only be moved once. Flag is set under the rwlock,
1443 * so we're now safe to proceed.
1445 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1446 IMGSRC_DEBUG("Already moved [2]\n");
1451 IMGSRC_DEBUG("Preparing coveredvp.\n");
1453 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1454 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1456 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1460 IMGSRC_DEBUG("Covered vp OK.\n");
1462 /* Sanity check the name caller has provided */
1463 vfsp
= mp
->mnt_vtable
;
1464 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1465 IMGSRC_DEBUG("Wrong fs name.\n");
1470 /* Check the device vnode and update mount-from name, for local filesystems */
1471 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1472 IMGSRC_DEBUG("Local, doing device validation.\n");
1474 if (devpath
!= USER_ADDR_NULL
) {
1475 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1477 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1486 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1487 * and increment the name cache's mount generation
1490 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1491 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1498 strncpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1499 strncpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1501 /* Forbid future moves */
1503 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1506 /* Finally, add to mount list, completely ready to go */
1507 if (mount_list_add(mp
) != 0) {
1509 * The system is shutting down trying to umount
1510 * everything, so fail with a plausible errno.
1516 mount_end_update(mp
);
1518 FREE(old_mntonname
, M_TEMP
);
1520 vfs_notify_mount(pvp
);
1524 strncpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1527 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1532 * Placing the mp on the vnode clears VMOUNT,
1533 * so cleanup is different after that point
1536 /* Rele the vp, clear VMOUNT and v_mountedhere */
1537 undo_place_on_covered_vp(mp
, vp
);
1539 vnode_lock_spin(vp
);
1540 CLR(vp
->v_flag
, VMOUNT
);
1544 mount_end_update(mp
);
1548 FREE(old_mntonname
, M_TEMP
);
1552 #endif /* CONFIG_IMGSRC_ACCESS */
1555 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1557 struct nameidata qnd
;
1559 char qfpath
[MAXPATHLEN
];
1560 const char *qfname
= QUOTAFILENAME
;
1561 const char *qfopsname
= QUOTAOPSNAME
;
1562 const char *qfextension
[] = INITQFNAMES
;
1564 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1565 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1569 * Enable filesystem disk quotas if necessary.
1570 * We ignore errors as this should not interfere with final mount
1572 for (type
=0; type
< MAXQUOTAS
; type
++) {
1573 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1574 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1575 CAST_USER_ADDR_T(qfpath
), ctx
);
1576 if (namei(&qnd
) != 0)
1577 continue; /* option file to trigger quotas is not present */
1578 vnode_put(qnd
.ni_vp
);
1580 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1582 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1589 checkdirs_callback(proc_t p
, void * arg
)
1591 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1592 vnode_t olddp
= cdrp
->olddp
;
1593 vnode_t newdp
= cdrp
->newdp
;
1594 struct filedesc
*fdp
;
1598 int cdir_changed
= 0;
1599 int rdir_changed
= 0;
1602 * XXX Also needs to iterate each thread in the process to see if it
1603 * XXX is using a per-thread current working directory, and, if so,
1604 * XXX update that as well.
1609 if (fdp
== (struct filedesc
*)0) {
1611 return(PROC_RETURNED
);
1613 fdp_cvp
= fdp
->fd_cdir
;
1614 fdp_rvp
= fdp
->fd_rdir
;
1617 if (fdp_cvp
== olddp
) {
1624 if (fdp_rvp
== olddp
) {
1631 if (cdir_changed
|| rdir_changed
) {
1633 fdp
->fd_cdir
= fdp_cvp
;
1634 fdp
->fd_rdir
= fdp_rvp
;
1637 return(PROC_RETURNED
);
1643 * Scan all active processes to see if any of them have a current
1644 * or root directory onto which the new filesystem has just been
1645 * mounted. If so, replace them with the new mount point.
1648 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1653 struct cdirargs cdr
;
1654 struct uthread
* uth
= get_bsdthread_info(current_thread());
1656 if (olddp
->v_usecount
== 1)
1658 if (uth
!= (struct uthread
*)0)
1659 uth
->uu_notrigger
= 1;
1660 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1661 if (uth
!= (struct uthread
*)0)
1662 uth
->uu_notrigger
= 0;
1666 panic("mount: lost mount: error %d", err
);
1673 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1674 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1676 if (rootvnode
== olddp
) {
1688 * Unmount a file system.
1690 * Note: unmount takes a path to the vnode mounted on as argument,
1691 * not special file (as before).
1695 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1700 struct nameidata nd
;
1701 vfs_context_t ctx
= vfs_context_current();
1703 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, NOTRIGGER
| FOLLOW
| AUDITVNPATH1
,
1704 UIO_USERSPACE
, uap
->path
, ctx
);
1713 error
= mac_mount_check_umount(ctx
, mp
);
1720 * Must be the root of the filesystem
1722 if ((vp
->v_flag
& VROOT
) == 0) {
1728 /* safedounmount consumes the mount ref */
1729 return (safedounmount(mp
, uap
->flags
, ctx
));
1733 vfs_unmountbyfsid(fsid_t
* fsid
, int flags
, vfs_context_t ctx
)
1737 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1738 if (mp
== (mount_t
)0) {
1743 /* safedounmount consumes the mount ref */
1744 return(safedounmount(mp
, flags
, ctx
));
1749 * The mount struct comes with a mount ref which will be consumed.
1750 * Do the actual file system unmount, prevent some common foot shooting.
1753 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1756 proc_t p
= vfs_context_proc(ctx
);
1759 * If the file system is not responding and MNT_NOBLOCK
1760 * is set and not a forced unmount then return EBUSY.
1762 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1763 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1769 * Skip authorization if the mount is tagged as permissive and
1770 * this is not a forced-unmount attempt.
1772 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1774 * Only root, or the user that did the original mount is
1775 * permitted to unmount this filesystem.
1777 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1778 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1782 * Don't allow unmounting the root file system.
1784 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1785 error
= EBUSY
; /* the root is always busy */
1789 #ifdef CONFIG_IMGSRC_ACCESS
1790 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1794 #endif /* CONFIG_IMGSRC_ACCESS */
1796 return (dounmount(mp
, flags
, 1, ctx
));
1804 * Do the actual file system unmount.
1807 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1809 vnode_t coveredvp
= (vnode_t
)0;
1812 int forcedunmount
= 0;
1814 struct vnode
*devvp
= NULLVP
;
1816 proc_t p
= vfs_context_proc(ctx
);
1818 int pflags_save
= 0;
1819 #endif /* CONFIG_TRIGGERS */
1821 if (flags
& MNT_FORCE
)
1825 /* XXX post jaguar fix LK_DRAIN - then clean this up */
1826 if ((flags
& MNT_FORCE
)) {
1827 mp
->mnt_kern_flag
|= MNTK_FRCUNMOUNT
;
1828 mp
->mnt_lflag
|= MNT_LFORCE
;
1830 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1831 mp
->mnt_lflag
|= MNT_LWAIT
;
1834 msleep((caddr_t
)mp
, &mp
->mnt_mlock
, (PVFS
| PDROP
), "dounmount", NULL
);
1836 * The prior unmount attempt has probably succeeded.
1837 * Do not dereference mp here - returning EBUSY is safest.
1843 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1844 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1847 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1848 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1849 mp
->mnt_flag
&=~ MNT_ASYNC
;
1851 * anyone currently in the fast path that
1852 * trips over the cached rootvp will be
1853 * dumped out and forced into the slow path
1854 * to regenerate a new cached value
1856 mp
->mnt_realrootvp
= NULLVP
;
1860 * taking the name_cache_lock exclusively will
1861 * insure that everyone is out of the fast path who
1862 * might be trying to use a now stale copy of
1863 * vp->v_mountedhere->mnt_realrootvp
1864 * bumping mount_generation causes the cached values
1869 name_cache_unlock();
1872 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1876 fsevent_unmount(mp
); /* has to come first! */
1879 if (forcedunmount
== 0) {
1880 ubc_umount(mp
); /* release cached vnodes */
1881 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1882 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
1885 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1886 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1887 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1894 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
1898 lflags
|= FORCECLOSE
;
1899 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
1900 if ((forcedunmount
== 0) && error
) {
1902 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1903 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1904 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1908 /* make sure there are no one in the mount iterations or lookup */
1909 mount_iterdrain(mp
);
1911 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
1913 mount_iterreset(mp
);
1915 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1916 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1917 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1921 /* increment the operations count */
1923 OSAddAtomic(1, &vfs_nummntops
);
1925 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1926 /* hold an io reference and drop the usecount before close */
1927 devvp
= mp
->mnt_devvp
;
1928 vnode_getalways(devvp
);
1930 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1932 vnode_clearmountedon(devvp
);
1935 lck_rw_done(&mp
->mnt_rwlock
);
1936 mount_list_remove(mp
);
1937 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1939 /* mark the mount point hook in the vp but not drop the ref yet */
1940 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
1941 vnode_getwithref(coveredvp
);
1942 vnode_lock_spin(coveredvp
);
1945 coveredvp
->v_mountedhere
= (struct mount
*)0;
1947 vnode_unlock(coveredvp
);
1948 vnode_put(coveredvp
);
1952 mp
->mnt_vtable
->vfc_refcount
--;
1953 mount_list_unlock();
1955 cache_purgevfs(mp
); /* remove cache entries for this file sys */
1956 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
1958 mp
->mnt_lflag
|= MNT_LDEAD
;
1960 if (mp
->mnt_lflag
& MNT_LWAIT
) {
1962 * do the wakeup here
1963 * in case we block in mount_refdrain
1964 * which will drop the mount lock
1965 * and allow anyone blocked in vfs_busy
1966 * to wakeup and see the LDEAD state
1968 mp
->mnt_lflag
&= ~MNT_LWAIT
;
1969 wakeup((caddr_t
)mp
);
1973 if (mp
->mnt_lflag
& MNT_LWAIT
) {
1974 mp
->mnt_lflag
&= ~MNT_LWAIT
;
1979 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
1980 // Restore P_NOREMOTEHANG bit to its previous value
1981 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
1982 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
1986 * Callback and context are set together under the mount lock, and
1987 * never cleared, so we're safe to examine them here, drop the lock,
1990 if (mp
->mnt_triggercallback
!= NULL
) {
1993 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
1994 } else if (did_vflush
) {
1995 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2002 #endif /* CONFIG_TRIGGERS */
2004 lck_rw_done(&mp
->mnt_rwlock
);
2007 wakeup((caddr_t
)mp
);
2010 if ((coveredvp
!= NULLVP
)) {
2013 vnode_getwithref(coveredvp
);
2014 pvp
= vnode_getparent(coveredvp
);
2015 vnode_rele(coveredvp
);
2017 mount_dropcrossref(mp
, coveredvp
, 0);
2019 if (coveredvp
->v_resolve
)
2020 vnode_trigger_rearm(coveredvp
, ctx
);
2022 vnode_put(coveredvp
);
2025 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2028 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2029 mount_lock_destroy(mp
);
2031 mac_mount_label_destroy(mp
);
2033 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2035 panic("dounmount: no coveredvp");
2041 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2046 if (mp
->mnt_crossref
< 0)
2047 panic("mount cross refs -ve");
2049 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2052 vnode_put_locked(dp
);
2055 mount_lock_destroy(mp
);
2057 mac_mount_label_destroy(mp
);
2059 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2063 vnode_put_locked(dp
);
2069 * Sync each mounted filesystem.
2073 struct ctldebug debug0
= { "syncprt", &syncprt
};
2076 int print_vmpage_stat
=0;
2079 sync_callback(mount_t mp
, void * arg
)
2083 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2084 asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2085 mp
->mnt_flag
&= ~MNT_ASYNC
;
2086 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_current());
2088 mp
->mnt_flag
|= MNT_ASYNC
;
2090 return(VFS_RETURNED
);
2096 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2098 vfs_iterate(LK_NOWAIT
, sync_callback
, (void *)0);
2100 if(print_vmpage_stat
) {
2101 vm_countdirtypages();
2107 #endif /* DIAGNOSTIC */
2112 * Change filesystem quotas.
2115 static int quotactl_funneled(proc_t p
, struct quotactl_args
*uap
, int32_t *retval
);
2118 quotactl(proc_t p
, struct quotactl_args
*uap
, int32_t *retval
)
2120 boolean_t funnel_state
;
2123 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
2124 error
= quotactl_funneled(p
, uap
, retval
);
2125 thread_funnel_set(kernel_flock
, funnel_state
);
2130 quotactl_funneled(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2133 int error
, quota_cmd
, quota_status
;
2136 struct nameidata nd
;
2137 vfs_context_t ctx
= vfs_context_current();
2138 struct dqblk my_dqblk
;
2140 AUDIT_ARG(uid
, uap
->uid
);
2141 AUDIT_ARG(cmd
, uap
->cmd
);
2142 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2147 mp
= nd
.ni_vp
->v_mount
;
2148 vnode_put(nd
.ni_vp
);
2151 /* copyin any data we will need for downstream code */
2152 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2154 switch (quota_cmd
) {
2156 /* uap->arg specifies a file from which to take the quotas */
2157 fnamelen
= MAXPATHLEN
;
2158 datap
= kalloc(MAXPATHLEN
);
2159 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2162 /* uap->arg is a pointer to a dqblk structure. */
2163 datap
= (caddr_t
) &my_dqblk
;
2167 /* uap->arg is a pointer to a dqblk structure. */
2168 datap
= (caddr_t
) &my_dqblk
;
2169 if (proc_is64bit(p
)) {
2170 struct user_dqblk my_dqblk64
;
2171 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2173 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2177 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2181 /* uap->arg is a pointer to an integer */
2182 datap
= (caddr_t
) "a_status
;
2190 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2193 switch (quota_cmd
) {
2196 kfree(datap
, MAXPATHLEN
);
2199 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2201 if (proc_is64bit(p
)) {
2202 struct user_dqblk my_dqblk64
;
2203 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2204 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2207 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2212 /* uap->arg is a pointer to an integer */
2214 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2225 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2227 return (EOPNOTSUPP
);
2232 * Get filesystem statistics.
2234 * Returns: 0 Success
2236 * vfs_update_vfsstat:???
2237 * munge_statfs:EFAULT
2241 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2244 struct vfsstatfs
*sp
;
2246 struct nameidata nd
;
2247 vfs_context_t ctx
= vfs_context_current();
2250 NDINIT(&nd
, LOOKUP
, OP_STATFS
, NOTRIGGER
| FOLLOW
| AUDITVNPATH1
,
2251 UIO_USERSPACE
, uap
->path
, ctx
);
2257 sp
= &mp
->mnt_vfsstat
;
2260 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2266 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2272 * Get filesystem statistics.
2276 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2280 struct vfsstatfs
*sp
;
2283 AUDIT_ARG(fd
, uap
->fd
);
2285 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2288 error
= vnode_getwithref(vp
);
2294 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2301 sp
= &mp
->mnt_vfsstat
;
2302 if ((error
= vfs_update_vfsstat(mp
,vfs_context_current(),VFS_USER_EVENT
)) != 0) {
2306 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2316 * Common routine to handle copying of statfs64 data to user space
2319 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2322 struct statfs64 sfs
;
2324 bzero(&sfs
, sizeof(sfs
));
2326 sfs
.f_bsize
= sfsp
->f_bsize
;
2327 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2328 sfs
.f_blocks
= sfsp
->f_blocks
;
2329 sfs
.f_bfree
= sfsp
->f_bfree
;
2330 sfs
.f_bavail
= sfsp
->f_bavail
;
2331 sfs
.f_files
= sfsp
->f_files
;
2332 sfs
.f_ffree
= sfsp
->f_ffree
;
2333 sfs
.f_fsid
= sfsp
->f_fsid
;
2334 sfs
.f_owner
= sfsp
->f_owner
;
2335 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2336 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2337 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2338 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2339 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2341 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2343 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2344 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2346 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2352 * Get file system statistics in 64-bit mode
2355 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2358 struct vfsstatfs
*sp
;
2360 struct nameidata nd
;
2361 vfs_context_t ctxp
= vfs_context_current();
2364 NDINIT(&nd
, LOOKUP
, OP_STATFS
, NOTRIGGER
| FOLLOW
| AUDITVNPATH1
,
2365 UIO_USERSPACE
, uap
->path
, ctxp
);
2371 sp
= &mp
->mnt_vfsstat
;
2374 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2380 error
= statfs64_common(mp
, sp
, uap
->buf
);
2387 * Get file system statistics in 64-bit mode
2390 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2394 struct vfsstatfs
*sp
;
2397 AUDIT_ARG(fd
, uap
->fd
);
2399 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2402 error
= vnode_getwithref(vp
);
2408 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2415 sp
= &mp
->mnt_vfsstat
;
2416 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2420 error
= statfs64_common(mp
, sp
, uap
->buf
);
2429 struct getfsstat_struct
{
2440 getfsstat_callback(mount_t mp
, void * arg
)
2443 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2444 struct vfsstatfs
*sp
;
2446 vfs_context_t ctx
= vfs_context_current();
2448 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2449 sp
= &mp
->mnt_vfsstat
;
2451 * If MNT_NOWAIT is specified, do not refresh the
2452 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2454 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2455 (error
= vfs_update_vfsstat(mp
, ctx
,
2457 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2458 return(VFS_RETURNED
);
2462 * Need to handle LP64 version of struct statfs
2464 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2466 fstp
->error
= error
;
2467 return(VFS_RETURNED_DONE
);
2469 fstp
->sfsp
+= my_size
;
2473 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2475 fstp
->error
= error
;
2476 return(VFS_RETURNED_DONE
);
2483 return(VFS_RETURNED
);
2487 * Get statistics on all filesystems.
2490 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2492 struct __mac_getfsstat_args muap
;
2494 muap
.buf
= uap
->buf
;
2495 muap
.bufsize
= uap
->bufsize
;
2496 muap
.mac
= USER_ADDR_NULL
;
2498 muap
.flags
= uap
->flags
;
2500 return (__mac_getfsstat(p
, &muap
, retval
));
2504 * __mac_getfsstat: Get MAC-related file system statistics
2506 * Parameters: p (ignored)
2507 * uap User argument descriptor (see below)
2508 * retval Count of file system statistics (N stats)
2510 * Indirect: uap->bufsize Buffer size
2511 * uap->macsize MAC info size
2512 * uap->buf Buffer where information will be returned
2514 * uap->flags File system flags
2517 * Returns: 0 Success
2522 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2526 size_t count
, maxcount
, bufsize
, macsize
;
2527 struct getfsstat_struct fst
;
2529 bufsize
= (size_t) uap
->bufsize
;
2530 macsize
= (size_t) uap
->macsize
;
2532 if (IS_64BIT_PROCESS(p
)) {
2533 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2536 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2544 if (uap
->mac
!= USER_ADDR_NULL
) {
2549 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2550 if (count
!= maxcount
)
2553 /* Copy in the array */
2554 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2559 error
= copyin(uap
->mac
, mp0
, macsize
);
2561 FREE(mp0
, M_MACTEMP
);
2565 /* Normalize to an array of user_addr_t */
2566 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2568 FREE(mp0
, M_MACTEMP
);
2572 for (i
= 0; i
< count
; i
++) {
2573 if (IS_64BIT_PROCESS(p
))
2574 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2576 mp
[i
] = (user_addr_t
)mp0
[i
];
2578 FREE(mp0
, M_MACTEMP
);
2585 fst
.flags
= uap
->flags
;
2588 fst
.maxcount
= maxcount
;
2591 vfs_iterate(0, getfsstat_callback
, &fst
);
2594 FREE(mp
, M_MACTEMP
);
2597 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2601 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2602 *retval
= fst
.maxcount
;
2604 *retval
= fst
.count
;
2609 getfsstat64_callback(mount_t mp
, void * arg
)
2611 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2612 struct vfsstatfs
*sp
;
2615 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2616 sp
= &mp
->mnt_vfsstat
;
2618 * If MNT_NOWAIT is specified, do not refresh the fsstat
2619 * cache. MNT_WAIT overrides MNT_NOWAIT.
2621 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2622 * getfsstat, since the constants are out of the same
2625 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2626 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2627 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2628 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2629 return(VFS_RETURNED
);
2632 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2634 fstp
->error
= error
;
2635 return(VFS_RETURNED_DONE
);
2637 fstp
->sfsp
+= sizeof(struct statfs64
);
2640 return(VFS_RETURNED
);
2644 * Get statistics on all file systems in 64 bit mode.
2647 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2650 int count
, maxcount
;
2651 struct getfsstat_struct fst
;
2653 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2659 fst
.flags
= uap
->flags
;
2662 fst
.maxcount
= maxcount
;
2664 vfs_iterate(0, getfsstat64_callback
, &fst
);
2667 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2671 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2672 *retval
= fst
.maxcount
;
2674 *retval
= fst
.count
;
2680 * Change current working directory to a given file descriptor.
2684 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
2686 struct filedesc
*fdp
= p
->p_fd
;
2692 vfs_context_t ctx
= vfs_context_current();
2694 AUDIT_ARG(fd
, uap
->fd
);
2695 if (per_thread
&& uap
->fd
== -1) {
2697 * Switching back from per-thread to per process CWD; verify we
2698 * in fact have one before proceeding. The only success case
2699 * for this code path is to return 0 preemptively after zapping
2700 * the thread structure contents.
2702 thread_t th
= vfs_context_thread(ctx
);
2704 uthread_t uth
= get_bsdthread_info(th
);
2706 uth
->uu_cdir
= NULLVP
;
2707 if (tvp
!= NULLVP
) {
2715 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2717 if ( (error
= vnode_getwithref(vp
)) ) {
2722 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
2724 if (vp
->v_type
!= VDIR
) {
2730 error
= mac_vnode_check_chdir(ctx
, vp
);
2734 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
2738 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
2739 if (vfs_busy(mp
, LK_NOWAIT
)) {
2743 error
= VFS_ROOT(mp
, &tdp
, ctx
);
2752 if ( (error
= vnode_ref(vp
)) )
2757 thread_t th
= vfs_context_thread(ctx
);
2759 uthread_t uth
= get_bsdthread_info(th
);
2762 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
2787 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
2789 return common_fchdir(p
, uap
, 0);
2793 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
2795 return common_fchdir(p
, (void *)uap
, 1);
2799 * Change current working directory (".").
2801 * Returns: 0 Success
2802 * change_dir:ENOTDIR
2804 * vnode_ref:ENOENT No such file or directory
2808 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
2810 struct filedesc
*fdp
= p
->p_fd
;
2812 struct nameidata nd
;
2814 vfs_context_t ctx
= vfs_context_current();
2816 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
2817 UIO_USERSPACE
, uap
->path
, ctx
);
2818 error
= change_dir(&nd
, ctx
);
2821 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
2822 vnode_put(nd
.ni_vp
);
2826 * drop the iocount we picked up in change_dir
2828 vnode_put(nd
.ni_vp
);
2831 thread_t th
= vfs_context_thread(ctx
);
2833 uthread_t uth
= get_bsdthread_info(th
);
2835 uth
->uu_cdir
= nd
.ni_vp
;
2836 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
2838 vnode_rele(nd
.ni_vp
);
2844 fdp
->fd_cdir
= nd
.ni_vp
;
2858 * Change current working directory (".") for the entire process
2860 * Parameters: p Process requesting the call
2861 * uap User argument descriptor (see below)
2864 * Indirect parameters: uap->path Directory path
2866 * Returns: 0 Success
2867 * common_chdir: ENOTDIR
2868 * common_chdir: ENOENT No such file or directory
2873 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
2875 return common_chdir(p
, (void *)uap
, 0);
2881 * Change current working directory (".") for a single thread
2883 * Parameters: p Process requesting the call
2884 * uap User argument descriptor (see below)
2887 * Indirect parameters: uap->path Directory path
2889 * Returns: 0 Success
2890 * common_chdir: ENOTDIR
2891 * common_chdir: ENOENT No such file or directory
2896 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
2898 return common_chdir(p
, (void *)uap
, 1);
2903 * Change notion of root (``/'') directory.
2907 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
2909 struct filedesc
*fdp
= p
->p_fd
;
2911 struct nameidata nd
;
2913 vfs_context_t ctx
= vfs_context_current();
2915 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
2918 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
2919 UIO_USERSPACE
, uap
->path
, ctx
);
2920 error
= change_dir(&nd
, ctx
);
2925 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
2928 vnode_put(nd
.ni_vp
);
2933 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
2934 vnode_put(nd
.ni_vp
);
2937 vnode_put(nd
.ni_vp
);
2941 fdp
->fd_rdir
= nd
.ni_vp
;
2942 fdp
->fd_flags
|= FD_CHROOT
;
2952 * Common routine for chroot and chdir.
2954 * Returns: 0 Success
2955 * ENOTDIR Not a directory
2956 * namei:??? [anything namei can return]
2957 * vnode_authorize:??? [anything vnode_authorize can return]
2960 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
2965 if ((error
= namei(ndp
)))
2970 if (vp
->v_type
!= VDIR
) {
2976 error
= mac_vnode_check_chdir(ctx
, vp
);
2983 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
2993 * Check permissions, allocate an open file structure,
2994 * and call the device open routine if any.
2996 * Returns: 0 Success
3007 * XXX Need to implement uid, gid
3010 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3011 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3014 proc_t p
= vfs_context_proc(ctx
);
3015 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3016 struct fileproc
*fp
;
3019 int type
, indx
, error
;
3021 int no_controlling_tty
= 0;
3022 int deny_controlling_tty
= 0;
3023 struct session
*sessp
= SESSION_NULL
;
3027 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3029 flags
= FFLAGS(uflags
);
3031 AUDIT_ARG(fflags
, oflags
);
3032 AUDIT_ARG(mode
, vap
->va_mode
);
3034 if ((error
= falloc_withalloc(p
,
3035 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3038 uu
->uu_dupfd
= -indx
- 1;
3040 if (!(p
->p_flag
& P_CONTROLT
)) {
3041 sessp
= proc_session(p
);
3042 no_controlling_tty
= 1;
3044 * If conditions would warrant getting a controlling tty if
3045 * the device being opened is a tty (see ttyopen in tty.c),
3046 * but the open flags deny it, set a flag in the session to
3049 if (SESS_LEADER(p
, sessp
) &&
3050 sessp
->s_ttyvp
== NULL
&&
3051 (flags
& O_NOCTTY
)) {
3052 session_lock(sessp
);
3053 sessp
->s_flags
|= S_NOCTTY
;
3054 session_unlock(sessp
);
3055 deny_controlling_tty
= 1;
3059 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3060 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3061 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3062 fp_drop(p
, indx
, NULL
, 0);
3064 if (deny_controlling_tty
) {
3065 session_lock(sessp
);
3066 sessp
->s_flags
&= ~S_NOCTTY
;
3067 session_unlock(sessp
);
3069 if (sessp
!= SESSION_NULL
)
3070 session_rele(sessp
);
3074 if (error
== ERESTART
)
3076 fp_free(p
, indx
, fp
);
3078 if (deny_controlling_tty
) {
3079 session_lock(sessp
);
3080 sessp
->s_flags
&= ~S_NOCTTY
;
3081 session_unlock(sessp
);
3083 if (sessp
!= SESSION_NULL
)
3084 session_rele(sessp
);
3090 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
);
3091 fp
->f_fglob
->fg_ops
= &vnops
;
3092 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3095 if (VATTR_IS_ACTIVE (vap
, va_dataprotect_flags
)) {
3096 if (vap
->va_dataprotect_flags
& VA_DP_RAWENCRYPTED
) {
3097 fp
->f_fglob
->fg_flag
|= FENCRYPTED
;
3102 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3103 lf
.l_whence
= SEEK_SET
;
3106 if (flags
& O_EXLOCK
)
3107 lf
.l_type
= F_WRLCK
;
3109 lf
.l_type
= F_RDLCK
;
3111 if ((flags
& FNONBLOCK
) == 0)
3114 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3119 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3121 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3124 /* try to truncate by setting the size attribute */
3125 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3129 * If the open flags denied the acquisition of a controlling tty,
3130 * clear the flag in the session structure that prevented the lower
3131 * level code from assigning one.
3133 if (deny_controlling_tty
) {
3134 session_lock(sessp
);
3135 sessp
->s_flags
&= ~S_NOCTTY
;
3136 session_unlock(sessp
);
3140 * If a controlling tty was set by the tty line discipline, then we
3141 * want to set the vp of the tty into the session structure. We have
3142 * a race here because we can't get to the vp for the tp in ttyopen,
3143 * because it's not passed as a parameter in the open path.
3145 if (no_controlling_tty
&& (p
->p_flag
& P_CONTROLT
)) {
3149 * We already have a ref from vn_open_auth(), so we can demand another reference.
3151 error
= vnode_ref_ext(vp
, 0, VNODE_REF_FORCE
);
3153 panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!");
3156 session_lock(sessp
);
3157 ttyvp
= sessp
->s_ttyvp
;
3158 sessp
->s_ttyvp
= vp
;
3159 sessp
->s_ttyvid
= vnode_vid(vp
);
3160 session_unlock(sessp
);
3161 if (ttyvp
!= NULLVP
)
3168 if (flags
& O_CLOEXEC
)
3169 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3170 if (flags
& O_CLOFORK
)
3171 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3172 procfdtbl_releasefd(p
, indx
, NULL
);
3173 fp_drop(p
, indx
, fp
, 1);
3178 if (sessp
!= SESSION_NULL
)
3179 session_rele(sessp
);
3182 if (deny_controlling_tty
) {
3183 session_lock(sessp
);
3184 sessp
->s_flags
&= ~S_NOCTTY
;
3185 session_unlock(sessp
);
3187 if (sessp
!= SESSION_NULL
)
3188 session_rele(sessp
);
3190 struct vfs_context context
= *vfs_context_current();
3191 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3193 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3195 fp_free(p
, indx
, fp
);
3201 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3203 * Parameters: p Process requesting the open
3204 * uap User argument descriptor (see below)
3205 * retval Pointer to an area to receive the
3206 * return calue from the system call
3208 * Indirect: uap->path Path to open (same as 'open')
3209 * uap->flags Flags to open (same as 'open'
3210 * uap->uid UID to set, if creating
3211 * uap->gid GID to set, if creating
3212 * uap->mode File mode, if creating (same as 'open')
3213 * uap->xsecurity ACL to set, if creating
3215 * Returns: 0 Success
3218 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3220 * XXX: We should enummerate the possible errno values here, and where
3221 * in the code they originated.
3224 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3226 struct filedesc
*fdp
= p
->p_fd
;
3228 kauth_filesec_t xsecdst
;
3229 struct vnode_attr va
;
3230 struct nameidata nd
;
3233 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3236 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3237 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3241 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3242 VATTR_SET(&va
, va_mode
, cmode
);
3243 if (uap
->uid
!= KAUTH_UID_NONE
)
3244 VATTR_SET(&va
, va_uid
, uap
->uid
);
3245 if (uap
->gid
!= KAUTH_GID_NONE
)
3246 VATTR_SET(&va
, va_gid
, uap
->gid
);
3247 if (xsecdst
!= NULL
)
3248 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3250 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3251 uap
->path
, vfs_context_current());
3253 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3254 fileproc_alloc_init
, NULL
, retval
);
3255 if (xsecdst
!= NULL
)
3256 kauth_filesec_free(xsecdst
);
3262 * Go through the data-protected atomically controlled open (2)
3264 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3266 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3267 int flags
= uap
->flags
;
3268 int class = uap
->class;
3269 int dpflags
= uap
->dpflags
;
3272 * Follow the same path as normal open(2)
3273 * Look up the item if it exists, and acquire the vnode.
3275 struct filedesc
*fdp
= p
->p_fd
;
3276 struct vnode_attr va
;
3277 struct nameidata nd
;
3282 /* Mask off all but regular access permissions */
3283 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3284 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3286 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3287 uap
->path
, vfs_context_current());
3290 * Initialize the extra fields in vnode_attr to pass down our
3292 * 1. target cprotect class.
3293 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3295 if (flags
& O_CREAT
) {
3296 VATTR_SET(&va
, va_dataprotect_class
, class);
3299 if (dpflags
& O_DP_GETRAWENCRYPTED
) {
3300 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3301 /* Not allowed to write raw encrypted bytes */
3304 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3307 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3308 fileproc_alloc_init
, NULL
, retval
);
3315 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3317 __pthread_testcancel(1);
3318 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3322 open_nocancel(proc_t p
, struct open_nocancel_args
*uap
, int32_t *retval
)
3324 struct filedesc
*fdp
= p
->p_fd
;
3325 struct vnode_attr va
;
3326 struct nameidata nd
;
3330 /* Mask off all but regular access permissions */
3331 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3332 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3334 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3335 uap
->path
, vfs_context_current());
3337 return (open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3338 fileproc_alloc_init
, NULL
, retval
));
3343 * Create a special file.
3345 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
3348 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
3350 struct vnode_attr va
;
3351 vfs_context_t ctx
= vfs_context_current();
3353 struct nameidata nd
;
3357 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3358 VATTR_SET(&va
, va_rdev
, uap
->dev
);
3360 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3361 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
3362 return(mkfifo1(ctx
, uap
->path
, &va
));
3364 AUDIT_ARG(mode
, uap
->mode
);
3365 AUDIT_ARG(value32
, uap
->dev
);
3367 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
3369 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
3370 UIO_USERSPACE
, uap
->path
, ctx
);
3382 switch (uap
->mode
& S_IFMT
) {
3383 case S_IFMT
: /* used by badsect to flag bad sectors */
3384 VATTR_SET(&va
, va_type
, VBAD
);
3387 VATTR_SET(&va
, va_type
, VCHR
);
3390 VATTR_SET(&va
, va_type
, VBLK
);
3398 error
= mac_vnode_check_create(ctx
,
3399 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
3404 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3407 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
3411 int update_flags
= 0;
3413 // Make sure the name & parent pointers are hooked up
3414 if (vp
->v_name
== NULL
)
3415 update_flags
|= VNODE_UPDATE_NAME
;
3416 if (vp
->v_parent
== NULLVP
)
3417 update_flags
|= VNODE_UPDATE_PARENT
;
3420 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3423 add_fsevent(FSE_CREATE_FILE
, ctx
,
3431 * nameidone has to happen before we vnode_put(dvp)
3432 * since it may need to release the fs_nodelock on the dvp
3444 * Create a named pipe.
3446 * Returns: 0 Success
3449 * vnode_authorize:???
3453 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
3457 struct nameidata nd
;
3459 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
3460 UIO_USERSPACE
, upath
, ctx
);
3467 /* check that this is a new file and authorize addition */
3472 VATTR_SET(vap
, va_type
, VFIFO
);
3474 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
3477 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
3480 * nameidone has to happen before we vnode_put(dvp)
3481 * since it may need to release the fs_nodelock on the dvp
3494 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3496 * Parameters: p Process requesting the open
3497 * uap User argument descriptor (see below)
3500 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3501 * uap->uid UID to set
3502 * uap->gid GID to set
3503 * uap->mode File mode to set (same as 'mkfifo')
3504 * uap->xsecurity ACL to set, if creating
3506 * Returns: 0 Success
3509 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3511 * XXX: We should enummerate the possible errno values here, and where
3512 * in the code they originated.
3515 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
3518 kauth_filesec_t xsecdst
;
3519 struct vnode_attr va
;
3521 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3523 xsecdst
= KAUTH_FILESEC_NONE
;
3524 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
3525 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
3530 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3531 if (uap
->uid
!= KAUTH_UID_NONE
)
3532 VATTR_SET(&va
, va_uid
, uap
->uid
);
3533 if (uap
->gid
!= KAUTH_GID_NONE
)
3534 VATTR_SET(&va
, va_gid
, uap
->gid
);
3535 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3536 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3538 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
3540 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3541 kauth_filesec_free(xsecdst
);
3547 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
3549 struct vnode_attr va
;
3552 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3554 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
3559 my_strrchr(char *p
, int ch
)
3563 for (save
= NULL
;; ++p
) {
3572 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
3575 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
3577 int ret
, len
= _len
;
3579 *truncated_path
= 0;
3580 ret
= vn_getpath(dvp
, path
, &len
);
3581 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
3584 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
3585 if (len
> MAXPATHLEN
) {
3588 // the string got truncated!
3589 *truncated_path
= 1;
3590 ptr
= my_strrchr(path
, '/');
3592 *ptr
= '\0'; // chop off the string at the last directory component
3594 len
= strlen(path
) + 1;
3597 } else if (ret
== 0) {
3598 *truncated_path
= 1;
3599 } else if (ret
!= 0) {
3600 struct vnode
*mydvp
=dvp
;
3602 if (ret
!= ENOSPC
) {
3603 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3604 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
3606 *truncated_path
= 1;
3609 if (mydvp
->v_parent
!= NULL
) {
3610 mydvp
= mydvp
->v_parent
;
3611 } else if (mydvp
->v_mount
) {
3612 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
3615 // no parent and no mount point? only thing is to punt and say "/" changed
3616 strlcpy(path
, "/", _len
);
3621 if (mydvp
== NULL
) {
3626 ret
= vn_getpath(mydvp
, path
, &len
);
3627 } while (ret
== ENOSPC
);
3635 * Make a hard file link.
3637 * Returns: 0 Success
3642 * vnode_authorize:???
3647 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
3649 vnode_t vp
, dvp
, lvp
;
3650 struct nameidata nd
;
3651 vfs_context_t ctx
= vfs_context_current();
3656 int need_event
, has_listeners
;
3657 char *target_path
= NULL
;
3660 vp
= dvp
= lvp
= NULLVP
;
3662 /* look up the object we are linking to */
3663 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
,
3664 UIO_USERSPACE
, uap
->path
, ctx
);
3673 * Normally, linking to directories is not supported.
3674 * However, some file systems may have limited support.
3676 if (vp
->v_type
== VDIR
) {
3677 if (!(vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSDIRLINKS
)) {
3678 error
= EPERM
; /* POSIX */
3681 /* Linking to a directory requires ownership. */
3682 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
3683 struct vnode_attr dva
;
3686 VATTR_WANTED(&dva
, va_uid
);
3687 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
3688 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
3689 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
3696 /* lookup the target node */
3700 nd
.ni_cnd
.cn_nameiop
= CREATE
;
3701 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
3702 nd
.ni_dirp
= uap
->link
;
3710 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
3714 /* or to anything that kauth doesn't want us to (eg. immutable items) */
3715 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
3718 /* target node must not exist */
3719 if (lvp
!= NULLVP
) {
3723 /* cannot link across mountpoints */
3724 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
3729 /* authorize creation of the target note */
3730 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3733 /* and finally make the link */
3734 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
3739 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
3743 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
3747 has_listeners
= kauth_authorize_fileop_has_listeners();
3749 if (need_event
|| has_listeners
) {
3750 char *link_to_path
= NULL
;
3751 int len
, link_name_len
;
3753 /* build the path to the new link file */
3754 GET_PATH(target_path
);
3755 if (target_path
== NULL
) {
3760 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
3762 if (has_listeners
) {
3763 /* build the path to file we are linking to */
3764 GET_PATH(link_to_path
);
3765 if (link_to_path
== NULL
) {
3770 link_name_len
= MAXPATHLEN
;
3771 vn_getpath(vp
, link_to_path
, &link_name_len
);
3774 * Call out to allow 3rd party notification of rename.
3775 * Ignore result of kauth_authorize_fileop call.
3777 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
3778 (uintptr_t)link_to_path
, (uintptr_t)target_path
);
3779 if (link_to_path
!= NULL
) {
3780 RELEASE_PATH(link_to_path
);
3785 /* construct fsevent */
3786 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
3788 finfo
.mode
|= FSE_TRUNCATED_PATH
;
3791 // build the path to the destination of the link
3792 add_fsevent(FSE_CREATE_FILE
, ctx
,
3793 FSE_ARG_STRING
, len
, target_path
,
3794 FSE_ARG_FINFO
, &finfo
,
3798 add_fsevent(FSE_STAT_CHANGED
, ctx
,
3799 FSE_ARG_VNODE
, vp
->v_parent
,
3807 * nameidone has to happen before we vnode_put(dvp)
3808 * since it may need to release the fs_nodelock on the dvp
3811 if (target_path
!= NULL
) {
3812 RELEASE_PATH(target_path
);
3824 * Make a symbolic link.
3826 * We could add support for ACLs here too...
3830 symlink(proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
3832 struct vnode_attr va
;
3835 struct nameidata nd
;
3836 vfs_context_t ctx
= vfs_context_current();
3840 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
3841 error
= copyinstr(uap
->path
, path
, MAXPATHLEN
, &dummy
);
3844 AUDIT_ARG(text
, path
); /* This is the link string */
3846 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
3847 UIO_USERSPACE
, uap
->link
, ctx
);
3855 VATTR_SET(&va
, va_type
, VLNK
);
3856 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
3858 error
= mac_vnode_check_create(ctx
,
3859 dvp
, &nd
.ni_cnd
, &va
);
3872 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
3873 /* get default ownership, etc. */
3875 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
3877 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
3881 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
3884 /* do fallback attribute handling */
3886 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
3889 int update_flags
= 0;
3892 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
3894 nd
.ni_op
= OP_LOOKUP
;
3896 nd
.ni_cnd
.cn_flags
= 0;
3904 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3905 /* call out to allow 3rd party notification of rename.
3906 * Ignore result of kauth_authorize_fileop call.
3908 if (kauth_authorize_fileop_has_listeners() &&
3910 char *new_link_path
= NULL
;
3913 /* build the path to the new link file */
3914 new_link_path
= get_pathbuff();
3916 vn_getpath(dvp
, new_link_path
, &len
);
3917 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
3918 new_link_path
[len
- 1] = '/';
3919 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
3922 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
3923 (uintptr_t)path
, (uintptr_t)new_link_path
);
3924 if (new_link_path
!= NULL
)
3925 release_pathbuff(new_link_path
);
3928 // Make sure the name & parent pointers are hooked up
3929 if (vp
->v_name
== NULL
)
3930 update_flags
|= VNODE_UPDATE_NAME
;
3931 if (vp
->v_parent
== NULLVP
)
3932 update_flags
|= VNODE_UPDATE_PARENT
;
3935 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3938 add_fsevent(FSE_CREATE_FILE
, ctx
,
3946 * nameidone has to happen before we vnode_put(dvp)
3947 * since it may need to release the fs_nodelock on the dvp
3955 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
3961 * Delete a whiteout from the filesystem.
3962 * XXX authorization not implmented for whiteouts
3965 undelete(__unused proc_t p
, struct undelete_args
*uap
, __unused
int32_t *retval
)
3968 struct nameidata nd
;
3969 vfs_context_t ctx
= vfs_context_current();
3972 NDINIT(&nd
, DELETE
, OP_UNLINK
, LOCKPARENT
| DOWHITEOUT
| AUDITVNPATH1
,
3973 UIO_USERSPACE
, uap
->path
, ctx
);
3980 if (vp
== NULLVP
&& (nd
.ni_cnd
.cn_flags
& ISWHITEOUT
)) {
3981 error
= VNOP_WHITEOUT(dvp
, &nd
.ni_cnd
, DELETE
, ctx
);
3986 * nameidone has to happen before we vnode_put(dvp)
3987 * since it may need to release the fs_nodelock on the dvp
4000 * Delete a name from the filesystem.
4004 unlink1(vfs_context_t ctx
, struct nameidata
*ndp
, int unlink_flags
)
4008 struct componentname
*cnp
;
4013 struct vnode_attr va
;
4017 int has_listeners
= 0;
4018 int truncated_path
=0;
4020 struct vnode_attr
*vap
= NULL
;
4023 /* unlink or delete is allowed on rsrc forks and named streams */
4024 ndp
->ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
4027 ndp
->ni_cnd
.cn_flags
|= LOCKPARENT
;
4028 ndp
->ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4040 /* With Carbon delete semantics, busy files cannot be deleted */
4041 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4042 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4045 /* Skip any potential upcalls if told to. */
4046 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4047 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4051 batched
= vnode_compound_remove_available(vp
);
4053 * The root of a mounted filesystem cannot be deleted.
4055 if (vp
->v_flag
& VROOT
) {
4060 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4068 if (!vnode_compound_remove_available(dvp
)) {
4069 panic("No vp, but no compound remove?");
4074 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4077 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4078 /* XXX need to get these data in batched VNOP */
4079 get_fse_info(vp
, &finfo
, ctx
);
4082 error
= vfs_get_notify_attributes(&va
);
4091 has_listeners
= kauth_authorize_fileop_has_listeners();
4092 if (need_event
|| has_listeners
) {
4100 len
= safe_getpath(dvp
, ndp
->ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4104 if (ndp
->ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4105 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4109 error
= vn_remove(dvp
, &ndp
->ni_vp
, ndp
, flags
, vap
, ctx
);
4111 if (error
== EKEEPLOOKING
) {
4113 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4116 if ((ndp
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4117 panic("EKEEPLOOKING, but continue flag not set?");
4120 if (vnode_isdir(vp
)) {
4124 goto lookup_continue
;
4129 * Call out to allow 3rd party notification of delete.
4130 * Ignore result of kauth_authorize_fileop call.
4133 if (has_listeners
) {
4134 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4135 KAUTH_FILEOP_DELETE
,
4140 if (vp
->v_flag
& VISHARDLINK
) {
4142 // if a hardlink gets deleted we want to blow away the
4143 // v_parent link because the path that got us to this
4144 // instance of the link is no longer valid. this will
4145 // force the next call to get the path to ask the file
4146 // system instead of just following the v_parent link.
4148 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4153 if (vp
->v_flag
& VISHARDLINK
) {
4154 get_fse_info(vp
, &finfo
, ctx
);
4156 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4158 if (truncated_path
) {
4159 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4161 add_fsevent(FSE_DELETE
, ctx
,
4162 FSE_ARG_STRING
, len
, path
,
4163 FSE_ARG_FINFO
, &finfo
,
4174 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4175 * will cause its shadow file to go away if necessary.
4177 if (vp
&& (vnode_isnamedstream(vp
)) &&
4178 (vp
->v_parent
!= NULLVP
) &&
4179 vnode_isshadow(vp
)) {
4184 * nameidone has to happen before we vnode_put(dvp)
4185 * since it may need to release the fs_nodelock on the dvp
4196 * Delete a name from the filesystem using POSIX semantics.
4199 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4201 struct nameidata nd
;
4202 vfs_context_t ctx
= vfs_context_current();
4204 NDINIT(&nd
, DELETE
, OP_UNLINK
, AUDITVNPATH1
, UIO_USERSPACE
,
4206 return unlink1(ctx
, &nd
, 0);
4210 * Delete a name from the filesystem using Carbon semantics.
4213 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4215 struct nameidata nd
;
4216 vfs_context_t ctx
= vfs_context_current();
4218 NDINIT(&nd
, DELETE
, OP_UNLINK
, AUDITVNPATH1
, UIO_USERSPACE
,
4220 return unlink1(ctx
, &nd
, VNODE_REMOVE_NODELETEBUSY
);
4224 * Reposition read/write file offset.
4227 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4229 struct fileproc
*fp
;
4231 struct vfs_context
*ctx
;
4232 off_t offset
= uap
->offset
, file_size
;
4235 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4236 if (error
== ENOTSUP
)
4240 if (vnode_isfifo(vp
)) {
4246 ctx
= vfs_context_current();
4248 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4249 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4252 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
4259 if ( (error
= vnode_getwithref(vp
)) ) {
4264 switch (uap
->whence
) {
4266 offset
+= fp
->f_fglob
->fg_offset
;
4269 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
4271 offset
+= file_size
;
4279 if (uap
->offset
> 0 && offset
< 0) {
4280 /* Incremented/relative move past max size */
4284 * Allow negative offsets on character devices, per
4285 * POSIX 1003.1-2001. Most likely for writing disk
4288 if (offset
< 0 && vp
->v_type
!= VCHR
) {
4289 /* Decremented/relative move before start */
4293 fp
->f_fglob
->fg_offset
= offset
;
4294 *retval
= fp
->f_fglob
->fg_offset
;
4300 * An lseek can affect whether data is "available to read." Use
4301 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4303 post_event_if_success(vp
, error
, NOTE_NONE
);
4304 (void)vnode_put(vp
);
4311 * Check access permissions.
4313 * Returns: 0 Success
4314 * vnode_authorize:???
4317 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
4319 kauth_action_t action
;
4323 * If just the regular access bits, convert them to something
4324 * that vnode_authorize will understand.
4326 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
4329 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4330 if (uflags
& W_OK
) {
4331 if (vnode_isdir(vp
)) {
4332 action
|= KAUTH_VNODE_ADD_FILE
|
4333 KAUTH_VNODE_ADD_SUBDIRECTORY
;
4334 /* might want delete rights here too */
4336 action
|= KAUTH_VNODE_WRITE_DATA
;
4339 if (uflags
& X_OK
) {
4340 if (vnode_isdir(vp
)) {
4341 action
|= KAUTH_VNODE_SEARCH
;
4343 action
|= KAUTH_VNODE_EXECUTE
;
4347 /* take advantage of definition of uflags */
4348 action
= uflags
>> 8;
4352 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
4357 /* action == 0 means only check for existence */
4359 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
4370 * access_extended: Check access permissions in bulk.
4372 * Description: uap->entries Pointer to an array of accessx
4373 * descriptor structs, plus one or
4374 * more NULL terminated strings (see
4375 * "Notes" section below).
4376 * uap->size Size of the area pointed to by
4378 * uap->results Pointer to the results array.
4380 * Returns: 0 Success
4381 * ENOMEM Insufficient memory
4382 * EINVAL Invalid arguments
4383 * namei:EFAULT Bad address
4384 * namei:ENAMETOOLONG Filename too long
4385 * namei:ENOENT No such file or directory
4386 * namei:ELOOP Too many levels of symbolic links
4387 * namei:EBADF Bad file descriptor
4388 * namei:ENOTDIR Not a directory
4393 * uap->results Array contents modified
4395 * Notes: The uap->entries are structured as an arbitrary length array
4396 * of accessx descriptors, followed by one or more NULL terminated
4399 * struct accessx_descriptor[0]
4401 * struct accessx_descriptor[n]
4402 * char name_data[0];
4404 * We determine the entry count by walking the buffer containing
4405 * the uap->entries argument descriptor. For each descriptor we
4406 * see, the valid values for the offset ad_name_offset will be
4407 * in the byte range:
4409 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4411 * [ uap->entries + uap->size - 2 ]
4413 * since we must have at least one string, and the string must
4414 * be at least one character plus the NULL terminator in length.
4416 * XXX: Need to support the check-as uid argument
4419 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
4421 struct accessx_descriptor
*input
= NULL
;
4422 errno_t
*result
= NULL
;
4425 unsigned int desc_max
, desc_actual
, i
, j
;
4426 struct vfs_context context
;
4427 struct nameidata nd
;
4431 #define ACCESSX_MAX_DESCR_ON_STACK 10
4432 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
4434 context
.vc_ucred
= NULL
;
4437 * Validate parameters; if valid, copy the descriptor array and string
4438 * arguments into local memory. Before proceeding, the following
4439 * conditions must have been met:
4441 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4442 * o There must be sufficient room in the request for at least one
4443 * descriptor and a one yte NUL terminated string.
4444 * o The allocation of local storage must not fail.
4446 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
4448 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
4450 if (uap
->size
<= sizeof (stack_input
)) {
4451 input
= stack_input
;
4453 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
4454 if (input
== NULL
) {
4459 error
= copyin(uap
->entries
, input
, uap
->size
);
4463 AUDIT_ARG(opaque
, input
, uap
->size
);
4466 * Force NUL termination of the copyin buffer to avoid nami() running
4467 * off the end. If the caller passes us bogus data, they may get a
4470 ((char *)input
)[uap
->size
- 1] = 0;
4473 * Access is defined as checking against the process' real identity,
4474 * even if operations are checking the effective identity. This
4475 * requires that we use a local vfs context.
4477 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
4478 context
.vc_thread
= current_thread();
4481 * Find out how many entries we have, so we can allocate the result
4482 * array by walking the list and adjusting the count downward by the
4483 * earliest string offset we see.
4485 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
4486 desc_actual
= desc_max
;
4487 for (i
= 0; i
< desc_actual
; i
++) {
4489 * Take the offset to the name string for this entry and
4490 * convert to an input array index, which would be one off
4491 * the end of the array if this entry was the lowest-addressed
4494 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
4497 * An offset greater than the max allowable offset is an error.
4498 * It is also an error for any valid entry to point
4499 * to a location prior to the end of the current entry, if
4500 * it's not a reference to the string of the previous entry.
4502 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
4508 * An offset of 0 means use the previous descriptor's offset;
4509 * this is used to chain multiple requests for the same file
4510 * to avoid multiple lookups.
4513 /* This is not valid for the first entry */
4522 * If the offset of the string for this descriptor is before
4523 * what we believe is the current actual last descriptor,
4524 * then we need to adjust our estimate downward; this permits
4525 * the string table following the last descriptor to be out
4526 * of order relative to the descriptor list.
4528 if (j
< desc_actual
)
4533 * We limit the actual number of descriptors we are willing to process
4534 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
4535 * requested does not exceed this limit,
4537 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
4541 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
4542 if (result
== NULL
) {
4548 * Do the work by iterating over the descriptor entries we know to
4549 * at least appear to contain valid data.
4552 for (i
= 0; i
< desc_actual
; i
++) {
4554 * If the ad_name_offset is 0, then we use the previous
4555 * results to make the check; otherwise, we are looking up
4558 if (input
[i
].ad_name_offset
!= 0) {
4559 /* discard old vnodes */
4570 * Scan forward in the descriptor list to see if we
4571 * need the parent vnode. We will need it if we are
4572 * deleting, since we must have rights to remove
4573 * entries in the parent directory, as well as the
4574 * rights to delete the object itself.
4576 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
4577 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
4578 if (input
[j
].ad_flags
& _DELETE_OK
)
4581 niopts
= FOLLOW
| AUDITVNPATH1
;
4583 /* need parent for vnode_authorize for deletion test */
4585 niopts
|= WANTPARENT
;
4588 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
4589 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
4601 * Handle lookup errors.
4611 /* run this access check */
4612 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
4615 /* fatal lookup error */
4621 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
4623 /* copy out results */
4624 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
4627 if (input
&& input
!= stack_input
)
4628 FREE(input
, M_TEMP
);
4630 FREE(result
, M_TEMP
);
4635 if (IS_VALID_CRED(context
.vc_ucred
))
4636 kauth_cred_unref(&context
.vc_ucred
);
4642 * Returns: 0 Success
4643 * namei:EFAULT Bad address
4644 * namei:ENAMETOOLONG Filename too long
4645 * namei:ENOENT No such file or directory
4646 * namei:ELOOP Too many levels of symbolic links
4647 * namei:EBADF Bad file descriptor
4648 * namei:ENOTDIR Not a directory
4653 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
4656 struct nameidata nd
;
4658 struct vfs_context context
;
4660 int is_namedstream
= 0;
4664 * Access is defined as checking against the process'
4665 * real identity, even if operations are checking the
4666 * effective identity. So we need to tweak the credential
4669 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
4670 context
.vc_thread
= current_thread();
4672 niopts
= FOLLOW
| AUDITVNPATH1
;
4673 /* need parent for vnode_authorize for deletion test */
4674 if (uap
->flags
& _DELETE_OK
)
4675 niopts
|= WANTPARENT
;
4676 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_USERSPACE
,
4677 uap
->path
, &context
);
4680 /* access(F_OK) calls are allowed for resource forks. */
4681 if (uap
->flags
== F_OK
)
4682 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
4689 /* Grab reference on the shadow stream file vnode to
4690 * force an inactive on release which will mark it
4693 if (vnode_isnamedstream(nd
.ni_vp
) &&
4694 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
4695 vnode_isshadow(nd
.ni_vp
)) {
4697 vnode_ref(nd
.ni_vp
);
4701 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, uap
->flags
, &context
);
4704 if (is_namedstream
) {
4705 vnode_rele(nd
.ni_vp
);
4709 vnode_put(nd
.ni_vp
);
4710 if (uap
->flags
& _DELETE_OK
)
4711 vnode_put(nd
.ni_dvp
);
4715 kauth_cred_unref(&context
.vc_ucred
);
4721 * Returns: 0 Success
4728 stat2(vfs_context_t ctx
, struct nameidata
*ndp
, user_addr_t ub
, user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
)
4735 struct user64_stat user64_sb
;
4736 struct user32_stat user32_sb
;
4737 struct user64_stat64 user64_sb64
;
4738 struct user32_stat64 user32_sb64
;
4742 kauth_filesec_t fsec
;
4743 size_t xsecurity_bufsize
;
4747 int is_namedstream
= 0;
4748 /* stat calls are allowed for resource forks. */
4749 ndp
->ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
4754 fsec
= KAUTH_FILESEC_NONE
;
4756 statptr
= (void *)&source
;
4759 /* Grab reference on the shadow stream file vnode to
4760 * force an inactive on release which will mark it
4763 if (vnode_isnamedstream(ndp
->ni_vp
) &&
4764 (ndp
->ni_vp
->v_parent
!= NULLVP
) &&
4765 vnode_isshadow(ndp
->ni_vp
)) {
4767 vnode_ref(ndp
->ni_vp
);
4771 error
= vn_stat(ndp
->ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
4774 if (is_namedstream
) {
4775 vnode_rele(ndp
->ni_vp
);
4778 vnode_put(ndp
->ni_vp
);
4783 /* Zap spare fields */
4784 if (isstat64
!= 0) {
4785 source
.sb64
.st_lspare
= 0;
4786 source
.sb64
.st_qspare
[0] = 0LL;
4787 source
.sb64
.st_qspare
[1] = 0LL;
4788 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
4789 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
4790 my_size
= sizeof(dest
.user64_sb64
);
4791 sbp
= (caddr_t
)&dest
.user64_sb64
;
4793 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
4794 my_size
= sizeof(dest
.user32_sb64
);
4795 sbp
= (caddr_t
)&dest
.user32_sb64
;
4798 * Check if we raced (post lookup) against the last unlink of a file.
4800 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
4801 source
.sb64
.st_nlink
= 1;
4804 source
.sb
.st_lspare
= 0;
4805 source
.sb
.st_qspare
[0] = 0LL;
4806 source
.sb
.st_qspare
[1] = 0LL;
4807 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
4808 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
4809 my_size
= sizeof(dest
.user64_sb
);
4810 sbp
= (caddr_t
)&dest
.user64_sb
;
4812 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
4813 my_size
= sizeof(dest
.user32_sb
);
4814 sbp
= (caddr_t
)&dest
.user32_sb
;
4818 * Check if we raced (post lookup) against the last unlink of a file.
4820 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
4821 source
.sb
.st_nlink
= 1;
4824 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
4827 /* caller wants extended security information? */
4828 if (xsecurity
!= USER_ADDR_NULL
) {
4830 /* did we get any? */
4831 if (fsec
== KAUTH_FILESEC_NONE
) {
4832 if (susize(xsecurity_size
, 0) != 0) {
4837 /* find the user buffer size */
4838 xsecurity_bufsize
= fusize(xsecurity_size
);
4840 /* copy out the actual data size */
4841 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
4846 /* if the caller supplied enough room, copy out to it */
4847 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
4848 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
4852 if (fsec
!= KAUTH_FILESEC_NONE
)
4853 kauth_filesec_free(fsec
);
4858 * Get file status; this version follows links.
4860 * Returns: 0 Success
4861 * stat2:??? [see stat2() in this file]
4864 stat1(user_addr_t path
, user_addr_t ub
, user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
)
4866 struct nameidata nd
;
4867 vfs_context_t ctx
= vfs_context_current();
4869 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, NOTRIGGER
| FOLLOW
| AUDITVNPATH1
,
4870 UIO_USERSPACE
, path
, ctx
);
4871 return(stat2(ctx
, &nd
, ub
, xsecurity
, xsecurity_size
, isstat64
));
4875 * stat_extended: Get file status; with extended security (ACL).
4877 * Parameters: p (ignored)
4878 * uap User argument descriptor (see below)
4881 * Indirect: uap->path Path of file to get status from
4882 * uap->ub User buffer (holds file status info)
4883 * uap->xsecurity ACL to get (extended security)
4884 * uap->xsecurity_size Size of ACL
4886 * Returns: 0 Success
4891 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
, __unused
int32_t *retval
)
4893 return (stat1(uap
->path
, uap
->ub
, uap
->xsecurity
, uap
->xsecurity_size
, 0));
4897 * Returns: 0 Success
4898 * stat1:??? [see stat1() in this file]
4901 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
4903 return(stat1(uap
->path
, uap
->ub
, 0, 0, 0));
4907 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
4909 return(stat1(uap
->path
, uap
->ub
, 0, 0, 1));
4913 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4915 * Parameters: p (ignored)
4916 * uap User argument descriptor (see below)
4919 * Indirect: uap->path Path of file to get status from
4920 * uap->ub User buffer (holds file status info)
4921 * uap->xsecurity ACL to get (extended security)
4922 * uap->xsecurity_size Size of ACL
4924 * Returns: 0 Success
4929 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
4931 return (stat1(uap
->path
, uap
->ub
, uap
->xsecurity
, uap
->xsecurity_size
, 1));
4934 * Get file status; this version does not follow links.
4937 lstat1(user_addr_t path
, user_addr_t ub
, user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
)
4939 struct nameidata nd
;
4940 vfs_context_t ctx
= vfs_context_current();
4942 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, NOTRIGGER
| NOFOLLOW
| AUDITVNPATH1
,
4943 UIO_USERSPACE
, path
, ctx
);
4945 return(stat2(ctx
, &nd
, ub
, xsecurity
, xsecurity_size
, isstat64
));
4949 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4951 * Parameters: p (ignored)
4952 * uap User argument descriptor (see below)
4955 * Indirect: uap->path Path of file to get status from
4956 * uap->ub User buffer (holds file status info)
4957 * uap->xsecurity ACL to get (extended security)
4958 * uap->xsecurity_size Size of ACL
4960 * Returns: 0 Success
4965 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
4967 return (lstat1(uap
->path
, uap
->ub
, uap
->xsecurity
, uap
->xsecurity_size
, 0));
4971 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
4973 return(lstat1(uap
->path
, uap
->ub
, 0, 0, 0));
4977 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
4979 return(lstat1(uap
->path
, uap
->ub
, 0, 0, 1));
4983 * lstat64_extended: Get file status; can handle large inode numbers; does not
4984 * follow links; with extended security (ACL).
4986 * Parameters: p (ignored)
4987 * uap User argument descriptor (see below)
4990 * Indirect: uap->path Path of file to get status from
4991 * uap->ub User buffer (holds file status info)
4992 * uap->xsecurity ACL to get (extended security)
4993 * uap->xsecurity_size Size of ACL
4995 * Returns: 0 Success
5000 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5002 return (lstat1(uap
->path
, uap
->ub
, uap
->xsecurity
, uap
->xsecurity_size
, 1));
5006 * Get configurable pathname variables.
5008 * Returns: 0 Success
5012 * Notes: Global implementation constants are intended to be
5013 * implemented in this function directly; all other constants
5014 * are per-FS implementation, and therefore must be handled in
5015 * each respective FS, instead.
5017 * XXX We implement some things globally right now that should actually be
5018 * XXX per-FS; we will need to deal with this at some point.
5022 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5025 struct nameidata nd
;
5026 vfs_context_t ctx
= vfs_context_current();
5028 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5029 UIO_USERSPACE
, uap
->path
, ctx
);
5034 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5036 vnode_put(nd
.ni_vp
);
5042 * Return target name of a symbolic link.
5046 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5050 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5052 struct nameidata nd
;
5053 vfs_context_t ctx
= vfs_context_current();
5054 char uio_buf
[ UIO_SIZEOF(1) ];
5056 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5057 UIO_USERSPACE
, uap
->path
, ctx
);
5065 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
5066 &uio_buf
[0], sizeof(uio_buf
));
5067 uio_addiov(auio
, uap
->buf
, uap
->count
);
5068 if (vp
->v_type
!= VLNK
)
5072 error
= mac_vnode_check_readlink(ctx
,
5076 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, ctx
);
5078 error
= VNOP_READLINK(vp
, auio
, ctx
);
5082 /* Safe: uio_resid() is bounded above by "count", and "count" is an int */
5083 *retval
= uap
->count
- (int)uio_resid(auio
);
5088 * Change file flags.
5091 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5093 struct vnode_attr va
;
5094 kauth_action_t action
;
5098 VATTR_SET(&va
, va_flags
, flags
);
5101 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5106 /* request authorisation, disregard immutability */
5107 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5110 * Request that the auth layer disregard those file flags it's allowed to when
5111 * authorizing this operation; we need to do this in order to be able to
5112 * clear immutable flags.
5114 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5116 error
= vnode_setattr(vp
, &va
, ctx
);
5118 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5127 * Change flags of a file given a path name.
5131 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5134 vfs_context_t ctx
= vfs_context_current();
5136 struct nameidata nd
;
5138 AUDIT_ARG(fflags
, uap
->flags
);
5139 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5140 UIO_USERSPACE
, uap
->path
, ctx
);
5147 error
= chflags1(vp
, uap
->flags
, ctx
);
5153 * Change flags of a file given a file descriptor.
5157 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5162 AUDIT_ARG(fd
, uap
->fd
);
5163 AUDIT_ARG(fflags
, uap
->flags
);
5164 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5167 if ((error
= vnode_getwithref(vp
))) {
5172 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5174 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
5181 * Change security information on a filesystem object.
5183 * Returns: 0 Success
5184 * EPERM Operation not permitted
5185 * vnode_authattr:??? [anything vnode_authattr can return]
5186 * vnode_authorize:??? [anything vnode_authorize can return]
5187 * vnode_setattr:??? [anything vnode_setattr can return]
5189 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5190 * translated to EPERM before being returned.
5193 chmod2(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
5195 kauth_action_t action
;
5198 AUDIT_ARG(mode
, vap
->va_mode
);
5199 /* XXX audit new args */
5202 /* chmod calls are not allowed for resource forks. */
5203 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5209 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
5210 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
5214 /* make sure that the caller is allowed to set this security information */
5215 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
5216 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5217 if (error
== EACCES
)
5222 error
= vnode_setattr(vp
, vap
, ctx
);
5229 * Change mode of a file given a path name.
5231 * Returns: 0 Success
5232 * namei:??? [anything namei can return]
5233 * chmod2:??? [anything chmod2 can return]
5236 chmod1(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
)
5238 struct nameidata nd
;
5241 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5242 UIO_USERSPACE
, path
, ctx
);
5243 if ((error
= namei(&nd
)))
5245 error
= chmod2(ctx
, nd
.ni_vp
, vap
);
5246 vnode_put(nd
.ni_vp
);
5252 * chmod_extended: Change the mode of a file given a path name; with extended
5253 * argument list (including extended security (ACL)).
5255 * Parameters: p Process requesting the open
5256 * uap User argument descriptor (see below)
5259 * Indirect: uap->path Path to object (same as 'chmod')
5260 * uap->uid UID to set
5261 * uap->gid GID to set
5262 * uap->mode File mode to set (same as 'chmod')
5263 * uap->xsecurity ACL to set (or delete)
5265 * Returns: 0 Success
5268 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5270 * XXX: We should enummerate the possible errno values here, and where
5271 * in the code they originated.
5274 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
5277 struct vnode_attr va
;
5278 kauth_filesec_t xsecdst
;
5280 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5283 if (uap
->mode
!= -1)
5284 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5285 if (uap
->uid
!= KAUTH_UID_NONE
)
5286 VATTR_SET(&va
, va_uid
, uap
->uid
);
5287 if (uap
->gid
!= KAUTH_GID_NONE
)
5288 VATTR_SET(&va
, va_gid
, uap
->gid
);
5291 switch(uap
->xsecurity
) {
5292 /* explicit remove request */
5293 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5294 VATTR_SET(&va
, va_acl
, NULL
);
5297 case USER_ADDR_NULL
:
5300 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5302 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5303 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
5306 error
= chmod1(vfs_context_current(), uap
->path
, &va
);
5308 if (xsecdst
!= NULL
)
5309 kauth_filesec_free(xsecdst
);
5314 * Returns: 0 Success
5315 * chmod1:??? [anything chmod1 can return]
5318 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
5320 struct vnode_attr va
;
5323 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5325 return(chmod1(vfs_context_current(), uap
->path
, &va
));
5329 * Change mode of a file given a file descriptor.
5332 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
5339 if ((error
= file_vnode(fd
, &vp
)) != 0)
5341 if ((error
= vnode_getwithref(vp
)) != 0) {
5345 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5347 error
= chmod2(vfs_context_current(), vp
, vap
);
5348 (void)vnode_put(vp
);
5355 * fchmod_extended: Change mode of a file given a file descriptor; with
5356 * extended argument list (including extended security (ACL)).
5358 * Parameters: p Process requesting to change file mode
5359 * uap User argument descriptor (see below)
5362 * Indirect: uap->mode File mode to set (same as 'chmod')
5363 * uap->uid UID to set
5364 * uap->gid GID to set
5365 * uap->xsecurity ACL to set (or delete)
5366 * uap->fd File descriptor of file to change mode
5368 * Returns: 0 Success
5373 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
5376 struct vnode_attr va
;
5377 kauth_filesec_t xsecdst
;
5379 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5382 if (uap
->mode
!= -1)
5383 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5384 if (uap
->uid
!= KAUTH_UID_NONE
)
5385 VATTR_SET(&va
, va_uid
, uap
->uid
);
5386 if (uap
->gid
!= KAUTH_GID_NONE
)
5387 VATTR_SET(&va
, va_gid
, uap
->gid
);
5390 switch(uap
->xsecurity
) {
5391 case USER_ADDR_NULL
:
5392 VATTR_SET(&va
, va_acl
, NULL
);
5394 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5395 VATTR_SET(&va
, va_acl
, NULL
);
5398 case CAST_USER_ADDR_T(-1):
5401 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5403 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5406 error
= fchmod1(p
, uap
->fd
, &va
);
5409 switch(uap
->xsecurity
) {
5410 case USER_ADDR_NULL
:
5411 case CAST_USER_ADDR_T(-1):
5414 if (xsecdst
!= NULL
)
5415 kauth_filesec_free(xsecdst
);
5421 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
5423 struct vnode_attr va
;
5426 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5428 return(fchmod1(p
, uap
->fd
, &va
));
5433 * Set ownership given a path name.
5437 chown1(vfs_context_t ctx
, struct chown_args
*uap
, __unused
int32_t *retval
, int follow
)
5440 struct vnode_attr va
;
5442 struct nameidata nd
;
5443 kauth_action_t action
;
5445 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5447 NDINIT(&nd
, LOOKUP
, OP_SETATTR
,
5448 (follow
? FOLLOW
: 0) | NOTRIGGER
| AUDITVNPATH1
,
5449 UIO_USERSPACE
, uap
->path
, ctx
);
5458 if (uap
->uid
!= VNOVAL
)
5459 VATTR_SET(&va
, va_uid
, uap
->uid
);
5460 if (uap
->gid
!= VNOVAL
)
5461 VATTR_SET(&va
, va_gid
, uap
->gid
);
5464 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
5469 /* preflight and authorize attribute changes */
5470 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5472 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
5474 error
= vnode_setattr(vp
, &va
, ctx
);
5478 * EACCES is only allowed from namei(); permissions failure should
5479 * return EPERM, so we need to translate the error code.
5481 if (error
== EACCES
)
5489 chown(__unused proc_t p
, struct chown_args
*uap
, int32_t *retval
)
5491 return chown1(vfs_context_current(), uap
, retval
, 1);
5495 lchown(__unused proc_t p
, struct lchown_args
*uap
, int32_t *retval
)
5497 /* Argument list identical, but machine generated; cast for chown1() */
5498 return chown1(vfs_context_current(), (struct chown_args
*)uap
, retval
, 0);
5502 * Set ownership given a file descriptor.
5506 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
5508 struct vnode_attr va
;
5509 vfs_context_t ctx
= vfs_context_current();
5512 kauth_action_t action
;
5514 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5515 AUDIT_ARG(fd
, uap
->fd
);
5517 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5520 if ( (error
= vnode_getwithref(vp
)) ) {
5524 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5527 if (uap
->uid
!= VNOVAL
)
5528 VATTR_SET(&va
, va_uid
, uap
->uid
);
5529 if (uap
->gid
!= VNOVAL
)
5530 VATTR_SET(&va
, va_gid
, uap
->gid
);
5533 /* chown calls are not allowed for resource forks. */
5534 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5541 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
5546 /* preflight and authorize attribute changes */
5547 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5549 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5550 if (error
== EACCES
)
5554 error
= vnode_setattr(vp
, &va
, ctx
);
5557 (void)vnode_put(vp
);
5563 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
5567 if (usrtvp
== USER_ADDR_NULL
) {
5568 struct timeval old_tv
;
5569 /* XXX Y2038 bug because of microtime argument */
5571 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
5574 if (IS_64BIT_PROCESS(current_proc())) {
5575 struct user64_timeval tv
[2];
5576 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
5579 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
5580 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
5582 struct user32_timeval tv
[2];
5583 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
5586 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
5587 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
5594 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
5598 struct vnode_attr va
;
5599 kauth_action_t action
;
5601 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5604 VATTR_SET(&va
, va_access_time
, ts
[0]);
5605 VATTR_SET(&va
, va_modify_time
, ts
[1]);
5607 va
.va_vaflags
|= VA_UTIMES_NULL
;
5610 /* utimes calls are not allowed for resource forks. */
5611 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5618 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
5622 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
5623 if (!nullflag
&& error
== EACCES
)
5628 /* since we may not need to auth anything, check here */
5629 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5630 if (!nullflag
&& error
== EACCES
)
5634 error
= vnode_setattr(vp
, &va
, ctx
);
5641 * Set the access and modification times of a file.
5645 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
5647 struct timespec ts
[2];
5650 struct nameidata nd
;
5651 vfs_context_t ctx
= vfs_context_current();
5654 * AUDIT: Needed to change the order of operations to do the
5655 * name lookup first because auditing wants the path.
5657 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5658 UIO_USERSPACE
, uap
->path
, ctx
);
5665 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
5666 * the current time instead.
5669 if ((error
= getutimes(usrtvp
, ts
)) != 0)
5672 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
5675 vnode_put(nd
.ni_vp
);
5680 * Set the access and modification times of a file.
5684 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
5686 struct timespec ts
[2];
5691 AUDIT_ARG(fd
, uap
->fd
);
5693 if ((error
= getutimes(usrtvp
, ts
)) != 0)
5695 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
5697 if((error
= vnode_getwithref(vp
))) {
5702 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
5709 * Truncate a file given its path name.
5713 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
5716 struct vnode_attr va
;
5717 vfs_context_t ctx
= vfs_context_current();
5719 struct nameidata nd
;
5720 kauth_action_t action
;
5722 if (uap
->length
< 0)
5724 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
5725 UIO_USERSPACE
, uap
->path
, ctx
);
5726 if ((error
= namei(&nd
)))
5733 VATTR_SET(&va
, va_data_size
, uap
->length
);
5736 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
5741 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5743 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
5745 error
= vnode_setattr(vp
, &va
, ctx
);
5752 * Truncate a file given a file descriptor.
5756 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
5758 vfs_context_t ctx
= vfs_context_current();
5759 struct vnode_attr va
;
5761 struct fileproc
*fp
;
5765 AUDIT_ARG(fd
, uap
->fd
);
5766 if (uap
->length
< 0)
5769 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
5773 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
5775 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
5784 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
5786 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
5787 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
5792 if ((error
= vnode_getwithref(vp
)) != 0) {
5796 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5799 error
= mac_vnode_check_truncate(ctx
,
5800 fp
->f_fglob
->fg_cred
, vp
);
5802 (void)vnode_put(vp
);
5807 VATTR_SET(&va
, va_data_size
, uap
->length
);
5808 error
= vnode_setattr(vp
, &va
, ctx
);
5809 (void)vnode_put(vp
);
5817 * Sync an open file with synchronized I/O _file_ integrity completion
5821 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
5823 __pthread_testcancel(1);
5824 return(fsync_common(p
, uap
, MNT_WAIT
));
5829 * Sync an open file with synchronized I/O _file_ integrity completion
5831 * Notes: This is a legacy support function that does not test for
5832 * thread cancellation points.
5836 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
5838 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
5843 * Sync an open file with synchronized I/O _data_ integrity completion
5847 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
5849 __pthread_testcancel(1);
5850 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
5857 * Common fsync code to support both synchronized I/O file integrity completion
5858 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
5860 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
5861 * will only guarantee that the file data contents are retrievable. If
5862 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
5863 * includes additional metadata unnecessary for retrieving the file data
5864 * contents, such as atime, mtime, ctime, etc., also be committed to stable
5867 * Parameters: p The process
5868 * uap->fd The descriptor to synchronize
5869 * flags The data integrity flags
5871 * Returns: int Success
5872 * fp_getfvp:EBADF Bad file descriptor
5873 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5874 * VNOP_FSYNC:??? unspecified
5876 * Notes: We use struct fsync_args because it is a short name, and all
5877 * caller argument structures are otherwise identical.
5880 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
5883 struct fileproc
*fp
;
5884 vfs_context_t ctx
= vfs_context_current();
5887 AUDIT_ARG(fd
, uap
->fd
);
5889 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
5891 if ( (error
= vnode_getwithref(vp
)) ) {
5896 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5898 error
= VNOP_FSYNC(vp
, flags
, ctx
);
5901 /* Sync resource fork shadow file if necessary. */
5903 (vp
->v_flag
& VISNAMEDSTREAM
) &&
5904 (vp
->v_parent
!= NULLVP
) &&
5905 vnode_isshadow(vp
) &&
5906 (fp
->f_flags
& FP_WRITTEN
)) {
5907 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
5911 (void)vnode_put(vp
);
5917 * Duplicate files. Source must be a file, target must be a file or
5920 * XXX Copyfile authorisation checking is woefully inadequate, and will not
5921 * perform inheritance correctly.
5925 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
5927 vnode_t tvp
, fvp
, tdvp
, sdvp
;
5928 struct nameidata fromnd
, tond
;
5930 vfs_context_t ctx
= vfs_context_current();
5932 /* Check that the flags are valid. */
5934 if (uap
->flags
& ~CPF_MASK
) {
5938 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, SAVESTART
| AUDITVNPATH1
,
5939 UIO_USERSPACE
, uap
->from
, ctx
);
5940 if ((error
= namei(&fromnd
)))
5944 NDINIT(&tond
, CREATE
, OP_LINK
,
5945 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
5946 UIO_USERSPACE
, uap
->to
, ctx
);
5947 if ((error
= namei(&tond
))) {
5954 if (!(uap
->flags
& CPF_OVERWRITE
)) {
5959 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
5964 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
5970 * If source is the same as the destination (that is the
5971 * same inode number) then there is nothing to do.
5972 * (fixed to have POSIX semantics - CSM 3/2/98)
5977 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
5979 sdvp
= tond
.ni_startdir
;
5981 * nameidone has to happen before we vnode_put(tdvp)
5982 * since it may need to release the fs_nodelock on the tdvp
5993 if (fromnd
.ni_startdir
)
5994 vnode_put(fromnd
.ni_startdir
);
6004 * Rename files. Source and destination must either both be directories,
6005 * or both not be directories. If target is a directory, it must be empty.
6009 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
6013 struct nameidata
*fromnd
, *tond
;
6014 vfs_context_t ctx
= vfs_context_current();
6019 const char *oname
= NULL
;
6020 char *from_name
= NULL
, *to_name
= NULL
;
6021 int from_len
=0, to_len
=0;
6022 int holding_mntlock
;
6023 mount_t locked_mp
= NULL
;
6024 vnode_t oparent
= NULLVP
;
6026 fse_info from_finfo
, to_finfo
;
6028 int from_truncated
=0, to_truncated
;
6030 struct vnode_attr
*fvap
, *tvap
;
6032 /* carving out a chunk for structs that are too big to be on stack. */
6034 struct nameidata from_node
, to_node
;
6035 struct vnode_attr fv_attr
, tv_attr
;
6037 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
6038 fromnd
= &__rename_data
->from_node
;
6039 tond
= &__rename_data
->to_node
;
6041 holding_mntlock
= 0;
6049 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
6050 UIO_USERSPACE
, uap
->from
, ctx
);
6051 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6053 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6054 UIO_USERSPACE
, uap
->to
, ctx
);
6055 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6058 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6059 if ( (error
= namei(fromnd
)) )
6061 fdvp
= fromnd
->ni_dvp
;
6062 fvp
= fromnd
->ni_vp
;
6064 if (fvp
&& fvp
->v_type
== VDIR
)
6065 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
6068 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6069 if ( (error
= namei(tond
)) ) {
6071 * Translate error code for rename("dir1", "dir2/.").
6073 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
6077 tdvp
= tond
->ni_dvp
;
6081 batched
= vnode_compound_rename_available(fdvp
);
6084 * Claim: this check will never reject a valid rename.
6085 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6086 * Suppose fdvp and tdvp are not on the same mount.
6087 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6088 * then you can't move it to within another dir on the same mountpoint.
6089 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6091 * If this check passes, then we are safe to pass these vnodes to the same FS.
6093 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
6097 goto skipped_lookup
;
6101 error
= vn_authorize_rename(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, NULL
);
6103 if (error
== ENOENT
) {
6105 * We encountered a race where after doing the namei, tvp stops
6106 * being valid. If so, simply re-drive the rename call from the
6116 * If the source and destination are the same (i.e. they're
6117 * links to the same vnode) and the target file system is
6118 * case sensitive, then there is nothing to do.
6120 * XXX Come back to this.
6126 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6127 * then assume that this file system is case sensitive.
6129 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
6130 pathconf_val
!= 0) {
6136 * Allow the renaming of mount points.
6137 * - target must not exist
6138 * - target must reside in the same directory as source
6139 * - union mounts cannot be renamed
6140 * - "/" cannot be renamed
6142 * XXX Handle this in VFS after a continued lookup (if we missed
6143 * in the cache to start off)
6145 if ((fvp
->v_flag
& VROOT
) &&
6146 (fvp
->v_type
== VDIR
) &&
6148 (fvp
->v_mountedhere
== NULL
) &&
6150 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
6151 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
6154 /* switch fvp to the covered vnode */
6155 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
6156 if ( (vnode_getwithref(coveredvp
)) ) {
6166 * Check for cross-device rename.
6168 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
6169 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
6175 * If source is the same as the destination (that is the
6176 * same inode number) then there is nothing to do...
6177 * EXCEPT if the underlying file system supports case
6178 * insensitivity and is case preserving. In this case
6179 * the file system needs to handle the special case of
6180 * getting the same vnode as target (fvp) and source (tvp).
6182 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6183 * and _PC_CASE_PRESERVING can have this exception, and they need to
6184 * handle the special case of getting the same vnode as target and
6185 * source. NOTE: Then the target is unlocked going into vnop_rename,
6186 * so not to cause locking problems. There is a single reference on tvp.
6188 * NOTE - that fvp == tvp also occurs if they are hard linked and
6189 * that correct behaviour then is just to return success without doing
6192 * XXX filesystem should take care of this itself, perhaps...
6194 if (fvp
== tvp
&& fdvp
== tdvp
) {
6195 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
6196 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
6197 fromnd
->ni_cnd
.cn_namelen
)) {
6202 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
6204 * we're holding a reference and lock
6205 * on locked_mp, but it no longer matches
6206 * what we want to do... so drop our hold
6208 mount_unlock_renames(locked_mp
);
6209 mount_drop(locked_mp
, 0);
6210 holding_mntlock
= 0;
6212 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
6214 * serialize renames that re-shape
6215 * the tree... if holding_mntlock is
6216 * set, then we're ready to go...
6218 * first need to drop the iocounts
6219 * we picked up, second take the
6220 * lock to serialize the access,
6221 * then finally start the lookup
6222 * process over with the lock held
6224 if (!holding_mntlock
) {
6226 * need to grab a reference on
6227 * the mount point before we
6228 * drop all the iocounts... once
6229 * the iocounts are gone, the mount
6232 locked_mp
= fvp
->v_mount
;
6233 mount_ref(locked_mp
, 0);
6236 * nameidone has to happen before we vnode_put(tvp)
6237 * since it may need to release the fs_nodelock on the tvp
6246 * nameidone has to happen before we vnode_put(fdvp)
6247 * since it may need to release the fs_nodelock on the fvp
6254 mount_lock_renames(locked_mp
);
6255 holding_mntlock
= 1;
6261 * when we dropped the iocounts to take
6262 * the lock, we allowed the identity of
6263 * the various vnodes to change... if they did,
6264 * we may no longer be dealing with a rename
6265 * that reshapes the tree... once we're holding
6266 * the iocounts, the vnodes can't change type
6267 * so we're free to drop the lock at this point
6270 if (holding_mntlock
) {
6271 mount_unlock_renames(locked_mp
);
6272 mount_drop(locked_mp
, 0);
6273 holding_mntlock
= 0;
6277 // save these off so we can later verify that fvp is the same
6278 oname
= fvp
->v_name
;
6279 oparent
= fvp
->v_parent
;
6283 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
6286 get_fse_info(fvp
, &from_finfo
, ctx
);
6288 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
6293 fvap
= &__rename_data
->fv_attr
;
6297 get_fse_info(tvp
, &to_finfo
, ctx
);
6298 } else if (batched
) {
6299 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
6304 tvap
= &__rename_data
->tv_attr
;
6309 #endif /* CONFIG_FSE */
6311 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
6312 if (from_name
== NULL
) {
6313 GET_PATH(from_name
);
6314 if (from_name
== NULL
) {
6320 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
6322 if (to_name
== NULL
) {
6324 if (to_name
== NULL
) {
6330 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
6333 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
6334 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
6337 if (holding_mntlock
) {
6339 * we can drop our serialization
6342 mount_unlock_renames(locked_mp
);
6343 mount_drop(locked_mp
, 0);
6344 holding_mntlock
= 0;
6347 if (error
== EKEEPLOOKING
) {
6348 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6349 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6350 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6354 fromnd
->ni_vp
= fvp
;
6357 goto continue_lookup
;
6361 * We may encounter a race in the VNOP where the destination didn't
6362 * exist when we did the namei, but it does by the time we go and
6363 * try to create the entry. In this case, we should re-drive this rename
6364 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6365 * but other filesystems susceptible to this race could return it, too.
6367 if (error
== ERECYCLE
) {
6374 /* call out to allow 3rd party notification of rename.
6375 * Ignore result of kauth_authorize_fileop call.
6377 kauth_authorize_fileop(vfs_context_ucred(ctx
),
6378 KAUTH_FILEOP_RENAME
,
6379 (uintptr_t)from_name
, (uintptr_t)to_name
);
6382 if (from_name
!= NULL
&& to_name
!= NULL
) {
6383 if (from_truncated
|| to_truncated
) {
6384 // set it here since only the from_finfo gets reported up to user space
6385 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
6389 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
6392 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
6396 add_fsevent(FSE_RENAME
, ctx
,
6397 FSE_ARG_STRING
, from_len
, from_name
,
6398 FSE_ARG_FINFO
, &from_finfo
,
6399 FSE_ARG_STRING
, to_len
, to_name
,
6400 FSE_ARG_FINFO
, &to_finfo
,
6403 add_fsevent(FSE_RENAME
, ctx
,
6404 FSE_ARG_STRING
, from_len
, from_name
,
6405 FSE_ARG_FINFO
, &from_finfo
,
6406 FSE_ARG_STRING
, to_len
, to_name
,
6410 #endif /* CONFIG_FSE */
6413 * update filesystem's mount point data
6416 char *cp
, *pathend
, *mpname
;
6422 mp
= fvp
->v_mountedhere
;
6424 if (vfs_busy(mp
, LK_NOWAIT
)) {
6428 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
6430 error
= copyinstr(uap
->to
, tobuf
, MAXPATHLEN
, &len
);
6432 /* find current mount point prefix */
6433 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
6434 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
6438 /* find last component of target name */
6439 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
6443 /* append name to prefix */
6444 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
6445 bzero(pathend
, maxlen
);
6446 strlcpy(pathend
, mpname
, maxlen
);
6448 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
6453 * fix up name & parent pointers. note that we first
6454 * check that fvp has the same name/parent pointers it
6455 * had before the rename call... this is a 'weak' check
6458 * XXX oparent and oname may not be set in the compound vnop case
6460 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
6463 update_flags
= VNODE_UPDATE_NAME
;
6466 update_flags
|= VNODE_UPDATE_PARENT
;
6468 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
6471 if (to_name
!= NULL
) {
6472 RELEASE_PATH(to_name
);
6475 if (from_name
!= NULL
) {
6476 RELEASE_PATH(from_name
);
6479 if (holding_mntlock
) {
6480 mount_unlock_renames(locked_mp
);
6481 mount_drop(locked_mp
, 0);
6482 holding_mntlock
= 0;
6486 * nameidone has to happen before we vnode_put(tdvp)
6487 * since it may need to release the fs_nodelock on the tdvp
6497 * nameidone has to happen before we vnode_put(fdvp)
6498 * since it may need to release the fs_nodelock on the fdvp
6509 * If things changed after we did the namei, then we will re-drive
6510 * this rename call from the top.
6517 FREE(__rename_data
, M_TEMP
);
6522 * Make a directory file.
6524 * Returns: 0 Success
6527 * vnode_authorize:???
6532 mkdir1(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
)
6536 int update_flags
= 0;
6538 struct nameidata nd
;
6540 AUDIT_ARG(mode
, vap
->va_mode
);
6541 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, UIO_USERSPACE
,
6543 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
6544 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
6558 batched
= vnode_compound_mkdir_available(dvp
);
6560 VATTR_SET(vap
, va_type
, VDIR
);
6564 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
6565 * only get EXISTS or EISDIR for existing path components, and not that it could see
6566 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
6567 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
6569 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
6570 if (error
== EACCES
|| error
== EPERM
) {
6578 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6579 * rather than EACCESS if the target exists.
6581 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, UIO_USERSPACE
,
6583 error2
= namei(&nd
);
6597 * make the directory
6599 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
6600 if (error
== EKEEPLOOKING
) {
6602 goto continue_lookup
;
6608 // Make sure the name & parent pointers are hooked up
6609 if (vp
->v_name
== NULL
)
6610 update_flags
|= VNODE_UPDATE_NAME
;
6611 if (vp
->v_parent
== NULLVP
)
6612 update_flags
|= VNODE_UPDATE_PARENT
;
6615 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
6618 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
6623 * nameidone has to happen before we vnode_put(dvp)
6624 * since it may need to release the fs_nodelock on the dvp
6637 * mkdir_extended: Create a directory; with extended security (ACL).
6639 * Parameters: p Process requesting to create the directory
6640 * uap User argument descriptor (see below)
6643 * Indirect: uap->path Path of directory to create
6644 * uap->mode Access permissions to set
6645 * uap->xsecurity ACL to set
6647 * Returns: 0 Success
6652 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
6655 kauth_filesec_t xsecdst
;
6656 struct vnode_attr va
;
6658 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6661 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
6662 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
6666 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
6667 if (xsecdst
!= NULL
)
6668 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6670 ciferror
= mkdir1(vfs_context_current(), uap
->path
, &va
);
6671 if (xsecdst
!= NULL
)
6672 kauth_filesec_free(xsecdst
);
6677 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
6679 struct vnode_attr va
;
6682 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
6684 return(mkdir1(vfs_context_current(), uap
->path
, &va
));
6688 * Remove a directory file.
6692 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
6696 struct nameidata nd
;
6699 int has_listeners
= 0;
6702 vfs_context_t ctx
= vfs_context_current();
6704 struct vnode_attr va
;
6705 #endif /* CONFIG_FSE */
6706 struct vnode_attr
*vap
= NULL
;
6712 * This loop exists to restart rmdir in the unlikely case that two
6713 * processes are simultaneously trying to remove the same directory
6714 * containing orphaned appleDouble files.
6717 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
6718 UIO_USERSPACE
, uap
->path
, ctx
);
6719 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
6732 batched
= vnode_compound_rmdir_available(vp
);
6734 if (vp
->v_flag
& VROOT
) {
6736 * The root of a mounted filesystem cannot be deleted.
6743 * Removed a check here; we used to abort if vp's vid
6744 * was not the same as what we'd seen the last time around.
6745 * I do not think that check was valid, because if we retry
6746 * and all dirents are gone, the directory could legitimately
6747 * be recycled but still be present in a situation where we would
6748 * have had permission to delete. Therefore, we won't make
6749 * an effort to preserve that check now that we may not have a
6754 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
6762 if (!vnode_compound_rmdir_available(dvp
)) {
6763 panic("No error, but no compound rmdir?");
6770 need_event
= need_fsevent(FSE_DELETE
, dvp
);
6773 get_fse_info(vp
, &finfo
, ctx
);
6775 error
= vfs_get_notify_attributes(&va
);
6784 has_listeners
= kauth_authorize_fileop_has_listeners();
6785 if (need_event
|| has_listeners
) {
6794 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
6797 finfo
.mode
|= FSE_TRUNCATED_PATH
;
6802 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
6805 /* Couldn't find a vnode */
6809 if (error
== EKEEPLOOKING
) {
6810 goto continue_lookup
;
6812 #if CONFIG_APPLEDOUBLE
6814 * Special case to remove orphaned AppleDouble
6815 * files. I don't like putting this in the kernel,
6816 * but carbon does not like putting this in carbon either,
6819 if (error
== ENOTEMPTY
) {
6820 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
6821 if (error
== EBUSY
) {
6827 * Assuming everything went well, we will try the RMDIR again
6830 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
6832 #endif /* CONFIG_APPLEDOUBLE */
6834 * Call out to allow 3rd party notification of delete.
6835 * Ignore result of kauth_authorize_fileop call.
6838 if (has_listeners
) {
6839 kauth_authorize_fileop(vfs_context_ucred(ctx
),
6840 KAUTH_FILEOP_DELETE
,
6845 if (vp
->v_flag
& VISHARDLINK
) {
6846 // see the comment in unlink1() about why we update
6847 // the parent of a hard link when it is removed
6848 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
6854 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
6856 add_fsevent(FSE_DELETE
, ctx
,
6857 FSE_ARG_STRING
, len
, path
,
6858 FSE_ARG_FINFO
, &finfo
,
6870 * nameidone has to happen before we vnode_put(dvp)
6871 * since it may need to release the fs_nodelock on the dvp
6879 if (restart_flag
== 0) {
6880 wakeup_one((caddr_t
)vp
);
6883 tsleep(vp
, PVFS
, "rm AD", 1);
6885 } while (restart_flag
!= 0);
6891 /* Get direntry length padded to 8 byte alignment */
6892 #define DIRENT64_LEN(namlen) \
6893 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
6896 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
6897 int *numdirent
, vfs_context_t ctxp
)
6899 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6900 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
6901 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
6902 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
6907 struct direntry
*entry64
;
6913 * Our kernel buffer needs to be smaller since re-packing
6914 * will expand each dirent. The worse case (when the name
6915 * length is 3) corresponds to a struct direntry size of 32
6916 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
6917 * (4-byte aligned). So having a buffer that is 3/8 the size
6918 * will prevent us from reading more than we can pack.
6920 * Since this buffer is wired memory, we will limit the
6921 * buffer size to a maximum of 32K. We would really like to
6922 * use 32K in the MIN(), but we use magic number 87371 to
6923 * prevent uio_resid() * 3 / 8 from overflowing.
6925 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
6926 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
6927 if (bufptr
== NULL
) {
6931 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
6932 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
6933 auio
->uio_offset
= uio
->uio_offset
;
6935 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
6937 dep
= (struct dirent
*)bufptr
;
6938 bytesread
= bufsize
- uio_resid(auio
);
6940 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
6943 * Convert all the entries and copy them out to user's buffer.
6945 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
6946 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
6948 bzero(entry64
, enbufsize
);
6949 /* Convert a dirent to a dirent64. */
6950 entry64
->d_ino
= dep
->d_ino
;
6951 entry64
->d_seekoff
= 0;
6952 entry64
->d_reclen
= enbufsize
;
6953 entry64
->d_namlen
= dep
->d_namlen
;
6954 entry64
->d_type
= dep
->d_type
;
6955 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
6957 /* Move to next entry. */
6958 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
6960 /* Copy entry64 to user's buffer. */
6961 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
6964 /* Update the real offset using the offset we got from VNOP_READDIR. */
6966 uio
->uio_offset
= auio
->uio_offset
;
6969 FREE(bufptr
, M_TEMP
);
6970 FREE(entry64
, M_TEMP
);
6975 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
6978 * Read a block of directory entries in a file system independent format.
6981 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
6982 off_t
*offset
, int flags
)
6985 struct vfs_context context
= *vfs_context_current(); /* local copy */
6986 struct fileproc
*fp
;
6988 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
6990 int error
, eofflag
, numdirent
;
6991 char uio_buf
[ UIO_SIZEOF(1) ];
6993 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
6997 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
6998 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7003 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
7004 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
7007 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
7011 if ( (error
= vnode_getwithref(vp
)) ) {
7014 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7017 if (vp
->v_type
!= VDIR
) {
7018 (void)vnode_put(vp
);
7024 error
= mac_vnode_check_readdir(&context
, vp
);
7026 (void)vnode_put(vp
);
7031 loff
= fp
->f_fglob
->fg_offset
;
7032 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7033 uio_addiov(auio
, bufp
, bufsize
);
7035 if (flags
& VNODE_READDIR_EXTENDED
) {
7036 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
7037 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7039 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
7040 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7043 (void)vnode_put(vp
);
7047 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
7048 if (union_dircheckp
) {
7049 error
= union_dircheckp(&vp
, fp
, &context
);
7056 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
7057 struct vnode
*tvp
= vp
;
7058 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
7060 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
7061 fp
->f_fglob
->fg_offset
= 0;
7075 *bytesread
= bufsize
- uio_resid(auio
);
7083 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
7089 AUDIT_ARG(fd
, uap
->fd
);
7090 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
7093 if (proc_is64bit(p
)) {
7094 user64_long_t base
= (user64_long_t
)offset
;
7095 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
7097 user32_long_t base
= (user32_long_t
)offset
;
7098 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
7100 *retval
= bytesread
;
7106 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
7112 AUDIT_ARG(fd
, uap
->fd
);
7113 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
7116 *retval
= bytesread
;
7117 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
7124 * Set the mode mask for creation of filesystem nodes.
7125 * XXX implement xsecurity
7127 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7129 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
7131 struct filedesc
*fdp
;
7133 AUDIT_ARG(mask
, newmask
);
7136 *retval
= fdp
->fd_cmask
;
7137 fdp
->fd_cmask
= newmask
& ALLPERMS
;
7143 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7145 * Parameters: p Process requesting to set the umask
7146 * uap User argument descriptor (see below)
7147 * retval umask of the process (parameter p)
7149 * Indirect: uap->newmask umask to set
7150 * uap->xsecurity ACL to set
7152 * Returns: 0 Success
7157 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
7160 kauth_filesec_t xsecdst
;
7162 xsecdst
= KAUTH_FILESEC_NONE
;
7163 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
7164 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
7167 xsecdst
= KAUTH_FILESEC_NONE
;
7170 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
7172 if (xsecdst
!= KAUTH_FILESEC_NONE
)
7173 kauth_filesec_free(xsecdst
);
7178 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
7180 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
7184 * Void all references to file by ripping underlying filesystem
7189 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
7192 struct vnode_attr va
;
7193 vfs_context_t ctx
= vfs_context_current();
7195 struct nameidata nd
;
7197 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
7206 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
7211 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
7217 error
= mac_vnode_check_revoke(ctx
, vp
);
7223 VATTR_WANTED(&va
, va_uid
);
7224 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
7226 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
7227 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
7229 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
7230 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
7238 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7239 * The following system calls are designed to support features
7240 * which are specific to the HFS & HFS Plus volume formats
7245 * Obtain attribute information on objects in a directory while enumerating
7250 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
7253 struct fileproc
*fp
;
7255 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7256 uint32_t count
, savecount
;
7260 struct attrlist attributelist
;
7261 vfs_context_t ctx
= vfs_context_current();
7263 char uio_buf
[ UIO_SIZEOF(1) ];
7264 kauth_action_t action
;
7268 /* Get the attributes into kernel space */
7269 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
7272 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
7276 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
7279 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7280 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7287 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
7294 if ( (error
= vnode_getwithref(vp
)) )
7297 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7300 if (vp
->v_type
!= VDIR
) {
7301 (void)vnode_put(vp
);
7307 error
= mac_vnode_check_readdir(ctx
, vp
);
7309 (void)vnode_put(vp
);
7314 /* set up the uio structure which will contain the users return buffer */
7315 loff
= fp
->f_fglob
->fg_offset
;
7316 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7317 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
7320 * If the only item requested is file names, we can let that past with
7321 * just LIST_DIRECTORY. If they want any other attributes, that means
7322 * they need SEARCH as well.
7324 action
= KAUTH_VNODE_LIST_DIRECTORY
;
7325 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
7326 attributelist
.fileattr
|| attributelist
.dirattr
)
7327 action
|= KAUTH_VNODE_SEARCH
;
7329 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
7331 /* Believe it or not, uap->options only has 32-bits of valid
7332 * info, so truncate before extending again */
7334 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
7335 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
7339 (void) vnode_put(vp
);
7344 * If we've got the last entry of a directory in a union mount
7345 * then reset the eofflag and pretend there's still more to come.
7346 * The next call will again set eofflag and the buffer will be empty,
7347 * so traverse to the underlying directory and do the directory
7350 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
7351 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
7353 } else { // Empty buffer
7354 struct vnode
*tvp
= vp
;
7355 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
7356 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
7357 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
7358 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
7360 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
7368 (void)vnode_put(vp
);
7372 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
7374 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
7376 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
7378 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
7381 *retval
= eofflag
; /* similar to getdirentries */
7385 return (error
); /* return error earlier, an retval of 0 or 1 now */
7387 } /* end of getdirentriesattr system call */
7390 * Exchange data between two files
7395 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
7398 struct nameidata fnd
, snd
;
7399 vfs_context_t ctx
= vfs_context_current();
7403 u_int32_t nameiflags
;
7407 int from_truncated
=0, to_truncated
=0;
7409 fse_info f_finfo
, s_finfo
;
7413 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
7415 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
7416 UIO_USERSPACE
, uap
->path1
, ctx
);
7418 error
= namei(&fnd
);
7425 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
7426 UIO_USERSPACE
, uap
->path2
, ctx
);
7428 error
= namei(&snd
);
7437 * if the files are the same, return an inval error
7445 * if the files are on different volumes, return an error
7447 if (svp
->v_mount
!= fvp
->v_mount
) {
7452 /* If they're not files, return an error */
7453 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
7459 error
= mac_vnode_check_exchangedata(ctx
,
7464 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
7465 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
7470 need_fsevent(FSE_EXCHANGE
, fvp
) ||
7472 kauth_authorize_fileop_has_listeners()) {
7475 if (fpath
== NULL
|| spath
== NULL
) {
7480 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
7481 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
7484 get_fse_info(fvp
, &f_finfo
, ctx
);
7485 get_fse_info(svp
, &s_finfo
, ctx
);
7486 if (from_truncated
|| to_truncated
) {
7487 // set it here since only the f_finfo gets reported up to user space
7488 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
7492 /* Ok, make the call */
7493 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
7496 const char *tmpname
;
7498 if (fpath
!= NULL
&& spath
!= NULL
) {
7499 /* call out to allow 3rd party notification of exchangedata.
7500 * Ignore result of kauth_authorize_fileop call.
7502 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
7503 (uintptr_t)fpath
, (uintptr_t)spath
);
7507 tmpname
= fvp
->v_name
;
7508 fvp
->v_name
= svp
->v_name
;
7509 svp
->v_name
= tmpname
;
7511 if (fvp
->v_parent
!= svp
->v_parent
) {
7514 tmp
= fvp
->v_parent
;
7515 fvp
->v_parent
= svp
->v_parent
;
7516 svp
->v_parent
= tmp
;
7518 name_cache_unlock();
7521 if (fpath
!= NULL
&& spath
!= NULL
) {
7522 add_fsevent(FSE_EXCHANGE
, ctx
,
7523 FSE_ARG_STRING
, flen
, fpath
,
7524 FSE_ARG_FINFO
, &f_finfo
,
7525 FSE_ARG_STRING
, slen
, spath
,
7526 FSE_ARG_FINFO
, &s_finfo
,
7534 RELEASE_PATH(fpath
);
7536 RELEASE_PATH(spath
);
7544 * Return (in MB) the amount of freespace on the given vnode's volume.
7546 uint32_t freespace_mb(vnode_t vp
);
7549 freespace_mb(vnode_t vp
)
7551 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
7552 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
7553 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
7561 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
7566 struct nameidata nd
;
7567 struct user64_fssearchblock searchblock
;
7568 struct searchstate
*state
;
7569 struct attrlist
*returnattrs
;
7570 struct timeval timelimit
;
7571 void *searchparams1
,*searchparams2
;
7573 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7574 uint32_t nummatches
;
7576 uint32_t nameiflags
;
7577 vfs_context_t ctx
= vfs_context_current();
7578 char uio_buf
[ UIO_SIZEOF(1) ];
7580 /* Start by copying in fsearchblock parameter list */
7581 if (IS_64BIT_PROCESS(p
)) {
7582 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
7583 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
7584 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
7587 struct user32_fssearchblock tmp_searchblock
;
7589 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
7590 // munge into 64-bit version
7591 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
7592 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
7593 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
7594 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
7596 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
7597 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
7599 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
7600 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
7601 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
7602 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
7603 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
7604 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
7605 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
7610 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
7612 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
7613 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
7616 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
7617 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
7618 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
7621 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
7622 sizeof(struct attrlist
) + sizeof(struct searchstate
);
7624 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
7626 /* Now set up the various pointers to the correct place in our newly allocated memory */
7628 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
7629 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
7630 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
7632 /* Now copy in the stuff given our local variables. */
7634 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
7637 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
7640 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
7643 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
7647 * When searching a union mount, need to set the
7648 * start flag at the first call on each layer to
7649 * reset state for the new volume.
7651 if (uap
->options
& SRCHFS_START
)
7652 state
->ss_union_layer
= 0;
7654 uap
->options
|= state
->ss_union_flags
;
7655 state
->ss_union_flags
= 0;
7658 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
7659 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
7660 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
7661 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
7662 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
7665 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
7666 attrreference_t
* string_ref
;
7667 u_int32_t
* start_length
;
7668 user64_size_t param_length
;
7670 /* validate searchparams1 */
7671 param_length
= searchblock
.sizeofsearchparams1
;
7672 /* skip the word that specifies length of the buffer */
7673 start_length
= (u_int32_t
*) searchparams1
;
7674 start_length
= start_length
+1;
7675 string_ref
= (attrreference_t
*) start_length
;
7677 /* ensure no negative offsets or too big offsets */
7678 if (string_ref
->attr_dataoffset
< 0 ) {
7682 if (string_ref
->attr_length
> MAXPATHLEN
) {
7687 /* Check for pointer overflow in the string ref */
7688 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
7693 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
7697 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
7703 /* set up the uio structure which will contain the users return buffer */
7704 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7705 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
7708 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
7709 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
7710 UIO_USERSPACE
, uap
->path
, ctx
);
7719 * Switch to the root vnode for the volume
7721 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
7728 * If it's a union mount, the path lookup takes
7729 * us to the top layer. But we may need to descend
7730 * to a lower layer. For non-union mounts the layer
7733 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
7734 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
7737 vp
= vp
->v_mount
->mnt_vnodecovered
;
7743 vnode_getwithref(vp
);
7748 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
7757 * If searchblock.maxmatches == 0, then skip the search. This has happened
7758 * before and sometimes the underlying code doesnt deal with it well.
7760 if (searchblock
.maxmatches
== 0) {
7766 * Allright, we have everything we need, so lets make that call.
7768 * We keep special track of the return value from the file system:
7769 * EAGAIN is an acceptable error condition that shouldn't keep us
7770 * from copying out any results...
7773 fserror
= VNOP_SEARCHFS(vp
,
7776 &searchblock
.searchattrs
,
7777 (u_long
)searchblock
.maxmatches
,
7781 (u_long
)uap
->scriptcode
,
7782 (u_long
)uap
->options
,
7784 (struct searchstate
*) &state
->ss_fsstate
,
7788 * If it's a union mount we need to be called again
7789 * to search the mounted-on filesystem.
7791 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
7792 state
->ss_union_flags
= SRCHFS_START
;
7793 state
->ss_union_layer
++; // search next layer down
7801 /* Now copy out the stuff that needs copying out. That means the number of matches, the
7802 search state. Everything was already put into he return buffer by the vop call. */
7804 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
7807 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
7814 FREE(searchparams1
,M_TEMP
);
7819 } /* end of searchfs system call */
7821 #else /* CONFIG_SEARCHFS */
7824 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
7829 #endif /* CONFIG_SEARCHFS */
7832 lck_grp_attr_t
* nspace_group_attr
;
7833 lck_attr_t
* nspace_lock_attr
;
7834 lck_grp_t
* nspace_mutex_group
;
7836 lck_mtx_t nspace_handler_lock
;
7837 lck_mtx_t nspace_handler_exclusion_lock
;
7839 time_t snapshot_timestamp
=0;
7840 int nspace_allow_virtual_devs
=0;
7842 void nspace_handler_init(void);
7844 typedef struct nspace_item_info
{
7854 #define MAX_NSPACE_ITEMS 128
7855 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
7856 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
7857 uint32_t nspace_token_id
=0;
7858 uint32_t nspace_handler_timeout
= 15; // seconds
7860 #define NSPACE_ITEM_NEW 0x0001
7861 #define NSPACE_ITEM_PROCESSING 0x0002
7862 #define NSPACE_ITEM_DEAD 0x0004
7863 #define NSPACE_ITEM_CANCELLED 0x0008
7864 #define NSPACE_ITEM_DONE 0x0010
7865 #define NSPACE_ITEM_RESET_TIMER 0x0020
7867 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
7868 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
7869 #define NSPACE_ITEM_TRACK_EVENT 0x0100
7871 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT)
7873 //#pragma optimization_level 0
7876 NSPACE_HANDLER_NSPACE
= 0,
7877 NSPACE_HANDLER_SNAPSHOT
= 1,
7878 NSPACE_HANDLER_TRACK
= 2,
7880 NSPACE_HANDLER_COUNT
,
7884 uint64_t handler_tid
;
7885 struct proc
*handler_proc
;
7889 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
7891 /* namespace fsctl functions */
7892 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
7893 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
7894 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
7895 static nspace_type_t
nspace_type_for_op(uint64_t op
);
7896 static int nspace_is_special_process(struct proc
*proc
);
7897 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
7898 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
7899 static int validate_namespace_args (int is64bit
, int size
);
7900 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
7903 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
7905 switch(nspace_type
) {
7906 case NSPACE_HANDLER_NSPACE
:
7907 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
7908 case NSPACE_HANDLER_SNAPSHOT
:
7909 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
7910 case NSPACE_HANDLER_TRACK
:
7911 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_TRACK_EVENT
;
7913 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
7918 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
7920 switch(nspace_type
) {
7921 case NSPACE_HANDLER_NSPACE
:
7922 return NSPACE_ITEM_NSPACE_EVENT
;
7923 case NSPACE_HANDLER_SNAPSHOT
:
7924 return NSPACE_ITEM_SNAPSHOT_EVENT
;
7925 case NSPACE_HANDLER_TRACK
:
7926 return NSPACE_ITEM_TRACK_EVENT
;
7928 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
7933 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
7935 switch(nspace_type
) {
7936 case NSPACE_HANDLER_NSPACE
:
7937 return FREAD
| FWRITE
| O_EVTONLY
;
7938 case NSPACE_HANDLER_SNAPSHOT
:
7939 case NSPACE_HANDLER_TRACK
:
7940 return FREAD
| O_EVTONLY
;
7942 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
7947 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
7949 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
7950 case NAMESPACE_HANDLER_NSPACE_EVENT
:
7951 return NSPACE_HANDLER_NSPACE
;
7952 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
7953 return NSPACE_HANDLER_SNAPSHOT
;
7954 case NAMESPACE_HANDLER_TRACK_EVENT
:
7955 return NSPACE_HANDLER_TRACK
;
7957 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
7958 return NSPACE_HANDLER_NSPACE
;
7962 static inline int nspace_is_special_process(struct proc
*proc
)
7965 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
7966 if (proc
== nspace_handlers
[i
].handler_proc
)
7973 nspace_handler_init(void)
7975 nspace_lock_attr
= lck_attr_alloc_init();
7976 nspace_group_attr
= lck_grp_attr_alloc_init();
7977 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
7978 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
7979 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
7980 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
7984 nspace_proc_exit(struct proc
*p
)
7986 int i
, event_mask
= 0;
7988 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
7989 if (p
== nspace_handlers
[i
].handler_proc
) {
7990 event_mask
|= nspace_item_flags_for_type(i
);
7991 nspace_handlers
[i
].handler_tid
= 0;
7992 nspace_handlers
[i
].handler_proc
= NULL
;
7996 if (event_mask
== 0) {
8000 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8001 // if this process was the snapshot handler, zero snapshot_timeout
8002 snapshot_timestamp
= 0;
8006 // unblock anyone that's waiting for the handler that died
8008 lck_mtx_lock(&nspace_handler_lock
);
8009 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8010 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
8012 if ( nspace_items
[i
].flags
& event_mask
) {
8014 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
8015 vnode_lock_spin(nspace_items
[i
].vp
);
8016 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
8017 vnode_unlock(nspace_items
[i
].vp
);
8019 nspace_items
[i
].vp
= NULL
;
8020 nspace_items
[i
].vid
= 0;
8021 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
8022 nspace_items
[i
].token
= 0;
8024 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8029 wakeup((caddr_t
)&nspace_item_idx
);
8030 lck_mtx_unlock(&nspace_handler_lock
);
8035 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
8037 return resolve_nspace_item_ext(vp
, op
, NULL
);
8041 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
8043 int i
, error
, keep_waiting
;
8045 nspace_type_t nspace_type
= nspace_type_for_op(op
);
8047 // only allow namespace events on regular files, directories and symlinks.
8048 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
8053 // if this is a snapshot event and the vnode is on a
8054 // disk image just pretend nothing happened since any
8055 // change to the disk image will cause the disk image
8056 // itself to get backed up and this avoids multi-way
8057 // deadlocks between the snapshot handler and the ever
8058 // popular diskimages-helper process. the variable
8059 // nspace_allow_virtual_devs allows this behavior to
8060 // be overridden (for use by the Mobile TimeMachine
8061 // testing infrastructure which uses disk images)
8063 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
8064 && (vp
->v_mount
!= NULL
)
8065 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
8066 && !nspace_allow_virtual_devs
) {
8071 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8072 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8076 if (nspace_is_special_process(current_proc())) {
8080 lck_mtx_lock(&nspace_handler_lock
);
8083 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8084 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
8089 if (i
>= MAX_NSPACE_ITEMS
) {
8090 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8091 if (nspace_items
[i
].flags
== 0) {
8096 nspace_items
[i
].refcount
++;
8099 if (i
>= MAX_NSPACE_ITEMS
) {
8100 ts
.tv_sec
= nspace_handler_timeout
;
8103 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
8105 // an entry got free'd up, go see if we can get a slot
8108 lck_mtx_unlock(&nspace_handler_lock
);
8114 // if it didn't already exist, add it. if it did exist
8115 // we'll get woken up when someone does a wakeup() on
8116 // the slot in the nspace_items table.
8118 if (vp
!= nspace_items
[i
].vp
) {
8119 nspace_items
[i
].vp
= vp
;
8120 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
8121 nspace_items
[i
].op
= op
;
8122 nspace_items
[i
].vid
= vnode_vid(vp
);
8123 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
8124 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
8125 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8127 vnode_lock_spin(vp
);
8128 vp
->v_flag
|= VNEEDSSNAPSHOT
;
8133 nspace_items
[i
].token
= 0;
8134 nspace_items
[i
].refcount
= 1;
8136 wakeup((caddr_t
)&nspace_item_idx
);
8140 // Now go to sleep until the handler does a wakeup on this
8141 // slot in the nspace_items table (or we timeout).
8144 while(keep_waiting
) {
8145 ts
.tv_sec
= nspace_handler_timeout
;
8147 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
8149 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
8151 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
8152 error
= nspace_items
[i
].token
;
8153 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
8154 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
8155 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
8160 } else if (error
== 0) {
8161 // hmmm, why did we get woken up?
8162 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8163 nspace_items
[i
].token
);
8166 if (--nspace_items
[i
].refcount
== 0) {
8167 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
8168 nspace_items
[i
].arg
= NULL
;
8169 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
8170 nspace_items
[i
].flags
= 0; // this clears it for re-use
8172 wakeup(&nspace_token_id
);
8176 lck_mtx_unlock(&nspace_handler_lock
);
8183 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
8187 lck_mtx_lock(&nspace_handler_lock
);
8188 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8189 if (nspace_items
[i
].vp
== vp
) {
8194 if (i
>= MAX_NSPACE_ITEMS
) {
8195 lck_mtx_unlock(&nspace_handler_lock
);
8199 *status
= nspace_items
[i
].flags
;
8200 lck_mtx_unlock(&nspace_handler_lock
);
8207 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
8209 struct vnode_attr va
;
8213 VATTR_WANTED(&va
, va_fsid
);
8214 VATTR_WANTED(&va
, va_fileid
);
8216 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
8217 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
8220 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
8229 // Note: this function does NOT check permissions on all of the
8230 // parent directories leading to this vnode. It should only be
8231 // called on behalf of a root process. Otherwise a process may
8232 // get access to a file because the file itself is readable even
8233 // though its parent directories would prevent access.
8236 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
8240 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8245 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
8250 /* compute action to be authorized */
8252 if (fmode
& FREAD
) {
8253 action
|= KAUTH_VNODE_READ_DATA
;
8255 if (fmode
& (FWRITE
| O_TRUNC
)) {
8257 * If we are writing, appending, and not truncating,
8258 * indicate that we are appending so that if the
8259 * UF_APPEND or SF_APPEND bits are set, we do not deny
8262 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
8263 action
|= KAUTH_VNODE_APPEND_DATA
;
8265 action
|= KAUTH_VNODE_WRITE_DATA
;
8269 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
8274 // if the vnode is tagged VOPENEVT and the current process
8275 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8276 // flag to the open mode so that this open won't count against
8277 // the vnode when carbon delete() does a vnode_isinuse() to see
8278 // if a file is currently in use. this allows spotlight
8279 // importers to not interfere with carbon apps that depend on
8280 // the no-delete-if-busy semantics of carbon delete().
8282 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
8286 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
8289 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
8290 VNOP_CLOSE(vp
, fmode
, ctx
);
8294 /* Call out to allow 3rd party notification of open.
8295 * Ignore result of kauth_authorize_fileop call.
8298 mac_vnode_notify_open(ctx
, vp
, fmode
);
8300 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
8308 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
8310 int i
, error
=0, unblock
=0;
8313 lck_mtx_lock(&nspace_handler_exclusion_lock
);
8314 if (nspace_handlers
[nspace_type
].handler_busy
) {
8315 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8318 nspace_handlers
[nspace_type
].handler_busy
= 1;
8319 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8322 * Any process that gets here will be one of the namespace handlers.
8323 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8324 * as we can cause deadlocks to occur, because the namespace handler may prevent
8325 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8328 curtask
= current_task();
8329 bsd_set_dependency_capable (curtask
);
8331 lck_mtx_lock(&nspace_handler_lock
);
8332 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8333 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
8334 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
8337 while (error
== 0) {
8339 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8340 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
8341 if (!nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
8348 if (i
< MAX_NSPACE_ITEMS
) {
8349 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
8350 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
8351 nspace_items
[i
].token
= ++nspace_token_id
;
8353 if (nspace_items
[i
].vp
) {
8354 struct fileproc
*fp
;
8355 int32_t indx
, fmode
;
8356 struct proc
*p
= current_proc();
8357 vfs_context_t ctx
= vfs_context_current();
8358 struct vnode_attr va
;
8362 * Use vnode pointer to acquire a file descriptor for
8363 * hand-off to userland
8365 fmode
= nspace_open_flags_for_type(nspace_type
);
8366 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
8371 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
8374 vnode_put(nspace_items
[i
].vp
);
8378 if ((error
= falloc(p
, &fp
, &indx
, ctx
))) {
8379 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
8380 vnode_put(nspace_items
[i
].vp
);
8385 fp
->f_fglob
->fg_flag
= fmode
;
8386 fp
->f_fglob
->fg_ops
= &vnops
;
8387 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
8390 procfdtbl_releasefd(p
, indx
, NULL
);
8391 fp_drop(p
, indx
, fp
, 1);
8395 * All variants of the namespace handler struct support these three fields:
8396 * token, flags, and the FD pointer
8398 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
8399 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
8400 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
8403 * Handle optional fields:
8404 * extended version support an info ptr (offset, length), and the
8406 * namedata version supports a unique per-link object ID
8410 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
8411 uint64_t u_offset
, u_length
;
8414 u_offset
= uio_offset(uio
);
8415 u_length
= uio_resid(uio
);
8420 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
8421 error
= copyout(&u_length
, nhd
->infoptr
+sizeof(uint64_t), sizeof(uint64_t));
8426 VATTR_WANTED(&va
, va_linkid
);
8427 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
8429 uint64_t linkid
= 0;
8430 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
8431 linkid
= (uint64_t)va
.va_linkid
;
8433 error
= copyout (&linkid
, nhd
->objid
, sizeof(uint64_t));
8438 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
8439 fp_free(p
, indx
, fp
);
8443 vnode_put(nspace_items
[i
].vp
);
8447 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
8448 i
, nspace_items
[i
].vp
, error
, nspace_items
[i
].vp
->v_name
);
8452 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
8453 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
8462 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
8463 vnode_lock_spin(nspace_items
[i
].vp
);
8464 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
8465 vnode_unlock(nspace_items
[i
].vp
);
8467 nspace_items
[i
].vp
= NULL
;
8468 nspace_items
[i
].vid
= 0;
8469 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
8470 nspace_items
[i
].token
= 0;
8472 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8475 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
8476 // just go through every snapshot event and unblock it immediately.
8477 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
8478 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8479 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
8480 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
8481 nspace_items
[i
].vp
= NULL
;
8482 nspace_items
[i
].vid
= 0;
8483 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
8484 nspace_items
[i
].token
= 0;
8486 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8493 lck_mtx_unlock(&nspace_handler_lock
);
8495 lck_mtx_lock(&nspace_handler_exclusion_lock
);
8496 nspace_handlers
[nspace_type
].handler_busy
= 0;
8497 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8502 static inline int validate_namespace_args (int is64bit
, int size
) {
8505 /* Must be one of these */
8506 if (size
== sizeof(user64_namespace_handler_info
)) {
8509 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
8512 if (size
== sizeof(user64_namespace_handler_data
)) {
8518 /* 32 bit -- must be one of these */
8519 if (size
== sizeof(user32_namespace_handler_info
)) {
8522 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
8525 if (size
== sizeof(user32_namespace_handler_data
)) {
8537 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
8540 namespace_handler_data nhd
;
8542 bzero (&nhd
, sizeof(namespace_handler_data
));
8544 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
8545 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
8549 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8553 error
= validate_namespace_args (is64bit
, size
);
8558 /* Copy in the userland pointers into our kernel-only struct */
8561 /* 64 bit userland structures */
8562 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
8563 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
8564 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
8566 /* If the size is greater than the standard info struct, add in extra fields */
8567 if (size
> (sizeof(user64_namespace_handler_info
))) {
8568 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
8569 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
8571 if (size
== (sizeof(user64_namespace_handler_data
))) {
8572 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
8574 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
8578 /* 32 bit userland structures */
8579 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
8580 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
8581 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
8583 if (size
> (sizeof(user32_namespace_handler_info
))) {
8584 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
8585 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
8587 if (size
== (sizeof(user32_namespace_handler_data
))) {
8588 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
8590 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
8594 return wait_for_namespace_event(&nhd
, nspace_type
);
8598 * Make a filesystem-specific control call:
8602 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
8607 #define STK_PARAMS 128
8608 char stkbuf
[STK_PARAMS
];
8610 vnode_t vp
= *arg_vp
;
8612 size
= IOCPARM_LEN(cmd
);
8613 if (size
> IOCPARM_MAX
) return (EINVAL
);
8615 is64bit
= proc_is64bit(p
);
8618 if (size
> sizeof (stkbuf
)) {
8619 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
8627 error
= copyin(udata
, data
, size
);
8628 if (error
) goto FSCtl_Exit
;
8631 *(user_addr_t
*)data
= udata
;
8634 *(uint32_t *)data
= (uint32_t)udata
;
8637 } else if ((cmd
& IOC_OUT
) && size
) {
8639 * Zero the buffer so the user always
8640 * gets back something deterministic.
8643 } else if (cmd
& IOC_VOID
) {
8645 *(user_addr_t
*)data
= udata
;
8648 *(uint32_t *)data
= (uint32_t)udata
;
8652 /* Check to see if it's a generic command */
8653 if (IOCBASECMD(cmd
) == FSCTL_SYNC_VOLUME
) {
8654 mount_t mp
= vp
->v_mount
;
8655 int arg
= *(uint32_t*)data
;
8657 /* record vid of vp so we can drop it below. */
8658 uint32_t vvid
= vp
->v_id
;
8661 * Then grab mount_iterref so that we can release the vnode.
8662 * Without this, a thread may call vnode_iterate_prepare then
8663 * get into a deadlock because we've never released the root vp
8665 error
= mount_iterref (mp
, 0);
8671 /* issue the sync for this volume */
8672 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
8675 * Then release the mount_iterref once we're done syncing; it's not
8676 * needed for the VNOP_IOCTL below
8680 if (arg
& FSCTL_SYNC_FULLSYNC
) {
8681 /* re-obtain vnode iocount on the root vp, if possible */
8682 error
= vnode_getwithvid (vp
, vvid
);
8684 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
8688 /* mark the argument VP as having been released */
8691 } else if (IOCBASECMD(cmd
) == FSCTL_SET_PACKAGE_EXTS
) {
8692 user_addr_t ext_strings
;
8693 uint32_t num_entries
;
8696 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
8697 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
8699 // either you're 64-bit and passed a 64-bit struct or
8700 // you're 32-bit and passed a 32-bit struct. otherwise
8707 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
8708 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
8709 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
8711 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
8712 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
8713 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
8716 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
8721 /* namespace handlers */
8722 else if (IOCBASECMD(cmd
) == FSCTL_NAMESPACE_HANDLER_GET
) {
8723 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
8726 /* Snapshot handlers */
8727 else if (IOCBASECMD(cmd
) == FSCTL_OLD_SNAPSHOT_HANDLER_GET
) {
8728 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
8729 } else if (IOCBASECMD(cmd
) == FSCTL_SNAPSHOT_HANDLER_GET_EXT
) {
8730 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
8733 /* Tracked File Handlers */
8734 else if (IOCBASECMD(cmd
) == FSCTL_TRACKED_HANDLER_GET
) {
8735 error
= process_namespace_fsctl(NSPACE_HANDLER_TRACK
, is64bit
, size
, data
);
8737 else if (IOCBASECMD(cmd
) == FSCTL_NAMESPACE_HANDLER_GETDATA
) {
8738 error
= process_namespace_fsctl(NSPACE_HANDLER_TRACK
, is64bit
, size
, data
);
8739 } else if (IOCBASECMD(cmd
) == FSCTL_NAMESPACE_HANDLER_UPDATE
) {
8740 uint32_t token
, val
;
8743 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
8747 if (!nspace_is_special_process(p
)) {
8752 token
= ((uint32_t *)data
)[0];
8753 val
= ((uint32_t *)data
)[1];
8755 lck_mtx_lock(&nspace_handler_lock
);
8757 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8758 if (nspace_items
[i
].token
== token
) {
8763 if (i
>= MAX_NSPACE_ITEMS
) {
8767 // if this bit is set, when resolve_nspace_item() times out
8768 // it will loop and go back to sleep.
8770 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
8773 lck_mtx_unlock(&nspace_handler_lock
);
8776 printf("nspace-handler-update: did not find token %u\n", token
);
8779 } else if (IOCBASECMD(cmd
) == FSCTL_NAMESPACE_HANDLER_UNBLOCK
) {
8780 uint32_t token
, val
;
8783 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
8787 if (!nspace_is_special_process(p
)) {
8792 token
= ((uint32_t *)data
)[0];
8793 val
= ((uint32_t *)data
)[1];
8795 lck_mtx_lock(&nspace_handler_lock
);
8797 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8798 if (nspace_items
[i
].token
== token
) {
8803 if (i
>= MAX_NSPACE_ITEMS
) {
8804 printf("nspace-handler-unblock: did not find token %u\n", token
);
8807 if (val
== 0 && nspace_items
[i
].vp
) {
8808 vnode_lock_spin(nspace_items
[i
].vp
);
8809 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
8810 vnode_unlock(nspace_items
[i
].vp
);
8813 nspace_items
[i
].vp
= NULL
;
8814 nspace_items
[i
].arg
= NULL
;
8815 nspace_items
[i
].op
= 0;
8816 nspace_items
[i
].vid
= 0;
8817 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
8818 nspace_items
[i
].token
= 0;
8820 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8823 lck_mtx_unlock(&nspace_handler_lock
);
8825 } else if (IOCBASECMD(cmd
) == FSCTL_NAMESPACE_HANDLER_CANCEL
) {
8826 uint32_t token
, val
;
8829 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
8833 if (!nspace_is_special_process(p
)) {
8838 token
= ((uint32_t *)data
)[0];
8839 val
= ((uint32_t *)data
)[1];
8841 lck_mtx_lock(&nspace_handler_lock
);
8843 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8844 if (nspace_items
[i
].token
== token
) {
8849 if (i
>= MAX_NSPACE_ITEMS
) {
8850 printf("nspace-handler-cancel: did not find token %u\n", token
);
8853 if (nspace_items
[i
].vp
) {
8854 vnode_lock_spin(nspace_items
[i
].vp
);
8855 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
8856 vnode_unlock(nspace_items
[i
].vp
);
8859 nspace_items
[i
].vp
= NULL
;
8860 nspace_items
[i
].arg
= NULL
;
8861 nspace_items
[i
].vid
= 0;
8862 nspace_items
[i
].token
= val
;
8863 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
8864 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
8866 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8869 lck_mtx_unlock(&nspace_handler_lock
);
8870 } else if (IOCBASECMD(cmd
) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
) {
8871 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8875 // we explicitly do not do the namespace_handler_proc check here
8877 lck_mtx_lock(&nspace_handler_lock
);
8878 snapshot_timestamp
= ((uint32_t *)data
)[0];
8879 wakeup(&nspace_item_idx
);
8880 lck_mtx_unlock(&nspace_handler_lock
);
8881 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
8883 } else if (IOCBASECMD(cmd
) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
) {
8884 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8888 lck_mtx_lock(&nspace_handler_lock
);
8889 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
8890 lck_mtx_unlock(&nspace_handler_lock
);
8891 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
8892 nspace_allow_virtual_devs
? "" : " NOT");
8895 } else if (IOCBASECMD(cmd
) == FSCTL_SET_FSTYPENAME_OVERRIDE
) {
8896 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8900 mount_lock(vp
->v_mount
);
8902 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
8903 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
8904 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
8905 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
8906 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
8909 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
8910 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
8912 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
8913 vp
->v_mount
->fstypename_override
[0] = '\0';
8915 mount_unlock(vp
->v_mount
);
8918 /* Invoke the filesystem-specific code */
8919 error
= VNOP_IOCTL(vp
, IOCBASECMD(cmd
), data
, options
, ctx
);
8924 * Copy any data to user, size was
8925 * already set and checked above.
8927 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
8928 error
= copyout(data
, udata
, size
);
8931 if (memp
) kfree(memp
, size
);
8938 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
8941 struct nameidata nd
;
8944 vfs_context_t ctx
= vfs_context_current();
8946 AUDIT_ARG(cmd
, uap
->cmd
);
8947 AUDIT_ARG(value32
, uap
->options
);
8948 /* Get the vnode for the file we are getting info on: */
8950 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8951 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
8952 UIO_USERSPACE
, uap
->path
, ctx
);
8953 if ((error
= namei(&nd
))) goto done
;
8958 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
8964 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
8973 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
8977 vfs_context_t ctx
= vfs_context_current();
8980 AUDIT_ARG(fd
, uap
->fd
);
8981 AUDIT_ARG(cmd
, uap
->cmd
);
8982 AUDIT_ARG(value32
, uap
->options
);
8984 /* Get the vnode for the file we are getting info on: */
8985 if ((error
= file_vnode(uap
->fd
, &vp
)))
8988 if ((error
= vnode_getwithref(vp
))) {
8993 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
8999 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9009 /* end of fsctl system call */
9012 * An in-kernel sync for power management to call.
9014 __private_extern__
int
9019 struct sync_args data
;
9024 error
= sync(current_proc(), &data
, &retval
[0]);
9028 } /* end of sync_internal call */
9032 * Retrieve the data of an extended attribute.
9035 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
9038 struct nameidata nd
;
9039 char attrname
[XATTR_MAXNAMELEN
+1];
9040 vfs_context_t ctx
= vfs_context_current();
9042 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9043 size_t attrsize
= 0;
9045 u_int32_t nameiflags
;
9047 char uio_buf
[ UIO_SIZEOF(1) ];
9049 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9052 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9053 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9054 if ((error
= namei(&nd
))) {
9060 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9063 if (xattr_protected(attrname
)) {
9064 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
9070 * the specific check for 0xffffffff is a hack to preserve
9071 * binaray compatibilty in K64 with applications that discovered
9072 * that passing in a buf pointer and a size of -1 resulted in
9073 * just the size of the indicated extended attribute being returned.
9074 * this isn't part of the documented behavior, but because of the
9075 * original implemtation's check for "uap->size > 0", this behavior
9076 * was allowed. In K32 that check turned into a signed comparison
9077 * even though uap->size is unsigned... in K64, we blow by that
9078 * check because uap->size is unsigned and doesn't get sign smeared
9079 * in the munger for a 32 bit user app. we also need to add a
9080 * check to limit the maximum size of the buffer being passed in...
9081 * unfortunately, the underlying fileystems seem to just malloc
9082 * the requested size even if the actual extended attribute is tiny.
9083 * because that malloc is for kernel wired memory, we have to put a
9086 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9087 * U64 running on K64 will yield -1 (64 bits wide)
9088 * U32/U64 running on K32 will yield -1 (32 bits wide)
9090 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
9094 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
9095 uap
->size
= XATTR_MAXSIZE
;
9097 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9098 &uio_buf
[0], sizeof(uio_buf
));
9099 uio_addiov(auio
, uap
->value
, uap
->size
);
9102 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
9107 *retval
= uap
->size
- uio_resid(auio
);
9109 *retval
= (user_ssize_t
)attrsize
;
9116 * Retrieve the data of an extended attribute.
9119 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
9122 char attrname
[XATTR_MAXNAMELEN
+1];
9124 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9125 size_t attrsize
= 0;
9128 char uio_buf
[ UIO_SIZEOF(1) ];
9130 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9133 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9136 if ( (error
= vnode_getwithref(vp
)) ) {
9140 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9143 if (xattr_protected(attrname
)) {
9147 if (uap
->value
&& uap
->size
> 0) {
9148 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9149 &uio_buf
[0], sizeof(uio_buf
));
9150 uio_addiov(auio
, uap
->value
, uap
->size
);
9153 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
9155 (void)vnode_put(vp
);
9159 *retval
= uap
->size
- uio_resid(auio
);
9161 *retval
= (user_ssize_t
)attrsize
;
9167 * Set the data of an extended attribute.
9170 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
9173 struct nameidata nd
;
9174 char attrname
[XATTR_MAXNAMELEN
+1];
9175 vfs_context_t ctx
= vfs_context_current();
9177 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9179 u_int32_t nameiflags
;
9181 char uio_buf
[ UIO_SIZEOF(1) ];
9183 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9186 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9187 if (error
== EPERM
) {
9188 /* if the string won't fit in attrname, copyinstr emits EPERM */
9189 return (ENAMETOOLONG
);
9191 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9194 if (xattr_protected(attrname
))
9196 if (uap
->size
!= 0 && uap
->value
== 0) {
9200 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9201 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9202 if ((error
= namei(&nd
))) {
9208 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9209 &uio_buf
[0], sizeof(uio_buf
));
9210 uio_addiov(auio
, uap
->value
, uap
->size
);
9212 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
9215 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9226 * Set the data of an extended attribute.
9229 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
9232 char attrname
[XATTR_MAXNAMELEN
+1];
9234 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9237 char uio_buf
[ UIO_SIZEOF(1) ];
9239 vfs_context_t ctx
= vfs_context_current();
9242 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9245 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9248 if (xattr_protected(attrname
))
9250 if (uap
->size
!= 0 && uap
->value
== 0) {
9253 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9256 if ( (error
= vnode_getwithref(vp
)) ) {
9260 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9261 &uio_buf
[0], sizeof(uio_buf
));
9262 uio_addiov(auio
, uap
->value
, uap
->size
);
9264 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
9267 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9279 * Remove an extended attribute.
9280 * XXX Code duplication here.
9283 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
9286 struct nameidata nd
;
9287 char attrname
[XATTR_MAXNAMELEN
+1];
9288 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9289 vfs_context_t ctx
= vfs_context_current();
9291 u_int32_t nameiflags
;
9294 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9297 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
9301 if (xattr_protected(attrname
))
9303 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9304 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9305 if ((error
= namei(&nd
))) {
9311 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
9314 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
9325 * Remove an extended attribute.
9326 * XXX Code duplication here.
9329 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
9332 char attrname
[XATTR_MAXNAMELEN
+1];
9336 vfs_context_t ctx
= vfs_context_current();
9339 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9342 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
9346 if (xattr_protected(attrname
))
9348 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9351 if ( (error
= vnode_getwithref(vp
)) ) {
9356 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
9359 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
9371 * Retrieve the list of extended attribute names.
9372 * XXX Code duplication here.
9375 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
9378 struct nameidata nd
;
9379 vfs_context_t ctx
= vfs_context_current();
9381 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9382 size_t attrsize
= 0;
9383 u_int32_t nameiflags
;
9385 char uio_buf
[ UIO_SIZEOF(1) ];
9387 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9390 nameiflags
= ((uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
) | NOTRIGGER
;
9391 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9392 if ((error
= namei(&nd
))) {
9397 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
9398 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
9399 &uio_buf
[0], sizeof(uio_buf
));
9400 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
9403 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
9407 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
9409 *retval
= (user_ssize_t
)attrsize
;
9415 * Retrieve the list of extended attribute names.
9416 * XXX Code duplication here.
9419 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
9423 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9424 size_t attrsize
= 0;
9426 char uio_buf
[ UIO_SIZEOF(1) ];
9428 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9431 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9434 if ( (error
= vnode_getwithref(vp
)) ) {
9438 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
9439 auio
= uio_createwithbuffer(1, 0, spacetype
,
9440 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
9441 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
9444 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
9449 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
9451 *retval
= (user_ssize_t
)attrsize
;
9457 * Obtain the full pathname of a file system object by id.
9459 * This is a private SPI used by the File Manager.
9463 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
9466 struct mount
*mp
= NULL
;
9467 vfs_context_t ctx
= vfs_context_current();
9474 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
9477 AUDIT_ARG(value32
, fsid
.val
[0]);
9478 AUDIT_ARG(value64
, uap
->objid
);
9479 /* Restrict output buffer size for now. */
9480 if (uap
->bufsize
> PAGE_SIZE
) {
9483 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
9484 if (realpath
== NULL
) {
9487 /* Find the target mountpoint. */
9488 if ((mp
= mount_lookupby_volfsid(fsid
.val
[0], 1)) == NULL
) {
9489 error
= ENOTSUP
; /* unexpected failure */
9493 /* Find the target vnode. */
9494 if (uap
->objid
== 2) {
9495 error
= VFS_ROOT(mp
, &vp
, ctx
);
9497 error
= VFS_VGET(mp
, (ino64_t
)uap
->objid
, &vp
, ctx
);
9500 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
9502 * If the fileid isn't found and we're in a union
9503 * mount volume, then see if the fileid is in the
9504 * mounted-on volume.
9506 struct mount
*tmp
= mp
;
9507 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
9509 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
9518 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
9524 /* Obtain the absolute path to this vnode. */
9525 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
9526 bpflags
|= BUILDPATH_CHECK_MOVED
;
9527 error
= build_path(vp
, realpath
, uap
->bufsize
, &length
, bpflags
, ctx
);
9532 AUDIT_ARG(text
, realpath
);
9534 if (kdebug_enable
) {
9535 long dbg_parms
[NUMPARMS
];
9538 dbg_namelen
= (int)sizeof(dbg_parms
);
9540 if (length
< dbg_namelen
) {
9541 memcpy((char *)dbg_parms
, realpath
, length
);
9542 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
9544 dbg_namelen
= length
;
9546 memcpy((char *)dbg_parms
, realpath
+ (length
- dbg_namelen
), dbg_namelen
);
9548 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
9550 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
9552 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
9555 FREE(realpath
, M_TEMP
);
9561 * Common routine to handle various flavors of statfs data heading out
9564 * Returns: 0 Success
9568 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
9569 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
9570 boolean_t partial_copy
)
9573 int my_size
, copy_size
;
9576 struct user64_statfs sfs
;
9577 my_size
= copy_size
= sizeof(sfs
);
9578 bzero(&sfs
, my_size
);
9579 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
9580 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
9581 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
9582 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
9583 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
9584 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
9585 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
9586 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
9587 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
9588 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
9589 sfs
.f_fsid
= sfsp
->f_fsid
;
9590 sfs
.f_owner
= sfsp
->f_owner
;
9591 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
9592 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
9594 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
9596 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
9597 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
9600 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
9602 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
9605 struct user32_statfs sfs
;
9607 my_size
= copy_size
= sizeof(sfs
);
9608 bzero(&sfs
, my_size
);
9610 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
9611 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
9612 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
9615 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
9616 * have to fudge the numbers here in that case. We inflate the blocksize in order
9617 * to reflect the filesystem size as best we can.
9619 if ((sfsp
->f_blocks
> INT_MAX
)
9620 /* Hack for 4061702 . I think the real fix is for Carbon to
9621 * look for some volume capability and not depend on hidden
9622 * semantics agreed between a FS and carbon.
9623 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
9624 * for Carbon to set bNoVolumeSizes volume attribute.
9625 * Without this the webdavfs files cannot be copied onto
9626 * disk as they look huge. This change should not affect
9627 * XSAN as they should not setting these to -1..
9629 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
9630 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
9631 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
9635 * Work out how far we have to shift the block count down to make it fit.
9636 * Note that it's possible to have to shift so far that the resulting
9637 * blocksize would be unreportably large. At that point, we will clip
9638 * any values that don't fit.
9640 * For safety's sake, we also ensure that f_iosize is never reported as
9641 * being smaller than f_bsize.
9643 for (shift
= 0; shift
< 32; shift
++) {
9644 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
9646 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
9649 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
9650 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
9651 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
9652 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
9653 #undef __SHIFT_OR_CLIP
9654 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
9655 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
9657 /* filesystem is small enough to be reported honestly */
9658 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
9659 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
9660 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
9661 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
9662 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
9664 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
9665 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
9666 sfs
.f_fsid
= sfsp
->f_fsid
;
9667 sfs
.f_owner
= sfsp
->f_owner
;
9668 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
9669 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
9671 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
9673 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
9674 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
9677 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
9679 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
9682 if (sizep
!= NULL
) {
9689 * copy stat structure into user_stat structure.
9691 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
9693 bzero(usbp
, sizeof(*usbp
));
9695 usbp
->st_dev
= sbp
->st_dev
;
9696 usbp
->st_ino
= sbp
->st_ino
;
9697 usbp
->st_mode
= sbp
->st_mode
;
9698 usbp
->st_nlink
= sbp
->st_nlink
;
9699 usbp
->st_uid
= sbp
->st_uid
;
9700 usbp
->st_gid
= sbp
->st_gid
;
9701 usbp
->st_rdev
= sbp
->st_rdev
;
9702 #ifndef _POSIX_C_SOURCE
9703 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
9704 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
9705 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
9706 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
9707 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
9708 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
9710 usbp
->st_atime
= sbp
->st_atime
;
9711 usbp
->st_atimensec
= sbp
->st_atimensec
;
9712 usbp
->st_mtime
= sbp
->st_mtime
;
9713 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
9714 usbp
->st_ctime
= sbp
->st_ctime
;
9715 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
9717 usbp
->st_size
= sbp
->st_size
;
9718 usbp
->st_blocks
= sbp
->st_blocks
;
9719 usbp
->st_blksize
= sbp
->st_blksize
;
9720 usbp
->st_flags
= sbp
->st_flags
;
9721 usbp
->st_gen
= sbp
->st_gen
;
9722 usbp
->st_lspare
= sbp
->st_lspare
;
9723 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
9724 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
9727 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
9729 bzero(usbp
, sizeof(*usbp
));
9731 usbp
->st_dev
= sbp
->st_dev
;
9732 usbp
->st_ino
= sbp
->st_ino
;
9733 usbp
->st_mode
= sbp
->st_mode
;
9734 usbp
->st_nlink
= sbp
->st_nlink
;
9735 usbp
->st_uid
= sbp
->st_uid
;
9736 usbp
->st_gid
= sbp
->st_gid
;
9737 usbp
->st_rdev
= sbp
->st_rdev
;
9738 #ifndef _POSIX_C_SOURCE
9739 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
9740 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
9741 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
9742 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
9743 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
9744 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
9746 usbp
->st_atime
= sbp
->st_atime
;
9747 usbp
->st_atimensec
= sbp
->st_atimensec
;
9748 usbp
->st_mtime
= sbp
->st_mtime
;
9749 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
9750 usbp
->st_ctime
= sbp
->st_ctime
;
9751 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
9753 usbp
->st_size
= sbp
->st_size
;
9754 usbp
->st_blocks
= sbp
->st_blocks
;
9755 usbp
->st_blksize
= sbp
->st_blksize
;
9756 usbp
->st_flags
= sbp
->st_flags
;
9757 usbp
->st_gen
= sbp
->st_gen
;
9758 usbp
->st_lspare
= sbp
->st_lspare
;
9759 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
9760 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
9764 * copy stat64 structure into user_stat64 structure.
9766 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
9768 bzero(usbp
, sizeof(*usbp
));
9770 usbp
->st_dev
= sbp
->st_dev
;
9771 usbp
->st_ino
= sbp
->st_ino
;
9772 usbp
->st_mode
= sbp
->st_mode
;
9773 usbp
->st_nlink
= sbp
->st_nlink
;
9774 usbp
->st_uid
= sbp
->st_uid
;
9775 usbp
->st_gid
= sbp
->st_gid
;
9776 usbp
->st_rdev
= sbp
->st_rdev
;
9777 #ifndef _POSIX_C_SOURCE
9778 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
9779 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
9780 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
9781 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
9782 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
9783 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
9784 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
9785 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
9787 usbp
->st_atime
= sbp
->st_atime
;
9788 usbp
->st_atimensec
= sbp
->st_atimensec
;
9789 usbp
->st_mtime
= sbp
->st_mtime
;
9790 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
9791 usbp
->st_ctime
= sbp
->st_ctime
;
9792 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
9793 usbp
->st_birthtime
= sbp
->st_birthtime
;
9794 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
9796 usbp
->st_size
= sbp
->st_size
;
9797 usbp
->st_blocks
= sbp
->st_blocks
;
9798 usbp
->st_blksize
= sbp
->st_blksize
;
9799 usbp
->st_flags
= sbp
->st_flags
;
9800 usbp
->st_gen
= sbp
->st_gen
;
9801 usbp
->st_lspare
= sbp
->st_lspare
;
9802 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
9803 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
9806 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
9808 bzero(usbp
, sizeof(*usbp
));
9810 usbp
->st_dev
= sbp
->st_dev
;
9811 usbp
->st_ino
= sbp
->st_ino
;
9812 usbp
->st_mode
= sbp
->st_mode
;
9813 usbp
->st_nlink
= sbp
->st_nlink
;
9814 usbp
->st_uid
= sbp
->st_uid
;
9815 usbp
->st_gid
= sbp
->st_gid
;
9816 usbp
->st_rdev
= sbp
->st_rdev
;
9817 #ifndef _POSIX_C_SOURCE
9818 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
9819 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
9820 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
9821 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
9822 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
9823 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
9824 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
9825 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
9827 usbp
->st_atime
= sbp
->st_atime
;
9828 usbp
->st_atimensec
= sbp
->st_atimensec
;
9829 usbp
->st_mtime
= sbp
->st_mtime
;
9830 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
9831 usbp
->st_ctime
= sbp
->st_ctime
;
9832 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
9833 usbp
->st_birthtime
= sbp
->st_birthtime
;
9834 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
9836 usbp
->st_size
= sbp
->st_size
;
9837 usbp
->st_blocks
= sbp
->st_blocks
;
9838 usbp
->st_blksize
= sbp
->st_blksize
;
9839 usbp
->st_flags
= sbp
->st_flags
;
9840 usbp
->st_gen
= sbp
->st_gen
;
9841 usbp
->st_lspare
= sbp
->st_lspare
;
9842 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
9843 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
9847 * Purge buffer cache for simulating cold starts
9849 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
9851 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
9853 return VNODE_RETURNED
;
9856 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
9858 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
9860 return VFS_RETURNED
;
9864 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
9866 if (!kauth_cred_issuser(kauth_cred_get()))
9869 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);