2 * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <machine/cons.h>
104 #include <machine/limits.h>
105 #include <miscfs/specfs/specdev.h>
107 #include <security/audit/audit.h>
108 #include <bsm/audit_kevents.h>
110 #include <mach/mach_types.h>
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/task.h>
115 #include <vm/vm_pageout.h>
117 #include <libkern/OSAtomic.h>
118 #include <pexpert/pexpert.h>
119 #include <IOKit/IOBSD.h>
122 #include <security/mac.h>
123 #include <security/mac_framework.h>
127 #define GET_PATH(x) \
128 (x) = get_pathbuff();
129 #define RELEASE_PATH(x) \
132 #define GET_PATH(x) \
133 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
134 #define RELEASE_PATH(x) \
135 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
136 #endif /* CONFIG_FSE */
138 /* struct for checkdirs iteration */
143 /* callback for checkdirs iteration */
144 static int checkdirs_callback(proc_t p
, void * arg
);
146 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
147 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
148 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
149 static int getfsstat_callback(mount_t mp
, void * arg
);
150 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
151 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
152 static int sync_callback(mount_t
, void *);
153 static void sync_thread(void *, __unused wait_result_t
);
154 static int sync_async(int);
155 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
156 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
157 boolean_t partial_copy
);
158 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
160 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
161 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
162 struct componentname
*cnp
, user_addr_t fsmountargs
,
163 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
165 void vfs_notify_mount(vnode_t pdvp
);
167 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
169 struct fd_vn_data
* fg_vn_data_alloc(void);
172 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
173 * Concurrent lookups (or lookups by ids) on hard links can cause the
174 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
175 * does) to return ENOENT as the path cannot be returned from the name cache
176 * alone. We have no option but to retry and hope to get one namei->reverse path
177 * generation done without an intervening lookup, lookup by id on the hard link
178 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
179 * which currently are the MAC hooks for rename, unlink and rmdir.
181 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
183 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
185 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
187 #ifdef CONFIG_IMGSRC_ACCESS
188 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
189 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
190 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
191 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
192 static void mount_end_update(mount_t mp
);
193 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
194 #endif /* CONFIG_IMGSRC_ACCESS */
196 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
199 int sync_internal(void);
202 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
204 extern lck_grp_t
*fd_vn_lck_grp
;
205 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
206 extern lck_attr_t
*fd_vn_lck_attr
;
209 * incremented each time a mount or unmount operation occurs
210 * used to invalidate the cached value of the rootvp in the
211 * mount structure utilized by cache_lookup_path
213 uint32_t mount_generation
= 0;
215 /* counts number of mount and unmount operations */
216 unsigned int vfs_nummntops
=0;
218 extern const struct fileops vnops
;
219 #if CONFIG_APPLEDOUBLE
220 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
221 #endif /* CONFIG_APPLEDOUBLE */
223 typedef uint32_t vfs_rename_flags_t
;
224 #if CONFIG_SECLUDED_RENAME
226 VFS_SECLUDE_RENAME
= 0x00000001
231 * Virtual File System System Calls
234 #if NFSCLIENT || DEVFS
236 * Private in-kernel mounting spi (NFS only, not exported)
240 vfs_iskernelmount(mount_t mp
)
242 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
247 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
248 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
254 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
255 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
258 * Get the vnode to be covered if it's not supplied
268 char *pnbuf
= CAST_DOWN(char *, path
);
270 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
271 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
275 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
276 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
286 #endif /* NFSCLIENT || DEVFS */
289 * Mount a file system.
293 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
295 struct __mac_mount_args muap
;
297 muap
.type
= uap
->type
;
298 muap
.path
= uap
->path
;
299 muap
.flags
= uap
->flags
;
300 muap
.data
= uap
->data
;
301 muap
.mac_p
= USER_ADDR_NULL
;
302 return (__mac_mount(p
, &muap
, retval
));
306 vfs_notify_mount(vnode_t pdvp
)
308 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
309 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
314 * Mount a file system taking into account MAC label behavior.
315 * See mount(2) man page for more information
317 * Parameters: p Process requesting the mount
318 * uap User argument descriptor (see below)
321 * Indirect: uap->type Filesystem type
322 * uap->path Path to mount
323 * uap->data Mount arguments
324 * uap->mac_p MAC info
325 * uap->flags Mount flags
331 boolean_t root_fs_upgrade_try
= FALSE
;
334 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
338 int need_nameidone
= 0;
339 vfs_context_t ctx
= vfs_context_current();
340 char fstypename
[MFSNAMELEN
];
343 char *labelstr
= NULL
;
344 int flags
= uap
->flags
;
346 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
347 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
352 * Get the fs type name from user space
354 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
359 * Get the vnode to be covered
361 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
362 UIO_USERSPACE
, uap
->path
, ctx
);
371 #ifdef CONFIG_IMGSRC_ACCESS
372 /* Mounting image source cannot be batched with other operations */
373 if (flags
== MNT_IMGSRC_BY_INDEX
) {
374 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
375 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
378 #endif /* CONFIG_IMGSRC_ACCESS */
382 * Get the label string (if any) from user space
384 if (uap
->mac_p
!= USER_ADDR_NULL
) {
389 struct user64_mac mac64
;
390 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
391 mac
.m_buflen
= mac64
.m_buflen
;
392 mac
.m_string
= mac64
.m_string
;
394 struct user32_mac mac32
;
395 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
396 mac
.m_buflen
= mac32
.m_buflen
;
397 mac
.m_string
= mac32
.m_string
;
401 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
402 (mac
.m_buflen
< 2)) {
406 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
407 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
411 AUDIT_ARG(mac_string
, labelstr
);
413 #endif /* CONFIG_MACF */
415 AUDIT_ARG(fflags
, flags
);
418 if (flags
& MNT_UNION
) {
419 /* No union mounts on release kernels */
425 if ((vp
->v_flag
& VROOT
) &&
426 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
427 if (!(flags
& MNT_UNION
)) {
432 * For a union mount on '/', treat it as fresh
433 * mount instead of update.
434 * Otherwise, union mouting on '/' used to panic the
435 * system before, since mnt_vnodecovered was found to
436 * be NULL for '/' which is required for unionlookup
437 * after it gets ENOENT on union mount.
439 flags
= (flags
& ~(MNT_UPDATE
));
443 if ((flags
& MNT_RDONLY
) == 0) {
444 /* Release kernels are not allowed to mount "/" as rw */
450 * See 7392553 for more details on why this check exists.
451 * Suffice to say: If this check is ON and something tries
452 * to mount the rootFS RW, we'll turn off the codesign
453 * bitmap optimization.
455 #if CHECK_CS_VALIDATION_BITMAP
456 if ((flags
& MNT_RDONLY
) == 0 ) {
457 root_fs_upgrade_try
= TRUE
;
462 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
463 labelstr
, FALSE
, ctx
);
469 FREE(labelstr
, M_MACTEMP
);
470 #endif /* CONFIG_MACF */
478 if (need_nameidone
) {
486 * common mount implementation (final stage of mounting)
489 * fstypename file system type (ie it's vfs name)
490 * pvp parent of covered vnode
492 * cnp component name (ie path) of covered vnode
493 * flags generic mount flags
494 * fsmountargs file system specific data
495 * labelstr optional MAC label
496 * kernelmount TRUE for mounts initiated from inside the kernel
497 * ctx caller's context
500 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
501 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
502 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
505 #pragma unused(labelstr)
507 struct vnode
*devvp
= NULLVP
;
508 struct vnode
*device_vnode
= NULLVP
;
513 struct vfstable
*vfsp
= (struct vfstable
*)0;
514 struct proc
*p
= vfs_context_proc(ctx
);
516 user_addr_t devpath
= USER_ADDR_NULL
;
519 boolean_t vfsp_ref
= FALSE
;
520 boolean_t is_rwlock_locked
= FALSE
;
521 boolean_t did_rele
= FALSE
;
522 boolean_t have_usecount
= FALSE
;
525 * Process an update for an existing mount
527 if (flags
& MNT_UPDATE
) {
528 if ((vp
->v_flag
& VROOT
) == 0) {
534 /* unmount in progress return error */
536 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
542 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
543 is_rwlock_locked
= TRUE
;
545 * We only allow the filesystem to be reloaded if it
546 * is currently mounted read-only.
548 if ((flags
& MNT_RELOAD
) &&
549 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
555 * If content protection is enabled, update mounts are not
556 * allowed to turn it off.
558 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
559 ((flags
& MNT_CPROTECT
) == 0)) {
564 #ifdef CONFIG_IMGSRC_ACCESS
565 /* Can't downgrade the backer of the root FS */
566 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
567 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
571 #endif /* CONFIG_IMGSRC_ACCESS */
574 * Only root, or the user that did the original mount is
575 * permitted to update it.
577 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
578 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
582 error
= mac_mount_check_remount(ctx
, mp
);
588 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
589 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
591 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
592 flags
|= MNT_NOSUID
| MNT_NODEV
;
593 if (mp
->mnt_flag
& MNT_NOEXEC
)
600 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
602 vfsp
= mp
->mnt_vtable
;
606 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
607 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
609 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
610 flags
|= MNT_NOSUID
| MNT_NODEV
;
611 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
615 /* XXXAUDIT: Should we capture the type on the error path as well? */
616 AUDIT_ARG(text
, fstypename
);
618 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
619 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
620 vfsp
->vfc_refcount
++;
631 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
633 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
634 error
= EINVAL
; /* unsupported request */
638 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
644 * Allocate and initialize the filesystem (mount_t)
646 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
648 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
651 /* Initialize the default IO constraints */
652 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
653 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
654 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
655 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
656 mp
->mnt_devblocksize
= DEV_BSIZE
;
657 mp
->mnt_alignmentmask
= PAGE_MASK
;
658 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
661 mp
->mnt_realrootvp
= NULLVP
;
662 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
664 TAILQ_INIT(&mp
->mnt_vnodelist
);
665 TAILQ_INIT(&mp
->mnt_workerqueue
);
666 TAILQ_INIT(&mp
->mnt_newvnodes
);
668 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
669 is_rwlock_locked
= TRUE
;
670 mp
->mnt_op
= vfsp
->vfc_vfsops
;
671 mp
->mnt_vtable
= vfsp
;
672 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
673 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
674 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
675 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
676 mp
->mnt_vnodecovered
= vp
;
677 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
678 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
679 mp
->mnt_devbsdunit
= 0;
681 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
682 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
684 #if NFSCLIENT || DEVFS
686 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
687 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
688 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
689 #endif /* NFSCLIENT || DEVFS */
693 * Set the mount level flags.
695 if (flags
& MNT_RDONLY
)
696 mp
->mnt_flag
|= MNT_RDONLY
;
697 else if (mp
->mnt_flag
& MNT_RDONLY
) {
698 // disallow read/write upgrades of file systems that
699 // had the TYPENAME_OVERRIDE feature set.
700 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
704 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
706 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
707 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
708 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
709 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
710 MNT_QUARANTINE
| MNT_CPROTECT
);
711 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
712 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
713 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
714 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
715 MNT_QUARANTINE
| MNT_CPROTECT
);
718 if (flags
& MNT_MULTILABEL
) {
719 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
723 mp
->mnt_flag
|= MNT_MULTILABEL
;
727 * Process device path for local file systems if requested
729 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
730 if (vfs_context_is64bit(ctx
)) {
731 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
733 fsmountargs
+= sizeof(devpath
);
736 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
738 /* munge into LP64 addr */
739 devpath
= CAST_USER_ADDR_T(tmp
);
740 fsmountargs
+= sizeof(tmp
);
743 /* Lookup device and authorize access to it */
747 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
748 if ( (error
= namei(&nd
)) )
751 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
756 if (devvp
->v_type
!= VBLK
) {
760 if (major(devvp
->v_rdev
) >= nblkdev
) {
765 * If mount by non-root, then verify that user has necessary
766 * permissions on the device.
768 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
769 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
771 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
772 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
773 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
777 /* On first mount, preflight and open device */
778 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
779 if ( (error
= vnode_ref(devvp
)) )
782 * Disallow multiple mounts of the same device.
783 * Disallow mounting of a device that is currently in use
784 * (except for root, which might share swap device for miniroot).
785 * Flush out any old buffers remaining from a previous use.
787 if ( (error
= vfs_mountedon(devvp
)) )
790 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
794 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
798 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
801 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
803 error
= mac_vnode_check_open(ctx
,
805 ronly
? FREAD
: FREAD
|FWRITE
);
809 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
812 mp
->mnt_devvp
= devvp
;
813 device_vnode
= devvp
;
815 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
816 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
817 (device_vnode
= mp
->mnt_devvp
)) {
821 * If upgrade to read-write by non-root, then verify
822 * that user has necessary permissions on the device.
824 vnode_getalways(device_vnode
);
826 if (suser(vfs_context_ucred(ctx
), NULL
) &&
827 (error
= vnode_authorize(device_vnode
, NULL
,
828 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
830 vnode_put(device_vnode
);
834 /* Tell the device that we're upgrading */
835 dev
= (dev_t
)device_vnode
->v_rdev
;
838 if ((u_int
)maj
>= (u_int
)nblkdev
)
839 panic("Volume mounted on a device with invalid major number.");
841 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
842 vnode_put(device_vnode
);
843 device_vnode
= NULLVP
;
850 if ((flags
& MNT_UPDATE
) == 0) {
851 mac_mount_label_init(mp
);
852 mac_mount_label_associate(ctx
, mp
);
855 if ((flags
& MNT_UPDATE
) != 0) {
856 error
= mac_mount_check_label_update(ctx
, mp
);
863 * Mount the filesystem.
865 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
867 if (flags
& MNT_UPDATE
) {
868 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
869 mp
->mnt_flag
&= ~MNT_RDONLY
;
871 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
872 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
874 mp
->mnt_flag
= flag
; /* restore flag value */
875 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
876 lck_rw_done(&mp
->mnt_rwlock
);
877 is_rwlock_locked
= FALSE
;
879 enablequotas(mp
, ctx
);
884 * Put the new filesystem on the mount list after root.
887 struct vfs_attr vfsattr
;
889 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
890 error
= VFS_ROOT(mp
, &rvp
, ctx
);
892 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
895 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
897 * drop reference provided by VFS_ROOT
907 CLR(vp
->v_flag
, VMOUNT
);
908 vp
->v_mountedhere
= mp
;
912 * taking the name_cache_lock exclusively will
913 * insure that everyone is out of the fast path who
914 * might be trying to use a now stale copy of
915 * vp->v_mountedhere->mnt_realrootvp
916 * bumping mount_generation causes the cached values
923 error
= vnode_ref(vp
);
928 have_usecount
= TRUE
;
930 error
= checkdirs(vp
, ctx
);
932 /* Unmount the filesystem as cdir/rdirs cannot be updated */
936 * there is no cleanup code here so I have made it void
937 * we need to revisit this
939 (void)VFS_START(mp
, 0, ctx
);
941 if (mount_list_add(mp
) != 0) {
943 * The system is shutting down trying to umount
944 * everything, so fail with a plausible errno.
949 lck_rw_done(&mp
->mnt_rwlock
);
950 is_rwlock_locked
= FALSE
;
952 /* Check if this mounted file system supports EAs or named streams. */
953 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
954 VFSATTR_INIT(&vfsattr
);
955 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
956 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
957 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
958 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
959 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
960 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
961 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
964 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
965 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
966 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
969 /* Check if this file system supports path from id lookups. */
970 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
971 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
972 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
973 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
974 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
975 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
978 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
979 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
981 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
982 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
984 /* increment the operations count */
985 OSAddAtomic(1, &vfs_nummntops
);
986 enablequotas(mp
, ctx
);
989 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
992 * cache the IO attributes for the underlying physical media...
993 * an error return indicates the underlying driver doesn't
994 * support all the queries necessary... however, reasonable
995 * defaults will have been set, so no reason to bail or care
997 vfs_init_io_attributes(device_vnode
, mp
);
1000 /* Now that mount is setup, notify the listeners */
1001 vfs_notify_mount(pvp
);
1002 IOBSDMountChange(mp
, kIOMountChangeMount
);
1005 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1006 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
1007 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1008 mp
->mnt_vtable
->vfc_name
, error
);
1011 vnode_lock_spin(vp
);
1012 CLR(vp
->v_flag
, VMOUNT
);
1015 mp
->mnt_vtable
->vfc_refcount
--;
1016 mount_list_unlock();
1018 if (device_vnode
) {
1019 vnode_rele(device_vnode
);
1020 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
1022 lck_rw_done(&mp
->mnt_rwlock
);
1023 is_rwlock_locked
= FALSE
;
1026 * if we get here, we have a mount structure that needs to be freed,
1027 * but since the coveredvp hasn't yet been updated to point at it,
1028 * no need to worry about other threads holding a crossref on this mp
1029 * so it's ok to just free it
1031 mount_lock_destroy(mp
);
1033 mac_mount_label_destroy(mp
);
1035 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1039 * drop I/O count on the device vp if there was one
1041 if (devpath
&& devvp
)
1046 /* Error condition exits */
1048 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1051 * If the mount has been placed on the covered vp,
1052 * it may have been discovered by now, so we have
1053 * to treat this just like an unmount
1055 mount_lock_spin(mp
);
1056 mp
->mnt_lflag
|= MNT_LDEAD
;
1059 if (device_vnode
!= NULLVP
) {
1060 vnode_rele(device_vnode
);
1061 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1066 vnode_lock_spin(vp
);
1069 vp
->v_mountedhere
= (mount_t
) 0;
1073 if (have_usecount
) {
1077 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1080 if (devpath
&& devvp
)
1083 /* Release mnt_rwlock only when it was taken */
1084 if (is_rwlock_locked
== TRUE
) {
1085 lck_rw_done(&mp
->mnt_rwlock
);
1089 if (mp
->mnt_crossref
)
1090 mount_dropcrossref(mp
, vp
, 0);
1092 mount_lock_destroy(mp
);
1094 mac_mount_label_destroy(mp
);
1096 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1101 vfsp
->vfc_refcount
--;
1102 mount_list_unlock();
1109 * Flush in-core data, check for competing mount attempts,
1113 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1116 #pragma unused(cnp,fsname)
1118 struct vnode_attr va
;
1123 * If the user is not root, ensure that they own the directory
1124 * onto which we are attempting to mount.
1127 VATTR_WANTED(&va
, va_uid
);
1128 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1129 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1130 (!vfs_context_issuser(ctx
)))) {
1136 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1139 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1142 if (vp
->v_type
!= VDIR
) {
1147 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1153 error
= mac_mount_check_mount(ctx
, vp
,
1159 vnode_lock_spin(vp
);
1160 SET(vp
->v_flag
, VMOUNT
);
1167 #if CONFIG_IMGSRC_ACCESS
1170 #define IMGSRC_DEBUG(args...) printf(args)
1172 #define IMGSRC_DEBUG(args...) do { } while(0)
1176 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1178 struct nameidata nd
;
1179 vnode_t vp
, realdevvp
;
1183 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1184 if ( (error
= namei(&nd
)) ) {
1185 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1191 if (!vnode_isblk(vp
)) {
1192 IMGSRC_DEBUG("Not block device.\n");
1197 realdevvp
= mp
->mnt_devvp
;
1198 if (realdevvp
== NULLVP
) {
1199 IMGSRC_DEBUG("No device backs the mount.\n");
1204 error
= vnode_getwithref(realdevvp
);
1206 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1210 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1211 IMGSRC_DEBUG("Wrong dev_t.\n");
1216 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1219 * If mount by non-root, then verify that user has necessary
1220 * permissions on the device.
1222 if (!vfs_context_issuser(ctx
)) {
1223 accessmode
= KAUTH_VNODE_READ_DATA
;
1224 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1225 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1226 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1227 IMGSRC_DEBUG("Access denied.\n");
1235 vnode_put(realdevvp
);
1246 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1247 * and call checkdirs()
1250 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1254 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1256 vnode_lock_spin(vp
);
1257 CLR(vp
->v_flag
, VMOUNT
);
1258 vp
->v_mountedhere
= mp
;
1262 * taking the name_cache_lock exclusively will
1263 * insure that everyone is out of the fast path who
1264 * might be trying to use a now stale copy of
1265 * vp->v_mountedhere->mnt_realrootvp
1266 * bumping mount_generation causes the cached values
1271 name_cache_unlock();
1273 error
= vnode_ref(vp
);
1278 error
= checkdirs(vp
, ctx
);
1280 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1287 mp
->mnt_vnodecovered
= NULLVP
;
1293 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1296 vnode_lock_spin(vp
);
1297 vp
->v_mountedhere
= (mount_t
)NULL
;
1300 mp
->mnt_vnodecovered
= NULLVP
;
1304 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1308 /* unmount in progress return error */
1309 mount_lock_spin(mp
);
1310 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1315 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1318 * We only allow the filesystem to be reloaded if it
1319 * is currently mounted read-only.
1321 if ((flags
& MNT_RELOAD
) &&
1322 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1328 * Only root, or the user that did the original mount is
1329 * permitted to update it.
1331 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1332 (!vfs_context_issuser(ctx
))) {
1337 error
= mac_mount_check_remount(ctx
, mp
);
1345 lck_rw_done(&mp
->mnt_rwlock
);
1352 mount_end_update(mount_t mp
)
1354 lck_rw_done(&mp
->mnt_rwlock
);
1358 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1362 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1366 vp
= imgsrc_rootvnodes
[height
];
1367 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1376 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1377 const char *fsname
, vfs_context_t ctx
,
1378 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1382 boolean_t placed
= FALSE
;
1383 vnode_t devvp
= NULLVP
;
1384 struct vfstable
*vfsp
;
1385 user_addr_t devpath
;
1386 char *old_mntonname
;
1391 /* If we didn't imageboot, nothing to move */
1392 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1396 /* Only root can do this */
1397 if (!vfs_context_issuser(ctx
)) {
1401 IMGSRC_DEBUG("looking for root vnode.\n");
1404 * Get root vnode of filesystem we're moving.
1408 struct user64_mnt_imgsrc_args mia64
;
1409 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1411 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1415 height
= mia64
.mi_height
;
1416 flags
= mia64
.mi_flags
;
1417 devpath
= mia64
.mi_devpath
;
1419 struct user32_mnt_imgsrc_args mia32
;
1420 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1422 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1426 height
= mia32
.mi_height
;
1427 flags
= mia32
.mi_flags
;
1428 devpath
= mia32
.mi_devpath
;
1432 * For binary compatibility--assumes one level of nesting.
1435 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1439 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1442 /* munge into LP64 addr */
1443 devpath
= CAST_USER_ADDR_T(tmp
);
1451 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1455 error
= get_imgsrc_rootvnode(height
, &rvp
);
1457 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1461 IMGSRC_DEBUG("got root vnode.\n");
1463 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1465 /* Can only move once */
1466 mp
= vnode_mount(rvp
);
1467 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1468 IMGSRC_DEBUG("Already moved.\n");
1473 IMGSRC_DEBUG("Starting updated.\n");
1475 /* Get exclusive rwlock on mount, authorize update on mp */
1476 error
= mount_begin_update(mp
, ctx
, 0);
1478 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1483 * It can only be moved once. Flag is set under the rwlock,
1484 * so we're now safe to proceed.
1486 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1487 IMGSRC_DEBUG("Already moved [2]\n");
1492 IMGSRC_DEBUG("Preparing coveredvp.\n");
1494 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1495 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1497 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1501 IMGSRC_DEBUG("Covered vp OK.\n");
1503 /* Sanity check the name caller has provided */
1504 vfsp
= mp
->mnt_vtable
;
1505 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1506 IMGSRC_DEBUG("Wrong fs name.\n");
1511 /* Check the device vnode and update mount-from name, for local filesystems */
1512 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1513 IMGSRC_DEBUG("Local, doing device validation.\n");
1515 if (devpath
!= USER_ADDR_NULL
) {
1516 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1518 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1527 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1528 * and increment the name cache's mount generation
1531 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1532 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1539 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1540 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1542 /* Forbid future moves */
1544 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1547 /* Finally, add to mount list, completely ready to go */
1548 if (mount_list_add(mp
) != 0) {
1550 * The system is shutting down trying to umount
1551 * everything, so fail with a plausible errno.
1557 mount_end_update(mp
);
1559 FREE(old_mntonname
, M_TEMP
);
1561 vfs_notify_mount(pvp
);
1565 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1568 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1573 * Placing the mp on the vnode clears VMOUNT,
1574 * so cleanup is different after that point
1577 /* Rele the vp, clear VMOUNT and v_mountedhere */
1578 undo_place_on_covered_vp(mp
, vp
);
1580 vnode_lock_spin(vp
);
1581 CLR(vp
->v_flag
, VMOUNT
);
1585 mount_end_update(mp
);
1589 FREE(old_mntonname
, M_TEMP
);
1593 #endif /* CONFIG_IMGSRC_ACCESS */
1596 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1598 struct nameidata qnd
;
1600 char qfpath
[MAXPATHLEN
];
1601 const char *qfname
= QUOTAFILENAME
;
1602 const char *qfopsname
= QUOTAOPSNAME
;
1603 const char *qfextension
[] = INITQFNAMES
;
1605 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1606 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1610 * Enable filesystem disk quotas if necessary.
1611 * We ignore errors as this should not interfere with final mount
1613 for (type
=0; type
< MAXQUOTAS
; type
++) {
1614 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1615 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1616 CAST_USER_ADDR_T(qfpath
), ctx
);
1617 if (namei(&qnd
) != 0)
1618 continue; /* option file to trigger quotas is not present */
1619 vnode_put(qnd
.ni_vp
);
1621 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1623 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1630 checkdirs_callback(proc_t p
, void * arg
)
1632 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1633 vnode_t olddp
= cdrp
->olddp
;
1634 vnode_t newdp
= cdrp
->newdp
;
1635 struct filedesc
*fdp
;
1639 int cdir_changed
= 0;
1640 int rdir_changed
= 0;
1643 * XXX Also needs to iterate each thread in the process to see if it
1644 * XXX is using a per-thread current working directory, and, if so,
1645 * XXX update that as well.
1650 if (fdp
== (struct filedesc
*)0) {
1652 return(PROC_RETURNED
);
1654 fdp_cvp
= fdp
->fd_cdir
;
1655 fdp_rvp
= fdp
->fd_rdir
;
1658 if (fdp_cvp
== olddp
) {
1665 if (fdp_rvp
== olddp
) {
1672 if (cdir_changed
|| rdir_changed
) {
1674 fdp
->fd_cdir
= fdp_cvp
;
1675 fdp
->fd_rdir
= fdp_rvp
;
1678 return(PROC_RETURNED
);
1684 * Scan all active processes to see if any of them have a current
1685 * or root directory onto which the new filesystem has just been
1686 * mounted. If so, replace them with the new mount point.
1689 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1694 struct cdirargs cdr
;
1696 if (olddp
->v_usecount
== 1)
1698 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1702 panic("mount: lost mount: error %d", err
);
1709 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1710 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1712 if (rootvnode
== olddp
) {
1724 * Unmount a file system.
1726 * Note: unmount takes a path to the vnode mounted on as argument,
1727 * not special file (as before).
1731 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1736 struct nameidata nd
;
1737 vfs_context_t ctx
= vfs_context_current();
1739 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1740 UIO_USERSPACE
, uap
->path
, ctx
);
1749 error
= mac_mount_check_umount(ctx
, mp
);
1756 * Must be the root of the filesystem
1758 if ((vp
->v_flag
& VROOT
) == 0) {
1764 /* safedounmount consumes the mount ref */
1765 return (safedounmount(mp
, uap
->flags
, ctx
));
1769 vfs_unmountbyfsid(fsid_t
* fsid
, int flags
, vfs_context_t ctx
)
1773 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1774 if (mp
== (mount_t
)0) {
1779 /* safedounmount consumes the mount ref */
1780 return(safedounmount(mp
, flags
, ctx
));
1785 * The mount struct comes with a mount ref which will be consumed.
1786 * Do the actual file system unmount, prevent some common foot shooting.
1789 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1792 proc_t p
= vfs_context_proc(ctx
);
1795 * If the file system is not responding and MNT_NOBLOCK
1796 * is set and not a forced unmount then return EBUSY.
1798 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1799 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1805 * Skip authorization if the mount is tagged as permissive and
1806 * this is not a forced-unmount attempt.
1808 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1810 * Only root, or the user that did the original mount is
1811 * permitted to unmount this filesystem.
1813 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1814 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1818 * Don't allow unmounting the root file system.
1820 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1821 error
= EBUSY
; /* the root is always busy */
1825 #ifdef CONFIG_IMGSRC_ACCESS
1826 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1830 #endif /* CONFIG_IMGSRC_ACCESS */
1832 return (dounmount(mp
, flags
, 1, ctx
));
1840 * Do the actual file system unmount.
1843 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1845 vnode_t coveredvp
= (vnode_t
)0;
1848 int forcedunmount
= 0;
1850 struct vnode
*devvp
= NULLVP
;
1852 proc_t p
= vfs_context_proc(ctx
);
1854 int pflags_save
= 0;
1855 #endif /* CONFIG_TRIGGERS */
1860 * If already an unmount in progress just return EBUSY.
1861 * Even a forced unmount cannot override.
1863 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1870 if (flags
& MNT_FORCE
) {
1872 mp
->mnt_lflag
|= MNT_LFORCE
;
1876 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1877 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1880 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1881 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1882 mp
->mnt_flag
&=~ MNT_ASYNC
;
1884 * anyone currently in the fast path that
1885 * trips over the cached rootvp will be
1886 * dumped out and forced into the slow path
1887 * to regenerate a new cached value
1889 mp
->mnt_realrootvp
= NULLVP
;
1892 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
1894 * Force unmount any mounts in this filesystem.
1895 * If any unmounts fail - just leave them dangling.
1898 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
1902 * taking the name_cache_lock exclusively will
1903 * insure that everyone is out of the fast path who
1904 * might be trying to use a now stale copy of
1905 * vp->v_mountedhere->mnt_realrootvp
1906 * bumping mount_generation causes the cached values
1911 name_cache_unlock();
1914 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1918 fsevent_unmount(mp
); /* has to come first! */
1921 if (forcedunmount
== 0) {
1922 ubc_umount(mp
); /* release cached vnodes */
1923 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1924 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
1927 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1928 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1929 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1935 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
1938 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
1942 lflags
|= FORCECLOSE
;
1943 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
1944 if ((forcedunmount
== 0) && error
) {
1946 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1947 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1948 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1952 /* make sure there are no one in the mount iterations or lookup */
1953 mount_iterdrain(mp
);
1955 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
1957 mount_iterreset(mp
);
1959 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1960 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1961 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1965 /* increment the operations count */
1967 OSAddAtomic(1, &vfs_nummntops
);
1969 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1970 /* hold an io reference and drop the usecount before close */
1971 devvp
= mp
->mnt_devvp
;
1972 vnode_getalways(devvp
);
1974 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1976 vnode_clearmountedon(devvp
);
1979 lck_rw_done(&mp
->mnt_rwlock
);
1980 mount_list_remove(mp
);
1981 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1983 /* mark the mount point hook in the vp but not drop the ref yet */
1984 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
1986 * The covered vnode needs special handling. Trying to get an
1987 * iocount must not block here as this may lead to deadlocks
1988 * if the Filesystem to which the covered vnode belongs is
1989 * undergoing forced unmounts. Since we hold a usecount, the
1990 * vnode cannot be reused (it can, however, still be terminated)
1992 vnode_getalways(coveredvp
);
1993 vnode_lock_spin(coveredvp
);
1996 coveredvp
->v_mountedhere
= (struct mount
*)0;
1997 CLR(coveredvp
->v_flag
, VMOUNT
);
1999 vnode_unlock(coveredvp
);
2000 vnode_put(coveredvp
);
2004 mp
->mnt_vtable
->vfc_refcount
--;
2005 mount_list_unlock();
2007 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2008 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2010 mp
->mnt_lflag
|= MNT_LDEAD
;
2012 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2014 * do the wakeup here
2015 * in case we block in mount_refdrain
2016 * which will drop the mount lock
2017 * and allow anyone blocked in vfs_busy
2018 * to wakeup and see the LDEAD state
2020 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2021 wakeup((caddr_t
)mp
);
2025 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2026 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2031 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2032 // Restore P_NOREMOTEHANG bit to its previous value
2033 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2034 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2038 * Callback and context are set together under the mount lock, and
2039 * never cleared, so we're safe to examine them here, drop the lock,
2042 if (mp
->mnt_triggercallback
!= NULL
) {
2045 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2046 } else if (did_vflush
) {
2047 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2054 #endif /* CONFIG_TRIGGERS */
2056 lck_rw_done(&mp
->mnt_rwlock
);
2059 wakeup((caddr_t
)mp
);
2062 if ((coveredvp
!= NULLVP
)) {
2063 vnode_t pvp
= NULLVP
;
2066 * The covered vnode needs special handling. Trying to
2067 * get an iocount must not block here as this may lead
2068 * to deadlocks if the Filesystem to which the covered
2069 * vnode belongs is undergoing forced unmounts. Since we
2070 * hold a usecount, the vnode cannot be reused
2071 * (it can, however, still be terminated).
2073 vnode_getalways(coveredvp
);
2075 mount_dropcrossref(mp
, coveredvp
, 0);
2077 * We'll _try_ to detect if this really needs to be
2078 * done. The coveredvp can only be in termination (or
2079 * terminated) if the coveredvp's mount point is in a
2080 * forced unmount (or has been) since we still hold the
2083 if (!vnode_isrecycled(coveredvp
)) {
2084 pvp
= vnode_getparent(coveredvp
);
2086 if (coveredvp
->v_resolve
) {
2087 vnode_trigger_rearm(coveredvp
, ctx
);
2092 vnode_rele(coveredvp
);
2093 vnode_put(coveredvp
);
2097 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2100 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2101 mount_lock_destroy(mp
);
2103 mac_mount_label_destroy(mp
);
2105 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2107 panic("dounmount: no coveredvp");
2113 * Unmount any mounts in this filesystem.
2116 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2119 fsid_t
*fsids
, fsid
;
2121 int count
= 0, i
, m
= 0;
2126 // Get an array to hold the submounts fsids.
2127 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2129 fsids_sz
= count
* sizeof(fsid_t
);
2130 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2131 if (fsids
== NULL
) {
2132 mount_list_unlock();
2135 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2138 * Fill the array with submount fsids.
2139 * Since mounts are always added to the tail of the mount list, the
2140 * list is always in mount order.
2141 * For each mount check if the mounted-on vnode belongs to a
2142 * mount that's already added to our array of mounts to be unmounted.
2144 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2145 vp
= smp
->mnt_vnodecovered
;
2148 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2149 for (i
= 0; i
<= m
; i
++) {
2150 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2151 fsids
[i
].val
[1] == fsid
.val
[1]) {
2152 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2157 mount_list_unlock();
2159 // Unmount the submounts in reverse order. Ignore errors.
2160 for (i
= m
; i
> 0; i
--) {
2161 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2164 mount_iterdrop(smp
);
2165 (void) dounmount(smp
, flags
, 1, ctx
);
2170 FREE(fsids
, M_TEMP
);
2174 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2179 if (mp
->mnt_crossref
< 0)
2180 panic("mount cross refs -ve");
2182 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2185 vnode_put_locked(dp
);
2188 mount_lock_destroy(mp
);
2190 mac_mount_label_destroy(mp
);
2192 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2196 vnode_put_locked(dp
);
2202 * Sync each mounted filesystem.
2208 int print_vmpage_stat
=0;
2209 int sync_timeout
= 60; // Sync time limit (sec)
2212 sync_callback(mount_t mp
, __unused
void *arg
)
2214 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2215 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2217 mp
->mnt_flag
&= ~MNT_ASYNC
;
2218 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2220 mp
->mnt_flag
|= MNT_ASYNC
;
2223 return (VFS_RETURNED
);
2228 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2230 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2232 if (print_vmpage_stat
) {
2233 vm_countdirtypages();
2239 #endif /* DIAGNOSTIC */
2244 sync_thread(void *arg
, __unused wait_result_t wr
)
2246 int *timeout
= (int *) arg
;
2248 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2251 wakeup((caddr_t
) timeout
);
2252 if (print_vmpage_stat
) {
2253 vm_countdirtypages();
2259 #endif /* DIAGNOSTIC */
2263 * Sync in a separate thread so we can time out if it blocks.
2266 sync_async(int timeout
)
2270 struct timespec ts
= {timeout
, 0};
2272 lck_mtx_lock(sync_mtx_lck
);
2273 if (kernel_thread_start(sync_thread
, &timeout
, &thd
) != KERN_SUCCESS
) {
2274 printf("sync_thread failed\n");
2275 lck_mtx_unlock(sync_mtx_lck
);
2279 error
= msleep((caddr_t
) &timeout
, sync_mtx_lck
, (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2281 printf("sync timed out: %d sec\n", timeout
);
2283 thread_deallocate(thd
);
2289 * An in-kernel sync for power management to call.
2291 __private_extern__
int
2294 (void) sync_async(sync_timeout
);
2297 } /* end of sync_internal call */
2300 * Change filesystem quotas.
2304 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2307 int error
, quota_cmd
, quota_status
;
2310 struct nameidata nd
;
2311 vfs_context_t ctx
= vfs_context_current();
2312 struct dqblk my_dqblk
;
2314 AUDIT_ARG(uid
, uap
->uid
);
2315 AUDIT_ARG(cmd
, uap
->cmd
);
2316 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2321 mp
= nd
.ni_vp
->v_mount
;
2322 vnode_put(nd
.ni_vp
);
2325 /* copyin any data we will need for downstream code */
2326 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2328 switch (quota_cmd
) {
2330 /* uap->arg specifies a file from which to take the quotas */
2331 fnamelen
= MAXPATHLEN
;
2332 datap
= kalloc(MAXPATHLEN
);
2333 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2336 /* uap->arg is a pointer to a dqblk structure. */
2337 datap
= (caddr_t
) &my_dqblk
;
2341 /* uap->arg is a pointer to a dqblk structure. */
2342 datap
= (caddr_t
) &my_dqblk
;
2343 if (proc_is64bit(p
)) {
2344 struct user_dqblk my_dqblk64
;
2345 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2347 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2351 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2355 /* uap->arg is a pointer to an integer */
2356 datap
= (caddr_t
) "a_status
;
2364 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2367 switch (quota_cmd
) {
2370 kfree(datap
, MAXPATHLEN
);
2373 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2375 if (proc_is64bit(p
)) {
2376 struct user_dqblk my_dqblk64
= {.dqb_bhardlimit
= 0};
2377 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2378 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2381 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2386 /* uap->arg is a pointer to an integer */
2388 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2399 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2401 return (EOPNOTSUPP
);
2406 * Get filesystem statistics.
2408 * Returns: 0 Success
2410 * vfs_update_vfsstat:???
2411 * munge_statfs:EFAULT
2415 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2418 struct vfsstatfs
*sp
;
2420 struct nameidata nd
;
2421 vfs_context_t ctx
= vfs_context_current();
2424 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2425 UIO_USERSPACE
, uap
->path
, ctx
);
2431 sp
= &mp
->mnt_vfsstat
;
2434 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2440 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2446 * Get filesystem statistics.
2450 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2454 struct vfsstatfs
*sp
;
2457 AUDIT_ARG(fd
, uap
->fd
);
2459 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2462 error
= vnode_getwithref(vp
);
2468 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2475 sp
= &mp
->mnt_vfsstat
;
2476 if ((error
= vfs_update_vfsstat(mp
,vfs_context_current(),VFS_USER_EVENT
)) != 0) {
2480 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2490 * Common routine to handle copying of statfs64 data to user space
2493 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2496 struct statfs64 sfs
;
2498 bzero(&sfs
, sizeof(sfs
));
2500 sfs
.f_bsize
= sfsp
->f_bsize
;
2501 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2502 sfs
.f_blocks
= sfsp
->f_blocks
;
2503 sfs
.f_bfree
= sfsp
->f_bfree
;
2504 sfs
.f_bavail
= sfsp
->f_bavail
;
2505 sfs
.f_files
= sfsp
->f_files
;
2506 sfs
.f_ffree
= sfsp
->f_ffree
;
2507 sfs
.f_fsid
= sfsp
->f_fsid
;
2508 sfs
.f_owner
= sfsp
->f_owner
;
2509 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2510 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2511 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2512 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2513 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2515 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2517 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2518 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2520 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2526 * Get file system statistics in 64-bit mode
2529 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2532 struct vfsstatfs
*sp
;
2534 struct nameidata nd
;
2535 vfs_context_t ctxp
= vfs_context_current();
2538 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2539 UIO_USERSPACE
, uap
->path
, ctxp
);
2545 sp
= &mp
->mnt_vfsstat
;
2548 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2554 error
= statfs64_common(mp
, sp
, uap
->buf
);
2561 * Get file system statistics in 64-bit mode
2564 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2568 struct vfsstatfs
*sp
;
2571 AUDIT_ARG(fd
, uap
->fd
);
2573 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2576 error
= vnode_getwithref(vp
);
2582 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2589 sp
= &mp
->mnt_vfsstat
;
2590 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2594 error
= statfs64_common(mp
, sp
, uap
->buf
);
2603 struct getfsstat_struct
{
2614 getfsstat_callback(mount_t mp
, void * arg
)
2617 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2618 struct vfsstatfs
*sp
;
2620 vfs_context_t ctx
= vfs_context_current();
2622 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2623 sp
= &mp
->mnt_vfsstat
;
2625 * If MNT_NOWAIT is specified, do not refresh the
2626 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2628 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2629 (error
= vfs_update_vfsstat(mp
, ctx
,
2631 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2632 return(VFS_RETURNED
);
2636 * Need to handle LP64 version of struct statfs
2638 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2640 fstp
->error
= error
;
2641 return(VFS_RETURNED_DONE
);
2643 fstp
->sfsp
+= my_size
;
2647 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2649 fstp
->error
= error
;
2650 return(VFS_RETURNED_DONE
);
2657 return(VFS_RETURNED
);
2661 * Get statistics on all filesystems.
2664 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2666 struct __mac_getfsstat_args muap
;
2668 muap
.buf
= uap
->buf
;
2669 muap
.bufsize
= uap
->bufsize
;
2670 muap
.mac
= USER_ADDR_NULL
;
2672 muap
.flags
= uap
->flags
;
2674 return (__mac_getfsstat(p
, &muap
, retval
));
2678 * __mac_getfsstat: Get MAC-related file system statistics
2680 * Parameters: p (ignored)
2681 * uap User argument descriptor (see below)
2682 * retval Count of file system statistics (N stats)
2684 * Indirect: uap->bufsize Buffer size
2685 * uap->macsize MAC info size
2686 * uap->buf Buffer where information will be returned
2688 * uap->flags File system flags
2691 * Returns: 0 Success
2696 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2700 size_t count
, maxcount
, bufsize
, macsize
;
2701 struct getfsstat_struct fst
;
2703 bufsize
= (size_t) uap
->bufsize
;
2704 macsize
= (size_t) uap
->macsize
;
2706 if (IS_64BIT_PROCESS(p
)) {
2707 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2710 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2718 if (uap
->mac
!= USER_ADDR_NULL
) {
2723 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2724 if (count
!= maxcount
)
2727 /* Copy in the array */
2728 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2733 error
= copyin(uap
->mac
, mp0
, macsize
);
2735 FREE(mp0
, M_MACTEMP
);
2739 /* Normalize to an array of user_addr_t */
2740 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2742 FREE(mp0
, M_MACTEMP
);
2746 for (i
= 0; i
< count
; i
++) {
2747 if (IS_64BIT_PROCESS(p
))
2748 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2750 mp
[i
] = (user_addr_t
)mp0
[i
];
2752 FREE(mp0
, M_MACTEMP
);
2759 fst
.flags
= uap
->flags
;
2762 fst
.maxcount
= maxcount
;
2765 vfs_iterate(0, getfsstat_callback
, &fst
);
2768 FREE(mp
, M_MACTEMP
);
2771 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2775 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2776 *retval
= fst
.maxcount
;
2778 *retval
= fst
.count
;
2783 getfsstat64_callback(mount_t mp
, void * arg
)
2785 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2786 struct vfsstatfs
*sp
;
2789 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2790 sp
= &mp
->mnt_vfsstat
;
2792 * If MNT_NOWAIT is specified, do not refresh the fsstat
2793 * cache. MNT_WAIT overrides MNT_NOWAIT.
2795 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2796 * getfsstat, since the constants are out of the same
2799 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2800 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2801 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2802 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2803 return(VFS_RETURNED
);
2806 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2808 fstp
->error
= error
;
2809 return(VFS_RETURNED_DONE
);
2811 fstp
->sfsp
+= sizeof(struct statfs64
);
2814 return(VFS_RETURNED
);
2818 * Get statistics on all file systems in 64 bit mode.
2821 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2824 int count
, maxcount
;
2825 struct getfsstat_struct fst
;
2827 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2833 fst
.flags
= uap
->flags
;
2836 fst
.maxcount
= maxcount
;
2838 vfs_iterate(0, getfsstat64_callback
, &fst
);
2841 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2845 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2846 *retval
= fst
.maxcount
;
2848 *retval
= fst
.count
;
2854 * gets the associated vnode with the file descriptor passed.
2858 * ctx - vfs context of caller
2859 * fd - file descriptor for which vnode is required.
2860 * vpp - Pointer to pointer to vnode to be returned.
2862 * The vnode is returned with an iocount so any vnode obtained
2863 * by this call needs a vnode_put
2867 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
2871 struct fileproc
*fp
;
2872 proc_t p
= vfs_context_proc(ctx
);
2876 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
2880 error
= vnode_getwithref(vp
);
2882 (void)fp_drop(p
, fd
, fp
, 0);
2886 (void)fp_drop(p
, fd
, fp
, 0);
2892 * Wrapper function around namei to start lookup from a directory
2893 * specified by a file descriptor ni_dirfd.
2895 * In addition to all the errors returned by namei, this call can
2896 * return ENOTDIR if the file descriptor does not refer to a directory.
2897 * and EBADF if the file descriptor is not valid.
2900 nameiat(struct nameidata
*ndp
, int dirfd
)
2902 if ((dirfd
!= AT_FDCWD
) &&
2903 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
2904 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
2908 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
2909 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
2913 c
= *((char *)(ndp
->ni_dirp
));
2919 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
2924 if (vnode_vtype(dvp_at
) != VDIR
) {
2929 ndp
->ni_dvp
= dvp_at
;
2930 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
2932 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
2938 return (namei(ndp
));
2942 * Change current working directory to a given file descriptor.
2946 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
2948 struct filedesc
*fdp
= p
->p_fd
;
2954 vfs_context_t ctx
= vfs_context_current();
2956 AUDIT_ARG(fd
, uap
->fd
);
2957 if (per_thread
&& uap
->fd
== -1) {
2959 * Switching back from per-thread to per process CWD; verify we
2960 * in fact have one before proceeding. The only success case
2961 * for this code path is to return 0 preemptively after zapping
2962 * the thread structure contents.
2964 thread_t th
= vfs_context_thread(ctx
);
2966 uthread_t uth
= get_bsdthread_info(th
);
2968 uth
->uu_cdir
= NULLVP
;
2969 if (tvp
!= NULLVP
) {
2977 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2979 if ( (error
= vnode_getwithref(vp
)) ) {
2984 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
2986 if (vp
->v_type
!= VDIR
) {
2992 error
= mac_vnode_check_chdir(ctx
, vp
);
2996 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3000 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
3001 if (vfs_busy(mp
, LK_NOWAIT
)) {
3005 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3014 if ( (error
= vnode_ref(vp
)) )
3019 thread_t th
= vfs_context_thread(ctx
);
3021 uthread_t uth
= get_bsdthread_info(th
);
3024 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3049 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3051 return common_fchdir(p
, uap
, 0);
3055 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3057 return common_fchdir(p
, (void *)uap
, 1);
3061 * Change current working directory (".").
3063 * Returns: 0 Success
3064 * change_dir:ENOTDIR
3066 * vnode_ref:ENOENT No such file or directory
3070 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3072 struct filedesc
*fdp
= p
->p_fd
;
3074 struct nameidata nd
;
3076 vfs_context_t ctx
= vfs_context_current();
3078 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3079 UIO_USERSPACE
, uap
->path
, ctx
);
3080 error
= change_dir(&nd
, ctx
);
3083 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3084 vnode_put(nd
.ni_vp
);
3088 * drop the iocount we picked up in change_dir
3090 vnode_put(nd
.ni_vp
);
3093 thread_t th
= vfs_context_thread(ctx
);
3095 uthread_t uth
= get_bsdthread_info(th
);
3097 uth
->uu_cdir
= nd
.ni_vp
;
3098 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3100 vnode_rele(nd
.ni_vp
);
3106 fdp
->fd_cdir
= nd
.ni_vp
;
3120 * Change current working directory (".") for the entire process
3122 * Parameters: p Process requesting the call
3123 * uap User argument descriptor (see below)
3126 * Indirect parameters: uap->path Directory path
3128 * Returns: 0 Success
3129 * common_chdir: ENOTDIR
3130 * common_chdir: ENOENT No such file or directory
3135 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3137 return common_chdir(p
, (void *)uap
, 0);
3143 * Change current working directory (".") for a single thread
3145 * Parameters: p Process requesting the call
3146 * uap User argument descriptor (see below)
3149 * Indirect parameters: uap->path Directory path
3151 * Returns: 0 Success
3152 * common_chdir: ENOTDIR
3153 * common_chdir: ENOENT No such file or directory
3158 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3160 return common_chdir(p
, (void *)uap
, 1);
3165 * Change notion of root (``/'') directory.
3169 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3171 struct filedesc
*fdp
= p
->p_fd
;
3173 struct nameidata nd
;
3175 vfs_context_t ctx
= vfs_context_current();
3177 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3180 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3181 UIO_USERSPACE
, uap
->path
, ctx
);
3182 error
= change_dir(&nd
, ctx
);
3187 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3190 vnode_put(nd
.ni_vp
);
3195 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3196 vnode_put(nd
.ni_vp
);
3199 vnode_put(nd
.ni_vp
);
3203 fdp
->fd_rdir
= nd
.ni_vp
;
3204 fdp
->fd_flags
|= FD_CHROOT
;
3214 * Common routine for chroot and chdir.
3216 * Returns: 0 Success
3217 * ENOTDIR Not a directory
3218 * namei:??? [anything namei can return]
3219 * vnode_authorize:??? [anything vnode_authorize can return]
3222 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3227 if ((error
= namei(ndp
)))
3232 if (vp
->v_type
!= VDIR
) {
3238 error
= mac_vnode_check_chdir(ctx
, vp
);
3245 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3255 * Free the vnode data (for directories) associated with the file glob.
3258 fg_vn_data_alloc(void)
3260 struct fd_vn_data
*fvdata
;
3262 /* Allocate per fd vnode data */
3263 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3264 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3265 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3270 * Free the vnode data (for directories) associated with the file glob.
3273 fg_vn_data_free(void *fgvndata
)
3275 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3278 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3279 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3280 FREE(fvdata
, M_FD_VN_DATA
);
3284 * Check permissions, allocate an open file structure,
3285 * and call the device open routine if any.
3287 * Returns: 0 Success
3298 * XXX Need to implement uid, gid
3301 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3302 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3305 proc_t p
= vfs_context_proc(ctx
);
3306 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3307 struct fileproc
*fp
;
3310 int type
, indx
, error
;
3312 struct vfs_context context
;
3316 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3319 flags
= FFLAGS(uflags
);
3320 CLR(flags
, FENCRYPTED
);
3321 CLR(flags
, FUNENCRYPTED
);
3323 AUDIT_ARG(fflags
, oflags
);
3324 AUDIT_ARG(mode
, vap
->va_mode
);
3326 if ((error
= falloc_withalloc(p
,
3327 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3330 uu
->uu_dupfd
= -indx
- 1;
3332 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3333 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3334 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3335 fp_drop(p
, indx
, NULL
, 0);
3340 if (error
== ERESTART
)
3342 fp_free(p
, indx
, fp
);
3348 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3349 fp
->f_fglob
->fg_ops
= &vnops
;
3350 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3352 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3353 lf
.l_whence
= SEEK_SET
;
3356 if (flags
& O_EXLOCK
)
3357 lf
.l_type
= F_WRLCK
;
3359 lf
.l_type
= F_RDLCK
;
3361 if ((flags
& FNONBLOCK
) == 0)
3364 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3369 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3371 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3374 /* try to truncate by setting the size attribute */
3375 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3379 * For directories we hold some additional information in the fd.
3381 if (vnode_vtype(vp
) == VDIR
) {
3382 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3384 fp
->f_fglob
->fg_vn_data
= NULL
;
3390 * The first terminal open (without a O_NOCTTY) by a session leader
3391 * results in it being set as the controlling terminal.
3393 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3394 !(flags
& O_NOCTTY
)) {
3397 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3398 (caddr_t
)&tmp
, ctx
);
3402 if (flags
& O_CLOEXEC
)
3403 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3404 if (flags
& O_CLOFORK
)
3405 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3406 procfdtbl_releasefd(p
, indx
, NULL
);
3407 fp_drop(p
, indx
, fp
, 1);
3414 context
= *vfs_context_current();
3415 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3417 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3418 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3419 lf
.l_whence
= SEEK_SET
;
3422 lf
.l_type
= F_UNLCK
;
3425 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3428 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3430 fp_free(p
, indx
, fp
);
3436 * While most of the *at syscall handlers can call nameiat() which
3437 * is a wrapper around namei, the use of namei and initialisation
3438 * of nameidata are far removed and in different functions - namei
3439 * gets called in vn_open_auth for open1. So we'll just do here what
3443 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3444 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3447 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3451 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3452 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3456 c
= *((char *)(ndp
->ni_dirp
));
3462 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3467 if (vnode_vtype(dvp_at
) != VDIR
) {
3472 ndp
->ni_dvp
= dvp_at
;
3473 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3474 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3481 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3485 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3487 * Parameters: p Process requesting the open
3488 * uap User argument descriptor (see below)
3489 * retval Pointer to an area to receive the
3490 * return calue from the system call
3492 * Indirect: uap->path Path to open (same as 'open')
3493 * uap->flags Flags to open (same as 'open'
3494 * uap->uid UID to set, if creating
3495 * uap->gid GID to set, if creating
3496 * uap->mode File mode, if creating (same as 'open')
3497 * uap->xsecurity ACL to set, if creating
3499 * Returns: 0 Success
3502 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3504 * XXX: We should enummerate the possible errno values here, and where
3505 * in the code they originated.
3508 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3510 struct filedesc
*fdp
= p
->p_fd
;
3512 kauth_filesec_t xsecdst
;
3513 struct vnode_attr va
;
3514 struct nameidata nd
;
3517 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3520 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3521 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3525 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3526 VATTR_SET(&va
, va_mode
, cmode
);
3527 if (uap
->uid
!= KAUTH_UID_NONE
)
3528 VATTR_SET(&va
, va_uid
, uap
->uid
);
3529 if (uap
->gid
!= KAUTH_GID_NONE
)
3530 VATTR_SET(&va
, va_gid
, uap
->gid
);
3531 if (xsecdst
!= NULL
)
3532 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3534 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3535 uap
->path
, vfs_context_current());
3537 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3538 fileproc_alloc_init
, NULL
, retval
);
3539 if (xsecdst
!= NULL
)
3540 kauth_filesec_free(xsecdst
);
3546 * Go through the data-protected atomically controlled open (2)
3548 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3550 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3551 int flags
= uap
->flags
;
3552 int class = uap
->class;
3553 int dpflags
= uap
->dpflags
;
3556 * Follow the same path as normal open(2)
3557 * Look up the item if it exists, and acquire the vnode.
3559 struct filedesc
*fdp
= p
->p_fd
;
3560 struct vnode_attr va
;
3561 struct nameidata nd
;
3566 /* Mask off all but regular access permissions */
3567 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3568 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3570 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3571 uap
->path
, vfs_context_current());
3574 * Initialize the extra fields in vnode_attr to pass down our
3576 * 1. target cprotect class.
3577 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3579 if (flags
& O_CREAT
) {
3580 /* lower level kernel code validates that the class is valid before applying it. */
3581 if (class != PROTECTION_CLASS_DEFAULT
) {
3583 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3584 * file behave the same as open (2)
3586 VATTR_SET(&va
, va_dataprotect_class
, class);
3590 if (dpflags
& (O_DP_GETRAWENCRYPTED
|O_DP_GETRAWUNENCRYPTED
)) {
3591 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3592 /* Not allowed to write raw encrypted bytes */
3595 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
3596 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3598 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
3599 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
3603 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3604 fileproc_alloc_init
, NULL
, retval
);
3610 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3611 int fd
, enum uio_seg segflg
, int *retval
)
3613 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3614 struct vnode_attr va
;
3615 struct nameidata nd
;
3619 /* Mask off all but regular access permissions */
3620 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3621 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3623 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3626 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3631 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3633 __pthread_testcancel(1);
3634 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3638 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3641 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3642 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3646 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3649 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3650 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3654 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3656 __pthread_testcancel(1);
3657 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3661 * openbyid_np: open a file given a file system id and a file system object id
3662 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3663 * file systems that don't support object ids it is a node id (uint64_t).
3665 * Parameters: p Process requesting the open
3666 * uap User argument descriptor (see below)
3667 * retval Pointer to an area to receive the
3668 * return calue from the system call
3670 * Indirect: uap->path Path to open (same as 'open')
3672 * uap->fsid id of target file system
3673 * uap->objid id of target file system object
3674 * uap->flags Flags to open (same as 'open')
3676 * Returns: 0 Success
3680 * XXX: We should enummerate the possible errno values here, and where
3681 * in the code they originated.
3684 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
3690 int buflen
= MAXPATHLEN
;
3692 vfs_context_t ctx
= vfs_context_current();
3694 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
3698 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3699 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
3703 AUDIT_ARG(value32
, fsid
.val
[0]);
3704 AUDIT_ARG(value64
, objid
);
3706 /*resolve path from fsis, objid*/
3708 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
3713 error
= fsgetpath_internal(
3714 ctx
, fsid
.val
[0], objid
,
3715 buflen
, buf
, &pathlen
);
3721 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
3729 error
= openat_internal(
3730 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
3739 * Create a special file.
3741 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
3744 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
3746 struct vnode_attr va
;
3747 vfs_context_t ctx
= vfs_context_current();
3749 struct nameidata nd
;
3753 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3754 VATTR_SET(&va
, va_rdev
, uap
->dev
);
3756 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3757 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
3758 return(mkfifo1(ctx
, uap
->path
, &va
));
3760 AUDIT_ARG(mode
, uap
->mode
);
3761 AUDIT_ARG(value32
, uap
->dev
);
3763 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
3765 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
3766 UIO_USERSPACE
, uap
->path
, ctx
);
3778 switch (uap
->mode
& S_IFMT
) {
3780 VATTR_SET(&va
, va_type
, VCHR
);
3783 VATTR_SET(&va
, va_type
, VBLK
);
3791 error
= mac_vnode_check_create(ctx
,
3792 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
3797 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3800 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
3804 int update_flags
= 0;
3806 // Make sure the name & parent pointers are hooked up
3807 if (vp
->v_name
== NULL
)
3808 update_flags
|= VNODE_UPDATE_NAME
;
3809 if (vp
->v_parent
== NULLVP
)
3810 update_flags
|= VNODE_UPDATE_PARENT
;
3813 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3816 add_fsevent(FSE_CREATE_FILE
, ctx
,
3824 * nameidone has to happen before we vnode_put(dvp)
3825 * since it may need to release the fs_nodelock on the dvp
3837 * Create a named pipe.
3839 * Returns: 0 Success
3842 * vnode_authorize:???
3846 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
3850 struct nameidata nd
;
3852 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
3853 UIO_USERSPACE
, upath
, ctx
);
3860 /* check that this is a new file and authorize addition */
3865 VATTR_SET(vap
, va_type
, VFIFO
);
3867 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
3870 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
3873 * nameidone has to happen before we vnode_put(dvp)
3874 * since it may need to release the fs_nodelock on the dvp
3887 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3889 * Parameters: p Process requesting the open
3890 * uap User argument descriptor (see below)
3893 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3894 * uap->uid UID to set
3895 * uap->gid GID to set
3896 * uap->mode File mode to set (same as 'mkfifo')
3897 * uap->xsecurity ACL to set, if creating
3899 * Returns: 0 Success
3902 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3904 * XXX: We should enummerate the possible errno values here, and where
3905 * in the code they originated.
3908 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
3911 kauth_filesec_t xsecdst
;
3912 struct vnode_attr va
;
3914 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3916 xsecdst
= KAUTH_FILESEC_NONE
;
3917 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
3918 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
3923 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3924 if (uap
->uid
!= KAUTH_UID_NONE
)
3925 VATTR_SET(&va
, va_uid
, uap
->uid
);
3926 if (uap
->gid
!= KAUTH_GID_NONE
)
3927 VATTR_SET(&va
, va_gid
, uap
->gid
);
3928 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3929 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3931 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
3933 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3934 kauth_filesec_free(xsecdst
);
3940 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
3942 struct vnode_attr va
;
3945 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3947 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
3952 my_strrchr(char *p
, int ch
)
3956 for (save
= NULL
;; ++p
) {
3965 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
3968 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
3970 int ret
, len
= _len
;
3972 *truncated_path
= 0;
3973 ret
= vn_getpath(dvp
, path
, &len
);
3974 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
3977 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
3978 if (len
> MAXPATHLEN
) {
3981 // the string got truncated!
3982 *truncated_path
= 1;
3983 ptr
= my_strrchr(path
, '/');
3985 *ptr
= '\0'; // chop off the string at the last directory component
3987 len
= strlen(path
) + 1;
3990 } else if (ret
== 0) {
3991 *truncated_path
= 1;
3992 } else if (ret
!= 0) {
3993 struct vnode
*mydvp
=dvp
;
3995 if (ret
!= ENOSPC
) {
3996 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3997 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
3999 *truncated_path
= 1;
4002 if (mydvp
->v_parent
!= NULL
) {
4003 mydvp
= mydvp
->v_parent
;
4004 } else if (mydvp
->v_mount
) {
4005 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4008 // no parent and no mount point? only thing is to punt and say "/" changed
4009 strlcpy(path
, "/", _len
);
4014 if (mydvp
== NULL
) {
4019 ret
= vn_getpath(mydvp
, path
, &len
);
4020 } while (ret
== ENOSPC
);
4028 * Make a hard file link.
4030 * Returns: 0 Success
4035 * vnode_authorize:???
4040 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4041 user_addr_t link
, int flag
, enum uio_seg segflg
)
4043 vnode_t vp
, dvp
, lvp
;
4044 struct nameidata nd
;
4050 int need_event
, has_listeners
;
4051 char *target_path
= NULL
;
4054 vp
= dvp
= lvp
= NULLVP
;
4056 /* look up the object we are linking to */
4057 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4058 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4061 error
= nameiat(&nd
, fd1
);
4069 * Normally, linking to directories is not supported.
4070 * However, some file systems may have limited support.
4072 if (vp
->v_type
== VDIR
) {
4073 if (!(vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSDIRLINKS
)) {
4074 error
= EPERM
; /* POSIX */
4077 /* Linking to a directory requires ownership. */
4078 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4079 struct vnode_attr dva
;
4082 VATTR_WANTED(&dva
, va_uid
);
4083 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4084 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4085 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4092 /* lookup the target node */
4096 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4097 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4099 error
= nameiat(&nd
, fd2
);
4106 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4110 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4111 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4114 /* target node must not exist */
4115 if (lvp
!= NULLVP
) {
4119 /* cannot link across mountpoints */
4120 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4125 /* authorize creation of the target note */
4126 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4129 /* and finally make the link */
4130 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4135 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4139 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4143 has_listeners
= kauth_authorize_fileop_has_listeners();
4145 if (need_event
|| has_listeners
) {
4146 char *link_to_path
= NULL
;
4147 int len
, link_name_len
;
4149 /* build the path to the new link file */
4150 GET_PATH(target_path
);
4151 if (target_path
== NULL
) {
4156 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4158 if (has_listeners
) {
4159 /* build the path to file we are linking to */
4160 GET_PATH(link_to_path
);
4161 if (link_to_path
== NULL
) {
4166 link_name_len
= MAXPATHLEN
;
4167 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4169 * Call out to allow 3rd party notification of rename.
4170 * Ignore result of kauth_authorize_fileop call.
4172 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4173 (uintptr_t)link_to_path
,
4174 (uintptr_t)target_path
);
4176 if (link_to_path
!= NULL
) {
4177 RELEASE_PATH(link_to_path
);
4182 /* construct fsevent */
4183 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4185 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4188 // build the path to the destination of the link
4189 add_fsevent(FSE_CREATE_FILE
, ctx
,
4190 FSE_ARG_STRING
, len
, target_path
,
4191 FSE_ARG_FINFO
, &finfo
,
4195 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4196 FSE_ARG_VNODE
, vp
->v_parent
,
4204 * nameidone has to happen before we vnode_put(dvp)
4205 * since it may need to release the fs_nodelock on the dvp
4208 if (target_path
!= NULL
) {
4209 RELEASE_PATH(target_path
);
4221 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4223 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4224 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4228 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4230 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4233 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4234 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4238 * Make a symbolic link.
4240 * We could add support for ACLs here too...
4244 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4245 user_addr_t link
, enum uio_seg segflg
)
4247 struct vnode_attr va
;
4250 struct nameidata nd
;
4252 uint32_t dfflags
; // Directory file flags
4257 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4258 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4259 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4261 path
= (char *)path_data
;
4265 AUDIT_ARG(text
, path
); /* This is the link string */
4267 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4270 error
= nameiat(&nd
, fd
);
4276 p
= vfs_context_proc(ctx
);
4278 VATTR_SET(&va
, va_type
, VLNK
);
4279 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4282 * Handle inheritance of restricted flag
4284 error
= vnode_flags(dvp
, &dfflags
, ctx
);
4287 if (dfflags
& SF_RESTRICTED
)
4288 VATTR_SET(&va
, va_flags
, SF_RESTRICTED
);
4291 error
= mac_vnode_check_create(ctx
,
4292 dvp
, &nd
.ni_cnd
, &va
);
4305 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4306 /* get default ownership, etc. */
4308 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4310 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4313 if (error
== 0 && vp
)
4314 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4317 /* do fallback attribute handling */
4318 if (error
== 0 && vp
)
4319 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4322 int update_flags
= 0;
4324 /*check if a new vnode was created, else try to get one*/
4326 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4328 nd
.ni_op
= OP_LOOKUP
;
4330 nd
.ni_cnd
.cn_flags
= 0;
4331 error
= nameiat(&nd
, fd
);
4338 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4339 /* call out to allow 3rd party notification of rename.
4340 * Ignore result of kauth_authorize_fileop call.
4342 if (kauth_authorize_fileop_has_listeners() &&
4344 char *new_link_path
= NULL
;
4347 /* build the path to the new link file */
4348 new_link_path
= get_pathbuff();
4350 vn_getpath(dvp
, new_link_path
, &len
);
4351 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4352 new_link_path
[len
- 1] = '/';
4353 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4356 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4357 (uintptr_t)path
, (uintptr_t)new_link_path
);
4358 if (new_link_path
!= NULL
)
4359 release_pathbuff(new_link_path
);
4362 // Make sure the name & parent pointers are hooked up
4363 if (vp
->v_name
== NULL
)
4364 update_flags
|= VNODE_UPDATE_NAME
;
4365 if (vp
->v_parent
== NULLVP
)
4366 update_flags
|= VNODE_UPDATE_PARENT
;
4369 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4372 add_fsevent(FSE_CREATE_FILE
, ctx
,
4380 * nameidone has to happen before we vnode_put(dvp)
4381 * since it may need to release the fs_nodelock on the dvp
4389 if (path
&& (path
!= (char *)path_data
))
4390 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4396 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4398 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4399 uap
->link
, UIO_USERSPACE
));
4403 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4404 __unused
int32_t *retval
)
4406 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4407 uap
->path2
, UIO_USERSPACE
));
4411 * Delete a whiteout from the filesystem.
4412 * No longer supported.
4415 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4421 * Delete a name from the filesystem.
4425 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4426 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4428 struct nameidata nd
;
4431 struct componentname
*cnp
;
4436 struct vnode_attr va
;
4443 struct vnode_attr
*vap
;
4445 int retry_count
= 0;
4448 cn_flags
= LOCKPARENT
;
4449 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
))
4450 cn_flags
|= AUDITVNPATH1
;
4451 /* If a starting dvp is passed, it trumps any fd passed. */
4456 /* unlink or delete is allowed on rsrc forks and named streams */
4457 cn_flags
|= CN_ALLOWRSRCFORK
;
4468 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4470 nd
.ni_dvp
= start_dvp
;
4471 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4475 error
= nameiat(&nd
, fd
);
4483 /* With Carbon delete semantics, busy files cannot be deleted */
4484 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4485 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4488 /* Skip any potential upcalls if told to. */
4489 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4490 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4494 batched
= vnode_compound_remove_available(vp
);
4496 * The root of a mounted filesystem cannot be deleted.
4498 if (vp
->v_flag
& VROOT
) {
4503 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4505 if (error
== ENOENT
) {
4506 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4507 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4518 if (!vnode_compound_remove_available(dvp
)) {
4519 panic("No vp, but no compound remove?");
4524 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4527 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4528 /* XXX need to get these data in batched VNOP */
4529 get_fse_info(vp
, &finfo
, ctx
);
4532 error
= vfs_get_notify_attributes(&va
);
4541 has_listeners
= kauth_authorize_fileop_has_listeners();
4542 if (need_event
|| has_listeners
) {
4550 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4554 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4555 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4559 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
4561 if (error
== EKEEPLOOKING
) {
4563 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4566 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4567 panic("EKEEPLOOKING, but continue flag not set?");
4570 if (vnode_isdir(vp
)) {
4574 goto lookup_continue
;
4575 } else if (error
== ENOENT
&& batched
) {
4576 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4577 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4579 * For compound VNOPs, the authorization callback may
4580 * return ENOENT in case of racing hardlink lookups
4581 * hitting the name cache, redrive the lookup.
4591 * Call out to allow 3rd party notification of delete.
4592 * Ignore result of kauth_authorize_fileop call.
4595 if (has_listeners
) {
4596 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4597 KAUTH_FILEOP_DELETE
,
4602 if (vp
->v_flag
& VISHARDLINK
) {
4604 // if a hardlink gets deleted we want to blow away the
4605 // v_parent link because the path that got us to this
4606 // instance of the link is no longer valid. this will
4607 // force the next call to get the path to ask the file
4608 // system instead of just following the v_parent link.
4610 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4615 if (vp
->v_flag
& VISHARDLINK
) {
4616 get_fse_info(vp
, &finfo
, ctx
);
4618 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4620 if (truncated_path
) {
4621 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4623 add_fsevent(FSE_DELETE
, ctx
,
4624 FSE_ARG_STRING
, len
, path
,
4625 FSE_ARG_FINFO
, &finfo
,
4636 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4637 * will cause its shadow file to go away if necessary.
4639 if (vp
&& (vnode_isnamedstream(vp
)) &&
4640 (vp
->v_parent
!= NULLVP
) &&
4641 vnode_isshadow(vp
)) {
4646 * nameidone has to happen before we vnode_put(dvp)
4647 * since it may need to release the fs_nodelock on the dvp
4663 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
4664 enum uio_seg segflg
, int unlink_flags
)
4666 return (unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
4671 * Delete a name from the filesystem using Carbon semantics.
4674 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4676 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4677 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
));
4681 * Delete a name from the filesystem using POSIX semantics.
4684 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4686 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4687 uap
->path
, UIO_USERSPACE
, 0));
4691 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
4693 if (uap
->flag
& ~AT_REMOVEDIR
)
4696 if (uap
->flag
& AT_REMOVEDIR
)
4697 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
4698 uap
->path
, UIO_USERSPACE
));
4700 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
4701 NULLVP
, uap
->path
, UIO_USERSPACE
, 0));
4705 * Reposition read/write file offset.
4708 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4710 struct fileproc
*fp
;
4712 struct vfs_context
*ctx
;
4713 off_t offset
= uap
->offset
, file_size
;
4716 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4717 if (error
== ENOTSUP
)
4721 if (vnode_isfifo(vp
)) {
4727 ctx
= vfs_context_current();
4729 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4730 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4733 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
4740 if ( (error
= vnode_getwithref(vp
)) ) {
4745 switch (uap
->whence
) {
4747 offset
+= fp
->f_fglob
->fg_offset
;
4750 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
4752 offset
+= file_size
;
4760 if (uap
->offset
> 0 && offset
< 0) {
4761 /* Incremented/relative move past max size */
4765 * Allow negative offsets on character devices, per
4766 * POSIX 1003.1-2001. Most likely for writing disk
4769 if (offset
< 0 && vp
->v_type
!= VCHR
) {
4770 /* Decremented/relative move before start */
4774 fp
->f_fglob
->fg_offset
= offset
;
4775 *retval
= fp
->f_fglob
->fg_offset
;
4781 * An lseek can affect whether data is "available to read." Use
4782 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4784 post_event_if_success(vp
, error
, NOTE_NONE
);
4785 (void)vnode_put(vp
);
4792 * Check access permissions.
4794 * Returns: 0 Success
4795 * vnode_authorize:???
4798 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
4800 kauth_action_t action
;
4804 * If just the regular access bits, convert them to something
4805 * that vnode_authorize will understand.
4807 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
4810 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4811 if (uflags
& W_OK
) {
4812 if (vnode_isdir(vp
)) {
4813 action
|= KAUTH_VNODE_ADD_FILE
|
4814 KAUTH_VNODE_ADD_SUBDIRECTORY
;
4815 /* might want delete rights here too */
4817 action
|= KAUTH_VNODE_WRITE_DATA
;
4820 if (uflags
& X_OK
) {
4821 if (vnode_isdir(vp
)) {
4822 action
|= KAUTH_VNODE_SEARCH
;
4824 action
|= KAUTH_VNODE_EXECUTE
;
4828 /* take advantage of definition of uflags */
4829 action
= uflags
>> 8;
4833 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
4838 /* action == 0 means only check for existence */
4840 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
4851 * access_extended: Check access permissions in bulk.
4853 * Description: uap->entries Pointer to an array of accessx
4854 * descriptor structs, plus one or
4855 * more NULL terminated strings (see
4856 * "Notes" section below).
4857 * uap->size Size of the area pointed to by
4859 * uap->results Pointer to the results array.
4861 * Returns: 0 Success
4862 * ENOMEM Insufficient memory
4863 * EINVAL Invalid arguments
4864 * namei:EFAULT Bad address
4865 * namei:ENAMETOOLONG Filename too long
4866 * namei:ENOENT No such file or directory
4867 * namei:ELOOP Too many levels of symbolic links
4868 * namei:EBADF Bad file descriptor
4869 * namei:ENOTDIR Not a directory
4874 * uap->results Array contents modified
4876 * Notes: The uap->entries are structured as an arbitrary length array
4877 * of accessx descriptors, followed by one or more NULL terminated
4880 * struct accessx_descriptor[0]
4882 * struct accessx_descriptor[n]
4883 * char name_data[0];
4885 * We determine the entry count by walking the buffer containing
4886 * the uap->entries argument descriptor. For each descriptor we
4887 * see, the valid values for the offset ad_name_offset will be
4888 * in the byte range:
4890 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4892 * [ uap->entries + uap->size - 2 ]
4894 * since we must have at least one string, and the string must
4895 * be at least one character plus the NULL terminator in length.
4897 * XXX: Need to support the check-as uid argument
4900 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
4902 struct accessx_descriptor
*input
= NULL
;
4903 errno_t
*result
= NULL
;
4906 unsigned int desc_max
, desc_actual
, i
, j
;
4907 struct vfs_context context
;
4908 struct nameidata nd
;
4912 #define ACCESSX_MAX_DESCR_ON_STACK 10
4913 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
4915 context
.vc_ucred
= NULL
;
4918 * Validate parameters; if valid, copy the descriptor array and string
4919 * arguments into local memory. Before proceeding, the following
4920 * conditions must have been met:
4922 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4923 * o There must be sufficient room in the request for at least one
4924 * descriptor and a one yte NUL terminated string.
4925 * o The allocation of local storage must not fail.
4927 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
4929 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
4931 if (uap
->size
<= sizeof (stack_input
)) {
4932 input
= stack_input
;
4934 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
4935 if (input
== NULL
) {
4940 error
= copyin(uap
->entries
, input
, uap
->size
);
4944 AUDIT_ARG(opaque
, input
, uap
->size
);
4947 * Force NUL termination of the copyin buffer to avoid nami() running
4948 * off the end. If the caller passes us bogus data, they may get a
4951 ((char *)input
)[uap
->size
- 1] = 0;
4954 * Access is defined as checking against the process' real identity,
4955 * even if operations are checking the effective identity. This
4956 * requires that we use a local vfs context.
4958 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
4959 context
.vc_thread
= current_thread();
4962 * Find out how many entries we have, so we can allocate the result
4963 * array by walking the list and adjusting the count downward by the
4964 * earliest string offset we see.
4966 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
4967 desc_actual
= desc_max
;
4968 for (i
= 0; i
< desc_actual
; i
++) {
4970 * Take the offset to the name string for this entry and
4971 * convert to an input array index, which would be one off
4972 * the end of the array if this entry was the lowest-addressed
4975 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
4978 * An offset greater than the max allowable offset is an error.
4979 * It is also an error for any valid entry to point
4980 * to a location prior to the end of the current entry, if
4981 * it's not a reference to the string of the previous entry.
4983 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
4989 * An offset of 0 means use the previous descriptor's offset;
4990 * this is used to chain multiple requests for the same file
4991 * to avoid multiple lookups.
4994 /* This is not valid for the first entry */
5003 * If the offset of the string for this descriptor is before
5004 * what we believe is the current actual last descriptor,
5005 * then we need to adjust our estimate downward; this permits
5006 * the string table following the last descriptor to be out
5007 * of order relative to the descriptor list.
5009 if (j
< desc_actual
)
5014 * We limit the actual number of descriptors we are willing to process
5015 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5016 * requested does not exceed this limit,
5018 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5022 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
5023 if (result
== NULL
) {
5029 * Do the work by iterating over the descriptor entries we know to
5030 * at least appear to contain valid data.
5033 for (i
= 0; i
< desc_actual
; i
++) {
5035 * If the ad_name_offset is 0, then we use the previous
5036 * results to make the check; otherwise, we are looking up
5039 if (input
[i
].ad_name_offset
!= 0) {
5040 /* discard old vnodes */
5051 * Scan forward in the descriptor list to see if we
5052 * need the parent vnode. We will need it if we are
5053 * deleting, since we must have rights to remove
5054 * entries in the parent directory, as well as the
5055 * rights to delete the object itself.
5057 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5058 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5059 if (input
[j
].ad_flags
& _DELETE_OK
)
5062 niopts
= FOLLOW
| AUDITVNPATH1
;
5064 /* need parent for vnode_authorize for deletion test */
5066 niopts
|= WANTPARENT
;
5069 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5070 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5082 * Handle lookup errors.
5092 /* run this access check */
5093 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5096 /* fatal lookup error */
5102 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5104 /* copy out results */
5105 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5108 if (input
&& input
!= stack_input
)
5109 FREE(input
, M_TEMP
);
5111 FREE(result
, M_TEMP
);
5116 if (IS_VALID_CRED(context
.vc_ucred
))
5117 kauth_cred_unref(&context
.vc_ucred
);
5123 * Returns: 0 Success
5124 * namei:EFAULT Bad address
5125 * namei:ENAMETOOLONG Filename too long
5126 * namei:ENOENT No such file or directory
5127 * namei:ELOOP Too many levels of symbolic links
5128 * namei:EBADF Bad file descriptor
5129 * namei:ENOTDIR Not a directory
5134 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5135 int flag
, enum uio_seg segflg
)
5138 struct nameidata nd
;
5140 struct vfs_context context
;
5142 int is_namedstream
= 0;
5146 * Unless the AT_EACCESS option is used, Access is defined as checking
5147 * against the process' real identity, even if operations are checking
5148 * the effective identity. So we need to tweak the credential
5149 * in the context for that case.
5151 if (!(flag
& AT_EACCESS
))
5152 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5154 context
.vc_ucred
= ctx
->vc_ucred
;
5155 context
.vc_thread
= ctx
->vc_thread
;
5158 niopts
= FOLLOW
| AUDITVNPATH1
;
5159 /* need parent for vnode_authorize for deletion test */
5160 if (amode
& _DELETE_OK
)
5161 niopts
|= WANTPARENT
;
5162 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5166 /* access(F_OK) calls are allowed for resource forks. */
5168 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5170 error
= nameiat(&nd
, fd
);
5175 /* Grab reference on the shadow stream file vnode to
5176 * force an inactive on release which will mark it
5179 if (vnode_isnamedstream(nd
.ni_vp
) &&
5180 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5181 vnode_isshadow(nd
.ni_vp
)) {
5183 vnode_ref(nd
.ni_vp
);
5187 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5190 if (is_namedstream
) {
5191 vnode_rele(nd
.ni_vp
);
5195 vnode_put(nd
.ni_vp
);
5196 if (amode
& _DELETE_OK
)
5197 vnode_put(nd
.ni_dvp
);
5201 if (!(flag
& AT_EACCESS
))
5202 kauth_cred_unref(&context
.vc_ucred
);
5207 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5209 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5210 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5214 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5215 __unused
int32_t *retval
)
5217 if (uap
->flag
& ~AT_EACCESS
)
5220 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5221 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5225 * Returns: 0 Success
5232 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5233 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5234 enum uio_seg segflg
, int fd
, int flag
)
5236 struct nameidata nd
;
5243 struct user64_stat user64_sb
;
5244 struct user32_stat user32_sb
;
5245 struct user64_stat64 user64_sb64
;
5246 struct user32_stat64 user32_sb64
;
5250 kauth_filesec_t fsec
;
5251 size_t xsecurity_bufsize
;
5254 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5255 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5259 int is_namedstream
= 0;
5260 /* stat calls are allowed for resource forks. */
5261 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5263 error
= nameiat(&nd
, fd
);
5266 fsec
= KAUTH_FILESEC_NONE
;
5268 statptr
= (void *)&source
;
5271 /* Grab reference on the shadow stream file vnode to
5272 * force an inactive on release which will mark it
5275 if (vnode_isnamedstream(nd
.ni_vp
) &&
5276 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5277 vnode_isshadow(nd
.ni_vp
)) {
5279 vnode_ref(nd
.ni_vp
);
5283 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5286 if (is_namedstream
) {
5287 vnode_rele(nd
.ni_vp
);
5290 vnode_put(nd
.ni_vp
);
5295 /* Zap spare fields */
5296 if (isstat64
!= 0) {
5297 source
.sb64
.st_lspare
= 0;
5298 source
.sb64
.st_qspare
[0] = 0LL;
5299 source
.sb64
.st_qspare
[1] = 0LL;
5300 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5301 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5302 my_size
= sizeof(dest
.user64_sb64
);
5303 sbp
= (caddr_t
)&dest
.user64_sb64
;
5305 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5306 my_size
= sizeof(dest
.user32_sb64
);
5307 sbp
= (caddr_t
)&dest
.user32_sb64
;
5310 * Check if we raced (post lookup) against the last unlink of a file.
5312 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5313 source
.sb64
.st_nlink
= 1;
5316 source
.sb
.st_lspare
= 0;
5317 source
.sb
.st_qspare
[0] = 0LL;
5318 source
.sb
.st_qspare
[1] = 0LL;
5319 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5320 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5321 my_size
= sizeof(dest
.user64_sb
);
5322 sbp
= (caddr_t
)&dest
.user64_sb
;
5324 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5325 my_size
= sizeof(dest
.user32_sb
);
5326 sbp
= (caddr_t
)&dest
.user32_sb
;
5330 * Check if we raced (post lookup) against the last unlink of a file.
5332 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5333 source
.sb
.st_nlink
= 1;
5336 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5339 /* caller wants extended security information? */
5340 if (xsecurity
!= USER_ADDR_NULL
) {
5342 /* did we get any? */
5343 if (fsec
== KAUTH_FILESEC_NONE
) {
5344 if (susize(xsecurity_size
, 0) != 0) {
5349 /* find the user buffer size */
5350 xsecurity_bufsize
= fusize(xsecurity_size
);
5352 /* copy out the actual data size */
5353 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5358 /* if the caller supplied enough room, copy out to it */
5359 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5360 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5364 if (fsec
!= KAUTH_FILESEC_NONE
)
5365 kauth_filesec_free(fsec
);
5370 * stat_extended: Get file status; with extended security (ACL).
5372 * Parameters: p (ignored)
5373 * uap User argument descriptor (see below)
5376 * Indirect: uap->path Path of file to get status from
5377 * uap->ub User buffer (holds file status info)
5378 * uap->xsecurity ACL to get (extended security)
5379 * uap->xsecurity_size Size of ACL
5381 * Returns: 0 Success
5386 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5387 __unused
int32_t *retval
)
5389 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5390 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5395 * Returns: 0 Success
5396 * fstatat_internal:??? [see fstatat_internal() in this file]
5399 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5401 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5402 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5406 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5408 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5409 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5413 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5415 * Parameters: p (ignored)
5416 * uap User argument descriptor (see below)
5419 * Indirect: uap->path Path of file to get status from
5420 * uap->ub User buffer (holds file status info)
5421 * uap->xsecurity ACL to get (extended security)
5422 * uap->xsecurity_size Size of ACL
5424 * Returns: 0 Success
5429 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5431 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5432 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5437 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5439 * Parameters: p (ignored)
5440 * uap User argument descriptor (see below)
5443 * Indirect: uap->path Path of file to get status from
5444 * uap->ub User buffer (holds file status info)
5445 * uap->xsecurity ACL to get (extended security)
5446 * uap->xsecurity_size Size of ACL
5448 * Returns: 0 Success
5453 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5455 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5456 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5457 AT_SYMLINK_NOFOLLOW
));
5461 * Get file status; this version does not follow links.
5464 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5466 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5467 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5471 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5473 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5474 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5478 * lstat64_extended: Get file status; can handle large inode numbers; does not
5479 * follow links; with extended security (ACL).
5481 * Parameters: p (ignored)
5482 * uap User argument descriptor (see below)
5485 * Indirect: uap->path Path of file to get status from
5486 * uap->ub User buffer (holds file status info)
5487 * uap->xsecurity ACL to get (extended security)
5488 * uap->xsecurity_size Size of ACL
5490 * Returns: 0 Success
5495 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5497 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5498 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5499 AT_SYMLINK_NOFOLLOW
));
5503 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5505 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5508 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5509 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5513 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5514 __unused
int32_t *retval
)
5516 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5519 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5520 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5524 * Get configurable pathname variables.
5526 * Returns: 0 Success
5530 * Notes: Global implementation constants are intended to be
5531 * implemented in this function directly; all other constants
5532 * are per-FS implementation, and therefore must be handled in
5533 * each respective FS, instead.
5535 * XXX We implement some things globally right now that should actually be
5536 * XXX per-FS; we will need to deal with this at some point.
5540 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5543 struct nameidata nd
;
5544 vfs_context_t ctx
= vfs_context_current();
5546 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5547 UIO_USERSPACE
, uap
->path
, ctx
);
5552 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5554 vnode_put(nd
.ni_vp
);
5560 * Return target name of a symbolic link.
5564 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5565 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5571 struct nameidata nd
;
5572 char uio_buf
[ UIO_SIZEOF(1) ];
5574 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5577 error
= nameiat(&nd
, fd
);
5584 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5585 &uio_buf
[0], sizeof(uio_buf
));
5586 uio_addiov(auio
, buf
, bufsize
);
5587 if (vp
->v_type
!= VLNK
) {
5591 error
= mac_vnode_check_readlink(ctx
, vp
);
5594 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5597 error
= VNOP_READLINK(vp
, auio
, ctx
);
5601 *retval
= bufsize
- (int)uio_resid(auio
);
5606 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5608 enum uio_seg procseg
;
5610 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5611 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5612 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5613 uap
->count
, procseg
, retval
));
5617 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5619 enum uio_seg procseg
;
5621 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5622 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5623 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5627 * Change file flags.
5630 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5632 struct vnode_attr va
;
5633 kauth_action_t action
;
5637 VATTR_SET(&va
, va_flags
, flags
);
5640 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5645 /* request authorisation, disregard immutability */
5646 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5649 * Request that the auth layer disregard those file flags it's allowed to when
5650 * authorizing this operation; we need to do this in order to be able to
5651 * clear immutable flags.
5653 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5655 error
= vnode_setattr(vp
, &va
, ctx
);
5657 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5666 * Change flags of a file given a path name.
5670 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5673 vfs_context_t ctx
= vfs_context_current();
5675 struct nameidata nd
;
5677 AUDIT_ARG(fflags
, uap
->flags
);
5678 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5679 UIO_USERSPACE
, uap
->path
, ctx
);
5686 error
= chflags1(vp
, uap
->flags
, ctx
);
5692 * Change flags of a file given a file descriptor.
5696 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5701 AUDIT_ARG(fd
, uap
->fd
);
5702 AUDIT_ARG(fflags
, uap
->flags
);
5703 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5706 if ((error
= vnode_getwithref(vp
))) {
5711 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5713 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
5720 * Change security information on a filesystem object.
5722 * Returns: 0 Success
5723 * EPERM Operation not permitted
5724 * vnode_authattr:??? [anything vnode_authattr can return]
5725 * vnode_authorize:??? [anything vnode_authorize can return]
5726 * vnode_setattr:??? [anything vnode_setattr can return]
5728 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5729 * translated to EPERM before being returned.
5732 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
5734 kauth_action_t action
;
5737 AUDIT_ARG(mode
, vap
->va_mode
);
5738 /* XXX audit new args */
5741 /* chmod calls are not allowed for resource forks. */
5742 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5748 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
5749 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
5753 /* make sure that the caller is allowed to set this security information */
5754 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
5755 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5756 if (error
== EACCES
)
5761 error
= vnode_setattr(vp
, vap
, ctx
);
5768 * Change mode of a file given a path name.
5770 * Returns: 0 Success
5771 * namei:??? [anything namei can return]
5772 * chmod_vnode:??? [anything chmod_vnode can return]
5775 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
5776 int fd
, int flag
, enum uio_seg segflg
)
5778 struct nameidata nd
;
5781 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5782 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
5784 if ((error
= nameiat(&nd
, fd
)))
5786 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
5787 vnode_put(nd
.ni_vp
);
5793 * chmod_extended: Change the mode of a file given a path name; with extended
5794 * argument list (including extended security (ACL)).
5796 * Parameters: p Process requesting the open
5797 * uap User argument descriptor (see below)
5800 * Indirect: uap->path Path to object (same as 'chmod')
5801 * uap->uid UID to set
5802 * uap->gid GID to set
5803 * uap->mode File mode to set (same as 'chmod')
5804 * uap->xsecurity ACL to set (or delete)
5806 * Returns: 0 Success
5809 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5811 * XXX: We should enummerate the possible errno values here, and where
5812 * in the code they originated.
5815 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
5818 struct vnode_attr va
;
5819 kauth_filesec_t xsecdst
;
5821 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5824 if (uap
->mode
!= -1)
5825 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5826 if (uap
->uid
!= KAUTH_UID_NONE
)
5827 VATTR_SET(&va
, va_uid
, uap
->uid
);
5828 if (uap
->gid
!= KAUTH_GID_NONE
)
5829 VATTR_SET(&va
, va_gid
, uap
->gid
);
5832 switch(uap
->xsecurity
) {
5833 /* explicit remove request */
5834 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5835 VATTR_SET(&va
, va_acl
, NULL
);
5838 case USER_ADDR_NULL
:
5841 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5843 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5844 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
5847 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
5850 if (xsecdst
!= NULL
)
5851 kauth_filesec_free(xsecdst
);
5856 * Returns: 0 Success
5857 * chmodat:??? [anything chmodat can return]
5860 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
5861 int flag
, enum uio_seg segflg
)
5863 struct vnode_attr va
;
5866 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
5868 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
5872 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
5874 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
5875 AT_FDCWD
, 0, UIO_USERSPACE
));
5879 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
5881 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5884 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
5885 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
5889 * Change mode of a file given a file descriptor.
5892 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
5899 if ((error
= file_vnode(fd
, &vp
)) != 0)
5901 if ((error
= vnode_getwithref(vp
)) != 0) {
5905 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5907 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
5908 (void)vnode_put(vp
);
5915 * fchmod_extended: Change mode of a file given a file descriptor; with
5916 * extended argument list (including extended security (ACL)).
5918 * Parameters: p Process requesting to change file mode
5919 * uap User argument descriptor (see below)
5922 * Indirect: uap->mode File mode to set (same as 'chmod')
5923 * uap->uid UID to set
5924 * uap->gid GID to set
5925 * uap->xsecurity ACL to set (or delete)
5926 * uap->fd File descriptor of file to change mode
5928 * Returns: 0 Success
5933 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
5936 struct vnode_attr va
;
5937 kauth_filesec_t xsecdst
;
5939 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5942 if (uap
->mode
!= -1)
5943 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5944 if (uap
->uid
!= KAUTH_UID_NONE
)
5945 VATTR_SET(&va
, va_uid
, uap
->uid
);
5946 if (uap
->gid
!= KAUTH_GID_NONE
)
5947 VATTR_SET(&va
, va_gid
, uap
->gid
);
5950 switch(uap
->xsecurity
) {
5951 case USER_ADDR_NULL
:
5952 VATTR_SET(&va
, va_acl
, NULL
);
5954 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5955 VATTR_SET(&va
, va_acl
, NULL
);
5958 case CAST_USER_ADDR_T(-1):
5961 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5963 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5966 error
= fchmod1(p
, uap
->fd
, &va
);
5969 switch(uap
->xsecurity
) {
5970 case USER_ADDR_NULL
:
5971 case CAST_USER_ADDR_T(-1):
5974 if (xsecdst
!= NULL
)
5975 kauth_filesec_free(xsecdst
);
5981 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
5983 struct vnode_attr va
;
5986 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5988 return(fchmod1(p
, uap
->fd
, &va
));
5993 * Set ownership given a path name.
5997 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
5998 gid_t gid
, int flag
, enum uio_seg segflg
)
6001 struct vnode_attr va
;
6003 struct nameidata nd
;
6005 kauth_action_t action
;
6007 AUDIT_ARG(owner
, uid
, gid
);
6009 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6010 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6012 error
= nameiat(&nd
, fd
);
6020 if (uid
!= (uid_t
)VNOVAL
)
6021 VATTR_SET(&va
, va_uid
, uid
);
6022 if (gid
!= (gid_t
)VNOVAL
)
6023 VATTR_SET(&va
, va_gid
, gid
);
6026 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6031 /* preflight and authorize attribute changes */
6032 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6034 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6036 error
= vnode_setattr(vp
, &va
, ctx
);
6040 * EACCES is only allowed from namei(); permissions failure should
6041 * return EPERM, so we need to translate the error code.
6043 if (error
== EACCES
)
6051 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6053 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6054 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6058 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6060 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6061 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6065 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6067 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6070 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6071 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6075 * Set ownership given a file descriptor.
6079 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6081 struct vnode_attr va
;
6082 vfs_context_t ctx
= vfs_context_current();
6085 kauth_action_t action
;
6087 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6088 AUDIT_ARG(fd
, uap
->fd
);
6090 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6093 if ( (error
= vnode_getwithref(vp
)) ) {
6097 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6100 if (uap
->uid
!= VNOVAL
)
6101 VATTR_SET(&va
, va_uid
, uap
->uid
);
6102 if (uap
->gid
!= VNOVAL
)
6103 VATTR_SET(&va
, va_gid
, uap
->gid
);
6106 /* chown calls are not allowed for resource forks. */
6107 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6114 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6119 /* preflight and authorize attribute changes */
6120 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6122 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6123 if (error
== EACCES
)
6127 error
= vnode_setattr(vp
, &va
, ctx
);
6130 (void)vnode_put(vp
);
6136 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6140 if (usrtvp
== USER_ADDR_NULL
) {
6141 struct timeval old_tv
;
6142 /* XXX Y2038 bug because of microtime argument */
6144 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6147 if (IS_64BIT_PROCESS(current_proc())) {
6148 struct user64_timeval tv
[2];
6149 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6152 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6153 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6155 struct user32_timeval tv
[2];
6156 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6159 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6160 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6167 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6171 struct vnode_attr va
;
6172 kauth_action_t action
;
6174 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6177 VATTR_SET(&va
, va_access_time
, ts
[0]);
6178 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6180 va
.va_vaflags
|= VA_UTIMES_NULL
;
6183 /* utimes calls are not allowed for resource forks. */
6184 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6191 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6195 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6196 if (!nullflag
&& error
== EACCES
)
6201 /* since we may not need to auth anything, check here */
6202 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6203 if (!nullflag
&& error
== EACCES
)
6207 error
= vnode_setattr(vp
, &va
, ctx
);
6214 * Set the access and modification times of a file.
6218 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6220 struct timespec ts
[2];
6223 struct nameidata nd
;
6224 vfs_context_t ctx
= vfs_context_current();
6227 * AUDIT: Needed to change the order of operations to do the
6228 * name lookup first because auditing wants the path.
6230 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6231 UIO_USERSPACE
, uap
->path
, ctx
);
6238 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6239 * the current time instead.
6242 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6245 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6248 vnode_put(nd
.ni_vp
);
6253 * Set the access and modification times of a file.
6257 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6259 struct timespec ts
[2];
6264 AUDIT_ARG(fd
, uap
->fd
);
6266 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6268 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6270 if((error
= vnode_getwithref(vp
))) {
6275 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6282 * Truncate a file given its path name.
6286 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6289 struct vnode_attr va
;
6290 vfs_context_t ctx
= vfs_context_current();
6292 struct nameidata nd
;
6293 kauth_action_t action
;
6295 if (uap
->length
< 0)
6297 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6298 UIO_USERSPACE
, uap
->path
, ctx
);
6299 if ((error
= namei(&nd
)))
6306 VATTR_SET(&va
, va_data_size
, uap
->length
);
6309 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6314 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6316 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6318 error
= vnode_setattr(vp
, &va
, ctx
);
6325 * Truncate a file given a file descriptor.
6329 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6331 vfs_context_t ctx
= vfs_context_current();
6332 struct vnode_attr va
;
6334 struct fileproc
*fp
;
6338 AUDIT_ARG(fd
, uap
->fd
);
6339 if (uap
->length
< 0)
6342 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6346 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6348 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6357 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6359 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6360 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6365 if ((error
= vnode_getwithref(vp
)) != 0) {
6369 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6372 error
= mac_vnode_check_truncate(ctx
,
6373 fp
->f_fglob
->fg_cred
, vp
);
6375 (void)vnode_put(vp
);
6380 VATTR_SET(&va
, va_data_size
, uap
->length
);
6381 error
= vnode_setattr(vp
, &va
, ctx
);
6382 (void)vnode_put(vp
);
6390 * Sync an open file with synchronized I/O _file_ integrity completion
6394 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6396 __pthread_testcancel(1);
6397 return(fsync_common(p
, uap
, MNT_WAIT
));
6402 * Sync an open file with synchronized I/O _file_ integrity completion
6404 * Notes: This is a legacy support function that does not test for
6405 * thread cancellation points.
6409 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6411 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6416 * Sync an open file with synchronized I/O _data_ integrity completion
6420 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6422 __pthread_testcancel(1);
6423 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6430 * Common fsync code to support both synchronized I/O file integrity completion
6431 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6433 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6434 * will only guarantee that the file data contents are retrievable. If
6435 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6436 * includes additional metadata unnecessary for retrieving the file data
6437 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6440 * Parameters: p The process
6441 * uap->fd The descriptor to synchronize
6442 * flags The data integrity flags
6444 * Returns: int Success
6445 * fp_getfvp:EBADF Bad file descriptor
6446 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6447 * VNOP_FSYNC:??? unspecified
6449 * Notes: We use struct fsync_args because it is a short name, and all
6450 * caller argument structures are otherwise identical.
6453 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6456 struct fileproc
*fp
;
6457 vfs_context_t ctx
= vfs_context_current();
6460 AUDIT_ARG(fd
, uap
->fd
);
6462 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6464 if ( (error
= vnode_getwithref(vp
)) ) {
6469 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6471 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6474 /* Sync resource fork shadow file if necessary. */
6476 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6477 (vp
->v_parent
!= NULLVP
) &&
6478 vnode_isshadow(vp
) &&
6479 (fp
->f_flags
& FP_WRITTEN
)) {
6480 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6484 (void)vnode_put(vp
);
6490 * Duplicate files. Source must be a file, target must be a file or
6493 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6494 * perform inheritance correctly.
6498 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6500 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6501 struct nameidata fromnd
, tond
;
6503 vfs_context_t ctx
= vfs_context_current();
6505 /* Check that the flags are valid. */
6507 if (uap
->flags
& ~CPF_MASK
) {
6511 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
,
6512 UIO_USERSPACE
, uap
->from
, ctx
);
6513 if ((error
= namei(&fromnd
)))
6517 NDINIT(&tond
, CREATE
, OP_LINK
,
6518 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6519 UIO_USERSPACE
, uap
->to
, ctx
);
6520 if ((error
= namei(&tond
))) {
6527 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6532 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6537 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6543 * If source is the same as the destination (that is the
6544 * same inode number) then there is nothing to do.
6545 * (fixed to have POSIX semantics - CSM 3/2/98)
6550 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6552 sdvp
= tond
.ni_startdir
;
6554 * nameidone has to happen before we vnode_put(tdvp)
6555 * since it may need to release the fs_nodelock on the tdvp
6575 * Rename files. Source and destination must either both be directories,
6576 * or both not be directories. If target is a directory, it must be empty.
6580 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
6581 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
6585 struct nameidata
*fromnd
, *tond
;
6591 const char *oname
= NULL
;
6592 char *from_name
= NULL
, *to_name
= NULL
;
6593 int from_len
=0, to_len
=0;
6594 int holding_mntlock
;
6595 mount_t locked_mp
= NULL
;
6596 vnode_t oparent
= NULLVP
;
6598 fse_info from_finfo
, to_finfo
;
6600 int from_truncated
=0, to_truncated
;
6602 struct vnode_attr
*fvap
, *tvap
;
6604 /* carving out a chunk for structs that are too big to be on stack. */
6606 struct nameidata from_node
, to_node
;
6607 struct vnode_attr fv_attr
, tv_attr
;
6609 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
6610 fromnd
= &__rename_data
->from_node
;
6611 tond
= &__rename_data
->to_node
;
6613 holding_mntlock
= 0;
6622 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
6624 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6626 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6628 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6631 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6632 if ( (error
= nameiat(fromnd
, fromfd
)) )
6634 fdvp
= fromnd
->ni_dvp
;
6635 fvp
= fromnd
->ni_vp
;
6637 if (fvp
&& fvp
->v_type
== VDIR
)
6638 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
6641 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6642 if ( (error
= nameiat(tond
, tofd
)) ) {
6644 * Translate error code for rename("dir1", "dir2/.").
6646 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
6650 tdvp
= tond
->ni_dvp
;
6654 batched
= vnode_compound_rename_available(fdvp
);
6657 * Claim: this check will never reject a valid rename.
6658 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6659 * Suppose fdvp and tdvp are not on the same mount.
6660 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6661 * then you can't move it to within another dir on the same mountpoint.
6662 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6664 * If this check passes, then we are safe to pass these vnodes to the same FS.
6666 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
6670 goto skipped_lookup
;
6674 error
= vn_authorize_rename(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, NULL
);
6676 if (error
== ENOENT
) {
6677 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
6678 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
6680 * We encountered a race where after doing the namei, tvp stops
6681 * being valid. If so, simply re-drive the rename call from the
6693 * If the source and destination are the same (i.e. they're
6694 * links to the same vnode) and the target file system is
6695 * case sensitive, then there is nothing to do.
6697 * XXX Come back to this.
6703 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6704 * then assume that this file system is case sensitive.
6706 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
6707 pathconf_val
!= 0) {
6713 * Allow the renaming of mount points.
6714 * - target must not exist
6715 * - target must reside in the same directory as source
6716 * - union mounts cannot be renamed
6717 * - "/" cannot be renamed
6719 * XXX Handle this in VFS after a continued lookup (if we missed
6720 * in the cache to start off)
6722 if ((fvp
->v_flag
& VROOT
) &&
6723 (fvp
->v_type
== VDIR
) &&
6725 (fvp
->v_mountedhere
== NULL
) &&
6727 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
6728 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
6731 /* switch fvp to the covered vnode */
6732 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
6733 if ( (vnode_getwithref(coveredvp
)) ) {
6743 * Check for cross-device rename.
6745 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
6746 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
6752 * If source is the same as the destination (that is the
6753 * same inode number) then there is nothing to do...
6754 * EXCEPT if the underlying file system supports case
6755 * insensitivity and is case preserving. In this case
6756 * the file system needs to handle the special case of
6757 * getting the same vnode as target (fvp) and source (tvp).
6759 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6760 * and _PC_CASE_PRESERVING can have this exception, and they need to
6761 * handle the special case of getting the same vnode as target and
6762 * source. NOTE: Then the target is unlocked going into vnop_rename,
6763 * so not to cause locking problems. There is a single reference on tvp.
6765 * NOTE - that fvp == tvp also occurs if they are hard linked and
6766 * that correct behaviour then is just to return success without doing
6769 * XXX filesystem should take care of this itself, perhaps...
6771 if (fvp
== tvp
&& fdvp
== tdvp
) {
6772 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
6773 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
6774 fromnd
->ni_cnd
.cn_namelen
)) {
6779 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
6781 * we're holding a reference and lock
6782 * on locked_mp, but it no longer matches
6783 * what we want to do... so drop our hold
6785 mount_unlock_renames(locked_mp
);
6786 mount_drop(locked_mp
, 0);
6787 holding_mntlock
= 0;
6789 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
6791 * serialize renames that re-shape
6792 * the tree... if holding_mntlock is
6793 * set, then we're ready to go...
6795 * first need to drop the iocounts
6796 * we picked up, second take the
6797 * lock to serialize the access,
6798 * then finally start the lookup
6799 * process over with the lock held
6801 if (!holding_mntlock
) {
6803 * need to grab a reference on
6804 * the mount point before we
6805 * drop all the iocounts... once
6806 * the iocounts are gone, the mount
6809 locked_mp
= fvp
->v_mount
;
6810 mount_ref(locked_mp
, 0);
6813 * nameidone has to happen before we vnode_put(tvp)
6814 * since it may need to release the fs_nodelock on the tvp
6823 * nameidone has to happen before we vnode_put(fdvp)
6824 * since it may need to release the fs_nodelock on the fvp
6831 mount_lock_renames(locked_mp
);
6832 holding_mntlock
= 1;
6838 * when we dropped the iocounts to take
6839 * the lock, we allowed the identity of
6840 * the various vnodes to change... if they did,
6841 * we may no longer be dealing with a rename
6842 * that reshapes the tree... once we're holding
6843 * the iocounts, the vnodes can't change type
6844 * so we're free to drop the lock at this point
6847 if (holding_mntlock
) {
6848 mount_unlock_renames(locked_mp
);
6849 mount_drop(locked_mp
, 0);
6850 holding_mntlock
= 0;
6854 // save these off so we can later verify that fvp is the same
6855 oname
= fvp
->v_name
;
6856 oparent
= fvp
->v_parent
;
6860 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
6863 get_fse_info(fvp
, &from_finfo
, ctx
);
6865 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
6870 fvap
= &__rename_data
->fv_attr
;
6874 get_fse_info(tvp
, &to_finfo
, ctx
);
6875 } else if (batched
) {
6876 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
6881 tvap
= &__rename_data
->tv_attr
;
6886 #endif /* CONFIG_FSE */
6888 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
6889 if (from_name
== NULL
) {
6890 GET_PATH(from_name
);
6891 if (from_name
== NULL
) {
6897 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
6899 if (to_name
== NULL
) {
6901 if (to_name
== NULL
) {
6907 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
6909 #if CONFIG_SECLUDED_RENAME
6910 if (flags
& VFS_SECLUDE_RENAME
) {
6911 fromnd
->ni_cnd
.cn_flags
|= CN_SECLUDE_RENAME
;
6914 #pragma unused(flags)
6916 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
6917 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
6920 if (holding_mntlock
) {
6922 * we can drop our serialization
6925 mount_unlock_renames(locked_mp
);
6926 mount_drop(locked_mp
, 0);
6927 holding_mntlock
= 0;
6930 if (error
== EKEEPLOOKING
) {
6931 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6932 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6933 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6937 fromnd
->ni_vp
= fvp
;
6940 goto continue_lookup
;
6944 * We may encounter a race in the VNOP where the destination didn't
6945 * exist when we did the namei, but it does by the time we go and
6946 * try to create the entry. In this case, we should re-drive this rename
6947 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6948 * but other filesystems susceptible to this race could return it, too.
6950 if (error
== ERECYCLE
) {
6955 * For compound VNOPs, the authorization callback may return
6956 * ENOENT in case of racing hardlink lookups hitting the name
6957 * cache, redrive the lookup.
6959 if (batched
&& error
== ENOENT
) {
6960 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
6961 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
6970 /* call out to allow 3rd party notification of rename.
6971 * Ignore result of kauth_authorize_fileop call.
6973 kauth_authorize_fileop(vfs_context_ucred(ctx
),
6974 KAUTH_FILEOP_RENAME
,
6975 (uintptr_t)from_name
, (uintptr_t)to_name
);
6978 if (from_name
!= NULL
&& to_name
!= NULL
) {
6979 if (from_truncated
|| to_truncated
) {
6980 // set it here since only the from_finfo gets reported up to user space
6981 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
6985 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
6988 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
6992 add_fsevent(FSE_RENAME
, ctx
,
6993 FSE_ARG_STRING
, from_len
, from_name
,
6994 FSE_ARG_FINFO
, &from_finfo
,
6995 FSE_ARG_STRING
, to_len
, to_name
,
6996 FSE_ARG_FINFO
, &to_finfo
,
6999 add_fsevent(FSE_RENAME
, ctx
,
7000 FSE_ARG_STRING
, from_len
, from_name
,
7001 FSE_ARG_FINFO
, &from_finfo
,
7002 FSE_ARG_STRING
, to_len
, to_name
,
7006 #endif /* CONFIG_FSE */
7009 * update filesystem's mount point data
7012 char *cp
, *pathend
, *mpname
;
7018 mp
= fvp
->v_mountedhere
;
7020 if (vfs_busy(mp
, LK_NOWAIT
)) {
7024 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7026 if (UIO_SEG_IS_USER_SPACE(segflg
))
7027 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7029 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7031 /* find current mount point prefix */
7032 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7033 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7037 /* find last component of target name */
7038 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7042 /* append name to prefix */
7043 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7044 bzero(pathend
, maxlen
);
7045 strlcpy(pathend
, mpname
, maxlen
);
7047 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7052 * fix up name & parent pointers. note that we first
7053 * check that fvp has the same name/parent pointers it
7054 * had before the rename call... this is a 'weak' check
7057 * XXX oparent and oname may not be set in the compound vnop case
7059 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7062 update_flags
= VNODE_UPDATE_NAME
;
7065 update_flags
|= VNODE_UPDATE_PARENT
;
7067 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7070 if (to_name
!= NULL
) {
7071 RELEASE_PATH(to_name
);
7074 if (from_name
!= NULL
) {
7075 RELEASE_PATH(from_name
);
7078 if (holding_mntlock
) {
7079 mount_unlock_renames(locked_mp
);
7080 mount_drop(locked_mp
, 0);
7081 holding_mntlock
= 0;
7085 * nameidone has to happen before we vnode_put(tdvp)
7086 * since it may need to release the fs_nodelock on the tdvp
7096 * nameidone has to happen before we vnode_put(fdvp)
7097 * since it may need to release the fs_nodelock on the fdvp
7107 * If things changed after we did the namei, then we will re-drive
7108 * this rename call from the top.
7115 FREE(__rename_data
, M_TEMP
);
7120 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7122 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7123 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7126 #if CONFIG_SECLUDED_RENAME
7127 int rename_ext(__unused proc_t p
, struct rename_ext_args
*uap
, __unused
int32_t *retval
)
7129 return renameat_internal(
7130 vfs_context_current(),
7131 AT_FDCWD
, uap
->from
,
7133 UIO_USERSPACE
, uap
->flags
);
7138 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7140 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7141 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7145 * Make a directory file.
7147 * Returns: 0 Success
7150 * vnode_authorize:???
7155 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7156 enum uio_seg segflg
)
7160 int update_flags
= 0;
7162 struct nameidata nd
;
7164 AUDIT_ARG(mode
, vap
->va_mode
);
7165 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7167 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7168 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7171 error
= nameiat(&nd
, fd
);
7182 batched
= vnode_compound_mkdir_available(dvp
);
7184 VATTR_SET(vap
, va_type
, VDIR
);
7188 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7189 * only get EXISTS or EISDIR for existing path components, and not that it could see
7190 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7191 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7193 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7194 if (error
== EACCES
|| error
== EPERM
) {
7202 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7203 * rather than EACCESS if the target exists.
7205 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7207 error2
= nameiat(&nd
, fd
);
7221 * make the directory
7223 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7224 if (error
== EKEEPLOOKING
) {
7226 goto continue_lookup
;
7232 // Make sure the name & parent pointers are hooked up
7233 if (vp
->v_name
== NULL
)
7234 update_flags
|= VNODE_UPDATE_NAME
;
7235 if (vp
->v_parent
== NULLVP
)
7236 update_flags
|= VNODE_UPDATE_PARENT
;
7239 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
7242 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
7247 * nameidone has to happen before we vnode_put(dvp)
7248 * since it may need to release the fs_nodelock on the dvp
7261 * mkdir_extended: Create a directory; with extended security (ACL).
7263 * Parameters: p Process requesting to create the directory
7264 * uap User argument descriptor (see below)
7267 * Indirect: uap->path Path of directory to create
7268 * uap->mode Access permissions to set
7269 * uap->xsecurity ACL to set
7271 * Returns: 0 Success
7276 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
7279 kauth_filesec_t xsecdst
;
7280 struct vnode_attr va
;
7282 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7285 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
7286 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
7290 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7291 if (xsecdst
!= NULL
)
7292 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7294 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7296 if (xsecdst
!= NULL
)
7297 kauth_filesec_free(xsecdst
);
7302 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
7304 struct vnode_attr va
;
7307 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7309 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7314 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
7316 struct vnode_attr va
;
7319 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7321 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
7326 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
7327 enum uio_seg segflg
)
7331 struct nameidata nd
;
7334 int has_listeners
= 0;
7338 struct vnode_attr va
;
7339 #endif /* CONFIG_FSE */
7340 struct vnode_attr
*vap
= NULL
;
7341 int restart_count
= 0;
7347 * This loop exists to restart rmdir in the unlikely case that two
7348 * processes are simultaneously trying to remove the same directory
7349 * containing orphaned appleDouble files.
7352 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
7353 segflg
, dirpath
, ctx
);
7354 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
7359 error
= nameiat(&nd
, fd
);
7367 batched
= vnode_compound_rmdir_available(vp
);
7369 if (vp
->v_flag
& VROOT
) {
7371 * The root of a mounted filesystem cannot be deleted.
7378 * Removed a check here; we used to abort if vp's vid
7379 * was not the same as what we'd seen the last time around.
7380 * I do not think that check was valid, because if we retry
7381 * and all dirents are gone, the directory could legitimately
7382 * be recycled but still be present in a situation where we would
7383 * have had permission to delete. Therefore, we won't make
7384 * an effort to preserve that check now that we may not have a
7389 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
7391 if (error
== ENOENT
) {
7392 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7393 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7404 if (!vnode_compound_rmdir_available(dvp
)) {
7405 panic("No error, but no compound rmdir?");
7412 need_event
= need_fsevent(FSE_DELETE
, dvp
);
7415 get_fse_info(vp
, &finfo
, ctx
);
7417 error
= vfs_get_notify_attributes(&va
);
7426 has_listeners
= kauth_authorize_fileop_has_listeners();
7427 if (need_event
|| has_listeners
) {
7436 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
7439 finfo
.mode
|= FSE_TRUNCATED_PATH
;
7444 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7447 /* Couldn't find a vnode */
7451 if (error
== EKEEPLOOKING
) {
7452 goto continue_lookup
;
7453 } else if (batched
&& error
== ENOENT
) {
7454 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7455 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7457 * For compound VNOPs, the authorization callback
7458 * may return ENOENT in case of racing hard link lookups
7459 * redrive the lookup.
7466 #if CONFIG_APPLEDOUBLE
7468 * Special case to remove orphaned AppleDouble
7469 * files. I don't like putting this in the kernel,
7470 * but carbon does not like putting this in carbon either,
7473 if (error
== ENOTEMPTY
) {
7474 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
7475 if (error
== EBUSY
) {
7481 * Assuming everything went well, we will try the RMDIR again
7484 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7486 #endif /* CONFIG_APPLEDOUBLE */
7488 * Call out to allow 3rd party notification of delete.
7489 * Ignore result of kauth_authorize_fileop call.
7492 if (has_listeners
) {
7493 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7494 KAUTH_FILEOP_DELETE
,
7499 if (vp
->v_flag
& VISHARDLINK
) {
7500 // see the comment in unlink1() about why we update
7501 // the parent of a hard link when it is removed
7502 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
7508 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
7510 add_fsevent(FSE_DELETE
, ctx
,
7511 FSE_ARG_STRING
, len
, path
,
7512 FSE_ARG_FINFO
, &finfo
,
7524 * nameidone has to happen before we vnode_put(dvp)
7525 * since it may need to release the fs_nodelock on the dvp
7533 if (restart_flag
== 0) {
7534 wakeup_one((caddr_t
)vp
);
7537 tsleep(vp
, PVFS
, "rm AD", 1);
7539 } while (restart_flag
!= 0);
7546 * Remove a directory file.
7550 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
7552 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
7553 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
7556 /* Get direntry length padded to 8 byte alignment */
7557 #define DIRENT64_LEN(namlen) \
7558 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7561 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
7562 int *numdirent
, vfs_context_t ctxp
)
7564 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7565 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
7566 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
7567 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
7572 struct direntry
*entry64
;
7578 * Our kernel buffer needs to be smaller since re-packing
7579 * will expand each dirent. The worse case (when the name
7580 * length is 3) corresponds to a struct direntry size of 32
7581 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7582 * (4-byte aligned). So having a buffer that is 3/8 the size
7583 * will prevent us from reading more than we can pack.
7585 * Since this buffer is wired memory, we will limit the
7586 * buffer size to a maximum of 32K. We would really like to
7587 * use 32K in the MIN(), but we use magic number 87371 to
7588 * prevent uio_resid() * 3 / 8 from overflowing.
7590 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
7591 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
7592 if (bufptr
== NULL
) {
7596 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
7597 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
7598 auio
->uio_offset
= uio
->uio_offset
;
7600 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
7602 dep
= (struct dirent
*)bufptr
;
7603 bytesread
= bufsize
- uio_resid(auio
);
7605 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
7608 * Convert all the entries and copy them out to user's buffer.
7610 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
7611 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
7613 bzero(entry64
, enbufsize
);
7614 /* Convert a dirent to a dirent64. */
7615 entry64
->d_ino
= dep
->d_ino
;
7616 entry64
->d_seekoff
= 0;
7617 entry64
->d_reclen
= enbufsize
;
7618 entry64
->d_namlen
= dep
->d_namlen
;
7619 entry64
->d_type
= dep
->d_type
;
7620 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
7622 /* Move to next entry. */
7623 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
7625 /* Copy entry64 to user's buffer. */
7626 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
7629 /* Update the real offset using the offset we got from VNOP_READDIR. */
7631 uio
->uio_offset
= auio
->uio_offset
;
7634 FREE(bufptr
, M_TEMP
);
7635 FREE(entry64
, M_TEMP
);
7640 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7643 * Read a block of directory entries in a file system independent format.
7646 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
7647 off_t
*offset
, int flags
)
7650 struct vfs_context context
= *vfs_context_current(); /* local copy */
7651 struct fileproc
*fp
;
7653 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7655 int error
, eofflag
, numdirent
;
7656 char uio_buf
[ UIO_SIZEOF(1) ];
7658 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
7662 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7663 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7668 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
7669 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
7672 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
7676 if ( (error
= vnode_getwithref(vp
)) ) {
7679 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7682 if (vp
->v_type
!= VDIR
) {
7683 (void)vnode_put(vp
);
7689 error
= mac_vnode_check_readdir(&context
, vp
);
7691 (void)vnode_put(vp
);
7696 loff
= fp
->f_fglob
->fg_offset
;
7697 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7698 uio_addiov(auio
, bufp
, bufsize
);
7700 if (flags
& VNODE_READDIR_EXTENDED
) {
7701 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
7702 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7704 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
7705 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7708 (void)vnode_put(vp
);
7712 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
7713 if (union_dircheckp
) {
7714 error
= union_dircheckp(&vp
, fp
, &context
);
7721 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
7722 struct vnode
*tvp
= vp
;
7723 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
7725 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
7726 fp
->f_fglob
->fg_offset
= 0;
7740 *bytesread
= bufsize
- uio_resid(auio
);
7748 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
7754 AUDIT_ARG(fd
, uap
->fd
);
7755 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
7758 if (proc_is64bit(p
)) {
7759 user64_long_t base
= (user64_long_t
)offset
;
7760 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
7762 user32_long_t base
= (user32_long_t
)offset
;
7763 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
7765 *retval
= bytesread
;
7771 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
7777 AUDIT_ARG(fd
, uap
->fd
);
7778 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
7781 *retval
= bytesread
;
7782 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
7789 * Set the mode mask for creation of filesystem nodes.
7790 * XXX implement xsecurity
7792 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7794 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
7796 struct filedesc
*fdp
;
7798 AUDIT_ARG(mask
, newmask
);
7801 *retval
= fdp
->fd_cmask
;
7802 fdp
->fd_cmask
= newmask
& ALLPERMS
;
7808 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7810 * Parameters: p Process requesting to set the umask
7811 * uap User argument descriptor (see below)
7812 * retval umask of the process (parameter p)
7814 * Indirect: uap->newmask umask to set
7815 * uap->xsecurity ACL to set
7817 * Returns: 0 Success
7822 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
7825 kauth_filesec_t xsecdst
;
7827 xsecdst
= KAUTH_FILESEC_NONE
;
7828 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
7829 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
7832 xsecdst
= KAUTH_FILESEC_NONE
;
7835 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
7837 if (xsecdst
!= KAUTH_FILESEC_NONE
)
7838 kauth_filesec_free(xsecdst
);
7843 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
7845 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
7849 * Void all references to file by ripping underlying filesystem
7854 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
7857 struct vnode_attr va
;
7858 vfs_context_t ctx
= vfs_context_current();
7860 struct nameidata nd
;
7862 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
7871 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
7876 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
7882 error
= mac_vnode_check_revoke(ctx
, vp
);
7888 VATTR_WANTED(&va
, va_uid
);
7889 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
7891 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
7892 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
7894 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
7895 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
7903 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7904 * The following system calls are designed to support features
7905 * which are specific to the HFS & HFS Plus volume formats
7910 * Obtain attribute information on objects in a directory while enumerating
7915 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
7918 struct fileproc
*fp
;
7920 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7921 uint32_t count
, savecount
;
7925 struct attrlist attributelist
;
7926 vfs_context_t ctx
= vfs_context_current();
7928 char uio_buf
[ UIO_SIZEOF(1) ];
7929 kauth_action_t action
;
7933 /* Get the attributes into kernel space */
7934 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
7937 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
7941 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
7944 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7945 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7952 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
7959 if ( (error
= vnode_getwithref(vp
)) )
7962 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7965 if (vp
->v_type
!= VDIR
) {
7966 (void)vnode_put(vp
);
7972 error
= mac_vnode_check_readdir(ctx
, vp
);
7974 (void)vnode_put(vp
);
7979 /* set up the uio structure which will contain the users return buffer */
7980 loff
= fp
->f_fglob
->fg_offset
;
7981 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7982 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
7985 * If the only item requested is file names, we can let that past with
7986 * just LIST_DIRECTORY. If they want any other attributes, that means
7987 * they need SEARCH as well.
7989 action
= KAUTH_VNODE_LIST_DIRECTORY
;
7990 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
7991 attributelist
.fileattr
|| attributelist
.dirattr
)
7992 action
|= KAUTH_VNODE_SEARCH
;
7994 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
7996 /* Believe it or not, uap->options only has 32-bits of valid
7997 * info, so truncate before extending again */
7999 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
8000 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
8004 (void) vnode_put(vp
);
8009 * If we've got the last entry of a directory in a union mount
8010 * then reset the eofflag and pretend there's still more to come.
8011 * The next call will again set eofflag and the buffer will be empty,
8012 * so traverse to the underlying directory and do the directory
8015 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
8016 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
8018 } else { // Empty buffer
8019 struct vnode
*tvp
= vp
;
8020 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
8021 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
8022 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8023 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
8025 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
8033 (void)vnode_put(vp
);
8037 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
8039 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8041 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8043 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8046 *retval
= eofflag
; /* similar to getdirentries */
8050 return (error
); /* return error earlier, an retval of 0 or 1 now */
8052 } /* end of getdirentriesattr system call */
8055 * Exchange data between two files
8060 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8063 struct nameidata fnd
, snd
;
8064 vfs_context_t ctx
= vfs_context_current();
8068 u_int32_t nameiflags
;
8072 int from_truncated
=0, to_truncated
=0;
8074 fse_info f_finfo
, s_finfo
;
8078 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8080 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8081 UIO_USERSPACE
, uap
->path1
, ctx
);
8083 error
= namei(&fnd
);
8090 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8091 UIO_USERSPACE
, uap
->path2
, ctx
);
8093 error
= namei(&snd
);
8102 * if the files are the same, return an inval error
8110 * if the files are on different volumes, return an error
8112 if (svp
->v_mount
!= fvp
->v_mount
) {
8117 /* If they're not files, return an error */
8118 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8124 error
= mac_vnode_check_exchangedata(ctx
,
8129 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8130 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8135 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8137 kauth_authorize_fileop_has_listeners()) {
8140 if (fpath
== NULL
|| spath
== NULL
) {
8145 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8146 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8149 get_fse_info(fvp
, &f_finfo
, ctx
);
8150 get_fse_info(svp
, &s_finfo
, ctx
);
8151 if (from_truncated
|| to_truncated
) {
8152 // set it here since only the f_finfo gets reported up to user space
8153 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8157 /* Ok, make the call */
8158 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8161 const char *tmpname
;
8163 if (fpath
!= NULL
&& spath
!= NULL
) {
8164 /* call out to allow 3rd party notification of exchangedata.
8165 * Ignore result of kauth_authorize_fileop call.
8167 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8168 (uintptr_t)fpath
, (uintptr_t)spath
);
8172 tmpname
= fvp
->v_name
;
8173 fvp
->v_name
= svp
->v_name
;
8174 svp
->v_name
= tmpname
;
8176 if (fvp
->v_parent
!= svp
->v_parent
) {
8179 tmp
= fvp
->v_parent
;
8180 fvp
->v_parent
= svp
->v_parent
;
8181 svp
->v_parent
= tmp
;
8183 name_cache_unlock();
8186 if (fpath
!= NULL
&& spath
!= NULL
) {
8187 add_fsevent(FSE_EXCHANGE
, ctx
,
8188 FSE_ARG_STRING
, flen
, fpath
,
8189 FSE_ARG_FINFO
, &f_finfo
,
8190 FSE_ARG_STRING
, slen
, spath
,
8191 FSE_ARG_FINFO
, &s_finfo
,
8199 RELEASE_PATH(fpath
);
8201 RELEASE_PATH(spath
);
8209 * Return (in MB) the amount of freespace on the given vnode's volume.
8211 uint32_t freespace_mb(vnode_t vp
);
8214 freespace_mb(vnode_t vp
)
8216 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
8217 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
8218 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
8226 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
8231 struct nameidata nd
;
8232 struct user64_fssearchblock searchblock
;
8233 struct searchstate
*state
;
8234 struct attrlist
*returnattrs
;
8235 struct timeval timelimit
;
8236 void *searchparams1
,*searchparams2
;
8238 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8239 uint32_t nummatches
;
8241 uint32_t nameiflags
;
8242 vfs_context_t ctx
= vfs_context_current();
8243 char uio_buf
[ UIO_SIZEOF(1) ];
8245 /* Start by copying in fsearchblock parameter list */
8246 if (IS_64BIT_PROCESS(p
)) {
8247 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
8248 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
8249 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
8252 struct user32_fssearchblock tmp_searchblock
;
8254 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
8255 // munge into 64-bit version
8256 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
8257 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
8258 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
8259 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
8261 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8262 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8264 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
8265 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
8266 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
8267 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
8268 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
8269 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
8270 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
8275 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8277 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
8278 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
8281 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8282 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8283 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8286 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8287 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8288 /* assumes the size is still 556 bytes it will continue to work */
8290 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
8291 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
8293 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
8295 /* Now set up the various pointers to the correct place in our newly allocated memory */
8297 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
8298 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
8299 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
8301 /* Now copy in the stuff given our local variables. */
8303 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
8306 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
8309 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
8312 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
8316 * When searching a union mount, need to set the
8317 * start flag at the first call on each layer to
8318 * reset state for the new volume.
8320 if (uap
->options
& SRCHFS_START
)
8321 state
->ss_union_layer
= 0;
8323 uap
->options
|= state
->ss_union_flags
;
8324 state
->ss_union_flags
= 0;
8327 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8328 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8329 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8330 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8331 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8334 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
8335 attrreference_t
* string_ref
;
8336 u_int32_t
* start_length
;
8337 user64_size_t param_length
;
8339 /* validate searchparams1 */
8340 param_length
= searchblock
.sizeofsearchparams1
;
8341 /* skip the word that specifies length of the buffer */
8342 start_length
= (u_int32_t
*) searchparams1
;
8343 start_length
= start_length
+1;
8344 string_ref
= (attrreference_t
*) start_length
;
8346 /* ensure no negative offsets or too big offsets */
8347 if (string_ref
->attr_dataoffset
< 0 ) {
8351 if (string_ref
->attr_length
> MAXPATHLEN
) {
8356 /* Check for pointer overflow in the string ref */
8357 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
8362 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
8366 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
8372 /* set up the uio structure which will contain the users return buffer */
8373 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8374 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
8377 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8378 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
8379 UIO_USERSPACE
, uap
->path
, ctx
);
8388 * Switch to the root vnode for the volume
8390 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
8397 * If it's a union mount, the path lookup takes
8398 * us to the top layer. But we may need to descend
8399 * to a lower layer. For non-union mounts the layer
8402 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
8403 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
8406 vp
= vp
->v_mount
->mnt_vnodecovered
;
8412 vnode_getwithref(vp
);
8417 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
8426 * If searchblock.maxmatches == 0, then skip the search. This has happened
8427 * before and sometimes the underlying code doesnt deal with it well.
8429 if (searchblock
.maxmatches
== 0) {
8435 * Allright, we have everything we need, so lets make that call.
8437 * We keep special track of the return value from the file system:
8438 * EAGAIN is an acceptable error condition that shouldn't keep us
8439 * from copying out any results...
8442 fserror
= VNOP_SEARCHFS(vp
,
8445 &searchblock
.searchattrs
,
8446 (u_long
)searchblock
.maxmatches
,
8450 (u_long
)uap
->scriptcode
,
8451 (u_long
)uap
->options
,
8453 (struct searchstate
*) &state
->ss_fsstate
,
8457 * If it's a union mount we need to be called again
8458 * to search the mounted-on filesystem.
8460 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
8461 state
->ss_union_flags
= SRCHFS_START
;
8462 state
->ss_union_layer
++; // search next layer down
8470 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8471 search state. Everything was already put into he return buffer by the vop call. */
8473 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
8476 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
8483 FREE(searchparams1
,M_TEMP
);
8488 } /* end of searchfs system call */
8490 #else /* CONFIG_SEARCHFS */
8493 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
8498 #endif /* CONFIG_SEARCHFS */
8501 lck_grp_attr_t
* nspace_group_attr
;
8502 lck_attr_t
* nspace_lock_attr
;
8503 lck_grp_t
* nspace_mutex_group
;
8505 lck_mtx_t nspace_handler_lock
;
8506 lck_mtx_t nspace_handler_exclusion_lock
;
8508 time_t snapshot_timestamp
=0;
8509 int nspace_allow_virtual_devs
=0;
8511 void nspace_handler_init(void);
8513 typedef struct nspace_item_info
{
8523 #define MAX_NSPACE_ITEMS 128
8524 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
8525 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
8526 uint32_t nspace_token_id
=0;
8527 uint32_t nspace_handler_timeout
= 15; // seconds
8529 #define NSPACE_ITEM_NEW 0x0001
8530 #define NSPACE_ITEM_PROCESSING 0x0002
8531 #define NSPACE_ITEM_DEAD 0x0004
8532 #define NSPACE_ITEM_CANCELLED 0x0008
8533 #define NSPACE_ITEM_DONE 0x0010
8534 #define NSPACE_ITEM_RESET_TIMER 0x0020
8536 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
8537 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8539 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8541 //#pragma optimization_level 0
8544 NSPACE_HANDLER_NSPACE
= 0,
8545 NSPACE_HANDLER_SNAPSHOT
= 1,
8547 NSPACE_HANDLER_COUNT
,
8551 uint64_t handler_tid
;
8552 struct proc
*handler_proc
;
8556 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
8558 /* namespace fsctl functions */
8559 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
8560 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
8561 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
8562 static nspace_type_t
nspace_type_for_op(uint64_t op
);
8563 static int nspace_is_special_process(struct proc
*proc
);
8564 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
8565 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
8566 static int validate_namespace_args (int is64bit
, int size
);
8567 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
8570 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
8572 switch(nspace_type
) {
8573 case NSPACE_HANDLER_NSPACE
:
8574 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
8575 case NSPACE_HANDLER_SNAPSHOT
:
8576 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
8578 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
8583 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
8585 switch(nspace_type
) {
8586 case NSPACE_HANDLER_NSPACE
:
8587 return NSPACE_ITEM_NSPACE_EVENT
;
8588 case NSPACE_HANDLER_SNAPSHOT
:
8589 return NSPACE_ITEM_SNAPSHOT_EVENT
;
8591 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
8596 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
8598 switch(nspace_type
) {
8599 case NSPACE_HANDLER_NSPACE
:
8600 return FREAD
| FWRITE
| O_EVTONLY
;
8601 case NSPACE_HANDLER_SNAPSHOT
:
8602 return FREAD
| O_EVTONLY
;
8604 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
8609 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
8611 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
8612 case NAMESPACE_HANDLER_NSPACE_EVENT
:
8613 return NSPACE_HANDLER_NSPACE
;
8614 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
8615 return NSPACE_HANDLER_SNAPSHOT
;
8617 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
8618 return NSPACE_HANDLER_NSPACE
;
8622 static inline int nspace_is_special_process(struct proc
*proc
)
8625 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
8626 if (proc
== nspace_handlers
[i
].handler_proc
)
8633 nspace_handler_init(void)
8635 nspace_lock_attr
= lck_attr_alloc_init();
8636 nspace_group_attr
= lck_grp_attr_alloc_init();
8637 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
8638 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
8639 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
8640 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
8644 nspace_proc_exit(struct proc
*p
)
8646 int i
, event_mask
= 0;
8648 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
8649 if (p
== nspace_handlers
[i
].handler_proc
) {
8650 event_mask
|= nspace_item_flags_for_type(i
);
8651 nspace_handlers
[i
].handler_tid
= 0;
8652 nspace_handlers
[i
].handler_proc
= NULL
;
8656 if (event_mask
== 0) {
8660 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8661 // if this process was the snapshot handler, zero snapshot_timeout
8662 snapshot_timestamp
= 0;
8666 // unblock anyone that's waiting for the handler that died
8668 lck_mtx_lock(&nspace_handler_lock
);
8669 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8670 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
8672 if ( nspace_items
[i
].flags
& event_mask
) {
8674 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
8675 vnode_lock_spin(nspace_items
[i
].vp
);
8676 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
8677 vnode_unlock(nspace_items
[i
].vp
);
8679 nspace_items
[i
].vp
= NULL
;
8680 nspace_items
[i
].vid
= 0;
8681 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
8682 nspace_items
[i
].token
= 0;
8684 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8689 wakeup((caddr_t
)&nspace_item_idx
);
8690 lck_mtx_unlock(&nspace_handler_lock
);
8695 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
8697 return resolve_nspace_item_ext(vp
, op
, NULL
);
8701 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
8703 int i
, error
, keep_waiting
;
8705 nspace_type_t nspace_type
= nspace_type_for_op(op
);
8707 // only allow namespace events on regular files, directories and symlinks.
8708 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
8713 // if this is a snapshot event and the vnode is on a
8714 // disk image just pretend nothing happened since any
8715 // change to the disk image will cause the disk image
8716 // itself to get backed up and this avoids multi-way
8717 // deadlocks between the snapshot handler and the ever
8718 // popular diskimages-helper process. the variable
8719 // nspace_allow_virtual_devs allows this behavior to
8720 // be overridden (for use by the Mobile TimeMachine
8721 // testing infrastructure which uses disk images)
8723 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
8724 && (vp
->v_mount
!= NULL
)
8725 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
8726 && !nspace_allow_virtual_devs
) {
8731 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8732 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8736 if (nspace_is_special_process(current_proc())) {
8740 lck_mtx_lock(&nspace_handler_lock
);
8743 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8744 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
8749 if (i
>= MAX_NSPACE_ITEMS
) {
8750 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8751 if (nspace_items
[i
].flags
== 0) {
8756 nspace_items
[i
].refcount
++;
8759 if (i
>= MAX_NSPACE_ITEMS
) {
8760 ts
.tv_sec
= nspace_handler_timeout
;
8763 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
8765 // an entry got free'd up, go see if we can get a slot
8768 lck_mtx_unlock(&nspace_handler_lock
);
8774 // if it didn't already exist, add it. if it did exist
8775 // we'll get woken up when someone does a wakeup() on
8776 // the slot in the nspace_items table.
8778 if (vp
!= nspace_items
[i
].vp
) {
8779 nspace_items
[i
].vp
= vp
;
8780 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
8781 nspace_items
[i
].op
= op
;
8782 nspace_items
[i
].vid
= vnode_vid(vp
);
8783 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
8784 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
8785 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8787 vnode_lock_spin(vp
);
8788 vp
->v_flag
|= VNEEDSSNAPSHOT
;
8793 nspace_items
[i
].token
= 0;
8794 nspace_items
[i
].refcount
= 1;
8796 wakeup((caddr_t
)&nspace_item_idx
);
8800 // Now go to sleep until the handler does a wakeup on this
8801 // slot in the nspace_items table (or we timeout).
8804 while(keep_waiting
) {
8805 ts
.tv_sec
= nspace_handler_timeout
;
8807 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
8809 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
8811 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
8812 error
= nspace_items
[i
].token
;
8813 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
8814 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
8815 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
8820 } else if (error
== 0) {
8821 // hmmm, why did we get woken up?
8822 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8823 nspace_items
[i
].token
);
8826 if (--nspace_items
[i
].refcount
== 0) {
8827 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
8828 nspace_items
[i
].arg
= NULL
;
8829 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
8830 nspace_items
[i
].flags
= 0; // this clears it for re-use
8832 wakeup(&nspace_token_id
);
8836 lck_mtx_unlock(&nspace_handler_lock
);
8843 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
8847 lck_mtx_lock(&nspace_handler_lock
);
8848 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8849 if (nspace_items
[i
].vp
== vp
) {
8854 if (i
>= MAX_NSPACE_ITEMS
) {
8855 lck_mtx_unlock(&nspace_handler_lock
);
8859 *status
= nspace_items
[i
].flags
;
8860 lck_mtx_unlock(&nspace_handler_lock
);
8867 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
8869 struct vnode_attr va
;
8873 VATTR_WANTED(&va
, va_fsid
);
8874 VATTR_WANTED(&va
, va_fileid
);
8876 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
8877 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
8880 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
8889 // Note: this function does NOT check permissions on all of the
8890 // parent directories leading to this vnode. It should only be
8891 // called on behalf of a root process. Otherwise a process may
8892 // get access to a file because the file itself is readable even
8893 // though its parent directories would prevent access.
8896 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
8900 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8905 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
8910 /* compute action to be authorized */
8912 if (fmode
& FREAD
) {
8913 action
|= KAUTH_VNODE_READ_DATA
;
8915 if (fmode
& (FWRITE
| O_TRUNC
)) {
8917 * If we are writing, appending, and not truncating,
8918 * indicate that we are appending so that if the
8919 * UF_APPEND or SF_APPEND bits are set, we do not deny
8922 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
8923 action
|= KAUTH_VNODE_APPEND_DATA
;
8925 action
|= KAUTH_VNODE_WRITE_DATA
;
8929 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
8934 // if the vnode is tagged VOPENEVT and the current process
8935 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8936 // flag to the open mode so that this open won't count against
8937 // the vnode when carbon delete() does a vnode_isinuse() to see
8938 // if a file is currently in use. this allows spotlight
8939 // importers to not interfere with carbon apps that depend on
8940 // the no-delete-if-busy semantics of carbon delete().
8942 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
8946 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
8949 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
8950 VNOP_CLOSE(vp
, fmode
, ctx
);
8954 /* Call out to allow 3rd party notification of open.
8955 * Ignore result of kauth_authorize_fileop call.
8958 mac_vnode_notify_open(ctx
, vp
, fmode
);
8960 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
8968 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
8970 int i
, error
=0, unblock
=0;
8973 lck_mtx_lock(&nspace_handler_exclusion_lock
);
8974 if (nspace_handlers
[nspace_type
].handler_busy
) {
8975 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8978 nspace_handlers
[nspace_type
].handler_busy
= 1;
8979 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8982 * Any process that gets here will be one of the namespace handlers.
8983 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8984 * as we can cause deadlocks to occur, because the namespace handler may prevent
8985 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8988 curtask
= current_task();
8989 bsd_set_dependency_capable (curtask
);
8991 lck_mtx_lock(&nspace_handler_lock
);
8992 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8993 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
8994 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
8997 while (error
== 0) {
8999 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9000 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9001 if (!nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9008 if (i
< MAX_NSPACE_ITEMS
) {
9009 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
9010 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
9011 nspace_items
[i
].token
= ++nspace_token_id
;
9013 if (nspace_items
[i
].vp
) {
9014 struct fileproc
*fp
;
9015 int32_t indx
, fmode
;
9016 struct proc
*p
= current_proc();
9017 vfs_context_t ctx
= vfs_context_current();
9018 struct vnode_attr va
;
9022 * Use vnode pointer to acquire a file descriptor for
9023 * hand-off to userland
9025 fmode
= nspace_open_flags_for_type(nspace_type
);
9026 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
9031 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
9034 vnode_put(nspace_items
[i
].vp
);
9038 if ((error
= falloc(p
, &fp
, &indx
, ctx
))) {
9039 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9040 vnode_put(nspace_items
[i
].vp
);
9045 fp
->f_fglob
->fg_flag
= fmode
;
9046 fp
->f_fglob
->fg_ops
= &vnops
;
9047 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9050 procfdtbl_releasefd(p
, indx
, NULL
);
9051 fp_drop(p
, indx
, fp
, 1);
9055 * All variants of the namespace handler struct support these three fields:
9056 * token, flags, and the FD pointer
9058 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9059 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9060 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9063 * Handle optional fields:
9064 * extended version support an info ptr (offset, length), and the
9066 * namedata version supports a unique per-link object ID
9070 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9071 uint64_t u_offset
, u_length
;
9074 u_offset
= uio_offset(uio
);
9075 u_length
= uio_resid(uio
);
9080 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9081 error
= copyout(&u_length
, nhd
->infoptr
+sizeof(uint64_t), sizeof(uint64_t));
9086 VATTR_WANTED(&va
, va_linkid
);
9087 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9089 uint64_t linkid
= 0;
9090 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9091 linkid
= (uint64_t)va
.va_linkid
;
9093 error
= copyout (&linkid
, nhd
->objid
, sizeof(uint64_t));
9098 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9099 fp_free(p
, indx
, fp
);
9103 vnode_put(nspace_items
[i
].vp
);
9107 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9108 i
, nspace_items
[i
].vp
, error
, nspace_items
[i
].vp
->v_name
);
9112 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9113 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9122 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9123 vnode_lock_spin(nspace_items
[i
].vp
);
9124 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9125 vnode_unlock(nspace_items
[i
].vp
);
9127 nspace_items
[i
].vp
= NULL
;
9128 nspace_items
[i
].vid
= 0;
9129 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9130 nspace_items
[i
].token
= 0;
9132 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9135 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9136 // just go through every snapshot event and unblock it immediately.
9137 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9138 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9139 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9140 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9141 nspace_items
[i
].vp
= NULL
;
9142 nspace_items
[i
].vid
= 0;
9143 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9144 nspace_items
[i
].token
= 0;
9146 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9153 lck_mtx_unlock(&nspace_handler_lock
);
9155 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9156 nspace_handlers
[nspace_type
].handler_busy
= 0;
9157 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9162 static inline int validate_namespace_args (int is64bit
, int size
) {
9165 /* Must be one of these */
9166 if (size
== sizeof(user64_namespace_handler_info
)) {
9169 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
9172 if (size
== sizeof(user64_namespace_handler_data
)) {
9178 /* 32 bit -- must be one of these */
9179 if (size
== sizeof(user32_namespace_handler_info
)) {
9182 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
9185 if (size
== sizeof(user32_namespace_handler_data
)) {
9197 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
9200 namespace_handler_data nhd
;
9202 bzero (&nhd
, sizeof(namespace_handler_data
));
9204 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9205 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9209 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9213 error
= validate_namespace_args (is64bit
, size
);
9218 /* Copy in the userland pointers into our kernel-only struct */
9221 /* 64 bit userland structures */
9222 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
9223 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
9224 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
9226 /* If the size is greater than the standard info struct, add in extra fields */
9227 if (size
> (sizeof(user64_namespace_handler_info
))) {
9228 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
9229 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
9231 if (size
== (sizeof(user64_namespace_handler_data
))) {
9232 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
9234 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9238 /* 32 bit userland structures */
9239 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
9240 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
9241 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
9243 if (size
> (sizeof(user32_namespace_handler_info
))) {
9244 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
9245 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
9247 if (size
== (sizeof(user32_namespace_handler_data
))) {
9248 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
9250 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9254 return wait_for_namespace_event(&nhd
, nspace_type
);
9258 * Make a filesystem-specific control call:
9262 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
9267 #define STK_PARAMS 128
9268 char stkbuf
[STK_PARAMS
];
9270 vnode_t vp
= *arg_vp
;
9272 size
= IOCPARM_LEN(cmd
);
9273 if (size
> IOCPARM_MAX
) return (EINVAL
);
9275 is64bit
= proc_is64bit(p
);
9281 * ensure the buffer is large enough for underlying calls
9283 #ifndef HFSIOC_GETPATH
9284 typedef char pn_t
[MAXPATHLEN
];
9285 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9289 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9291 if (IOCBASECMD(cmd
) == HFS_GETPATH
) {
9292 /* Round up to MAXPATHLEN regardless of user input */
9296 if (size
> sizeof (stkbuf
)) {
9297 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
9305 error
= copyin(udata
, data
, size
);
9314 *(user_addr_t
*)data
= udata
;
9317 *(uint32_t *)data
= (uint32_t)udata
;
9320 } else if ((cmd
& IOC_OUT
) && size
) {
9322 * Zero the buffer so the user always
9323 * gets back something deterministic.
9326 } else if (cmd
& IOC_VOID
) {
9328 *(user_addr_t
*)data
= udata
;
9331 *(uint32_t *)data
= (uint32_t)udata
;
9335 /* Check to see if it's a generic command */
9336 switch (IOCBASECMD(cmd
)) {
9338 case FSCTL_SYNC_VOLUME
: {
9339 mount_t mp
= vp
->v_mount
;
9340 int arg
= *(uint32_t*)data
;
9342 /* record vid of vp so we can drop it below. */
9343 uint32_t vvid
= vp
->v_id
;
9346 * Then grab mount_iterref so that we can release the vnode.
9347 * Without this, a thread may call vnode_iterate_prepare then
9348 * get into a deadlock because we've never released the root vp
9350 error
= mount_iterref (mp
, 0);
9356 /* issue the sync for this volume */
9357 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
9360 * Then release the mount_iterref once we're done syncing; it's not
9361 * needed for the VNOP_IOCTL below
9365 if (arg
& FSCTL_SYNC_FULLSYNC
) {
9366 /* re-obtain vnode iocount on the root vp, if possible */
9367 error
= vnode_getwithvid (vp
, vvid
);
9369 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
9373 /* mark the argument VP as having been released */
9378 case FSCTL_SET_PACKAGE_EXTS
: {
9379 user_addr_t ext_strings
;
9380 uint32_t num_entries
;
9383 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
9384 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
9386 // either you're 64-bit and passed a 64-bit struct or
9387 // you're 32-bit and passed a 32-bit struct. otherwise
9394 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
9395 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
9396 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
9398 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
9399 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
9400 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
9402 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
9406 /* namespace handlers */
9407 case FSCTL_NAMESPACE_HANDLER_GET
: {
9408 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
9412 /* Snapshot handlers */
9413 case FSCTL_OLD_SNAPSHOT_HANDLER_GET
: {
9414 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
9418 case FSCTL_SNAPSHOT_HANDLER_GET_EXT
: {
9419 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
9423 case FSCTL_NAMESPACE_HANDLER_UPDATE
: {
9424 uint32_t token
, val
;
9427 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9431 if (!nspace_is_special_process(p
)) {
9436 token
= ((uint32_t *)data
)[0];
9437 val
= ((uint32_t *)data
)[1];
9439 lck_mtx_lock(&nspace_handler_lock
);
9441 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9442 if (nspace_items
[i
].token
== token
) {
9443 break; /* exit for loop, not case stmt */
9447 if (i
>= MAX_NSPACE_ITEMS
) {
9451 // if this bit is set, when resolve_nspace_item() times out
9452 // it will loop and go back to sleep.
9454 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
9457 lck_mtx_unlock(&nspace_handler_lock
);
9460 printf("nspace-handler-update: did not find token %u\n", token
);
9465 case FSCTL_NAMESPACE_HANDLER_UNBLOCK
: {
9466 uint32_t token
, val
;
9469 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9473 if (!nspace_is_special_process(p
)) {
9478 token
= ((uint32_t *)data
)[0];
9479 val
= ((uint32_t *)data
)[1];
9481 lck_mtx_lock(&nspace_handler_lock
);
9483 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9484 if (nspace_items
[i
].token
== token
) {
9485 break; /* exit for loop, not case statement */
9489 if (i
>= MAX_NSPACE_ITEMS
) {
9490 printf("nspace-handler-unblock: did not find token %u\n", token
);
9493 if (val
== 0 && nspace_items
[i
].vp
) {
9494 vnode_lock_spin(nspace_items
[i
].vp
);
9495 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9496 vnode_unlock(nspace_items
[i
].vp
);
9499 nspace_items
[i
].vp
= NULL
;
9500 nspace_items
[i
].arg
= NULL
;
9501 nspace_items
[i
].op
= 0;
9502 nspace_items
[i
].vid
= 0;
9503 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9504 nspace_items
[i
].token
= 0;
9506 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9509 lck_mtx_unlock(&nspace_handler_lock
);
9513 case FSCTL_NAMESPACE_HANDLER_CANCEL
: {
9514 uint32_t token
, val
;
9517 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9521 if (!nspace_is_special_process(p
)) {
9526 token
= ((uint32_t *)data
)[0];
9527 val
= ((uint32_t *)data
)[1];
9529 lck_mtx_lock(&nspace_handler_lock
);
9531 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9532 if (nspace_items
[i
].token
== token
) {
9533 break; /* exit for loop, not case stmt */
9537 if (i
>= MAX_NSPACE_ITEMS
) {
9538 printf("nspace-handler-cancel: did not find token %u\n", token
);
9541 if (nspace_items
[i
].vp
) {
9542 vnode_lock_spin(nspace_items
[i
].vp
);
9543 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9544 vnode_unlock(nspace_items
[i
].vp
);
9547 nspace_items
[i
].vp
= NULL
;
9548 nspace_items
[i
].arg
= NULL
;
9549 nspace_items
[i
].vid
= 0;
9550 nspace_items
[i
].token
= val
;
9551 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
9552 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
9554 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9557 lck_mtx_unlock(&nspace_handler_lock
);
9561 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
9562 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9566 // we explicitly do not do the namespace_handler_proc check here
9568 lck_mtx_lock(&nspace_handler_lock
);
9569 snapshot_timestamp
= ((uint32_t *)data
)[0];
9570 wakeup(&nspace_item_idx
);
9571 lck_mtx_unlock(&nspace_handler_lock
);
9572 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
9577 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
9579 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9583 lck_mtx_lock(&nspace_handler_lock
);
9584 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
9585 lck_mtx_unlock(&nspace_handler_lock
);
9586 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9587 nspace_allow_virtual_devs
? "" : " NOT");
9593 case FSCTL_SET_FSTYPENAME_OVERRIDE
:
9595 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9599 mount_lock(vp
->v_mount
);
9601 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
9602 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
9603 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
9604 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
9605 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
9608 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
9609 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
9611 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
9612 vp
->v_mount
->fstypename_override
[0] = '\0';
9614 mount_unlock(vp
->v_mount
);
9620 /* Invoke the filesystem-specific code */
9621 error
= VNOP_IOCTL(vp
, IOCBASECMD(cmd
), data
, options
, ctx
);
9624 } /* end switch stmt */
9627 * if no errors, copy any data to user. Size was
9628 * already set and checked above.
9630 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
9631 error
= copyout(data
, udata
, size
);
9642 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
9645 struct nameidata nd
;
9648 vfs_context_t ctx
= vfs_context_current();
9650 AUDIT_ARG(cmd
, uap
->cmd
);
9651 AUDIT_ARG(value32
, uap
->options
);
9652 /* Get the vnode for the file we are getting info on: */
9654 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
9655 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
9656 UIO_USERSPACE
, uap
->path
, ctx
);
9657 if ((error
= namei(&nd
))) goto done
;
9662 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
9668 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9677 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
9681 vfs_context_t ctx
= vfs_context_current();
9684 AUDIT_ARG(fd
, uap
->fd
);
9685 AUDIT_ARG(cmd
, uap
->cmd
);
9686 AUDIT_ARG(value32
, uap
->options
);
9688 /* Get the vnode for the file we are getting info on: */
9689 if ((error
= file_vnode(uap
->fd
, &vp
)))
9692 if ((error
= vnode_getwithref(vp
))) {
9698 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
9705 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9709 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
9716 /* end of fsctl system call */
9719 * Retrieve the data of an extended attribute.
9722 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
9725 struct nameidata nd
;
9726 char attrname
[XATTR_MAXNAMELEN
+1];
9727 vfs_context_t ctx
= vfs_context_current();
9729 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9730 size_t attrsize
= 0;
9732 u_int32_t nameiflags
;
9734 char uio_buf
[ UIO_SIZEOF(1) ];
9736 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9739 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9740 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9741 if ((error
= namei(&nd
))) {
9747 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9750 if (xattr_protected(attrname
)) {
9751 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
9757 * the specific check for 0xffffffff is a hack to preserve
9758 * binaray compatibilty in K64 with applications that discovered
9759 * that passing in a buf pointer and a size of -1 resulted in
9760 * just the size of the indicated extended attribute being returned.
9761 * this isn't part of the documented behavior, but because of the
9762 * original implemtation's check for "uap->size > 0", this behavior
9763 * was allowed. In K32 that check turned into a signed comparison
9764 * even though uap->size is unsigned... in K64, we blow by that
9765 * check because uap->size is unsigned and doesn't get sign smeared
9766 * in the munger for a 32 bit user app. we also need to add a
9767 * check to limit the maximum size of the buffer being passed in...
9768 * unfortunately, the underlying fileystems seem to just malloc
9769 * the requested size even if the actual extended attribute is tiny.
9770 * because that malloc is for kernel wired memory, we have to put a
9773 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9774 * U64 running on K64 will yield -1 (64 bits wide)
9775 * U32/U64 running on K32 will yield -1 (32 bits wide)
9777 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
9781 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
9782 uap
->size
= XATTR_MAXSIZE
;
9784 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9785 &uio_buf
[0], sizeof(uio_buf
));
9786 uio_addiov(auio
, uap
->value
, uap
->size
);
9789 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
9794 *retval
= uap
->size
- uio_resid(auio
);
9796 *retval
= (user_ssize_t
)attrsize
;
9803 * Retrieve the data of an extended attribute.
9806 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
9809 char attrname
[XATTR_MAXNAMELEN
+1];
9811 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9812 size_t attrsize
= 0;
9815 char uio_buf
[ UIO_SIZEOF(1) ];
9817 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9820 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9823 if ( (error
= vnode_getwithref(vp
)) ) {
9827 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9830 if (xattr_protected(attrname
)) {
9834 if (uap
->value
&& uap
->size
> 0) {
9835 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9836 &uio_buf
[0], sizeof(uio_buf
));
9837 uio_addiov(auio
, uap
->value
, uap
->size
);
9840 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
9842 (void)vnode_put(vp
);
9846 *retval
= uap
->size
- uio_resid(auio
);
9848 *retval
= (user_ssize_t
)attrsize
;
9854 * Set the data of an extended attribute.
9857 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
9860 struct nameidata nd
;
9861 char attrname
[XATTR_MAXNAMELEN
+1];
9862 vfs_context_t ctx
= vfs_context_current();
9864 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9866 u_int32_t nameiflags
;
9868 char uio_buf
[ UIO_SIZEOF(1) ];
9870 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9873 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9874 if (error
== EPERM
) {
9875 /* if the string won't fit in attrname, copyinstr emits EPERM */
9876 return (ENAMETOOLONG
);
9878 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9881 if (xattr_protected(attrname
))
9883 if (uap
->size
!= 0 && uap
->value
== 0) {
9887 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9888 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9889 if ((error
= namei(&nd
))) {
9895 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9896 &uio_buf
[0], sizeof(uio_buf
));
9897 uio_addiov(auio
, uap
->value
, uap
->size
);
9899 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
9902 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9913 * Set the data of an extended attribute.
9916 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
9919 char attrname
[XATTR_MAXNAMELEN
+1];
9921 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9924 char uio_buf
[ UIO_SIZEOF(1) ];
9926 vfs_context_t ctx
= vfs_context_current();
9929 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9932 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9933 if (error
== EPERM
) {
9934 /* if the string won't fit in attrname, copyinstr emits EPERM */
9935 return (ENAMETOOLONG
);
9937 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9940 if (xattr_protected(attrname
))
9942 if (uap
->size
!= 0 && uap
->value
== 0) {
9945 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9948 if ( (error
= vnode_getwithref(vp
)) ) {
9952 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9953 &uio_buf
[0], sizeof(uio_buf
));
9954 uio_addiov(auio
, uap
->value
, uap
->size
);
9956 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
9959 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9971 * Remove an extended attribute.
9972 * XXX Code duplication here.
9975 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
9978 struct nameidata nd
;
9979 char attrname
[XATTR_MAXNAMELEN
+1];
9980 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9981 vfs_context_t ctx
= vfs_context_current();
9983 u_int32_t nameiflags
;
9986 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9989 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
9993 if (xattr_protected(attrname
))
9995 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9996 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9997 if ((error
= namei(&nd
))) {
10003 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
10006 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10017 * Remove an extended attribute.
10018 * XXX Code duplication here.
10021 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
10024 char attrname
[XATTR_MAXNAMELEN
+1];
10028 vfs_context_t ctx
= vfs_context_current();
10031 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10034 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10038 if (xattr_protected(attrname
))
10040 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10043 if ( (error
= vnode_getwithref(vp
)) ) {
10044 file_drop(uap
->fd
);
10048 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
10051 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10057 file_drop(uap
->fd
);
10063 * Retrieve the list of extended attribute names.
10064 * XXX Code duplication here.
10067 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
10070 struct nameidata nd
;
10071 vfs_context_t ctx
= vfs_context_current();
10073 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10074 size_t attrsize
= 0;
10075 u_int32_t nameiflags
;
10077 char uio_buf
[ UIO_SIZEOF(1) ];
10079 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10082 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10083 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10084 if ((error
= namei(&nd
))) {
10089 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10090 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
10091 &uio_buf
[0], sizeof(uio_buf
));
10092 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10095 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
10099 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10101 *retval
= (user_ssize_t
)attrsize
;
10107 * Retrieve the list of extended attribute names.
10108 * XXX Code duplication here.
10111 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
10115 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10116 size_t attrsize
= 0;
10118 char uio_buf
[ UIO_SIZEOF(1) ];
10120 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10123 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10126 if ( (error
= vnode_getwithref(vp
)) ) {
10127 file_drop(uap
->fd
);
10130 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10131 auio
= uio_createwithbuffer(1, 0, spacetype
,
10132 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10133 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10136 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10139 file_drop(uap
->fd
);
10141 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10143 *retval
= (user_ssize_t
)attrsize
;
10148 static int fsgetpath_internal(
10149 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
10150 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
10153 struct mount
*mp
= NULL
;
10158 if (bufsize
> PAGE_SIZE
) {
10166 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
10167 error
= ENOTSUP
; /* unexpected failure */
10173 error
= VFS_ROOT(mp
, &vp
, ctx
);
10175 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
10178 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
10180 * If the fileid isn't found and we're in a union
10181 * mount volume, then see if the fileid is in the
10182 * mounted-on volume.
10184 struct mount
*tmp
= mp
;
10185 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
10187 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
10198 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
10205 /* Obtain the absolute path to this vnode. */
10206 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
10207 bpflags
|= BUILDPATH_CHECK_MOVED
;
10208 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
10215 AUDIT_ARG(text
, buf
);
10217 if (kdebug_enable
) {
10218 long dbg_parms
[NUMPARMS
];
10221 dbg_namelen
= (int)sizeof(dbg_parms
);
10223 if (length
< dbg_namelen
) {
10224 memcpy((char *)dbg_parms
, buf
, length
);
10225 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
10227 dbg_namelen
= length
;
10229 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
10232 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
10235 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
10242 * Obtain the full pathname of a file system object by id.
10244 * This is a private SPI used by the File Manager.
10248 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
10250 vfs_context_t ctx
= vfs_context_current();
10256 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
10259 AUDIT_ARG(value32
, fsid
.val
[0]);
10260 AUDIT_ARG(value64
, uap
->objid
);
10261 /* Restrict output buffer size for now. */
10263 if (uap
->bufsize
> PAGE_SIZE
) {
10266 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
10267 if (realpath
== NULL
) {
10271 error
= fsgetpath_internal(
10272 ctx
, fsid
.val
[0], uap
->objid
,
10273 uap
->bufsize
, realpath
, &length
);
10279 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
10281 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
10284 FREE(realpath
, M_TEMP
);
10290 * Common routine to handle various flavors of statfs data heading out
10293 * Returns: 0 Success
10297 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
10298 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
10299 boolean_t partial_copy
)
10302 int my_size
, copy_size
;
10305 struct user64_statfs sfs
;
10306 my_size
= copy_size
= sizeof(sfs
);
10307 bzero(&sfs
, my_size
);
10308 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10309 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10310 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10311 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
10312 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
10313 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
10314 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
10315 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
10316 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
10317 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
10318 sfs
.f_fsid
= sfsp
->f_fsid
;
10319 sfs
.f_owner
= sfsp
->f_owner
;
10320 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10321 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10323 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10325 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10326 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10328 if (partial_copy
) {
10329 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10331 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10334 struct user32_statfs sfs
;
10336 my_size
= copy_size
= sizeof(sfs
);
10337 bzero(&sfs
, my_size
);
10339 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10340 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10341 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10344 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10345 * have to fudge the numbers here in that case. We inflate the blocksize in order
10346 * to reflect the filesystem size as best we can.
10348 if ((sfsp
->f_blocks
> INT_MAX
)
10349 /* Hack for 4061702 . I think the real fix is for Carbon to
10350 * look for some volume capability and not depend on hidden
10351 * semantics agreed between a FS and carbon.
10352 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10353 * for Carbon to set bNoVolumeSizes volume attribute.
10354 * Without this the webdavfs files cannot be copied onto
10355 * disk as they look huge. This change should not affect
10356 * XSAN as they should not setting these to -1..
10358 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
10359 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
10360 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
10364 * Work out how far we have to shift the block count down to make it fit.
10365 * Note that it's possible to have to shift so far that the resulting
10366 * blocksize would be unreportably large. At that point, we will clip
10367 * any values that don't fit.
10369 * For safety's sake, we also ensure that f_iosize is never reported as
10370 * being smaller than f_bsize.
10372 for (shift
= 0; shift
< 32; shift
++) {
10373 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
10375 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
10378 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10379 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
10380 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
10381 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
10382 #undef __SHIFT_OR_CLIP
10383 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
10384 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
10386 /* filesystem is small enough to be reported honestly */
10387 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
10388 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
10389 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
10390 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
10391 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
10393 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
10394 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
10395 sfs
.f_fsid
= sfsp
->f_fsid
;
10396 sfs
.f_owner
= sfsp
->f_owner
;
10397 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10398 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10400 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10402 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10403 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10405 if (partial_copy
) {
10406 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10408 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10411 if (sizep
!= NULL
) {
10418 * copy stat structure into user_stat structure.
10420 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
10422 bzero(usbp
, sizeof(*usbp
));
10424 usbp
->st_dev
= sbp
->st_dev
;
10425 usbp
->st_ino
= sbp
->st_ino
;
10426 usbp
->st_mode
= sbp
->st_mode
;
10427 usbp
->st_nlink
= sbp
->st_nlink
;
10428 usbp
->st_uid
= sbp
->st_uid
;
10429 usbp
->st_gid
= sbp
->st_gid
;
10430 usbp
->st_rdev
= sbp
->st_rdev
;
10431 #ifndef _POSIX_C_SOURCE
10432 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10433 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10434 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10435 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10436 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10437 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10439 usbp
->st_atime
= sbp
->st_atime
;
10440 usbp
->st_atimensec
= sbp
->st_atimensec
;
10441 usbp
->st_mtime
= sbp
->st_mtime
;
10442 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10443 usbp
->st_ctime
= sbp
->st_ctime
;
10444 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10446 usbp
->st_size
= sbp
->st_size
;
10447 usbp
->st_blocks
= sbp
->st_blocks
;
10448 usbp
->st_blksize
= sbp
->st_blksize
;
10449 usbp
->st_flags
= sbp
->st_flags
;
10450 usbp
->st_gen
= sbp
->st_gen
;
10451 usbp
->st_lspare
= sbp
->st_lspare
;
10452 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10453 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10456 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
10458 bzero(usbp
, sizeof(*usbp
));
10460 usbp
->st_dev
= sbp
->st_dev
;
10461 usbp
->st_ino
= sbp
->st_ino
;
10462 usbp
->st_mode
= sbp
->st_mode
;
10463 usbp
->st_nlink
= sbp
->st_nlink
;
10464 usbp
->st_uid
= sbp
->st_uid
;
10465 usbp
->st_gid
= sbp
->st_gid
;
10466 usbp
->st_rdev
= sbp
->st_rdev
;
10467 #ifndef _POSIX_C_SOURCE
10468 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10469 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10470 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10471 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10472 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10473 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10475 usbp
->st_atime
= sbp
->st_atime
;
10476 usbp
->st_atimensec
= sbp
->st_atimensec
;
10477 usbp
->st_mtime
= sbp
->st_mtime
;
10478 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10479 usbp
->st_ctime
= sbp
->st_ctime
;
10480 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10482 usbp
->st_size
= sbp
->st_size
;
10483 usbp
->st_blocks
= sbp
->st_blocks
;
10484 usbp
->st_blksize
= sbp
->st_blksize
;
10485 usbp
->st_flags
= sbp
->st_flags
;
10486 usbp
->st_gen
= sbp
->st_gen
;
10487 usbp
->st_lspare
= sbp
->st_lspare
;
10488 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10489 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10493 * copy stat64 structure into user_stat64 structure.
10495 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
10497 bzero(usbp
, sizeof(*usbp
));
10499 usbp
->st_dev
= sbp
->st_dev
;
10500 usbp
->st_ino
= sbp
->st_ino
;
10501 usbp
->st_mode
= sbp
->st_mode
;
10502 usbp
->st_nlink
= sbp
->st_nlink
;
10503 usbp
->st_uid
= sbp
->st_uid
;
10504 usbp
->st_gid
= sbp
->st_gid
;
10505 usbp
->st_rdev
= sbp
->st_rdev
;
10506 #ifndef _POSIX_C_SOURCE
10507 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10508 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10509 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10510 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10511 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10512 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10513 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
10514 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
10516 usbp
->st_atime
= sbp
->st_atime
;
10517 usbp
->st_atimensec
= sbp
->st_atimensec
;
10518 usbp
->st_mtime
= sbp
->st_mtime
;
10519 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10520 usbp
->st_ctime
= sbp
->st_ctime
;
10521 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10522 usbp
->st_birthtime
= sbp
->st_birthtime
;
10523 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
10525 usbp
->st_size
= sbp
->st_size
;
10526 usbp
->st_blocks
= sbp
->st_blocks
;
10527 usbp
->st_blksize
= sbp
->st_blksize
;
10528 usbp
->st_flags
= sbp
->st_flags
;
10529 usbp
->st_gen
= sbp
->st_gen
;
10530 usbp
->st_lspare
= sbp
->st_lspare
;
10531 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10532 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10535 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
10537 bzero(usbp
, sizeof(*usbp
));
10539 usbp
->st_dev
= sbp
->st_dev
;
10540 usbp
->st_ino
= sbp
->st_ino
;
10541 usbp
->st_mode
= sbp
->st_mode
;
10542 usbp
->st_nlink
= sbp
->st_nlink
;
10543 usbp
->st_uid
= sbp
->st_uid
;
10544 usbp
->st_gid
= sbp
->st_gid
;
10545 usbp
->st_rdev
= sbp
->st_rdev
;
10546 #ifndef _POSIX_C_SOURCE
10547 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10548 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10549 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10550 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10551 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10552 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10553 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
10554 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
10556 usbp
->st_atime
= sbp
->st_atime
;
10557 usbp
->st_atimensec
= sbp
->st_atimensec
;
10558 usbp
->st_mtime
= sbp
->st_mtime
;
10559 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10560 usbp
->st_ctime
= sbp
->st_ctime
;
10561 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10562 usbp
->st_birthtime
= sbp
->st_birthtime
;
10563 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
10565 usbp
->st_size
= sbp
->st_size
;
10566 usbp
->st_blocks
= sbp
->st_blocks
;
10567 usbp
->st_blksize
= sbp
->st_blksize
;
10568 usbp
->st_flags
= sbp
->st_flags
;
10569 usbp
->st_gen
= sbp
->st_gen
;
10570 usbp
->st_lspare
= sbp
->st_lspare
;
10571 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10572 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10576 * Purge buffer cache for simulating cold starts
10578 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
10580 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
10582 return VNODE_RETURNED
;
10585 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
10587 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
10589 return VFS_RETURNED
;
10593 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
10595 if (!kauth_cred_issuser(kauth_cred_get()))
10598 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);