2 * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <machine/cons.h>
104 #include <machine/limits.h>
105 #include <miscfs/specfs/specdev.h>
107 #include <security/audit/audit.h>
108 #include <bsm/audit_kevents.h>
110 #include <mach/mach_types.h>
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/task.h>
115 #include <vm/vm_pageout.h>
117 #include <libkern/OSAtomic.h>
118 #include <pexpert/pexpert.h>
119 #include <IOKit/IOBSD.h>
122 #include <security/mac.h>
123 #include <security/mac_framework.h>
127 #define GET_PATH(x) \
128 (x) = get_pathbuff();
129 #define RELEASE_PATH(x) \
132 #define GET_PATH(x) \
133 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
134 #define RELEASE_PATH(x) \
135 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
136 #endif /* CONFIG_FSE */
138 /* struct for checkdirs iteration */
143 /* callback for checkdirs iteration */
144 static int checkdirs_callback(proc_t p
, void * arg
);
146 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
147 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
148 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
149 static int getfsstat_callback(mount_t mp
, void * arg
);
150 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
151 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
152 static int sync_callback(mount_t
, void *);
153 static void sync_thread(void *, __unused wait_result_t
);
154 static int sync_async(int);
155 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
156 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
157 boolean_t partial_copy
);
158 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
160 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
161 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
162 struct componentname
*cnp
, user_addr_t fsmountargs
,
163 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
165 void vfs_notify_mount(vnode_t pdvp
);
167 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
169 struct fd_vn_data
* fg_vn_data_alloc(void);
172 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
173 * Concurrent lookups (or lookups by ids) on hard links can cause the
174 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
175 * does) to return ENOENT as the path cannot be returned from the name cache
176 * alone. We have no option but to retry and hope to get one namei->reverse path
177 * generation done without an intervening lookup, lookup by id on the hard link
178 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
179 * which currently are the MAC hooks for rename, unlink and rmdir.
181 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
183 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
185 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
187 #ifdef CONFIG_IMGSRC_ACCESS
188 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
189 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
190 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
191 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
192 static void mount_end_update(mount_t mp
);
193 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
194 #endif /* CONFIG_IMGSRC_ACCESS */
196 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
199 int sync_internal(void);
202 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
204 extern lck_grp_t
*fd_vn_lck_grp
;
205 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
206 extern lck_attr_t
*fd_vn_lck_attr
;
209 * incremented each time a mount or unmount operation occurs
210 * used to invalidate the cached value of the rootvp in the
211 * mount structure utilized by cache_lookup_path
213 uint32_t mount_generation
= 0;
215 /* counts number of mount and unmount operations */
216 unsigned int vfs_nummntops
=0;
218 extern const struct fileops vnops
;
219 #if CONFIG_APPLEDOUBLE
220 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
221 #endif /* CONFIG_APPLEDOUBLE */
223 typedef uint32_t vfs_rename_flags_t
;
224 #if CONFIG_SECLUDED_RENAME
226 VFS_SECLUDE_RENAME
= 0x00000001
231 * Virtual File System System Calls
234 #if NFSCLIENT || DEVFS
236 * Private in-kernel mounting spi (NFS only, not exported)
240 vfs_iskernelmount(mount_t mp
)
242 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
247 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
248 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
254 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
255 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
258 * Get the vnode to be covered if it's not supplied
268 char *pnbuf
= CAST_DOWN(char *, path
);
270 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
271 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
275 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
276 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
286 #endif /* NFSCLIENT || DEVFS */
289 * Mount a file system.
293 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
295 struct __mac_mount_args muap
;
297 muap
.type
= uap
->type
;
298 muap
.path
= uap
->path
;
299 muap
.flags
= uap
->flags
;
300 muap
.data
= uap
->data
;
301 muap
.mac_p
= USER_ADDR_NULL
;
302 return (__mac_mount(p
, &muap
, retval
));
306 vfs_notify_mount(vnode_t pdvp
)
308 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
309 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
314 * Mount a file system taking into account MAC label behavior.
315 * See mount(2) man page for more information
317 * Parameters: p Process requesting the mount
318 * uap User argument descriptor (see below)
321 * Indirect: uap->type Filesystem type
322 * uap->path Path to mount
323 * uap->data Mount arguments
324 * uap->mac_p MAC info
325 * uap->flags Mount flags
331 boolean_t root_fs_upgrade_try
= FALSE
;
334 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
338 int need_nameidone
= 0;
339 vfs_context_t ctx
= vfs_context_current();
340 char fstypename
[MFSNAMELEN
];
343 char *labelstr
= NULL
;
344 int flags
= uap
->flags
;
346 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
347 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
352 * Get the fs type name from user space
354 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
359 * Get the vnode to be covered
361 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
362 UIO_USERSPACE
, uap
->path
, ctx
);
371 #ifdef CONFIG_IMGSRC_ACCESS
372 /* Mounting image source cannot be batched with other operations */
373 if (flags
== MNT_IMGSRC_BY_INDEX
) {
374 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
375 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
378 #endif /* CONFIG_IMGSRC_ACCESS */
382 * Get the label string (if any) from user space
384 if (uap
->mac_p
!= USER_ADDR_NULL
) {
389 struct user64_mac mac64
;
390 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
391 mac
.m_buflen
= mac64
.m_buflen
;
392 mac
.m_string
= mac64
.m_string
;
394 struct user32_mac mac32
;
395 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
396 mac
.m_buflen
= mac32
.m_buflen
;
397 mac
.m_string
= mac32
.m_string
;
401 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
402 (mac
.m_buflen
< 2)) {
406 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
407 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
411 AUDIT_ARG(mac_string
, labelstr
);
413 #endif /* CONFIG_MACF */
415 AUDIT_ARG(fflags
, flags
);
417 if ((vp
->v_flag
& VROOT
) &&
418 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
419 if (!(flags
& MNT_UNION
)) {
424 * For a union mount on '/', treat it as fresh
425 * mount instead of update.
426 * Otherwise, union mouting on '/' used to panic the
427 * system before, since mnt_vnodecovered was found to
428 * be NULL for '/' which is required for unionlookup
429 * after it gets ENOENT on union mount.
431 flags
= (flags
& ~(MNT_UPDATE
));
435 if ((flags
& MNT_RDONLY
) == 0) {
436 /* Release kernels are not allowed to mount "/" as rw */
442 * See 7392553 for more details on why this check exists.
443 * Suffice to say: If this check is ON and something tries
444 * to mount the rootFS RW, we'll turn off the codesign
445 * bitmap optimization.
447 #if CHECK_CS_VALIDATION_BITMAP
448 if ((flags
& MNT_RDONLY
) == 0 ) {
449 root_fs_upgrade_try
= TRUE
;
454 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
455 labelstr
, FALSE
, ctx
);
461 FREE(labelstr
, M_MACTEMP
);
462 #endif /* CONFIG_MACF */
470 if (need_nameidone
) {
478 * common mount implementation (final stage of mounting)
481 * fstypename file system type (ie it's vfs name)
482 * pvp parent of covered vnode
484 * cnp component name (ie path) of covered vnode
485 * flags generic mount flags
486 * fsmountargs file system specific data
487 * labelstr optional MAC label
488 * kernelmount TRUE for mounts initiated from inside the kernel
489 * ctx caller's context
492 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
493 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
494 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
497 #pragma unused(labelstr)
499 struct vnode
*devvp
= NULLVP
;
500 struct vnode
*device_vnode
= NULLVP
;
505 struct vfstable
*vfsp
= (struct vfstable
*)0;
506 struct proc
*p
= vfs_context_proc(ctx
);
508 user_addr_t devpath
= USER_ADDR_NULL
;
511 boolean_t vfsp_ref
= FALSE
;
512 boolean_t is_rwlock_locked
= FALSE
;
513 boolean_t did_rele
= FALSE
;
514 boolean_t have_usecount
= FALSE
;
517 * Process an update for an existing mount
519 if (flags
& MNT_UPDATE
) {
520 if ((vp
->v_flag
& VROOT
) == 0) {
526 /* unmount in progress return error */
528 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
534 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
535 is_rwlock_locked
= TRUE
;
537 * We only allow the filesystem to be reloaded if it
538 * is currently mounted read-only.
540 if ((flags
& MNT_RELOAD
) &&
541 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
547 * If content protection is enabled, update mounts are not
548 * allowed to turn it off.
550 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
551 ((flags
& MNT_CPROTECT
) == 0)) {
556 #ifdef CONFIG_IMGSRC_ACCESS
557 /* Can't downgrade the backer of the root FS */
558 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
559 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
563 #endif /* CONFIG_IMGSRC_ACCESS */
566 * Only root, or the user that did the original mount is
567 * permitted to update it.
569 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
570 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
574 error
= mac_mount_check_remount(ctx
, mp
);
580 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
581 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
583 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
584 flags
|= MNT_NOSUID
| MNT_NODEV
;
585 if (mp
->mnt_flag
& MNT_NOEXEC
)
592 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
594 vfsp
= mp
->mnt_vtable
;
598 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
599 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
601 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
602 flags
|= MNT_NOSUID
| MNT_NODEV
;
603 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
607 /* XXXAUDIT: Should we capture the type on the error path as well? */
608 AUDIT_ARG(text
, fstypename
);
610 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
611 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
612 vfsp
->vfc_refcount
++;
623 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
625 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
626 error
= EINVAL
; /* unsupported request */
630 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
636 * Allocate and initialize the filesystem (mount_t)
638 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
640 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
643 /* Initialize the default IO constraints */
644 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
645 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
646 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
647 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
648 mp
->mnt_devblocksize
= DEV_BSIZE
;
649 mp
->mnt_alignmentmask
= PAGE_MASK
;
650 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
653 mp
->mnt_realrootvp
= NULLVP
;
654 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
656 TAILQ_INIT(&mp
->mnt_vnodelist
);
657 TAILQ_INIT(&mp
->mnt_workerqueue
);
658 TAILQ_INIT(&mp
->mnt_newvnodes
);
660 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
661 is_rwlock_locked
= TRUE
;
662 mp
->mnt_op
= vfsp
->vfc_vfsops
;
663 mp
->mnt_vtable
= vfsp
;
664 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
665 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
666 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
667 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
668 mp
->mnt_vnodecovered
= vp
;
669 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
670 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
671 mp
->mnt_devbsdunit
= 0;
673 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
674 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
676 #if NFSCLIENT || DEVFS
678 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
679 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
680 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
681 #endif /* NFSCLIENT || DEVFS */
685 * Set the mount level flags.
687 if (flags
& MNT_RDONLY
)
688 mp
->mnt_flag
|= MNT_RDONLY
;
689 else if (mp
->mnt_flag
& MNT_RDONLY
) {
690 // disallow read/write upgrades of file systems that
691 // had the TYPENAME_OVERRIDE feature set.
692 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
696 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
698 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
699 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
700 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
701 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
702 MNT_QUARANTINE
| MNT_CPROTECT
);
703 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
704 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
705 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
706 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
707 MNT_QUARANTINE
| MNT_CPROTECT
);
710 if (flags
& MNT_MULTILABEL
) {
711 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
715 mp
->mnt_flag
|= MNT_MULTILABEL
;
719 * Process device path for local file systems if requested
721 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
722 if (vfs_context_is64bit(ctx
)) {
723 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
725 fsmountargs
+= sizeof(devpath
);
728 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
730 /* munge into LP64 addr */
731 devpath
= CAST_USER_ADDR_T(tmp
);
732 fsmountargs
+= sizeof(tmp
);
735 /* Lookup device and authorize access to it */
739 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
740 if ( (error
= namei(&nd
)) )
743 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
748 if (devvp
->v_type
!= VBLK
) {
752 if (major(devvp
->v_rdev
) >= nblkdev
) {
757 * If mount by non-root, then verify that user has necessary
758 * permissions on the device.
760 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
761 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
763 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
764 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
765 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
769 /* On first mount, preflight and open device */
770 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
771 if ( (error
= vnode_ref(devvp
)) )
774 * Disallow multiple mounts of the same device.
775 * Disallow mounting of a device that is currently in use
776 * (except for root, which might share swap device for miniroot).
777 * Flush out any old buffers remaining from a previous use.
779 if ( (error
= vfs_mountedon(devvp
)) )
782 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
786 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
790 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
793 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
795 error
= mac_vnode_check_open(ctx
,
797 ronly
? FREAD
: FREAD
|FWRITE
);
801 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
804 mp
->mnt_devvp
= devvp
;
805 device_vnode
= devvp
;
807 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
808 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
809 (device_vnode
= mp
->mnt_devvp
)) {
813 * If upgrade to read-write by non-root, then verify
814 * that user has necessary permissions on the device.
816 vnode_getalways(device_vnode
);
818 if (suser(vfs_context_ucred(ctx
), NULL
) &&
819 (error
= vnode_authorize(device_vnode
, NULL
,
820 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
822 vnode_put(device_vnode
);
826 /* Tell the device that we're upgrading */
827 dev
= (dev_t
)device_vnode
->v_rdev
;
830 if ((u_int
)maj
>= (u_int
)nblkdev
)
831 panic("Volume mounted on a device with invalid major number.");
833 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
834 vnode_put(device_vnode
);
835 device_vnode
= NULLVP
;
842 if ((flags
& MNT_UPDATE
) == 0) {
843 mac_mount_label_init(mp
);
844 mac_mount_label_associate(ctx
, mp
);
847 if ((flags
& MNT_UPDATE
) != 0) {
848 error
= mac_mount_check_label_update(ctx
, mp
);
855 * Mount the filesystem.
857 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
859 if (flags
& MNT_UPDATE
) {
860 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
861 mp
->mnt_flag
&= ~MNT_RDONLY
;
863 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
864 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
866 mp
->mnt_flag
= flag
; /* restore flag value */
867 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
868 lck_rw_done(&mp
->mnt_rwlock
);
869 is_rwlock_locked
= FALSE
;
871 enablequotas(mp
, ctx
);
876 * Put the new filesystem on the mount list after root.
879 struct vfs_attr vfsattr
;
881 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
882 error
= VFS_ROOT(mp
, &rvp
, ctx
);
884 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
887 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
889 * drop reference provided by VFS_ROOT
899 CLR(vp
->v_flag
, VMOUNT
);
900 vp
->v_mountedhere
= mp
;
904 * taking the name_cache_lock exclusively will
905 * insure that everyone is out of the fast path who
906 * might be trying to use a now stale copy of
907 * vp->v_mountedhere->mnt_realrootvp
908 * bumping mount_generation causes the cached values
915 error
= vnode_ref(vp
);
920 have_usecount
= TRUE
;
922 error
= checkdirs(vp
, ctx
);
924 /* Unmount the filesystem as cdir/rdirs cannot be updated */
928 * there is no cleanup code here so I have made it void
929 * we need to revisit this
931 (void)VFS_START(mp
, 0, ctx
);
933 if (mount_list_add(mp
) != 0) {
935 * The system is shutting down trying to umount
936 * everything, so fail with a plausible errno.
941 lck_rw_done(&mp
->mnt_rwlock
);
942 is_rwlock_locked
= FALSE
;
944 /* Check if this mounted file system supports EAs or named streams. */
945 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
946 VFSATTR_INIT(&vfsattr
);
947 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
948 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
949 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
950 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
951 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
952 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
953 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
956 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
957 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
958 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
961 /* Check if this file system supports path from id lookups. */
962 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
963 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
964 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
965 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
966 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
967 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
970 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
971 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
973 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
974 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
976 /* increment the operations count */
977 OSAddAtomic(1, &vfs_nummntops
);
978 enablequotas(mp
, ctx
);
981 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
984 * cache the IO attributes for the underlying physical media...
985 * an error return indicates the underlying driver doesn't
986 * support all the queries necessary... however, reasonable
987 * defaults will have been set, so no reason to bail or care
989 vfs_init_io_attributes(device_vnode
, mp
);
992 /* Now that mount is setup, notify the listeners */
993 vfs_notify_mount(pvp
);
994 IOBSDMountChange(mp
, kIOMountChangeMount
);
997 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
998 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
999 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1000 mp
->mnt_vtable
->vfc_name
, error
);
1003 vnode_lock_spin(vp
);
1004 CLR(vp
->v_flag
, VMOUNT
);
1007 mp
->mnt_vtable
->vfc_refcount
--;
1008 mount_list_unlock();
1010 if (device_vnode
) {
1011 vnode_rele(device_vnode
);
1012 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
1014 lck_rw_done(&mp
->mnt_rwlock
);
1015 is_rwlock_locked
= FALSE
;
1018 * if we get here, we have a mount structure that needs to be freed,
1019 * but since the coveredvp hasn't yet been updated to point at it,
1020 * no need to worry about other threads holding a crossref on this mp
1021 * so it's ok to just free it
1023 mount_lock_destroy(mp
);
1025 mac_mount_label_destroy(mp
);
1027 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1031 * drop I/O count on the device vp if there was one
1033 if (devpath
&& devvp
)
1038 /* Error condition exits */
1040 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1043 * If the mount has been placed on the covered vp,
1044 * it may have been discovered by now, so we have
1045 * to treat this just like an unmount
1047 mount_lock_spin(mp
);
1048 mp
->mnt_lflag
|= MNT_LDEAD
;
1051 if (device_vnode
!= NULLVP
) {
1052 vnode_rele(device_vnode
);
1053 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1058 vnode_lock_spin(vp
);
1061 vp
->v_mountedhere
= (mount_t
) 0;
1065 if (have_usecount
) {
1069 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1072 if (devpath
&& devvp
)
1075 /* Release mnt_rwlock only when it was taken */
1076 if (is_rwlock_locked
== TRUE
) {
1077 lck_rw_done(&mp
->mnt_rwlock
);
1081 if (mp
->mnt_crossref
)
1082 mount_dropcrossref(mp
, vp
, 0);
1084 mount_lock_destroy(mp
);
1086 mac_mount_label_destroy(mp
);
1088 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1093 vfsp
->vfc_refcount
--;
1094 mount_list_unlock();
1101 * Flush in-core data, check for competing mount attempts,
1105 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1108 #pragma unused(cnp,fsname)
1110 struct vnode_attr va
;
1115 * If the user is not root, ensure that they own the directory
1116 * onto which we are attempting to mount.
1119 VATTR_WANTED(&va
, va_uid
);
1120 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1121 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1122 (!vfs_context_issuser(ctx
)))) {
1128 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1131 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1134 if (vp
->v_type
!= VDIR
) {
1139 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1145 error
= mac_mount_check_mount(ctx
, vp
,
1151 vnode_lock_spin(vp
);
1152 SET(vp
->v_flag
, VMOUNT
);
1159 #if CONFIG_IMGSRC_ACCESS
1162 #define IMGSRC_DEBUG(args...) printf(args)
1164 #define IMGSRC_DEBUG(args...) do { } while(0)
1168 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1170 struct nameidata nd
;
1171 vnode_t vp
, realdevvp
;
1175 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1176 if ( (error
= namei(&nd
)) ) {
1177 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1183 if (!vnode_isblk(vp
)) {
1184 IMGSRC_DEBUG("Not block device.\n");
1189 realdevvp
= mp
->mnt_devvp
;
1190 if (realdevvp
== NULLVP
) {
1191 IMGSRC_DEBUG("No device backs the mount.\n");
1196 error
= vnode_getwithref(realdevvp
);
1198 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1202 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1203 IMGSRC_DEBUG("Wrong dev_t.\n");
1208 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1211 * If mount by non-root, then verify that user has necessary
1212 * permissions on the device.
1214 if (!vfs_context_issuser(ctx
)) {
1215 accessmode
= KAUTH_VNODE_READ_DATA
;
1216 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1217 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1218 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1219 IMGSRC_DEBUG("Access denied.\n");
1227 vnode_put(realdevvp
);
1238 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1239 * and call checkdirs()
1242 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1246 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1248 vnode_lock_spin(vp
);
1249 CLR(vp
->v_flag
, VMOUNT
);
1250 vp
->v_mountedhere
= mp
;
1254 * taking the name_cache_lock exclusively will
1255 * insure that everyone is out of the fast path who
1256 * might be trying to use a now stale copy of
1257 * vp->v_mountedhere->mnt_realrootvp
1258 * bumping mount_generation causes the cached values
1263 name_cache_unlock();
1265 error
= vnode_ref(vp
);
1270 error
= checkdirs(vp
, ctx
);
1272 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1279 mp
->mnt_vnodecovered
= NULLVP
;
1285 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1288 vnode_lock_spin(vp
);
1289 vp
->v_mountedhere
= (mount_t
)NULL
;
1292 mp
->mnt_vnodecovered
= NULLVP
;
1296 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1300 /* unmount in progress return error */
1301 mount_lock_spin(mp
);
1302 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1307 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1310 * We only allow the filesystem to be reloaded if it
1311 * is currently mounted read-only.
1313 if ((flags
& MNT_RELOAD
) &&
1314 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1320 * Only root, or the user that did the original mount is
1321 * permitted to update it.
1323 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1324 (!vfs_context_issuser(ctx
))) {
1329 error
= mac_mount_check_remount(ctx
, mp
);
1337 lck_rw_done(&mp
->mnt_rwlock
);
1344 mount_end_update(mount_t mp
)
1346 lck_rw_done(&mp
->mnt_rwlock
);
1350 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1354 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1358 vp
= imgsrc_rootvnodes
[height
];
1359 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1368 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1369 const char *fsname
, vfs_context_t ctx
,
1370 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1374 boolean_t placed
= FALSE
;
1375 vnode_t devvp
= NULLVP
;
1376 struct vfstable
*vfsp
;
1377 user_addr_t devpath
;
1378 char *old_mntonname
;
1383 /* If we didn't imageboot, nothing to move */
1384 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1388 /* Only root can do this */
1389 if (!vfs_context_issuser(ctx
)) {
1393 IMGSRC_DEBUG("looking for root vnode.\n");
1396 * Get root vnode of filesystem we're moving.
1400 struct user64_mnt_imgsrc_args mia64
;
1401 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1403 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1407 height
= mia64
.mi_height
;
1408 flags
= mia64
.mi_flags
;
1409 devpath
= mia64
.mi_devpath
;
1411 struct user32_mnt_imgsrc_args mia32
;
1412 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1414 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1418 height
= mia32
.mi_height
;
1419 flags
= mia32
.mi_flags
;
1420 devpath
= mia32
.mi_devpath
;
1424 * For binary compatibility--assumes one level of nesting.
1427 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1431 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1434 /* munge into LP64 addr */
1435 devpath
= CAST_USER_ADDR_T(tmp
);
1443 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1447 error
= get_imgsrc_rootvnode(height
, &rvp
);
1449 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1453 IMGSRC_DEBUG("got root vnode.\n");
1455 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1457 /* Can only move once */
1458 mp
= vnode_mount(rvp
);
1459 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1460 IMGSRC_DEBUG("Already moved.\n");
1465 IMGSRC_DEBUG("Starting updated.\n");
1467 /* Get exclusive rwlock on mount, authorize update on mp */
1468 error
= mount_begin_update(mp
, ctx
, 0);
1470 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1475 * It can only be moved once. Flag is set under the rwlock,
1476 * so we're now safe to proceed.
1478 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1479 IMGSRC_DEBUG("Already moved [2]\n");
1484 IMGSRC_DEBUG("Preparing coveredvp.\n");
1486 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1487 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1489 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1493 IMGSRC_DEBUG("Covered vp OK.\n");
1495 /* Sanity check the name caller has provided */
1496 vfsp
= mp
->mnt_vtable
;
1497 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1498 IMGSRC_DEBUG("Wrong fs name.\n");
1503 /* Check the device vnode and update mount-from name, for local filesystems */
1504 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1505 IMGSRC_DEBUG("Local, doing device validation.\n");
1507 if (devpath
!= USER_ADDR_NULL
) {
1508 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1510 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1519 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1520 * and increment the name cache's mount generation
1523 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1524 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1531 strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1532 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1534 /* Forbid future moves */
1536 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1539 /* Finally, add to mount list, completely ready to go */
1540 if (mount_list_add(mp
) != 0) {
1542 * The system is shutting down trying to umount
1543 * everything, so fail with a plausible errno.
1549 mount_end_update(mp
);
1551 FREE(old_mntonname
, M_TEMP
);
1553 vfs_notify_mount(pvp
);
1557 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1560 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1565 * Placing the mp on the vnode clears VMOUNT,
1566 * so cleanup is different after that point
1569 /* Rele the vp, clear VMOUNT and v_mountedhere */
1570 undo_place_on_covered_vp(mp
, vp
);
1572 vnode_lock_spin(vp
);
1573 CLR(vp
->v_flag
, VMOUNT
);
1577 mount_end_update(mp
);
1581 FREE(old_mntonname
, M_TEMP
);
1585 #endif /* CONFIG_IMGSRC_ACCESS */
1588 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1590 struct nameidata qnd
;
1592 char qfpath
[MAXPATHLEN
];
1593 const char *qfname
= QUOTAFILENAME
;
1594 const char *qfopsname
= QUOTAOPSNAME
;
1595 const char *qfextension
[] = INITQFNAMES
;
1597 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1598 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1602 * Enable filesystem disk quotas if necessary.
1603 * We ignore errors as this should not interfere with final mount
1605 for (type
=0; type
< MAXQUOTAS
; type
++) {
1606 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1607 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1608 CAST_USER_ADDR_T(qfpath
), ctx
);
1609 if (namei(&qnd
) != 0)
1610 continue; /* option file to trigger quotas is not present */
1611 vnode_put(qnd
.ni_vp
);
1613 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1615 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1622 checkdirs_callback(proc_t p
, void * arg
)
1624 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1625 vnode_t olddp
= cdrp
->olddp
;
1626 vnode_t newdp
= cdrp
->newdp
;
1627 struct filedesc
*fdp
;
1631 int cdir_changed
= 0;
1632 int rdir_changed
= 0;
1635 * XXX Also needs to iterate each thread in the process to see if it
1636 * XXX is using a per-thread current working directory, and, if so,
1637 * XXX update that as well.
1642 if (fdp
== (struct filedesc
*)0) {
1644 return(PROC_RETURNED
);
1646 fdp_cvp
= fdp
->fd_cdir
;
1647 fdp_rvp
= fdp
->fd_rdir
;
1650 if (fdp_cvp
== olddp
) {
1657 if (fdp_rvp
== olddp
) {
1664 if (cdir_changed
|| rdir_changed
) {
1666 fdp
->fd_cdir
= fdp_cvp
;
1667 fdp
->fd_rdir
= fdp_rvp
;
1670 return(PROC_RETURNED
);
1676 * Scan all active processes to see if any of them have a current
1677 * or root directory onto which the new filesystem has just been
1678 * mounted. If so, replace them with the new mount point.
1681 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1686 struct cdirargs cdr
;
1688 if (olddp
->v_usecount
== 1)
1690 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1694 panic("mount: lost mount: error %d", err
);
1701 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1702 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1704 if (rootvnode
== olddp
) {
1716 * Unmount a file system.
1718 * Note: unmount takes a path to the vnode mounted on as argument,
1719 * not special file (as before).
1723 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1728 struct nameidata nd
;
1729 vfs_context_t ctx
= vfs_context_current();
1731 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1732 UIO_USERSPACE
, uap
->path
, ctx
);
1741 error
= mac_mount_check_umount(ctx
, mp
);
1748 * Must be the root of the filesystem
1750 if ((vp
->v_flag
& VROOT
) == 0) {
1756 /* safedounmount consumes the mount ref */
1757 return (safedounmount(mp
, uap
->flags
, ctx
));
1761 vfs_unmountbyfsid(fsid_t
* fsid
, int flags
, vfs_context_t ctx
)
1765 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1766 if (mp
== (mount_t
)0) {
1771 /* safedounmount consumes the mount ref */
1772 return(safedounmount(mp
, flags
, ctx
));
1777 * The mount struct comes with a mount ref which will be consumed.
1778 * Do the actual file system unmount, prevent some common foot shooting.
1781 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1784 proc_t p
= vfs_context_proc(ctx
);
1787 * If the file system is not responding and MNT_NOBLOCK
1788 * is set and not a forced unmount then return EBUSY.
1790 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1791 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1797 * Skip authorization if the mount is tagged as permissive and
1798 * this is not a forced-unmount attempt.
1800 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1802 * Only root, or the user that did the original mount is
1803 * permitted to unmount this filesystem.
1805 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1806 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1810 * Don't allow unmounting the root file system.
1812 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1813 error
= EBUSY
; /* the root is always busy */
1817 #ifdef CONFIG_IMGSRC_ACCESS
1818 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1822 #endif /* CONFIG_IMGSRC_ACCESS */
1824 return (dounmount(mp
, flags
, 1, ctx
));
1832 * Do the actual file system unmount.
1835 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1837 vnode_t coveredvp
= (vnode_t
)0;
1840 int forcedunmount
= 0;
1842 struct vnode
*devvp
= NULLVP
;
1844 proc_t p
= vfs_context_proc(ctx
);
1846 int pflags_save
= 0;
1847 #endif /* CONFIG_TRIGGERS */
1852 * If already an unmount in progress just return EBUSY.
1853 * Even a forced unmount cannot override.
1855 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1862 if (flags
& MNT_FORCE
) {
1864 mp
->mnt_lflag
|= MNT_LFORCE
;
1868 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1869 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1872 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1873 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1874 mp
->mnt_flag
&=~ MNT_ASYNC
;
1876 * anyone currently in the fast path that
1877 * trips over the cached rootvp will be
1878 * dumped out and forced into the slow path
1879 * to regenerate a new cached value
1881 mp
->mnt_realrootvp
= NULLVP
;
1884 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
1886 * Force unmount any mounts in this filesystem.
1887 * If any unmounts fail - just leave them dangling.
1890 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
1894 * taking the name_cache_lock exclusively will
1895 * insure that everyone is out of the fast path who
1896 * might be trying to use a now stale copy of
1897 * vp->v_mountedhere->mnt_realrootvp
1898 * bumping mount_generation causes the cached values
1903 name_cache_unlock();
1906 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1910 fsevent_unmount(mp
); /* has to come first! */
1913 if (forcedunmount
== 0) {
1914 ubc_umount(mp
); /* release cached vnodes */
1915 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1916 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
1919 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1920 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1921 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1927 IOBSDMountChange(mp
, kIOMountChangeUnmount
);
1930 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
1934 lflags
|= FORCECLOSE
;
1935 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
1936 if ((forcedunmount
== 0) && error
) {
1938 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1939 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1940 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1944 /* make sure there are no one in the mount iterations or lookup */
1945 mount_iterdrain(mp
);
1947 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
1949 mount_iterreset(mp
);
1951 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1952 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1953 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1957 /* increment the operations count */
1959 OSAddAtomic(1, &vfs_nummntops
);
1961 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1962 /* hold an io reference and drop the usecount before close */
1963 devvp
= mp
->mnt_devvp
;
1964 vnode_getalways(devvp
);
1966 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1968 vnode_clearmountedon(devvp
);
1971 lck_rw_done(&mp
->mnt_rwlock
);
1972 mount_list_remove(mp
);
1973 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1975 /* mark the mount point hook in the vp but not drop the ref yet */
1976 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
1978 * The covered vnode needs special handling. Trying to get an
1979 * iocount must not block here as this may lead to deadlocks
1980 * if the Filesystem to which the covered vnode belongs is
1981 * undergoing forced unmounts. Since we hold a usecount, the
1982 * vnode cannot be reused (it can, however, still be terminated)
1984 vnode_getalways(coveredvp
);
1985 vnode_lock_spin(coveredvp
);
1988 coveredvp
->v_mountedhere
= (struct mount
*)0;
1989 CLR(coveredvp
->v_flag
, VMOUNT
);
1991 vnode_unlock(coveredvp
);
1992 vnode_put(coveredvp
);
1996 mp
->mnt_vtable
->vfc_refcount
--;
1997 mount_list_unlock();
1999 cache_purgevfs(mp
); /* remove cache entries for this file sys */
2000 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
2002 mp
->mnt_lflag
|= MNT_LDEAD
;
2004 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2006 * do the wakeup here
2007 * in case we block in mount_refdrain
2008 * which will drop the mount lock
2009 * and allow anyone blocked in vfs_busy
2010 * to wakeup and see the LDEAD state
2012 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2013 wakeup((caddr_t
)mp
);
2017 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2018 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2023 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2024 // Restore P_NOREMOTEHANG bit to its previous value
2025 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2026 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2030 * Callback and context are set together under the mount lock, and
2031 * never cleared, so we're safe to examine them here, drop the lock,
2034 if (mp
->mnt_triggercallback
!= NULL
) {
2037 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2038 } else if (did_vflush
) {
2039 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2046 #endif /* CONFIG_TRIGGERS */
2048 lck_rw_done(&mp
->mnt_rwlock
);
2051 wakeup((caddr_t
)mp
);
2054 if ((coveredvp
!= NULLVP
)) {
2055 vnode_t pvp
= NULLVP
;
2058 * The covered vnode needs special handling. Trying to
2059 * get an iocount must not block here as this may lead
2060 * to deadlocks if the Filesystem to which the covered
2061 * vnode belongs is undergoing forced unmounts. Since we
2062 * hold a usecount, the vnode cannot be reused
2063 * (it can, however, still be terminated).
2065 vnode_getalways(coveredvp
);
2067 mount_dropcrossref(mp
, coveredvp
, 0);
2069 * We'll _try_ to detect if this really needs to be
2070 * done. The coveredvp can only be in termination (or
2071 * terminated) if the coveredvp's mount point is in a
2072 * forced unmount (or has been) since we still hold the
2075 if (!vnode_isrecycled(coveredvp
)) {
2076 pvp
= vnode_getparent(coveredvp
);
2078 if (coveredvp
->v_resolve
) {
2079 vnode_trigger_rearm(coveredvp
, ctx
);
2084 vnode_rele(coveredvp
);
2085 vnode_put(coveredvp
);
2089 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2092 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2093 mount_lock_destroy(mp
);
2095 mac_mount_label_destroy(mp
);
2097 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2099 panic("dounmount: no coveredvp");
2105 * Unmount any mounts in this filesystem.
2108 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2111 fsid_t
*fsids
, fsid
;
2113 int count
= 0, i
, m
= 0;
2118 // Get an array to hold the submounts fsids.
2119 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2121 fsids_sz
= count
* sizeof(fsid_t
);
2122 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2123 if (fsids
== NULL
) {
2124 mount_list_unlock();
2127 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2130 * Fill the array with submount fsids.
2131 * Since mounts are always added to the tail of the mount list, the
2132 * list is always in mount order.
2133 * For each mount check if the mounted-on vnode belongs to a
2134 * mount that's already added to our array of mounts to be unmounted.
2136 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2137 vp
= smp
->mnt_vnodecovered
;
2140 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2141 for (i
= 0; i
<= m
; i
++) {
2142 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2143 fsids
[i
].val
[1] == fsid
.val
[1]) {
2144 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2149 mount_list_unlock();
2151 // Unmount the submounts in reverse order. Ignore errors.
2152 for (i
= m
; i
> 0; i
--) {
2153 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2156 mount_iterdrop(smp
);
2157 (void) dounmount(smp
, flags
, 1, ctx
);
2162 FREE(fsids
, M_TEMP
);
2166 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2171 if (mp
->mnt_crossref
< 0)
2172 panic("mount cross refs -ve");
2174 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2177 vnode_put_locked(dp
);
2180 mount_lock_destroy(mp
);
2182 mac_mount_label_destroy(mp
);
2184 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2188 vnode_put_locked(dp
);
2194 * Sync each mounted filesystem.
2200 int print_vmpage_stat
=0;
2201 int sync_timeout
= 60; // Sync time limit (sec)
2204 sync_callback(mount_t mp
, __unused
void *arg
)
2206 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2207 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2209 mp
->mnt_flag
&= ~MNT_ASYNC
;
2210 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2212 mp
->mnt_flag
|= MNT_ASYNC
;
2215 return (VFS_RETURNED
);
2220 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2222 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2224 if (print_vmpage_stat
) {
2225 vm_countdirtypages();
2231 #endif /* DIAGNOSTIC */
2236 sync_thread(void *arg
, __unused wait_result_t wr
)
2238 int *timeout
= (int *) arg
;
2240 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2243 wakeup((caddr_t
) timeout
);
2244 if (print_vmpage_stat
) {
2245 vm_countdirtypages();
2251 #endif /* DIAGNOSTIC */
2255 * Sync in a separate thread so we can time out if it blocks.
2258 sync_async(int timeout
)
2262 struct timespec ts
= {timeout
, 0};
2264 lck_mtx_lock(sync_mtx_lck
);
2265 if (kernel_thread_start(sync_thread
, &timeout
, &thd
) != KERN_SUCCESS
) {
2266 printf("sync_thread failed\n");
2267 lck_mtx_unlock(sync_mtx_lck
);
2271 error
= msleep((caddr_t
) &timeout
, sync_mtx_lck
, (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2273 printf("sync timed out: %d sec\n", timeout
);
2275 thread_deallocate(thd
);
2281 * An in-kernel sync for power management to call.
2283 __private_extern__
int
2286 (void) sync_async(sync_timeout
);
2289 } /* end of sync_internal call */
2292 * Change filesystem quotas.
2296 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2299 int error
, quota_cmd
, quota_status
;
2302 struct nameidata nd
;
2303 vfs_context_t ctx
= vfs_context_current();
2304 struct dqblk my_dqblk
;
2306 AUDIT_ARG(uid
, uap
->uid
);
2307 AUDIT_ARG(cmd
, uap
->cmd
);
2308 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2313 mp
= nd
.ni_vp
->v_mount
;
2314 vnode_put(nd
.ni_vp
);
2317 /* copyin any data we will need for downstream code */
2318 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2320 switch (quota_cmd
) {
2322 /* uap->arg specifies a file from which to take the quotas */
2323 fnamelen
= MAXPATHLEN
;
2324 datap
= kalloc(MAXPATHLEN
);
2325 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2328 /* uap->arg is a pointer to a dqblk structure. */
2329 datap
= (caddr_t
) &my_dqblk
;
2333 /* uap->arg is a pointer to a dqblk structure. */
2334 datap
= (caddr_t
) &my_dqblk
;
2335 if (proc_is64bit(p
)) {
2336 struct user_dqblk my_dqblk64
;
2337 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2339 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2343 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2347 /* uap->arg is a pointer to an integer */
2348 datap
= (caddr_t
) "a_status
;
2356 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2359 switch (quota_cmd
) {
2362 kfree(datap
, MAXPATHLEN
);
2365 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2367 if (proc_is64bit(p
)) {
2368 struct user_dqblk my_dqblk64
= {.dqb_bhardlimit
= 0};
2369 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2370 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2373 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2378 /* uap->arg is a pointer to an integer */
2380 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2391 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2393 return (EOPNOTSUPP
);
2398 * Get filesystem statistics.
2400 * Returns: 0 Success
2402 * vfs_update_vfsstat:???
2403 * munge_statfs:EFAULT
2407 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2410 struct vfsstatfs
*sp
;
2412 struct nameidata nd
;
2413 vfs_context_t ctx
= vfs_context_current();
2416 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2417 UIO_USERSPACE
, uap
->path
, ctx
);
2423 sp
= &mp
->mnt_vfsstat
;
2426 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2432 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2438 * Get filesystem statistics.
2442 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2446 struct vfsstatfs
*sp
;
2449 AUDIT_ARG(fd
, uap
->fd
);
2451 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2454 error
= vnode_getwithref(vp
);
2460 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2467 sp
= &mp
->mnt_vfsstat
;
2468 if ((error
= vfs_update_vfsstat(mp
,vfs_context_current(),VFS_USER_EVENT
)) != 0) {
2472 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2482 * Common routine to handle copying of statfs64 data to user space
2485 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2488 struct statfs64 sfs
;
2490 bzero(&sfs
, sizeof(sfs
));
2492 sfs
.f_bsize
= sfsp
->f_bsize
;
2493 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2494 sfs
.f_blocks
= sfsp
->f_blocks
;
2495 sfs
.f_bfree
= sfsp
->f_bfree
;
2496 sfs
.f_bavail
= sfsp
->f_bavail
;
2497 sfs
.f_files
= sfsp
->f_files
;
2498 sfs
.f_ffree
= sfsp
->f_ffree
;
2499 sfs
.f_fsid
= sfsp
->f_fsid
;
2500 sfs
.f_owner
= sfsp
->f_owner
;
2501 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2502 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2503 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2504 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2505 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2507 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2509 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2510 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2512 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2518 * Get file system statistics in 64-bit mode
2521 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2524 struct vfsstatfs
*sp
;
2526 struct nameidata nd
;
2527 vfs_context_t ctxp
= vfs_context_current();
2530 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2531 UIO_USERSPACE
, uap
->path
, ctxp
);
2537 sp
= &mp
->mnt_vfsstat
;
2540 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2546 error
= statfs64_common(mp
, sp
, uap
->buf
);
2553 * Get file system statistics in 64-bit mode
2556 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2560 struct vfsstatfs
*sp
;
2563 AUDIT_ARG(fd
, uap
->fd
);
2565 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2568 error
= vnode_getwithref(vp
);
2574 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2581 sp
= &mp
->mnt_vfsstat
;
2582 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2586 error
= statfs64_common(mp
, sp
, uap
->buf
);
2595 struct getfsstat_struct
{
2606 getfsstat_callback(mount_t mp
, void * arg
)
2609 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2610 struct vfsstatfs
*sp
;
2612 vfs_context_t ctx
= vfs_context_current();
2614 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2615 sp
= &mp
->mnt_vfsstat
;
2617 * If MNT_NOWAIT is specified, do not refresh the
2618 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2620 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2621 (error
= vfs_update_vfsstat(mp
, ctx
,
2623 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2624 return(VFS_RETURNED
);
2628 * Need to handle LP64 version of struct statfs
2630 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2632 fstp
->error
= error
;
2633 return(VFS_RETURNED_DONE
);
2635 fstp
->sfsp
+= my_size
;
2639 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2641 fstp
->error
= error
;
2642 return(VFS_RETURNED_DONE
);
2649 return(VFS_RETURNED
);
2653 * Get statistics on all filesystems.
2656 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2658 struct __mac_getfsstat_args muap
;
2660 muap
.buf
= uap
->buf
;
2661 muap
.bufsize
= uap
->bufsize
;
2662 muap
.mac
= USER_ADDR_NULL
;
2664 muap
.flags
= uap
->flags
;
2666 return (__mac_getfsstat(p
, &muap
, retval
));
2670 * __mac_getfsstat: Get MAC-related file system statistics
2672 * Parameters: p (ignored)
2673 * uap User argument descriptor (see below)
2674 * retval Count of file system statistics (N stats)
2676 * Indirect: uap->bufsize Buffer size
2677 * uap->macsize MAC info size
2678 * uap->buf Buffer where information will be returned
2680 * uap->flags File system flags
2683 * Returns: 0 Success
2688 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2692 size_t count
, maxcount
, bufsize
, macsize
;
2693 struct getfsstat_struct fst
;
2695 bufsize
= (size_t) uap
->bufsize
;
2696 macsize
= (size_t) uap
->macsize
;
2698 if (IS_64BIT_PROCESS(p
)) {
2699 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2702 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2710 if (uap
->mac
!= USER_ADDR_NULL
) {
2715 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2716 if (count
!= maxcount
)
2719 /* Copy in the array */
2720 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2725 error
= copyin(uap
->mac
, mp0
, macsize
);
2727 FREE(mp0
, M_MACTEMP
);
2731 /* Normalize to an array of user_addr_t */
2732 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2734 FREE(mp0
, M_MACTEMP
);
2738 for (i
= 0; i
< count
; i
++) {
2739 if (IS_64BIT_PROCESS(p
))
2740 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2742 mp
[i
] = (user_addr_t
)mp0
[i
];
2744 FREE(mp0
, M_MACTEMP
);
2751 fst
.flags
= uap
->flags
;
2754 fst
.maxcount
= maxcount
;
2757 vfs_iterate(0, getfsstat_callback
, &fst
);
2760 FREE(mp
, M_MACTEMP
);
2763 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2767 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2768 *retval
= fst
.maxcount
;
2770 *retval
= fst
.count
;
2775 getfsstat64_callback(mount_t mp
, void * arg
)
2777 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2778 struct vfsstatfs
*sp
;
2781 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2782 sp
= &mp
->mnt_vfsstat
;
2784 * If MNT_NOWAIT is specified, do not refresh the fsstat
2785 * cache. MNT_WAIT overrides MNT_NOWAIT.
2787 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2788 * getfsstat, since the constants are out of the same
2791 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2792 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2793 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2794 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2795 return(VFS_RETURNED
);
2798 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2800 fstp
->error
= error
;
2801 return(VFS_RETURNED_DONE
);
2803 fstp
->sfsp
+= sizeof(struct statfs64
);
2806 return(VFS_RETURNED
);
2810 * Get statistics on all file systems in 64 bit mode.
2813 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2816 int count
, maxcount
;
2817 struct getfsstat_struct fst
;
2819 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2825 fst
.flags
= uap
->flags
;
2828 fst
.maxcount
= maxcount
;
2830 vfs_iterate(0, getfsstat64_callback
, &fst
);
2833 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2837 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2838 *retval
= fst
.maxcount
;
2840 *retval
= fst
.count
;
2846 * gets the associated vnode with the file descriptor passed.
2850 * ctx - vfs context of caller
2851 * fd - file descriptor for which vnode is required.
2852 * vpp - Pointer to pointer to vnode to be returned.
2854 * The vnode is returned with an iocount so any vnode obtained
2855 * by this call needs a vnode_put
2859 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
2863 struct fileproc
*fp
;
2864 proc_t p
= vfs_context_proc(ctx
);
2868 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
2872 error
= vnode_getwithref(vp
);
2874 (void)fp_drop(p
, fd
, fp
, 0);
2878 (void)fp_drop(p
, fd
, fp
, 0);
2884 * Wrapper function around namei to start lookup from a directory
2885 * specified by a file descriptor ni_dirfd.
2887 * In addition to all the errors returned by namei, this call can
2888 * return ENOTDIR if the file descriptor does not refer to a directory.
2889 * and EBADF if the file descriptor is not valid.
2892 nameiat(struct nameidata
*ndp
, int dirfd
)
2894 if ((dirfd
!= AT_FDCWD
) &&
2895 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
2896 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
2900 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
2901 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
2905 c
= *((char *)(ndp
->ni_dirp
));
2911 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
2916 if (vnode_vtype(dvp_at
) != VDIR
) {
2921 ndp
->ni_dvp
= dvp_at
;
2922 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
2924 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
2930 return (namei(ndp
));
2934 * Change current working directory to a given file descriptor.
2938 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
2940 struct filedesc
*fdp
= p
->p_fd
;
2946 vfs_context_t ctx
= vfs_context_current();
2948 AUDIT_ARG(fd
, uap
->fd
);
2949 if (per_thread
&& uap
->fd
== -1) {
2951 * Switching back from per-thread to per process CWD; verify we
2952 * in fact have one before proceeding. The only success case
2953 * for this code path is to return 0 preemptively after zapping
2954 * the thread structure contents.
2956 thread_t th
= vfs_context_thread(ctx
);
2958 uthread_t uth
= get_bsdthread_info(th
);
2960 uth
->uu_cdir
= NULLVP
;
2961 if (tvp
!= NULLVP
) {
2969 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2971 if ( (error
= vnode_getwithref(vp
)) ) {
2976 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
2978 if (vp
->v_type
!= VDIR
) {
2984 error
= mac_vnode_check_chdir(ctx
, vp
);
2988 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
2992 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
2993 if (vfs_busy(mp
, LK_NOWAIT
)) {
2997 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3006 if ( (error
= vnode_ref(vp
)) )
3011 thread_t th
= vfs_context_thread(ctx
);
3013 uthread_t uth
= get_bsdthread_info(th
);
3016 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3041 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3043 return common_fchdir(p
, uap
, 0);
3047 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3049 return common_fchdir(p
, (void *)uap
, 1);
3053 * Change current working directory (".").
3055 * Returns: 0 Success
3056 * change_dir:ENOTDIR
3058 * vnode_ref:ENOENT No such file or directory
3062 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3064 struct filedesc
*fdp
= p
->p_fd
;
3066 struct nameidata nd
;
3068 vfs_context_t ctx
= vfs_context_current();
3070 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3071 UIO_USERSPACE
, uap
->path
, ctx
);
3072 error
= change_dir(&nd
, ctx
);
3075 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3076 vnode_put(nd
.ni_vp
);
3080 * drop the iocount we picked up in change_dir
3082 vnode_put(nd
.ni_vp
);
3085 thread_t th
= vfs_context_thread(ctx
);
3087 uthread_t uth
= get_bsdthread_info(th
);
3089 uth
->uu_cdir
= nd
.ni_vp
;
3090 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3092 vnode_rele(nd
.ni_vp
);
3098 fdp
->fd_cdir
= nd
.ni_vp
;
3112 * Change current working directory (".") for the entire process
3114 * Parameters: p Process requesting the call
3115 * uap User argument descriptor (see below)
3118 * Indirect parameters: uap->path Directory path
3120 * Returns: 0 Success
3121 * common_chdir: ENOTDIR
3122 * common_chdir: ENOENT No such file or directory
3127 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3129 return common_chdir(p
, (void *)uap
, 0);
3135 * Change current working directory (".") for a single thread
3137 * Parameters: p Process requesting the call
3138 * uap User argument descriptor (see below)
3141 * Indirect parameters: uap->path Directory path
3143 * Returns: 0 Success
3144 * common_chdir: ENOTDIR
3145 * common_chdir: ENOENT No such file or directory
3150 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3152 return common_chdir(p
, (void *)uap
, 1);
3157 * Change notion of root (``/'') directory.
3161 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3163 struct filedesc
*fdp
= p
->p_fd
;
3165 struct nameidata nd
;
3167 vfs_context_t ctx
= vfs_context_current();
3169 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3172 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3173 UIO_USERSPACE
, uap
->path
, ctx
);
3174 error
= change_dir(&nd
, ctx
);
3179 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3182 vnode_put(nd
.ni_vp
);
3187 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3188 vnode_put(nd
.ni_vp
);
3191 vnode_put(nd
.ni_vp
);
3195 fdp
->fd_rdir
= nd
.ni_vp
;
3196 fdp
->fd_flags
|= FD_CHROOT
;
3206 * Common routine for chroot and chdir.
3208 * Returns: 0 Success
3209 * ENOTDIR Not a directory
3210 * namei:??? [anything namei can return]
3211 * vnode_authorize:??? [anything vnode_authorize can return]
3214 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3219 if ((error
= namei(ndp
)))
3224 if (vp
->v_type
!= VDIR
) {
3230 error
= mac_vnode_check_chdir(ctx
, vp
);
3237 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3247 * Free the vnode data (for directories) associated with the file glob.
3250 fg_vn_data_alloc(void)
3252 struct fd_vn_data
*fvdata
;
3254 /* Allocate per fd vnode data */
3255 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3256 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3257 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3262 * Free the vnode data (for directories) associated with the file glob.
3265 fg_vn_data_free(void *fgvndata
)
3267 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3270 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3271 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3272 FREE(fvdata
, M_FD_VN_DATA
);
3276 * Check permissions, allocate an open file structure,
3277 * and call the device open routine if any.
3279 * Returns: 0 Success
3290 * XXX Need to implement uid, gid
3293 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3294 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3297 proc_t p
= vfs_context_proc(ctx
);
3298 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3299 struct fileproc
*fp
;
3302 int type
, indx
, error
;
3304 struct vfs_context context
;
3308 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3311 flags
= FFLAGS(uflags
);
3312 CLR(flags
, FENCRYPTED
);
3313 CLR(flags
, FUNENCRYPTED
);
3315 AUDIT_ARG(fflags
, oflags
);
3316 AUDIT_ARG(mode
, vap
->va_mode
);
3318 if ((error
= falloc_withalloc(p
,
3319 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3322 uu
->uu_dupfd
= -indx
- 1;
3324 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3325 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3326 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3327 fp_drop(p
, indx
, NULL
, 0);
3332 if (error
== ERESTART
)
3334 fp_free(p
, indx
, fp
);
3340 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
| FENCRYPTED
| FUNENCRYPTED
);
3341 fp
->f_fglob
->fg_ops
= &vnops
;
3342 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3344 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3345 lf
.l_whence
= SEEK_SET
;
3348 if (flags
& O_EXLOCK
)
3349 lf
.l_type
= F_WRLCK
;
3351 lf
.l_type
= F_RDLCK
;
3353 if ((flags
& FNONBLOCK
) == 0)
3356 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3361 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3363 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3366 /* try to truncate by setting the size attribute */
3367 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3371 * For directories we hold some additional information in the fd.
3373 if (vnode_vtype(vp
) == VDIR
) {
3374 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3376 fp
->f_fglob
->fg_vn_data
= NULL
;
3382 * The first terminal open (without a O_NOCTTY) by a session leader
3383 * results in it being set as the controlling terminal.
3385 if (vnode_istty(vp
) && !(p
->p_flag
& P_CONTROLT
) &&
3386 !(flags
& O_NOCTTY
)) {
3389 (void)(*fp
->f_fglob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
,
3390 (caddr_t
)&tmp
, ctx
);
3394 if (flags
& O_CLOEXEC
)
3395 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3396 if (flags
& O_CLOFORK
)
3397 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3398 procfdtbl_releasefd(p
, indx
, NULL
);
3399 fp_drop(p
, indx
, fp
, 1);
3406 context
= *vfs_context_current();
3407 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3409 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3410 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3411 lf
.l_whence
= SEEK_SET
;
3414 lf
.l_type
= F_UNLCK
;
3417 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3420 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3422 fp_free(p
, indx
, fp
);
3428 * While most of the *at syscall handlers can call nameiat() which
3429 * is a wrapper around namei, the use of namei and initialisation
3430 * of nameidata are far removed and in different functions - namei
3431 * gets called in vn_open_auth for open1. So we'll just do here what
3435 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3436 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3439 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3443 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3444 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3448 c
= *((char *)(ndp
->ni_dirp
));
3454 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3459 if (vnode_vtype(dvp_at
) != VDIR
) {
3464 ndp
->ni_dvp
= dvp_at
;
3465 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3466 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3473 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3477 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3479 * Parameters: p Process requesting the open
3480 * uap User argument descriptor (see below)
3481 * retval Pointer to an area to receive the
3482 * return calue from the system call
3484 * Indirect: uap->path Path to open (same as 'open')
3485 * uap->flags Flags to open (same as 'open'
3486 * uap->uid UID to set, if creating
3487 * uap->gid GID to set, if creating
3488 * uap->mode File mode, if creating (same as 'open')
3489 * uap->xsecurity ACL to set, if creating
3491 * Returns: 0 Success
3494 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3496 * XXX: We should enummerate the possible errno values here, and where
3497 * in the code they originated.
3500 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3502 struct filedesc
*fdp
= p
->p_fd
;
3504 kauth_filesec_t xsecdst
;
3505 struct vnode_attr va
;
3506 struct nameidata nd
;
3509 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3512 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3513 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3517 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3518 VATTR_SET(&va
, va_mode
, cmode
);
3519 if (uap
->uid
!= KAUTH_UID_NONE
)
3520 VATTR_SET(&va
, va_uid
, uap
->uid
);
3521 if (uap
->gid
!= KAUTH_GID_NONE
)
3522 VATTR_SET(&va
, va_gid
, uap
->gid
);
3523 if (xsecdst
!= NULL
)
3524 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3526 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3527 uap
->path
, vfs_context_current());
3529 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3530 fileproc_alloc_init
, NULL
, retval
);
3531 if (xsecdst
!= NULL
)
3532 kauth_filesec_free(xsecdst
);
3538 * Go through the data-protected atomically controlled open (2)
3540 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3542 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3543 int flags
= uap
->flags
;
3544 int class = uap
->class;
3545 int dpflags
= uap
->dpflags
;
3548 * Follow the same path as normal open(2)
3549 * Look up the item if it exists, and acquire the vnode.
3551 struct filedesc
*fdp
= p
->p_fd
;
3552 struct vnode_attr va
;
3553 struct nameidata nd
;
3558 /* Mask off all but regular access permissions */
3559 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3560 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3562 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3563 uap
->path
, vfs_context_current());
3566 * Initialize the extra fields in vnode_attr to pass down our
3568 * 1. target cprotect class.
3569 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3571 if (flags
& O_CREAT
) {
3572 /* lower level kernel code validates that the class is valid before applying it. */
3573 if (class != PROTECTION_CLASS_DEFAULT
) {
3575 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3576 * file behave the same as open (2)
3578 VATTR_SET(&va
, va_dataprotect_class
, class);
3582 if (dpflags
& (O_DP_GETRAWENCRYPTED
|O_DP_GETRAWUNENCRYPTED
)) {
3583 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3584 /* Not allowed to write raw encrypted bytes */
3587 if (uap
->dpflags
& O_DP_GETRAWENCRYPTED
) {
3588 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3590 if (uap
->dpflags
& O_DP_GETRAWUNENCRYPTED
) {
3591 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
);
3595 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3596 fileproc_alloc_init
, NULL
, retval
);
3602 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3603 int fd
, enum uio_seg segflg
, int *retval
)
3605 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3606 struct vnode_attr va
;
3607 struct nameidata nd
;
3611 /* Mask off all but regular access permissions */
3612 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3613 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3615 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3618 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3623 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3625 __pthread_testcancel(1);
3626 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3630 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3633 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3634 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3638 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3641 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3642 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3646 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3648 __pthread_testcancel(1);
3649 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3653 * openbyid_np: open a file given a file system id and a file system object id
3654 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3655 * file systems that don't support object ids it is a node id (uint64_t).
3657 * Parameters: p Process requesting the open
3658 * uap User argument descriptor (see below)
3659 * retval Pointer to an area to receive the
3660 * return calue from the system call
3662 * Indirect: uap->path Path to open (same as 'open')
3664 * uap->fsid id of target file system
3665 * uap->objid id of target file system object
3666 * uap->flags Flags to open (same as 'open')
3668 * Returns: 0 Success
3672 * XXX: We should enummerate the possible errno values here, and where
3673 * in the code they originated.
3676 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
3682 int buflen
= MAXPATHLEN
;
3684 vfs_context_t ctx
= vfs_context_current();
3686 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
3690 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3691 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
3695 AUDIT_ARG(value32
, fsid
.val
[0]);
3696 AUDIT_ARG(value64
, objid
);
3698 /*resolve path from fsis, objid*/
3700 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
3705 error
= fsgetpath_internal(
3706 ctx
, fsid
.val
[0], objid
,
3707 buflen
, buf
, &pathlen
);
3713 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
3721 error
= openat_internal(
3722 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
3731 * Create a special file.
3733 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
3736 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
3738 struct vnode_attr va
;
3739 vfs_context_t ctx
= vfs_context_current();
3741 struct nameidata nd
;
3745 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3746 VATTR_SET(&va
, va_rdev
, uap
->dev
);
3748 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3749 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
3750 return(mkfifo1(ctx
, uap
->path
, &va
));
3752 AUDIT_ARG(mode
, uap
->mode
);
3753 AUDIT_ARG(value32
, uap
->dev
);
3755 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
3757 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
3758 UIO_USERSPACE
, uap
->path
, ctx
);
3770 switch (uap
->mode
& S_IFMT
) {
3772 VATTR_SET(&va
, va_type
, VCHR
);
3775 VATTR_SET(&va
, va_type
, VBLK
);
3783 error
= mac_vnode_check_create(ctx
,
3784 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
3789 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3792 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
3796 int update_flags
= 0;
3798 // Make sure the name & parent pointers are hooked up
3799 if (vp
->v_name
== NULL
)
3800 update_flags
|= VNODE_UPDATE_NAME
;
3801 if (vp
->v_parent
== NULLVP
)
3802 update_flags
|= VNODE_UPDATE_PARENT
;
3805 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3808 add_fsevent(FSE_CREATE_FILE
, ctx
,
3816 * nameidone has to happen before we vnode_put(dvp)
3817 * since it may need to release the fs_nodelock on the dvp
3829 * Create a named pipe.
3831 * Returns: 0 Success
3834 * vnode_authorize:???
3838 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
3842 struct nameidata nd
;
3844 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
3845 UIO_USERSPACE
, upath
, ctx
);
3852 /* check that this is a new file and authorize addition */
3857 VATTR_SET(vap
, va_type
, VFIFO
);
3859 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
3862 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
3865 * nameidone has to happen before we vnode_put(dvp)
3866 * since it may need to release the fs_nodelock on the dvp
3879 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3881 * Parameters: p Process requesting the open
3882 * uap User argument descriptor (see below)
3885 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3886 * uap->uid UID to set
3887 * uap->gid GID to set
3888 * uap->mode File mode to set (same as 'mkfifo')
3889 * uap->xsecurity ACL to set, if creating
3891 * Returns: 0 Success
3894 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3896 * XXX: We should enummerate the possible errno values here, and where
3897 * in the code they originated.
3900 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
3903 kauth_filesec_t xsecdst
;
3904 struct vnode_attr va
;
3906 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3908 xsecdst
= KAUTH_FILESEC_NONE
;
3909 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
3910 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
3915 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3916 if (uap
->uid
!= KAUTH_UID_NONE
)
3917 VATTR_SET(&va
, va_uid
, uap
->uid
);
3918 if (uap
->gid
!= KAUTH_GID_NONE
)
3919 VATTR_SET(&va
, va_gid
, uap
->gid
);
3920 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3921 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3923 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
3925 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3926 kauth_filesec_free(xsecdst
);
3932 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
3934 struct vnode_attr va
;
3937 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3939 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
3944 my_strrchr(char *p
, int ch
)
3948 for (save
= NULL
;; ++p
) {
3957 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
3960 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
3962 int ret
, len
= _len
;
3964 *truncated_path
= 0;
3965 ret
= vn_getpath(dvp
, path
, &len
);
3966 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
3969 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
3970 if (len
> MAXPATHLEN
) {
3973 // the string got truncated!
3974 *truncated_path
= 1;
3975 ptr
= my_strrchr(path
, '/');
3977 *ptr
= '\0'; // chop off the string at the last directory component
3979 len
= strlen(path
) + 1;
3982 } else if (ret
== 0) {
3983 *truncated_path
= 1;
3984 } else if (ret
!= 0) {
3985 struct vnode
*mydvp
=dvp
;
3987 if (ret
!= ENOSPC
) {
3988 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3989 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
3991 *truncated_path
= 1;
3994 if (mydvp
->v_parent
!= NULL
) {
3995 mydvp
= mydvp
->v_parent
;
3996 } else if (mydvp
->v_mount
) {
3997 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4000 // no parent and no mount point? only thing is to punt and say "/" changed
4001 strlcpy(path
, "/", _len
);
4006 if (mydvp
== NULL
) {
4011 ret
= vn_getpath(mydvp
, path
, &len
);
4012 } while (ret
== ENOSPC
);
4020 * Make a hard file link.
4022 * Returns: 0 Success
4027 * vnode_authorize:???
4032 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4033 user_addr_t link
, int flag
, enum uio_seg segflg
)
4035 vnode_t vp
, dvp
, lvp
;
4036 struct nameidata nd
;
4042 int need_event
, has_listeners
;
4043 char *target_path
= NULL
;
4046 vp
= dvp
= lvp
= NULLVP
;
4048 /* look up the object we are linking to */
4049 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4050 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4053 error
= nameiat(&nd
, fd1
);
4061 * Normally, linking to directories is not supported.
4062 * However, some file systems may have limited support.
4064 if (vp
->v_type
== VDIR
) {
4065 if (!(vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSDIRLINKS
)) {
4066 error
= EPERM
; /* POSIX */
4069 /* Linking to a directory requires ownership. */
4070 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4071 struct vnode_attr dva
;
4074 VATTR_WANTED(&dva
, va_uid
);
4075 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4076 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4077 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4084 /* lookup the target node */
4088 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4089 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4091 error
= nameiat(&nd
, fd2
);
4098 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4102 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4103 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4106 /* target node must not exist */
4107 if (lvp
!= NULLVP
) {
4111 /* cannot link across mountpoints */
4112 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4117 /* authorize creation of the target note */
4118 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4121 /* and finally make the link */
4122 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4127 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4131 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4135 has_listeners
= kauth_authorize_fileop_has_listeners();
4137 if (need_event
|| has_listeners
) {
4138 char *link_to_path
= NULL
;
4139 int len
, link_name_len
;
4141 /* build the path to the new link file */
4142 GET_PATH(target_path
);
4143 if (target_path
== NULL
) {
4148 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4150 if (has_listeners
) {
4151 /* build the path to file we are linking to */
4152 GET_PATH(link_to_path
);
4153 if (link_to_path
== NULL
) {
4158 link_name_len
= MAXPATHLEN
;
4159 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4161 * Call out to allow 3rd party notification of rename.
4162 * Ignore result of kauth_authorize_fileop call.
4164 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4165 (uintptr_t)link_to_path
,
4166 (uintptr_t)target_path
);
4168 if (link_to_path
!= NULL
) {
4169 RELEASE_PATH(link_to_path
);
4174 /* construct fsevent */
4175 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4177 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4180 // build the path to the destination of the link
4181 add_fsevent(FSE_CREATE_FILE
, ctx
,
4182 FSE_ARG_STRING
, len
, target_path
,
4183 FSE_ARG_FINFO
, &finfo
,
4187 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4188 FSE_ARG_VNODE
, vp
->v_parent
,
4196 * nameidone has to happen before we vnode_put(dvp)
4197 * since it may need to release the fs_nodelock on the dvp
4200 if (target_path
!= NULL
) {
4201 RELEASE_PATH(target_path
);
4213 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4215 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4216 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4220 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4222 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4225 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4226 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4230 * Make a symbolic link.
4232 * We could add support for ACLs here too...
4236 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4237 user_addr_t link
, enum uio_seg segflg
)
4239 struct vnode_attr va
;
4242 struct nameidata nd
;
4244 uint32_t dfflags
; // Directory file flags
4249 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4250 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4251 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4253 path
= (char *)path_data
;
4257 AUDIT_ARG(text
, path
); /* This is the link string */
4259 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4262 error
= nameiat(&nd
, fd
);
4268 p
= vfs_context_proc(ctx
);
4270 VATTR_SET(&va
, va_type
, VLNK
);
4271 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4274 * Handle inheritance of restricted flag
4276 error
= vnode_flags(dvp
, &dfflags
, ctx
);
4279 if (dfflags
& SF_RESTRICTED
)
4280 VATTR_SET(&va
, va_flags
, SF_RESTRICTED
);
4283 error
= mac_vnode_check_create(ctx
,
4284 dvp
, &nd
.ni_cnd
, &va
);
4297 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4298 /* get default ownership, etc. */
4300 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4302 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4305 if (error
== 0 && vp
)
4306 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4309 /* do fallback attribute handling */
4310 if (error
== 0 && vp
)
4311 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4314 int update_flags
= 0;
4316 /*check if a new vnode was created, else try to get one*/
4318 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4320 nd
.ni_op
= OP_LOOKUP
;
4322 nd
.ni_cnd
.cn_flags
= 0;
4323 error
= nameiat(&nd
, fd
);
4330 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4331 /* call out to allow 3rd party notification of rename.
4332 * Ignore result of kauth_authorize_fileop call.
4334 if (kauth_authorize_fileop_has_listeners() &&
4336 char *new_link_path
= NULL
;
4339 /* build the path to the new link file */
4340 new_link_path
= get_pathbuff();
4342 vn_getpath(dvp
, new_link_path
, &len
);
4343 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4344 new_link_path
[len
- 1] = '/';
4345 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4348 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4349 (uintptr_t)path
, (uintptr_t)new_link_path
);
4350 if (new_link_path
!= NULL
)
4351 release_pathbuff(new_link_path
);
4354 // Make sure the name & parent pointers are hooked up
4355 if (vp
->v_name
== NULL
)
4356 update_flags
|= VNODE_UPDATE_NAME
;
4357 if (vp
->v_parent
== NULLVP
)
4358 update_flags
|= VNODE_UPDATE_PARENT
;
4361 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4364 add_fsevent(FSE_CREATE_FILE
, ctx
,
4372 * nameidone has to happen before we vnode_put(dvp)
4373 * since it may need to release the fs_nodelock on the dvp
4381 if (path
&& (path
!= (char *)path_data
))
4382 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4388 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4390 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4391 uap
->link
, UIO_USERSPACE
));
4395 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4396 __unused
int32_t *retval
)
4398 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4399 uap
->path2
, UIO_USERSPACE
));
4403 * Delete a whiteout from the filesystem.
4404 * No longer supported.
4407 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4413 * Delete a name from the filesystem.
4417 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4418 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4420 struct nameidata nd
;
4423 struct componentname
*cnp
;
4428 struct vnode_attr va
;
4435 struct vnode_attr
*vap
;
4437 int retry_count
= 0;
4440 cn_flags
= LOCKPARENT
;
4441 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
))
4442 cn_flags
|= AUDITVNPATH1
;
4443 /* If a starting dvp is passed, it trumps any fd passed. */
4448 /* unlink or delete is allowed on rsrc forks and named streams */
4449 cn_flags
|= CN_ALLOWRSRCFORK
;
4460 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4462 nd
.ni_dvp
= start_dvp
;
4463 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4467 error
= nameiat(&nd
, fd
);
4475 /* With Carbon delete semantics, busy files cannot be deleted */
4476 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4477 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4480 /* Skip any potential upcalls if told to. */
4481 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4482 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4486 batched
= vnode_compound_remove_available(vp
);
4488 * The root of a mounted filesystem cannot be deleted.
4490 if (vp
->v_flag
& VROOT
) {
4495 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4497 if (error
== ENOENT
) {
4498 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4499 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4510 if (!vnode_compound_remove_available(dvp
)) {
4511 panic("No vp, but no compound remove?");
4516 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4519 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4520 /* XXX need to get these data in batched VNOP */
4521 get_fse_info(vp
, &finfo
, ctx
);
4524 error
= vfs_get_notify_attributes(&va
);
4533 has_listeners
= kauth_authorize_fileop_has_listeners();
4534 if (need_event
|| has_listeners
) {
4542 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4546 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4547 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4551 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
4553 if (error
== EKEEPLOOKING
) {
4555 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4558 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4559 panic("EKEEPLOOKING, but continue flag not set?");
4562 if (vnode_isdir(vp
)) {
4566 goto lookup_continue
;
4567 } else if (error
== ENOENT
&& batched
) {
4568 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
4569 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4571 * For compound VNOPs, the authorization callback may
4572 * return ENOENT in case of racing hardlink lookups
4573 * hitting the name cache, redrive the lookup.
4583 * Call out to allow 3rd party notification of delete.
4584 * Ignore result of kauth_authorize_fileop call.
4587 if (has_listeners
) {
4588 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4589 KAUTH_FILEOP_DELETE
,
4594 if (vp
->v_flag
& VISHARDLINK
) {
4596 // if a hardlink gets deleted we want to blow away the
4597 // v_parent link because the path that got us to this
4598 // instance of the link is no longer valid. this will
4599 // force the next call to get the path to ask the file
4600 // system instead of just following the v_parent link.
4602 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4607 if (vp
->v_flag
& VISHARDLINK
) {
4608 get_fse_info(vp
, &finfo
, ctx
);
4610 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4612 if (truncated_path
) {
4613 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4615 add_fsevent(FSE_DELETE
, ctx
,
4616 FSE_ARG_STRING
, len
, path
,
4617 FSE_ARG_FINFO
, &finfo
,
4628 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4629 * will cause its shadow file to go away if necessary.
4631 if (vp
&& (vnode_isnamedstream(vp
)) &&
4632 (vp
->v_parent
!= NULLVP
) &&
4633 vnode_isshadow(vp
)) {
4638 * nameidone has to happen before we vnode_put(dvp)
4639 * since it may need to release the fs_nodelock on the dvp
4655 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
4656 enum uio_seg segflg
, int unlink_flags
)
4658 return (unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
4663 * Delete a name from the filesystem using Carbon semantics.
4666 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4668 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4669 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
));
4673 * Delete a name from the filesystem using POSIX semantics.
4676 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4678 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4679 uap
->path
, UIO_USERSPACE
, 0));
4683 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
4685 if (uap
->flag
& ~AT_REMOVEDIR
)
4688 if (uap
->flag
& AT_REMOVEDIR
)
4689 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
4690 uap
->path
, UIO_USERSPACE
));
4692 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
4693 NULLVP
, uap
->path
, UIO_USERSPACE
, 0));
4697 * Reposition read/write file offset.
4700 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4702 struct fileproc
*fp
;
4704 struct vfs_context
*ctx
;
4705 off_t offset
= uap
->offset
, file_size
;
4708 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4709 if (error
== ENOTSUP
)
4713 if (vnode_isfifo(vp
)) {
4719 ctx
= vfs_context_current();
4721 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4722 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4725 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
4732 if ( (error
= vnode_getwithref(vp
)) ) {
4737 switch (uap
->whence
) {
4739 offset
+= fp
->f_fglob
->fg_offset
;
4742 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
4744 offset
+= file_size
;
4752 if (uap
->offset
> 0 && offset
< 0) {
4753 /* Incremented/relative move past max size */
4757 * Allow negative offsets on character devices, per
4758 * POSIX 1003.1-2001. Most likely for writing disk
4761 if (offset
< 0 && vp
->v_type
!= VCHR
) {
4762 /* Decremented/relative move before start */
4766 fp
->f_fglob
->fg_offset
= offset
;
4767 *retval
= fp
->f_fglob
->fg_offset
;
4773 * An lseek can affect whether data is "available to read." Use
4774 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4776 post_event_if_success(vp
, error
, NOTE_NONE
);
4777 (void)vnode_put(vp
);
4784 * Check access permissions.
4786 * Returns: 0 Success
4787 * vnode_authorize:???
4790 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
4792 kauth_action_t action
;
4796 * If just the regular access bits, convert them to something
4797 * that vnode_authorize will understand.
4799 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
4802 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4803 if (uflags
& W_OK
) {
4804 if (vnode_isdir(vp
)) {
4805 action
|= KAUTH_VNODE_ADD_FILE
|
4806 KAUTH_VNODE_ADD_SUBDIRECTORY
;
4807 /* might want delete rights here too */
4809 action
|= KAUTH_VNODE_WRITE_DATA
;
4812 if (uflags
& X_OK
) {
4813 if (vnode_isdir(vp
)) {
4814 action
|= KAUTH_VNODE_SEARCH
;
4816 action
|= KAUTH_VNODE_EXECUTE
;
4820 /* take advantage of definition of uflags */
4821 action
= uflags
>> 8;
4825 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
4830 /* action == 0 means only check for existence */
4832 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
4843 * access_extended: Check access permissions in bulk.
4845 * Description: uap->entries Pointer to an array of accessx
4846 * descriptor structs, plus one or
4847 * more NULL terminated strings (see
4848 * "Notes" section below).
4849 * uap->size Size of the area pointed to by
4851 * uap->results Pointer to the results array.
4853 * Returns: 0 Success
4854 * ENOMEM Insufficient memory
4855 * EINVAL Invalid arguments
4856 * namei:EFAULT Bad address
4857 * namei:ENAMETOOLONG Filename too long
4858 * namei:ENOENT No such file or directory
4859 * namei:ELOOP Too many levels of symbolic links
4860 * namei:EBADF Bad file descriptor
4861 * namei:ENOTDIR Not a directory
4866 * uap->results Array contents modified
4868 * Notes: The uap->entries are structured as an arbitrary length array
4869 * of accessx descriptors, followed by one or more NULL terminated
4872 * struct accessx_descriptor[0]
4874 * struct accessx_descriptor[n]
4875 * char name_data[0];
4877 * We determine the entry count by walking the buffer containing
4878 * the uap->entries argument descriptor. For each descriptor we
4879 * see, the valid values for the offset ad_name_offset will be
4880 * in the byte range:
4882 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4884 * [ uap->entries + uap->size - 2 ]
4886 * since we must have at least one string, and the string must
4887 * be at least one character plus the NULL terminator in length.
4889 * XXX: Need to support the check-as uid argument
4892 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
4894 struct accessx_descriptor
*input
= NULL
;
4895 errno_t
*result
= NULL
;
4898 unsigned int desc_max
, desc_actual
, i
, j
;
4899 struct vfs_context context
;
4900 struct nameidata nd
;
4904 #define ACCESSX_MAX_DESCR_ON_STACK 10
4905 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
4907 context
.vc_ucred
= NULL
;
4910 * Validate parameters; if valid, copy the descriptor array and string
4911 * arguments into local memory. Before proceeding, the following
4912 * conditions must have been met:
4914 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4915 * o There must be sufficient room in the request for at least one
4916 * descriptor and a one yte NUL terminated string.
4917 * o The allocation of local storage must not fail.
4919 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
4921 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
4923 if (uap
->size
<= sizeof (stack_input
)) {
4924 input
= stack_input
;
4926 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
4927 if (input
== NULL
) {
4932 error
= copyin(uap
->entries
, input
, uap
->size
);
4936 AUDIT_ARG(opaque
, input
, uap
->size
);
4939 * Force NUL termination of the copyin buffer to avoid nami() running
4940 * off the end. If the caller passes us bogus data, they may get a
4943 ((char *)input
)[uap
->size
- 1] = 0;
4946 * Access is defined as checking against the process' real identity,
4947 * even if operations are checking the effective identity. This
4948 * requires that we use a local vfs context.
4950 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
4951 context
.vc_thread
= current_thread();
4954 * Find out how many entries we have, so we can allocate the result
4955 * array by walking the list and adjusting the count downward by the
4956 * earliest string offset we see.
4958 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
4959 desc_actual
= desc_max
;
4960 for (i
= 0; i
< desc_actual
; i
++) {
4962 * Take the offset to the name string for this entry and
4963 * convert to an input array index, which would be one off
4964 * the end of the array if this entry was the lowest-addressed
4967 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
4970 * An offset greater than the max allowable offset is an error.
4971 * It is also an error for any valid entry to point
4972 * to a location prior to the end of the current entry, if
4973 * it's not a reference to the string of the previous entry.
4975 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
4981 * An offset of 0 means use the previous descriptor's offset;
4982 * this is used to chain multiple requests for the same file
4983 * to avoid multiple lookups.
4986 /* This is not valid for the first entry */
4995 * If the offset of the string for this descriptor is before
4996 * what we believe is the current actual last descriptor,
4997 * then we need to adjust our estimate downward; this permits
4998 * the string table following the last descriptor to be out
4999 * of order relative to the descriptor list.
5001 if (j
< desc_actual
)
5006 * We limit the actual number of descriptors we are willing to process
5007 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5008 * requested does not exceed this limit,
5010 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5014 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
5015 if (result
== NULL
) {
5021 * Do the work by iterating over the descriptor entries we know to
5022 * at least appear to contain valid data.
5025 for (i
= 0; i
< desc_actual
; i
++) {
5027 * If the ad_name_offset is 0, then we use the previous
5028 * results to make the check; otherwise, we are looking up
5031 if (input
[i
].ad_name_offset
!= 0) {
5032 /* discard old vnodes */
5043 * Scan forward in the descriptor list to see if we
5044 * need the parent vnode. We will need it if we are
5045 * deleting, since we must have rights to remove
5046 * entries in the parent directory, as well as the
5047 * rights to delete the object itself.
5049 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5050 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5051 if (input
[j
].ad_flags
& _DELETE_OK
)
5054 niopts
= FOLLOW
| AUDITVNPATH1
;
5056 /* need parent for vnode_authorize for deletion test */
5058 niopts
|= WANTPARENT
;
5061 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5062 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5074 * Handle lookup errors.
5084 /* run this access check */
5085 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5088 /* fatal lookup error */
5094 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5096 /* copy out results */
5097 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5100 if (input
&& input
!= stack_input
)
5101 FREE(input
, M_TEMP
);
5103 FREE(result
, M_TEMP
);
5108 if (IS_VALID_CRED(context
.vc_ucred
))
5109 kauth_cred_unref(&context
.vc_ucred
);
5115 * Returns: 0 Success
5116 * namei:EFAULT Bad address
5117 * namei:ENAMETOOLONG Filename too long
5118 * namei:ENOENT No such file or directory
5119 * namei:ELOOP Too many levels of symbolic links
5120 * namei:EBADF Bad file descriptor
5121 * namei:ENOTDIR Not a directory
5126 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5127 int flag
, enum uio_seg segflg
)
5130 struct nameidata nd
;
5132 struct vfs_context context
;
5134 int is_namedstream
= 0;
5138 * Unless the AT_EACCESS option is used, Access is defined as checking
5139 * against the process' real identity, even if operations are checking
5140 * the effective identity. So we need to tweak the credential
5141 * in the context for that case.
5143 if (!(flag
& AT_EACCESS
))
5144 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5146 context
.vc_ucred
= ctx
->vc_ucred
;
5147 context
.vc_thread
= ctx
->vc_thread
;
5150 niopts
= FOLLOW
| AUDITVNPATH1
;
5151 /* need parent for vnode_authorize for deletion test */
5152 if (amode
& _DELETE_OK
)
5153 niopts
|= WANTPARENT
;
5154 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5158 /* access(F_OK) calls are allowed for resource forks. */
5160 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5162 error
= nameiat(&nd
, fd
);
5167 /* Grab reference on the shadow stream file vnode to
5168 * force an inactive on release which will mark it
5171 if (vnode_isnamedstream(nd
.ni_vp
) &&
5172 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5173 vnode_isshadow(nd
.ni_vp
)) {
5175 vnode_ref(nd
.ni_vp
);
5179 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5182 if (is_namedstream
) {
5183 vnode_rele(nd
.ni_vp
);
5187 vnode_put(nd
.ni_vp
);
5188 if (amode
& _DELETE_OK
)
5189 vnode_put(nd
.ni_dvp
);
5193 if (!(flag
& AT_EACCESS
))
5194 kauth_cred_unref(&context
.vc_ucred
);
5199 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5201 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5202 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5206 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5207 __unused
int32_t *retval
)
5209 if (uap
->flag
& ~AT_EACCESS
)
5212 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5213 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5217 * Returns: 0 Success
5224 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5225 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5226 enum uio_seg segflg
, int fd
, int flag
)
5228 struct nameidata nd
;
5235 struct user64_stat user64_sb
;
5236 struct user32_stat user32_sb
;
5237 struct user64_stat64 user64_sb64
;
5238 struct user32_stat64 user32_sb64
;
5242 kauth_filesec_t fsec
;
5243 size_t xsecurity_bufsize
;
5246 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5247 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5251 int is_namedstream
= 0;
5252 /* stat calls are allowed for resource forks. */
5253 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5255 error
= nameiat(&nd
, fd
);
5258 fsec
= KAUTH_FILESEC_NONE
;
5260 statptr
= (void *)&source
;
5263 /* Grab reference on the shadow stream file vnode to
5264 * force an inactive on release which will mark it
5267 if (vnode_isnamedstream(nd
.ni_vp
) &&
5268 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5269 vnode_isshadow(nd
.ni_vp
)) {
5271 vnode_ref(nd
.ni_vp
);
5275 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5278 if (is_namedstream
) {
5279 vnode_rele(nd
.ni_vp
);
5282 vnode_put(nd
.ni_vp
);
5287 /* Zap spare fields */
5288 if (isstat64
!= 0) {
5289 source
.sb64
.st_lspare
= 0;
5290 source
.sb64
.st_qspare
[0] = 0LL;
5291 source
.sb64
.st_qspare
[1] = 0LL;
5292 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5293 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5294 my_size
= sizeof(dest
.user64_sb64
);
5295 sbp
= (caddr_t
)&dest
.user64_sb64
;
5297 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5298 my_size
= sizeof(dest
.user32_sb64
);
5299 sbp
= (caddr_t
)&dest
.user32_sb64
;
5302 * Check if we raced (post lookup) against the last unlink of a file.
5304 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5305 source
.sb64
.st_nlink
= 1;
5308 source
.sb
.st_lspare
= 0;
5309 source
.sb
.st_qspare
[0] = 0LL;
5310 source
.sb
.st_qspare
[1] = 0LL;
5311 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5312 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5313 my_size
= sizeof(dest
.user64_sb
);
5314 sbp
= (caddr_t
)&dest
.user64_sb
;
5316 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5317 my_size
= sizeof(dest
.user32_sb
);
5318 sbp
= (caddr_t
)&dest
.user32_sb
;
5322 * Check if we raced (post lookup) against the last unlink of a file.
5324 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5325 source
.sb
.st_nlink
= 1;
5328 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5331 /* caller wants extended security information? */
5332 if (xsecurity
!= USER_ADDR_NULL
) {
5334 /* did we get any? */
5335 if (fsec
== KAUTH_FILESEC_NONE
) {
5336 if (susize(xsecurity_size
, 0) != 0) {
5341 /* find the user buffer size */
5342 xsecurity_bufsize
= fusize(xsecurity_size
);
5344 /* copy out the actual data size */
5345 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5350 /* if the caller supplied enough room, copy out to it */
5351 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5352 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5356 if (fsec
!= KAUTH_FILESEC_NONE
)
5357 kauth_filesec_free(fsec
);
5362 * stat_extended: Get file status; with extended security (ACL).
5364 * Parameters: p (ignored)
5365 * uap User argument descriptor (see below)
5368 * Indirect: uap->path Path of file to get status from
5369 * uap->ub User buffer (holds file status info)
5370 * uap->xsecurity ACL to get (extended security)
5371 * uap->xsecurity_size Size of ACL
5373 * Returns: 0 Success
5378 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5379 __unused
int32_t *retval
)
5381 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5382 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5387 * Returns: 0 Success
5388 * fstatat_internal:??? [see fstatat_internal() in this file]
5391 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5393 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5394 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5398 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5400 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5401 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5405 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5407 * Parameters: p (ignored)
5408 * uap User argument descriptor (see below)
5411 * Indirect: uap->path Path of file to get status from
5412 * uap->ub User buffer (holds file status info)
5413 * uap->xsecurity ACL to get (extended security)
5414 * uap->xsecurity_size Size of ACL
5416 * Returns: 0 Success
5421 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5423 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5424 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5429 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5431 * Parameters: p (ignored)
5432 * uap User argument descriptor (see below)
5435 * Indirect: uap->path Path of file to get status from
5436 * uap->ub User buffer (holds file status info)
5437 * uap->xsecurity ACL to get (extended security)
5438 * uap->xsecurity_size Size of ACL
5440 * Returns: 0 Success
5445 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5447 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5448 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5449 AT_SYMLINK_NOFOLLOW
));
5453 * Get file status; this version does not follow links.
5456 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5458 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5459 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5463 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5465 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5466 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5470 * lstat64_extended: Get file status; can handle large inode numbers; does not
5471 * follow links; with extended security (ACL).
5473 * Parameters: p (ignored)
5474 * uap User argument descriptor (see below)
5477 * Indirect: uap->path Path of file to get status from
5478 * uap->ub User buffer (holds file status info)
5479 * uap->xsecurity ACL to get (extended security)
5480 * uap->xsecurity_size Size of ACL
5482 * Returns: 0 Success
5487 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5489 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5490 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5491 AT_SYMLINK_NOFOLLOW
));
5495 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5497 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5500 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5501 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5505 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5506 __unused
int32_t *retval
)
5508 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5511 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5512 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5516 * Get configurable pathname variables.
5518 * Returns: 0 Success
5522 * Notes: Global implementation constants are intended to be
5523 * implemented in this function directly; all other constants
5524 * are per-FS implementation, and therefore must be handled in
5525 * each respective FS, instead.
5527 * XXX We implement some things globally right now that should actually be
5528 * XXX per-FS; we will need to deal with this at some point.
5532 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5535 struct nameidata nd
;
5536 vfs_context_t ctx
= vfs_context_current();
5538 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5539 UIO_USERSPACE
, uap
->path
, ctx
);
5544 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5546 vnode_put(nd
.ni_vp
);
5552 * Return target name of a symbolic link.
5556 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5557 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5563 struct nameidata nd
;
5564 char uio_buf
[ UIO_SIZEOF(1) ];
5566 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5569 error
= nameiat(&nd
, fd
);
5576 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5577 &uio_buf
[0], sizeof(uio_buf
));
5578 uio_addiov(auio
, buf
, bufsize
);
5579 if (vp
->v_type
!= VLNK
) {
5583 error
= mac_vnode_check_readlink(ctx
, vp
);
5586 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5589 error
= VNOP_READLINK(vp
, auio
, ctx
);
5593 *retval
= bufsize
- (int)uio_resid(auio
);
5598 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5600 enum uio_seg procseg
;
5602 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5603 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5604 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5605 uap
->count
, procseg
, retval
));
5609 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5611 enum uio_seg procseg
;
5613 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5614 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5615 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5619 * Change file flags.
5622 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5624 struct vnode_attr va
;
5625 kauth_action_t action
;
5629 VATTR_SET(&va
, va_flags
, flags
);
5632 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5637 /* request authorisation, disregard immutability */
5638 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5641 * Request that the auth layer disregard those file flags it's allowed to when
5642 * authorizing this operation; we need to do this in order to be able to
5643 * clear immutable flags.
5645 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5647 error
= vnode_setattr(vp
, &va
, ctx
);
5649 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5658 * Change flags of a file given a path name.
5662 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5665 vfs_context_t ctx
= vfs_context_current();
5667 struct nameidata nd
;
5669 AUDIT_ARG(fflags
, uap
->flags
);
5670 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5671 UIO_USERSPACE
, uap
->path
, ctx
);
5678 error
= chflags1(vp
, uap
->flags
, ctx
);
5684 * Change flags of a file given a file descriptor.
5688 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5693 AUDIT_ARG(fd
, uap
->fd
);
5694 AUDIT_ARG(fflags
, uap
->flags
);
5695 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5698 if ((error
= vnode_getwithref(vp
))) {
5703 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5705 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
5712 * Change security information on a filesystem object.
5714 * Returns: 0 Success
5715 * EPERM Operation not permitted
5716 * vnode_authattr:??? [anything vnode_authattr can return]
5717 * vnode_authorize:??? [anything vnode_authorize can return]
5718 * vnode_setattr:??? [anything vnode_setattr can return]
5720 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5721 * translated to EPERM before being returned.
5724 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
5726 kauth_action_t action
;
5729 AUDIT_ARG(mode
, vap
->va_mode
);
5730 /* XXX audit new args */
5733 /* chmod calls are not allowed for resource forks. */
5734 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5740 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
5741 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
5745 /* make sure that the caller is allowed to set this security information */
5746 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
5747 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5748 if (error
== EACCES
)
5753 error
= vnode_setattr(vp
, vap
, ctx
);
5760 * Change mode of a file given a path name.
5762 * Returns: 0 Success
5763 * namei:??? [anything namei can return]
5764 * chmod_vnode:??? [anything chmod_vnode can return]
5767 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
5768 int fd
, int flag
, enum uio_seg segflg
)
5770 struct nameidata nd
;
5773 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5774 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
5776 if ((error
= nameiat(&nd
, fd
)))
5778 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
5779 vnode_put(nd
.ni_vp
);
5785 * chmod_extended: Change the mode of a file given a path name; with extended
5786 * argument list (including extended security (ACL)).
5788 * Parameters: p Process requesting the open
5789 * uap User argument descriptor (see below)
5792 * Indirect: uap->path Path to object (same as 'chmod')
5793 * uap->uid UID to set
5794 * uap->gid GID to set
5795 * uap->mode File mode to set (same as 'chmod')
5796 * uap->xsecurity ACL to set (or delete)
5798 * Returns: 0 Success
5801 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5803 * XXX: We should enummerate the possible errno values here, and where
5804 * in the code they originated.
5807 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
5810 struct vnode_attr va
;
5811 kauth_filesec_t xsecdst
;
5813 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5816 if (uap
->mode
!= -1)
5817 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5818 if (uap
->uid
!= KAUTH_UID_NONE
)
5819 VATTR_SET(&va
, va_uid
, uap
->uid
);
5820 if (uap
->gid
!= KAUTH_GID_NONE
)
5821 VATTR_SET(&va
, va_gid
, uap
->gid
);
5824 switch(uap
->xsecurity
) {
5825 /* explicit remove request */
5826 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5827 VATTR_SET(&va
, va_acl
, NULL
);
5830 case USER_ADDR_NULL
:
5833 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5835 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5836 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
5839 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
5842 if (xsecdst
!= NULL
)
5843 kauth_filesec_free(xsecdst
);
5848 * Returns: 0 Success
5849 * chmodat:??? [anything chmodat can return]
5852 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
5853 int flag
, enum uio_seg segflg
)
5855 struct vnode_attr va
;
5858 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
5860 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
5864 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
5866 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
5867 AT_FDCWD
, 0, UIO_USERSPACE
));
5871 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
5873 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5876 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
5877 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
5881 * Change mode of a file given a file descriptor.
5884 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
5891 if ((error
= file_vnode(fd
, &vp
)) != 0)
5893 if ((error
= vnode_getwithref(vp
)) != 0) {
5897 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5899 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
5900 (void)vnode_put(vp
);
5907 * fchmod_extended: Change mode of a file given a file descriptor; with
5908 * extended argument list (including extended security (ACL)).
5910 * Parameters: p Process requesting to change file mode
5911 * uap User argument descriptor (see below)
5914 * Indirect: uap->mode File mode to set (same as 'chmod')
5915 * uap->uid UID to set
5916 * uap->gid GID to set
5917 * uap->xsecurity ACL to set (or delete)
5918 * uap->fd File descriptor of file to change mode
5920 * Returns: 0 Success
5925 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
5928 struct vnode_attr va
;
5929 kauth_filesec_t xsecdst
;
5931 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5934 if (uap
->mode
!= -1)
5935 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5936 if (uap
->uid
!= KAUTH_UID_NONE
)
5937 VATTR_SET(&va
, va_uid
, uap
->uid
);
5938 if (uap
->gid
!= KAUTH_GID_NONE
)
5939 VATTR_SET(&va
, va_gid
, uap
->gid
);
5942 switch(uap
->xsecurity
) {
5943 case USER_ADDR_NULL
:
5944 VATTR_SET(&va
, va_acl
, NULL
);
5946 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5947 VATTR_SET(&va
, va_acl
, NULL
);
5950 case CAST_USER_ADDR_T(-1):
5953 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5955 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5958 error
= fchmod1(p
, uap
->fd
, &va
);
5961 switch(uap
->xsecurity
) {
5962 case USER_ADDR_NULL
:
5963 case CAST_USER_ADDR_T(-1):
5966 if (xsecdst
!= NULL
)
5967 kauth_filesec_free(xsecdst
);
5973 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
5975 struct vnode_attr va
;
5978 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5980 return(fchmod1(p
, uap
->fd
, &va
));
5985 * Set ownership given a path name.
5989 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
5990 gid_t gid
, int flag
, enum uio_seg segflg
)
5993 struct vnode_attr va
;
5995 struct nameidata nd
;
5997 kauth_action_t action
;
5999 AUDIT_ARG(owner
, uid
, gid
);
6001 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6002 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6004 error
= nameiat(&nd
, fd
);
6012 if (uid
!= (uid_t
)VNOVAL
)
6013 VATTR_SET(&va
, va_uid
, uid
);
6014 if (gid
!= (gid_t
)VNOVAL
)
6015 VATTR_SET(&va
, va_gid
, gid
);
6018 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6023 /* preflight and authorize attribute changes */
6024 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6026 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6028 error
= vnode_setattr(vp
, &va
, ctx
);
6032 * EACCES is only allowed from namei(); permissions failure should
6033 * return EPERM, so we need to translate the error code.
6035 if (error
== EACCES
)
6043 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6045 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6046 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6050 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6052 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6053 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6057 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6059 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6062 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6063 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6067 * Set ownership given a file descriptor.
6071 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6073 struct vnode_attr va
;
6074 vfs_context_t ctx
= vfs_context_current();
6077 kauth_action_t action
;
6079 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6080 AUDIT_ARG(fd
, uap
->fd
);
6082 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6085 if ( (error
= vnode_getwithref(vp
)) ) {
6089 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6092 if (uap
->uid
!= VNOVAL
)
6093 VATTR_SET(&va
, va_uid
, uap
->uid
);
6094 if (uap
->gid
!= VNOVAL
)
6095 VATTR_SET(&va
, va_gid
, uap
->gid
);
6098 /* chown calls are not allowed for resource forks. */
6099 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6106 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6111 /* preflight and authorize attribute changes */
6112 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6114 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6115 if (error
== EACCES
)
6119 error
= vnode_setattr(vp
, &va
, ctx
);
6122 (void)vnode_put(vp
);
6128 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6132 if (usrtvp
== USER_ADDR_NULL
) {
6133 struct timeval old_tv
;
6134 /* XXX Y2038 bug because of microtime argument */
6136 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6139 if (IS_64BIT_PROCESS(current_proc())) {
6140 struct user64_timeval tv
[2];
6141 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6144 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6145 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6147 struct user32_timeval tv
[2];
6148 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6151 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6152 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6159 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6163 struct vnode_attr va
;
6164 kauth_action_t action
;
6166 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6169 VATTR_SET(&va
, va_access_time
, ts
[0]);
6170 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6172 va
.va_vaflags
|= VA_UTIMES_NULL
;
6175 /* utimes calls are not allowed for resource forks. */
6176 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6183 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6187 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6188 if (!nullflag
&& error
== EACCES
)
6193 /* since we may not need to auth anything, check here */
6194 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6195 if (!nullflag
&& error
== EACCES
)
6199 error
= vnode_setattr(vp
, &va
, ctx
);
6206 * Set the access and modification times of a file.
6210 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6212 struct timespec ts
[2];
6215 struct nameidata nd
;
6216 vfs_context_t ctx
= vfs_context_current();
6219 * AUDIT: Needed to change the order of operations to do the
6220 * name lookup first because auditing wants the path.
6222 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6223 UIO_USERSPACE
, uap
->path
, ctx
);
6230 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6231 * the current time instead.
6234 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6237 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6240 vnode_put(nd
.ni_vp
);
6245 * Set the access and modification times of a file.
6249 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6251 struct timespec ts
[2];
6256 AUDIT_ARG(fd
, uap
->fd
);
6258 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6260 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6262 if((error
= vnode_getwithref(vp
))) {
6267 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6274 * Truncate a file given its path name.
6278 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6281 struct vnode_attr va
;
6282 vfs_context_t ctx
= vfs_context_current();
6284 struct nameidata nd
;
6285 kauth_action_t action
;
6287 if (uap
->length
< 0)
6289 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6290 UIO_USERSPACE
, uap
->path
, ctx
);
6291 if ((error
= namei(&nd
)))
6298 VATTR_SET(&va
, va_data_size
, uap
->length
);
6301 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6306 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6308 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6310 error
= vnode_setattr(vp
, &va
, ctx
);
6317 * Truncate a file given a file descriptor.
6321 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6323 vfs_context_t ctx
= vfs_context_current();
6324 struct vnode_attr va
;
6326 struct fileproc
*fp
;
6330 AUDIT_ARG(fd
, uap
->fd
);
6331 if (uap
->length
< 0)
6334 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6338 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6340 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6349 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6351 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6352 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6357 if ((error
= vnode_getwithref(vp
)) != 0) {
6361 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6364 error
= mac_vnode_check_truncate(ctx
,
6365 fp
->f_fglob
->fg_cred
, vp
);
6367 (void)vnode_put(vp
);
6372 VATTR_SET(&va
, va_data_size
, uap
->length
);
6373 error
= vnode_setattr(vp
, &va
, ctx
);
6374 (void)vnode_put(vp
);
6382 * Sync an open file with synchronized I/O _file_ integrity completion
6386 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6388 __pthread_testcancel(1);
6389 return(fsync_common(p
, uap
, MNT_WAIT
));
6394 * Sync an open file with synchronized I/O _file_ integrity completion
6396 * Notes: This is a legacy support function that does not test for
6397 * thread cancellation points.
6401 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6403 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6408 * Sync an open file with synchronized I/O _data_ integrity completion
6412 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6414 __pthread_testcancel(1);
6415 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6422 * Common fsync code to support both synchronized I/O file integrity completion
6423 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6425 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6426 * will only guarantee that the file data contents are retrievable. If
6427 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6428 * includes additional metadata unnecessary for retrieving the file data
6429 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6432 * Parameters: p The process
6433 * uap->fd The descriptor to synchronize
6434 * flags The data integrity flags
6436 * Returns: int Success
6437 * fp_getfvp:EBADF Bad file descriptor
6438 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6439 * VNOP_FSYNC:??? unspecified
6441 * Notes: We use struct fsync_args because it is a short name, and all
6442 * caller argument structures are otherwise identical.
6445 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6448 struct fileproc
*fp
;
6449 vfs_context_t ctx
= vfs_context_current();
6452 AUDIT_ARG(fd
, uap
->fd
);
6454 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6456 if ( (error
= vnode_getwithref(vp
)) ) {
6461 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6463 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6466 /* Sync resource fork shadow file if necessary. */
6468 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6469 (vp
->v_parent
!= NULLVP
) &&
6470 vnode_isshadow(vp
) &&
6471 (fp
->f_flags
& FP_WRITTEN
)) {
6472 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6476 (void)vnode_put(vp
);
6482 * Duplicate files. Source must be a file, target must be a file or
6485 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6486 * perform inheritance correctly.
6490 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6492 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6493 struct nameidata fromnd
, tond
;
6495 vfs_context_t ctx
= vfs_context_current();
6497 /* Check that the flags are valid. */
6499 if (uap
->flags
& ~CPF_MASK
) {
6503 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, SAVESTART
| AUDITVNPATH1
,
6504 UIO_USERSPACE
, uap
->from
, ctx
);
6505 if ((error
= namei(&fromnd
)))
6509 NDINIT(&tond
, CREATE
, OP_LINK
,
6510 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6511 UIO_USERSPACE
, uap
->to
, ctx
);
6512 if ((error
= namei(&tond
))) {
6519 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6524 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6529 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6535 * If source is the same as the destination (that is the
6536 * same inode number) then there is nothing to do.
6537 * (fixed to have POSIX semantics - CSM 3/2/98)
6542 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6544 sdvp
= tond
.ni_startdir
;
6546 * nameidone has to happen before we vnode_put(tdvp)
6547 * since it may need to release the fs_nodelock on the tdvp
6558 if (fromnd
.ni_startdir
)
6559 vnode_put(fromnd
.ni_startdir
);
6569 * Rename files. Source and destination must either both be directories,
6570 * or both not be directories. If target is a directory, it must be empty.
6574 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
6575 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
6579 struct nameidata
*fromnd
, *tond
;
6585 const char *oname
= NULL
;
6586 char *from_name
= NULL
, *to_name
= NULL
;
6587 int from_len
=0, to_len
=0;
6588 int holding_mntlock
;
6589 mount_t locked_mp
= NULL
;
6590 vnode_t oparent
= NULLVP
;
6592 fse_info from_finfo
, to_finfo
;
6594 int from_truncated
=0, to_truncated
;
6596 struct vnode_attr
*fvap
, *tvap
;
6598 /* carving out a chunk for structs that are too big to be on stack. */
6600 struct nameidata from_node
, to_node
;
6601 struct vnode_attr fv_attr
, tv_attr
;
6603 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
6604 fromnd
= &__rename_data
->from_node
;
6605 tond
= &__rename_data
->to_node
;
6607 holding_mntlock
= 0;
6616 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
6618 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6620 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6622 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6625 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6626 if ( (error
= nameiat(fromnd
, fromfd
)) )
6628 fdvp
= fromnd
->ni_dvp
;
6629 fvp
= fromnd
->ni_vp
;
6631 if (fvp
&& fvp
->v_type
== VDIR
)
6632 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
6635 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6636 if ( (error
= nameiat(tond
, tofd
)) ) {
6638 * Translate error code for rename("dir1", "dir2/.").
6640 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
6644 tdvp
= tond
->ni_dvp
;
6648 batched
= vnode_compound_rename_available(fdvp
);
6651 * Claim: this check will never reject a valid rename.
6652 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6653 * Suppose fdvp and tdvp are not on the same mount.
6654 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6655 * then you can't move it to within another dir on the same mountpoint.
6656 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6658 * If this check passes, then we are safe to pass these vnodes to the same FS.
6660 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
6664 goto skipped_lookup
;
6668 error
= vn_authorize_rename(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, NULL
);
6670 if (error
== ENOENT
) {
6671 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
6672 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
6674 * We encountered a race where after doing the namei, tvp stops
6675 * being valid. If so, simply re-drive the rename call from the
6687 * If the source and destination are the same (i.e. they're
6688 * links to the same vnode) and the target file system is
6689 * case sensitive, then there is nothing to do.
6691 * XXX Come back to this.
6697 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6698 * then assume that this file system is case sensitive.
6700 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
6701 pathconf_val
!= 0) {
6707 * Allow the renaming of mount points.
6708 * - target must not exist
6709 * - target must reside in the same directory as source
6710 * - union mounts cannot be renamed
6711 * - "/" cannot be renamed
6713 * XXX Handle this in VFS after a continued lookup (if we missed
6714 * in the cache to start off)
6716 if ((fvp
->v_flag
& VROOT
) &&
6717 (fvp
->v_type
== VDIR
) &&
6719 (fvp
->v_mountedhere
== NULL
) &&
6721 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
6722 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
6725 /* switch fvp to the covered vnode */
6726 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
6727 if ( (vnode_getwithref(coveredvp
)) ) {
6737 * Check for cross-device rename.
6739 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
6740 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
6746 * If source is the same as the destination (that is the
6747 * same inode number) then there is nothing to do...
6748 * EXCEPT if the underlying file system supports case
6749 * insensitivity and is case preserving. In this case
6750 * the file system needs to handle the special case of
6751 * getting the same vnode as target (fvp) and source (tvp).
6753 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6754 * and _PC_CASE_PRESERVING can have this exception, and they need to
6755 * handle the special case of getting the same vnode as target and
6756 * source. NOTE: Then the target is unlocked going into vnop_rename,
6757 * so not to cause locking problems. There is a single reference on tvp.
6759 * NOTE - that fvp == tvp also occurs if they are hard linked and
6760 * that correct behaviour then is just to return success without doing
6763 * XXX filesystem should take care of this itself, perhaps...
6765 if (fvp
== tvp
&& fdvp
== tdvp
) {
6766 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
6767 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
6768 fromnd
->ni_cnd
.cn_namelen
)) {
6773 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
6775 * we're holding a reference and lock
6776 * on locked_mp, but it no longer matches
6777 * what we want to do... so drop our hold
6779 mount_unlock_renames(locked_mp
);
6780 mount_drop(locked_mp
, 0);
6781 holding_mntlock
= 0;
6783 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
6785 * serialize renames that re-shape
6786 * the tree... if holding_mntlock is
6787 * set, then we're ready to go...
6789 * first need to drop the iocounts
6790 * we picked up, second take the
6791 * lock to serialize the access,
6792 * then finally start the lookup
6793 * process over with the lock held
6795 if (!holding_mntlock
) {
6797 * need to grab a reference on
6798 * the mount point before we
6799 * drop all the iocounts... once
6800 * the iocounts are gone, the mount
6803 locked_mp
= fvp
->v_mount
;
6804 mount_ref(locked_mp
, 0);
6807 * nameidone has to happen before we vnode_put(tvp)
6808 * since it may need to release the fs_nodelock on the tvp
6817 * nameidone has to happen before we vnode_put(fdvp)
6818 * since it may need to release the fs_nodelock on the fvp
6825 mount_lock_renames(locked_mp
);
6826 holding_mntlock
= 1;
6832 * when we dropped the iocounts to take
6833 * the lock, we allowed the identity of
6834 * the various vnodes to change... if they did,
6835 * we may no longer be dealing with a rename
6836 * that reshapes the tree... once we're holding
6837 * the iocounts, the vnodes can't change type
6838 * so we're free to drop the lock at this point
6841 if (holding_mntlock
) {
6842 mount_unlock_renames(locked_mp
);
6843 mount_drop(locked_mp
, 0);
6844 holding_mntlock
= 0;
6848 // save these off so we can later verify that fvp is the same
6849 oname
= fvp
->v_name
;
6850 oparent
= fvp
->v_parent
;
6854 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
6857 get_fse_info(fvp
, &from_finfo
, ctx
);
6859 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
6864 fvap
= &__rename_data
->fv_attr
;
6868 get_fse_info(tvp
, &to_finfo
, ctx
);
6869 } else if (batched
) {
6870 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
6875 tvap
= &__rename_data
->tv_attr
;
6880 #endif /* CONFIG_FSE */
6882 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
6883 if (from_name
== NULL
) {
6884 GET_PATH(from_name
);
6885 if (from_name
== NULL
) {
6891 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
6893 if (to_name
== NULL
) {
6895 if (to_name
== NULL
) {
6901 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
6903 #if CONFIG_SECLUDED_RENAME
6904 if (flags
& VFS_SECLUDE_RENAME
) {
6905 fromnd
->ni_cnd
.cn_flags
|= CN_SECLUDE_RENAME
;
6908 #pragma unused(flags)
6910 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
6911 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
6914 if (holding_mntlock
) {
6916 * we can drop our serialization
6919 mount_unlock_renames(locked_mp
);
6920 mount_drop(locked_mp
, 0);
6921 holding_mntlock
= 0;
6924 if (error
== EKEEPLOOKING
) {
6925 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6926 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6927 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6931 fromnd
->ni_vp
= fvp
;
6934 goto continue_lookup
;
6938 * We may encounter a race in the VNOP where the destination didn't
6939 * exist when we did the namei, but it does by the time we go and
6940 * try to create the entry. In this case, we should re-drive this rename
6941 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6942 * but other filesystems susceptible to this race could return it, too.
6944 if (error
== ERECYCLE
) {
6949 * For compound VNOPs, the authorization callback may return
6950 * ENOENT in case of racing hardlink lookups hitting the name
6951 * cache, redrive the lookup.
6953 if (batched
&& error
== ENOENT
) {
6954 assert(retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
6955 if (retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
6964 /* call out to allow 3rd party notification of rename.
6965 * Ignore result of kauth_authorize_fileop call.
6967 kauth_authorize_fileop(vfs_context_ucred(ctx
),
6968 KAUTH_FILEOP_RENAME
,
6969 (uintptr_t)from_name
, (uintptr_t)to_name
);
6972 if (from_name
!= NULL
&& to_name
!= NULL
) {
6973 if (from_truncated
|| to_truncated
) {
6974 // set it here since only the from_finfo gets reported up to user space
6975 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
6979 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
6982 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
6986 add_fsevent(FSE_RENAME
, ctx
,
6987 FSE_ARG_STRING
, from_len
, from_name
,
6988 FSE_ARG_FINFO
, &from_finfo
,
6989 FSE_ARG_STRING
, to_len
, to_name
,
6990 FSE_ARG_FINFO
, &to_finfo
,
6993 add_fsevent(FSE_RENAME
, ctx
,
6994 FSE_ARG_STRING
, from_len
, from_name
,
6995 FSE_ARG_FINFO
, &from_finfo
,
6996 FSE_ARG_STRING
, to_len
, to_name
,
7000 #endif /* CONFIG_FSE */
7003 * update filesystem's mount point data
7006 char *cp
, *pathend
, *mpname
;
7012 mp
= fvp
->v_mountedhere
;
7014 if (vfs_busy(mp
, LK_NOWAIT
)) {
7018 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7020 if (UIO_SEG_IS_USER_SPACE(segflg
))
7021 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7023 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7025 /* find current mount point prefix */
7026 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7027 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7031 /* find last component of target name */
7032 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7036 /* append name to prefix */
7037 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7038 bzero(pathend
, maxlen
);
7039 strlcpy(pathend
, mpname
, maxlen
);
7041 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7046 * fix up name & parent pointers. note that we first
7047 * check that fvp has the same name/parent pointers it
7048 * had before the rename call... this is a 'weak' check
7051 * XXX oparent and oname may not be set in the compound vnop case
7053 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7056 update_flags
= VNODE_UPDATE_NAME
;
7059 update_flags
|= VNODE_UPDATE_PARENT
;
7061 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7064 if (to_name
!= NULL
) {
7065 RELEASE_PATH(to_name
);
7068 if (from_name
!= NULL
) {
7069 RELEASE_PATH(from_name
);
7072 if (holding_mntlock
) {
7073 mount_unlock_renames(locked_mp
);
7074 mount_drop(locked_mp
, 0);
7075 holding_mntlock
= 0;
7079 * nameidone has to happen before we vnode_put(tdvp)
7080 * since it may need to release the fs_nodelock on the tdvp
7090 * nameidone has to happen before we vnode_put(fdvp)
7091 * since it may need to release the fs_nodelock on the fdvp
7101 * If things changed after we did the namei, then we will re-drive
7102 * this rename call from the top.
7109 FREE(__rename_data
, M_TEMP
);
7114 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7116 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7117 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7120 #if CONFIG_SECLUDED_RENAME
7121 int rename_ext(__unused proc_t p
, struct rename_ext_args
*uap
, __unused
int32_t *retval
)
7123 return renameat_internal(
7124 vfs_context_current(),
7125 AT_FDCWD
, uap
->from
,
7127 UIO_USERSPACE
, uap
->flags
);
7132 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7134 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7135 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7139 * Make a directory file.
7141 * Returns: 0 Success
7144 * vnode_authorize:???
7149 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7150 enum uio_seg segflg
)
7154 int update_flags
= 0;
7156 struct nameidata nd
;
7158 AUDIT_ARG(mode
, vap
->va_mode
);
7159 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7161 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7162 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7165 error
= nameiat(&nd
, fd
);
7176 batched
= vnode_compound_mkdir_available(dvp
);
7178 VATTR_SET(vap
, va_type
, VDIR
);
7182 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7183 * only get EXISTS or EISDIR for existing path components, and not that it could see
7184 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7185 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7187 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7188 if (error
== EACCES
|| error
== EPERM
) {
7196 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7197 * rather than EACCESS if the target exists.
7199 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7201 error2
= nameiat(&nd
, fd
);
7215 * make the directory
7217 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7218 if (error
== EKEEPLOOKING
) {
7220 goto continue_lookup
;
7226 // Make sure the name & parent pointers are hooked up
7227 if (vp
->v_name
== NULL
)
7228 update_flags
|= VNODE_UPDATE_NAME
;
7229 if (vp
->v_parent
== NULLVP
)
7230 update_flags
|= VNODE_UPDATE_PARENT
;
7233 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
7236 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
7241 * nameidone has to happen before we vnode_put(dvp)
7242 * since it may need to release the fs_nodelock on the dvp
7255 * mkdir_extended: Create a directory; with extended security (ACL).
7257 * Parameters: p Process requesting to create the directory
7258 * uap User argument descriptor (see below)
7261 * Indirect: uap->path Path of directory to create
7262 * uap->mode Access permissions to set
7263 * uap->xsecurity ACL to set
7265 * Returns: 0 Success
7270 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
7273 kauth_filesec_t xsecdst
;
7274 struct vnode_attr va
;
7276 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7279 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
7280 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
7284 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7285 if (xsecdst
!= NULL
)
7286 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7288 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7290 if (xsecdst
!= NULL
)
7291 kauth_filesec_free(xsecdst
);
7296 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
7298 struct vnode_attr va
;
7301 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7303 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7308 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
7310 struct vnode_attr va
;
7313 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7315 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
7320 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
7321 enum uio_seg segflg
)
7325 struct nameidata nd
;
7328 int has_listeners
= 0;
7332 struct vnode_attr va
;
7333 #endif /* CONFIG_FSE */
7334 struct vnode_attr
*vap
= NULL
;
7335 int restart_count
= 0;
7341 * This loop exists to restart rmdir in the unlikely case that two
7342 * processes are simultaneously trying to remove the same directory
7343 * containing orphaned appleDouble files.
7346 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
7347 segflg
, dirpath
, ctx
);
7348 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
7353 error
= nameiat(&nd
, fd
);
7361 batched
= vnode_compound_rmdir_available(vp
);
7363 if (vp
->v_flag
& VROOT
) {
7365 * The root of a mounted filesystem cannot be deleted.
7372 * Removed a check here; we used to abort if vp's vid
7373 * was not the same as what we'd seen the last time around.
7374 * I do not think that check was valid, because if we retry
7375 * and all dirents are gone, the directory could legitimately
7376 * be recycled but still be present in a situation where we would
7377 * have had permission to delete. Therefore, we won't make
7378 * an effort to preserve that check now that we may not have a
7383 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
7385 if (error
== ENOENT
) {
7386 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7387 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7398 if (!vnode_compound_rmdir_available(dvp
)) {
7399 panic("No error, but no compound rmdir?");
7406 need_event
= need_fsevent(FSE_DELETE
, dvp
);
7409 get_fse_info(vp
, &finfo
, ctx
);
7411 error
= vfs_get_notify_attributes(&va
);
7420 has_listeners
= kauth_authorize_fileop_has_listeners();
7421 if (need_event
|| has_listeners
) {
7430 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
7433 finfo
.mode
|= FSE_TRUNCATED_PATH
;
7438 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7441 /* Couldn't find a vnode */
7445 if (error
== EKEEPLOOKING
) {
7446 goto continue_lookup
;
7447 } else if (batched
&& error
== ENOENT
) {
7448 assert(restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
);
7449 if (restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7451 * For compound VNOPs, the authorization callback
7452 * may return ENOENT in case of racing hard link lookups
7453 * redrive the lookup.
7460 #if CONFIG_APPLEDOUBLE
7462 * Special case to remove orphaned AppleDouble
7463 * files. I don't like putting this in the kernel,
7464 * but carbon does not like putting this in carbon either,
7467 if (error
== ENOTEMPTY
) {
7468 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
7469 if (error
== EBUSY
) {
7475 * Assuming everything went well, we will try the RMDIR again
7478 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7480 #endif /* CONFIG_APPLEDOUBLE */
7482 * Call out to allow 3rd party notification of delete.
7483 * Ignore result of kauth_authorize_fileop call.
7486 if (has_listeners
) {
7487 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7488 KAUTH_FILEOP_DELETE
,
7493 if (vp
->v_flag
& VISHARDLINK
) {
7494 // see the comment in unlink1() about why we update
7495 // the parent of a hard link when it is removed
7496 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
7502 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
7504 add_fsevent(FSE_DELETE
, ctx
,
7505 FSE_ARG_STRING
, len
, path
,
7506 FSE_ARG_FINFO
, &finfo
,
7518 * nameidone has to happen before we vnode_put(dvp)
7519 * since it may need to release the fs_nodelock on the dvp
7527 if (restart_flag
== 0) {
7528 wakeup_one((caddr_t
)vp
);
7531 tsleep(vp
, PVFS
, "rm AD", 1);
7533 } while (restart_flag
!= 0);
7540 * Remove a directory file.
7544 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
7546 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
7547 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
7550 /* Get direntry length padded to 8 byte alignment */
7551 #define DIRENT64_LEN(namlen) \
7552 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7555 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
7556 int *numdirent
, vfs_context_t ctxp
)
7558 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7559 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
7560 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
7561 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
7566 struct direntry
*entry64
;
7572 * Our kernel buffer needs to be smaller since re-packing
7573 * will expand each dirent. The worse case (when the name
7574 * length is 3) corresponds to a struct direntry size of 32
7575 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7576 * (4-byte aligned). So having a buffer that is 3/8 the size
7577 * will prevent us from reading more than we can pack.
7579 * Since this buffer is wired memory, we will limit the
7580 * buffer size to a maximum of 32K. We would really like to
7581 * use 32K in the MIN(), but we use magic number 87371 to
7582 * prevent uio_resid() * 3 / 8 from overflowing.
7584 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
7585 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
7586 if (bufptr
== NULL
) {
7590 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
7591 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
7592 auio
->uio_offset
= uio
->uio_offset
;
7594 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
7596 dep
= (struct dirent
*)bufptr
;
7597 bytesread
= bufsize
- uio_resid(auio
);
7599 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
7602 * Convert all the entries and copy them out to user's buffer.
7604 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
7605 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
7607 bzero(entry64
, enbufsize
);
7608 /* Convert a dirent to a dirent64. */
7609 entry64
->d_ino
= dep
->d_ino
;
7610 entry64
->d_seekoff
= 0;
7611 entry64
->d_reclen
= enbufsize
;
7612 entry64
->d_namlen
= dep
->d_namlen
;
7613 entry64
->d_type
= dep
->d_type
;
7614 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
7616 /* Move to next entry. */
7617 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
7619 /* Copy entry64 to user's buffer. */
7620 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
7623 /* Update the real offset using the offset we got from VNOP_READDIR. */
7625 uio
->uio_offset
= auio
->uio_offset
;
7628 FREE(bufptr
, M_TEMP
);
7629 FREE(entry64
, M_TEMP
);
7634 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7637 * Read a block of directory entries in a file system independent format.
7640 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
7641 off_t
*offset
, int flags
)
7644 struct vfs_context context
= *vfs_context_current(); /* local copy */
7645 struct fileproc
*fp
;
7647 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7649 int error
, eofflag
, numdirent
;
7650 char uio_buf
[ UIO_SIZEOF(1) ];
7652 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
7656 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7657 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7662 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
7663 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
7666 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
7670 if ( (error
= vnode_getwithref(vp
)) ) {
7673 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7676 if (vp
->v_type
!= VDIR
) {
7677 (void)vnode_put(vp
);
7683 error
= mac_vnode_check_readdir(&context
, vp
);
7685 (void)vnode_put(vp
);
7690 loff
= fp
->f_fglob
->fg_offset
;
7691 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7692 uio_addiov(auio
, bufp
, bufsize
);
7694 if (flags
& VNODE_READDIR_EXTENDED
) {
7695 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
7696 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7698 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
7699 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7702 (void)vnode_put(vp
);
7706 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
7707 if (union_dircheckp
) {
7708 error
= union_dircheckp(&vp
, fp
, &context
);
7715 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
7716 struct vnode
*tvp
= vp
;
7717 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
7719 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
7720 fp
->f_fglob
->fg_offset
= 0;
7734 *bytesread
= bufsize
- uio_resid(auio
);
7742 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
7748 AUDIT_ARG(fd
, uap
->fd
);
7749 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
7752 if (proc_is64bit(p
)) {
7753 user64_long_t base
= (user64_long_t
)offset
;
7754 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
7756 user32_long_t base
= (user32_long_t
)offset
;
7757 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
7759 *retval
= bytesread
;
7765 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
7771 AUDIT_ARG(fd
, uap
->fd
);
7772 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
7775 *retval
= bytesread
;
7776 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
7783 * Set the mode mask for creation of filesystem nodes.
7784 * XXX implement xsecurity
7786 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7788 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
7790 struct filedesc
*fdp
;
7792 AUDIT_ARG(mask
, newmask
);
7795 *retval
= fdp
->fd_cmask
;
7796 fdp
->fd_cmask
= newmask
& ALLPERMS
;
7802 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7804 * Parameters: p Process requesting to set the umask
7805 * uap User argument descriptor (see below)
7806 * retval umask of the process (parameter p)
7808 * Indirect: uap->newmask umask to set
7809 * uap->xsecurity ACL to set
7811 * Returns: 0 Success
7816 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
7819 kauth_filesec_t xsecdst
;
7821 xsecdst
= KAUTH_FILESEC_NONE
;
7822 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
7823 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
7826 xsecdst
= KAUTH_FILESEC_NONE
;
7829 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
7831 if (xsecdst
!= KAUTH_FILESEC_NONE
)
7832 kauth_filesec_free(xsecdst
);
7837 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
7839 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
7843 * Void all references to file by ripping underlying filesystem
7848 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
7851 struct vnode_attr va
;
7852 vfs_context_t ctx
= vfs_context_current();
7854 struct nameidata nd
;
7856 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
7865 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
7870 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
7876 error
= mac_vnode_check_revoke(ctx
, vp
);
7882 VATTR_WANTED(&va
, va_uid
);
7883 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
7885 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
7886 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
7888 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
7889 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
7897 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7898 * The following system calls are designed to support features
7899 * which are specific to the HFS & HFS Plus volume formats
7904 * Obtain attribute information on objects in a directory while enumerating
7909 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
7912 struct fileproc
*fp
;
7914 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7915 uint32_t count
, savecount
;
7919 struct attrlist attributelist
;
7920 vfs_context_t ctx
= vfs_context_current();
7922 char uio_buf
[ UIO_SIZEOF(1) ];
7923 kauth_action_t action
;
7927 /* Get the attributes into kernel space */
7928 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
7931 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
7935 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
7938 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7939 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7946 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
7953 if ( (error
= vnode_getwithref(vp
)) )
7956 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7959 if (vp
->v_type
!= VDIR
) {
7960 (void)vnode_put(vp
);
7966 error
= mac_vnode_check_readdir(ctx
, vp
);
7968 (void)vnode_put(vp
);
7973 /* set up the uio structure which will contain the users return buffer */
7974 loff
= fp
->f_fglob
->fg_offset
;
7975 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7976 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
7979 * If the only item requested is file names, we can let that past with
7980 * just LIST_DIRECTORY. If they want any other attributes, that means
7981 * they need SEARCH as well.
7983 action
= KAUTH_VNODE_LIST_DIRECTORY
;
7984 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
7985 attributelist
.fileattr
|| attributelist
.dirattr
)
7986 action
|= KAUTH_VNODE_SEARCH
;
7988 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
7990 /* Believe it or not, uap->options only has 32-bits of valid
7991 * info, so truncate before extending again */
7993 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
7994 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
7998 (void) vnode_put(vp
);
8003 * If we've got the last entry of a directory in a union mount
8004 * then reset the eofflag and pretend there's still more to come.
8005 * The next call will again set eofflag and the buffer will be empty,
8006 * so traverse to the underlying directory and do the directory
8009 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
8010 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
8012 } else { // Empty buffer
8013 struct vnode
*tvp
= vp
;
8014 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
8015 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
8016 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8017 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
8019 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
8027 (void)vnode_put(vp
);
8031 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
8033 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8035 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8037 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8040 *retval
= eofflag
; /* similar to getdirentries */
8044 return (error
); /* return error earlier, an retval of 0 or 1 now */
8046 } /* end of getdirentriesattr system call */
8049 * Exchange data between two files
8054 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8057 struct nameidata fnd
, snd
;
8058 vfs_context_t ctx
= vfs_context_current();
8062 u_int32_t nameiflags
;
8066 int from_truncated
=0, to_truncated
=0;
8068 fse_info f_finfo
, s_finfo
;
8072 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8074 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8075 UIO_USERSPACE
, uap
->path1
, ctx
);
8077 error
= namei(&fnd
);
8084 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8085 UIO_USERSPACE
, uap
->path2
, ctx
);
8087 error
= namei(&snd
);
8096 * if the files are the same, return an inval error
8104 * if the files are on different volumes, return an error
8106 if (svp
->v_mount
!= fvp
->v_mount
) {
8111 /* If they're not files, return an error */
8112 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8118 error
= mac_vnode_check_exchangedata(ctx
,
8123 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8124 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8129 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8131 kauth_authorize_fileop_has_listeners()) {
8134 if (fpath
== NULL
|| spath
== NULL
) {
8139 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8140 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8143 get_fse_info(fvp
, &f_finfo
, ctx
);
8144 get_fse_info(svp
, &s_finfo
, ctx
);
8145 if (from_truncated
|| to_truncated
) {
8146 // set it here since only the f_finfo gets reported up to user space
8147 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8151 /* Ok, make the call */
8152 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8155 const char *tmpname
;
8157 if (fpath
!= NULL
&& spath
!= NULL
) {
8158 /* call out to allow 3rd party notification of exchangedata.
8159 * Ignore result of kauth_authorize_fileop call.
8161 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8162 (uintptr_t)fpath
, (uintptr_t)spath
);
8166 tmpname
= fvp
->v_name
;
8167 fvp
->v_name
= svp
->v_name
;
8168 svp
->v_name
= tmpname
;
8170 if (fvp
->v_parent
!= svp
->v_parent
) {
8173 tmp
= fvp
->v_parent
;
8174 fvp
->v_parent
= svp
->v_parent
;
8175 svp
->v_parent
= tmp
;
8177 name_cache_unlock();
8180 if (fpath
!= NULL
&& spath
!= NULL
) {
8181 add_fsevent(FSE_EXCHANGE
, ctx
,
8182 FSE_ARG_STRING
, flen
, fpath
,
8183 FSE_ARG_FINFO
, &f_finfo
,
8184 FSE_ARG_STRING
, slen
, spath
,
8185 FSE_ARG_FINFO
, &s_finfo
,
8193 RELEASE_PATH(fpath
);
8195 RELEASE_PATH(spath
);
8203 * Return (in MB) the amount of freespace on the given vnode's volume.
8205 uint32_t freespace_mb(vnode_t vp
);
8208 freespace_mb(vnode_t vp
)
8210 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
8211 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
8212 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
8220 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
8225 struct nameidata nd
;
8226 struct user64_fssearchblock searchblock
;
8227 struct searchstate
*state
;
8228 struct attrlist
*returnattrs
;
8229 struct timeval timelimit
;
8230 void *searchparams1
,*searchparams2
;
8232 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8233 uint32_t nummatches
;
8235 uint32_t nameiflags
;
8236 vfs_context_t ctx
= vfs_context_current();
8237 char uio_buf
[ UIO_SIZEOF(1) ];
8239 /* Start by copying in fsearchblock parameter list */
8240 if (IS_64BIT_PROCESS(p
)) {
8241 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
8242 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
8243 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
8246 struct user32_fssearchblock tmp_searchblock
;
8248 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
8249 // munge into 64-bit version
8250 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
8251 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
8252 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
8253 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
8255 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8256 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8258 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
8259 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
8260 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
8261 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
8262 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
8263 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
8264 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
8269 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8271 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
8272 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
8275 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8276 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8277 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8280 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8281 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8282 /* assumes the size is still 556 bytes it will continue to work */
8284 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
8285 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
8287 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
8289 /* Now set up the various pointers to the correct place in our newly allocated memory */
8291 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
8292 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
8293 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
8295 /* Now copy in the stuff given our local variables. */
8297 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
8300 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
8303 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
8306 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
8310 * When searching a union mount, need to set the
8311 * start flag at the first call on each layer to
8312 * reset state for the new volume.
8314 if (uap
->options
& SRCHFS_START
)
8315 state
->ss_union_layer
= 0;
8317 uap
->options
|= state
->ss_union_flags
;
8318 state
->ss_union_flags
= 0;
8321 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8322 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8323 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8324 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8325 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8328 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
8329 attrreference_t
* string_ref
;
8330 u_int32_t
* start_length
;
8331 user64_size_t param_length
;
8333 /* validate searchparams1 */
8334 param_length
= searchblock
.sizeofsearchparams1
;
8335 /* skip the word that specifies length of the buffer */
8336 start_length
= (u_int32_t
*) searchparams1
;
8337 start_length
= start_length
+1;
8338 string_ref
= (attrreference_t
*) start_length
;
8340 /* ensure no negative offsets or too big offsets */
8341 if (string_ref
->attr_dataoffset
< 0 ) {
8345 if (string_ref
->attr_length
> MAXPATHLEN
) {
8350 /* Check for pointer overflow in the string ref */
8351 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
8356 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
8360 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
8366 /* set up the uio structure which will contain the users return buffer */
8367 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8368 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
8371 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8372 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
8373 UIO_USERSPACE
, uap
->path
, ctx
);
8382 * Switch to the root vnode for the volume
8384 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
8391 * If it's a union mount, the path lookup takes
8392 * us to the top layer. But we may need to descend
8393 * to a lower layer. For non-union mounts the layer
8396 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
8397 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
8400 vp
= vp
->v_mount
->mnt_vnodecovered
;
8406 vnode_getwithref(vp
);
8411 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
8420 * If searchblock.maxmatches == 0, then skip the search. This has happened
8421 * before and sometimes the underlying code doesnt deal with it well.
8423 if (searchblock
.maxmatches
== 0) {
8429 * Allright, we have everything we need, so lets make that call.
8431 * We keep special track of the return value from the file system:
8432 * EAGAIN is an acceptable error condition that shouldn't keep us
8433 * from copying out any results...
8436 fserror
= VNOP_SEARCHFS(vp
,
8439 &searchblock
.searchattrs
,
8440 (u_long
)searchblock
.maxmatches
,
8444 (u_long
)uap
->scriptcode
,
8445 (u_long
)uap
->options
,
8447 (struct searchstate
*) &state
->ss_fsstate
,
8451 * If it's a union mount we need to be called again
8452 * to search the mounted-on filesystem.
8454 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
8455 state
->ss_union_flags
= SRCHFS_START
;
8456 state
->ss_union_layer
++; // search next layer down
8464 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8465 search state. Everything was already put into he return buffer by the vop call. */
8467 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
8470 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
8477 FREE(searchparams1
,M_TEMP
);
8482 } /* end of searchfs system call */
8484 #else /* CONFIG_SEARCHFS */
8487 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
8492 #endif /* CONFIG_SEARCHFS */
8495 lck_grp_attr_t
* nspace_group_attr
;
8496 lck_attr_t
* nspace_lock_attr
;
8497 lck_grp_t
* nspace_mutex_group
;
8499 lck_mtx_t nspace_handler_lock
;
8500 lck_mtx_t nspace_handler_exclusion_lock
;
8502 time_t snapshot_timestamp
=0;
8503 int nspace_allow_virtual_devs
=0;
8505 void nspace_handler_init(void);
8507 typedef struct nspace_item_info
{
8517 #define MAX_NSPACE_ITEMS 128
8518 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
8519 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
8520 uint32_t nspace_token_id
=0;
8521 uint32_t nspace_handler_timeout
= 15; // seconds
8523 #define NSPACE_ITEM_NEW 0x0001
8524 #define NSPACE_ITEM_PROCESSING 0x0002
8525 #define NSPACE_ITEM_DEAD 0x0004
8526 #define NSPACE_ITEM_CANCELLED 0x0008
8527 #define NSPACE_ITEM_DONE 0x0010
8528 #define NSPACE_ITEM_RESET_TIMER 0x0020
8530 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
8531 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8533 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8535 //#pragma optimization_level 0
8538 NSPACE_HANDLER_NSPACE
= 0,
8539 NSPACE_HANDLER_SNAPSHOT
= 1,
8541 NSPACE_HANDLER_COUNT
,
8545 uint64_t handler_tid
;
8546 struct proc
*handler_proc
;
8550 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
8552 /* namespace fsctl functions */
8553 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
8554 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
8555 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
8556 static nspace_type_t
nspace_type_for_op(uint64_t op
);
8557 static int nspace_is_special_process(struct proc
*proc
);
8558 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
8559 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
8560 static int validate_namespace_args (int is64bit
, int size
);
8561 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
8564 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
8566 switch(nspace_type
) {
8567 case NSPACE_HANDLER_NSPACE
:
8568 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
8569 case NSPACE_HANDLER_SNAPSHOT
:
8570 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
8572 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
8577 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
8579 switch(nspace_type
) {
8580 case NSPACE_HANDLER_NSPACE
:
8581 return NSPACE_ITEM_NSPACE_EVENT
;
8582 case NSPACE_HANDLER_SNAPSHOT
:
8583 return NSPACE_ITEM_SNAPSHOT_EVENT
;
8585 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
8590 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
8592 switch(nspace_type
) {
8593 case NSPACE_HANDLER_NSPACE
:
8594 return FREAD
| FWRITE
| O_EVTONLY
;
8595 case NSPACE_HANDLER_SNAPSHOT
:
8596 return FREAD
| O_EVTONLY
;
8598 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
8603 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
8605 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
8606 case NAMESPACE_HANDLER_NSPACE_EVENT
:
8607 return NSPACE_HANDLER_NSPACE
;
8608 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
8609 return NSPACE_HANDLER_SNAPSHOT
;
8611 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
8612 return NSPACE_HANDLER_NSPACE
;
8616 static inline int nspace_is_special_process(struct proc
*proc
)
8619 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
8620 if (proc
== nspace_handlers
[i
].handler_proc
)
8627 nspace_handler_init(void)
8629 nspace_lock_attr
= lck_attr_alloc_init();
8630 nspace_group_attr
= lck_grp_attr_alloc_init();
8631 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
8632 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
8633 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
8634 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
8638 nspace_proc_exit(struct proc
*p
)
8640 int i
, event_mask
= 0;
8642 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
8643 if (p
== nspace_handlers
[i
].handler_proc
) {
8644 event_mask
|= nspace_item_flags_for_type(i
);
8645 nspace_handlers
[i
].handler_tid
= 0;
8646 nspace_handlers
[i
].handler_proc
= NULL
;
8650 if (event_mask
== 0) {
8654 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8655 // if this process was the snapshot handler, zero snapshot_timeout
8656 snapshot_timestamp
= 0;
8660 // unblock anyone that's waiting for the handler that died
8662 lck_mtx_lock(&nspace_handler_lock
);
8663 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8664 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
8666 if ( nspace_items
[i
].flags
& event_mask
) {
8668 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
8669 vnode_lock_spin(nspace_items
[i
].vp
);
8670 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
8671 vnode_unlock(nspace_items
[i
].vp
);
8673 nspace_items
[i
].vp
= NULL
;
8674 nspace_items
[i
].vid
= 0;
8675 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
8676 nspace_items
[i
].token
= 0;
8678 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8683 wakeup((caddr_t
)&nspace_item_idx
);
8684 lck_mtx_unlock(&nspace_handler_lock
);
8689 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
8691 return resolve_nspace_item_ext(vp
, op
, NULL
);
8695 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
8697 int i
, error
, keep_waiting
;
8699 nspace_type_t nspace_type
= nspace_type_for_op(op
);
8701 // only allow namespace events on regular files, directories and symlinks.
8702 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
8707 // if this is a snapshot event and the vnode is on a
8708 // disk image just pretend nothing happened since any
8709 // change to the disk image will cause the disk image
8710 // itself to get backed up and this avoids multi-way
8711 // deadlocks between the snapshot handler and the ever
8712 // popular diskimages-helper process. the variable
8713 // nspace_allow_virtual_devs allows this behavior to
8714 // be overridden (for use by the Mobile TimeMachine
8715 // testing infrastructure which uses disk images)
8717 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
8718 && (vp
->v_mount
!= NULL
)
8719 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
8720 && !nspace_allow_virtual_devs
) {
8725 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8726 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8730 if (nspace_is_special_process(current_proc())) {
8734 lck_mtx_lock(&nspace_handler_lock
);
8737 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8738 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
8743 if (i
>= MAX_NSPACE_ITEMS
) {
8744 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8745 if (nspace_items
[i
].flags
== 0) {
8750 nspace_items
[i
].refcount
++;
8753 if (i
>= MAX_NSPACE_ITEMS
) {
8754 ts
.tv_sec
= nspace_handler_timeout
;
8757 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
8759 // an entry got free'd up, go see if we can get a slot
8762 lck_mtx_unlock(&nspace_handler_lock
);
8768 // if it didn't already exist, add it. if it did exist
8769 // we'll get woken up when someone does a wakeup() on
8770 // the slot in the nspace_items table.
8772 if (vp
!= nspace_items
[i
].vp
) {
8773 nspace_items
[i
].vp
= vp
;
8774 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
8775 nspace_items
[i
].op
= op
;
8776 nspace_items
[i
].vid
= vnode_vid(vp
);
8777 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
8778 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
8779 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8781 vnode_lock_spin(vp
);
8782 vp
->v_flag
|= VNEEDSSNAPSHOT
;
8787 nspace_items
[i
].token
= 0;
8788 nspace_items
[i
].refcount
= 1;
8790 wakeup((caddr_t
)&nspace_item_idx
);
8794 // Now go to sleep until the handler does a wakeup on this
8795 // slot in the nspace_items table (or we timeout).
8798 while(keep_waiting
) {
8799 ts
.tv_sec
= nspace_handler_timeout
;
8801 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
8803 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
8805 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
8806 error
= nspace_items
[i
].token
;
8807 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
8808 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
8809 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
8814 } else if (error
== 0) {
8815 // hmmm, why did we get woken up?
8816 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8817 nspace_items
[i
].token
);
8820 if (--nspace_items
[i
].refcount
== 0) {
8821 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
8822 nspace_items
[i
].arg
= NULL
;
8823 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
8824 nspace_items
[i
].flags
= 0; // this clears it for re-use
8826 wakeup(&nspace_token_id
);
8830 lck_mtx_unlock(&nspace_handler_lock
);
8837 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
8841 lck_mtx_lock(&nspace_handler_lock
);
8842 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8843 if (nspace_items
[i
].vp
== vp
) {
8848 if (i
>= MAX_NSPACE_ITEMS
) {
8849 lck_mtx_unlock(&nspace_handler_lock
);
8853 *status
= nspace_items
[i
].flags
;
8854 lck_mtx_unlock(&nspace_handler_lock
);
8861 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
8863 struct vnode_attr va
;
8867 VATTR_WANTED(&va
, va_fsid
);
8868 VATTR_WANTED(&va
, va_fileid
);
8870 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
8871 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
8874 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
8883 // Note: this function does NOT check permissions on all of the
8884 // parent directories leading to this vnode. It should only be
8885 // called on behalf of a root process. Otherwise a process may
8886 // get access to a file because the file itself is readable even
8887 // though its parent directories would prevent access.
8890 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
8894 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8899 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
8904 /* compute action to be authorized */
8906 if (fmode
& FREAD
) {
8907 action
|= KAUTH_VNODE_READ_DATA
;
8909 if (fmode
& (FWRITE
| O_TRUNC
)) {
8911 * If we are writing, appending, and not truncating,
8912 * indicate that we are appending so that if the
8913 * UF_APPEND or SF_APPEND bits are set, we do not deny
8916 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
8917 action
|= KAUTH_VNODE_APPEND_DATA
;
8919 action
|= KAUTH_VNODE_WRITE_DATA
;
8923 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
8928 // if the vnode is tagged VOPENEVT and the current process
8929 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8930 // flag to the open mode so that this open won't count against
8931 // the vnode when carbon delete() does a vnode_isinuse() to see
8932 // if a file is currently in use. this allows spotlight
8933 // importers to not interfere with carbon apps that depend on
8934 // the no-delete-if-busy semantics of carbon delete().
8936 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
8940 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
8943 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
8944 VNOP_CLOSE(vp
, fmode
, ctx
);
8948 /* Call out to allow 3rd party notification of open.
8949 * Ignore result of kauth_authorize_fileop call.
8952 mac_vnode_notify_open(ctx
, vp
, fmode
);
8954 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
8962 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
8964 int i
, error
=0, unblock
=0;
8967 lck_mtx_lock(&nspace_handler_exclusion_lock
);
8968 if (nspace_handlers
[nspace_type
].handler_busy
) {
8969 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8972 nspace_handlers
[nspace_type
].handler_busy
= 1;
8973 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8976 * Any process that gets here will be one of the namespace handlers.
8977 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8978 * as we can cause deadlocks to occur, because the namespace handler may prevent
8979 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8982 curtask
= current_task();
8983 bsd_set_dependency_capable (curtask
);
8985 lck_mtx_lock(&nspace_handler_lock
);
8986 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8987 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
8988 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
8991 while (error
== 0) {
8993 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8994 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
8995 if (!nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9002 if (i
< MAX_NSPACE_ITEMS
) {
9003 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
9004 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
9005 nspace_items
[i
].token
= ++nspace_token_id
;
9007 if (nspace_items
[i
].vp
) {
9008 struct fileproc
*fp
;
9009 int32_t indx
, fmode
;
9010 struct proc
*p
= current_proc();
9011 vfs_context_t ctx
= vfs_context_current();
9012 struct vnode_attr va
;
9016 * Use vnode pointer to acquire a file descriptor for
9017 * hand-off to userland
9019 fmode
= nspace_open_flags_for_type(nspace_type
);
9020 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
9025 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
9028 vnode_put(nspace_items
[i
].vp
);
9032 if ((error
= falloc(p
, &fp
, &indx
, ctx
))) {
9033 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9034 vnode_put(nspace_items
[i
].vp
);
9039 fp
->f_fglob
->fg_flag
= fmode
;
9040 fp
->f_fglob
->fg_ops
= &vnops
;
9041 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9044 procfdtbl_releasefd(p
, indx
, NULL
);
9045 fp_drop(p
, indx
, fp
, 1);
9049 * All variants of the namespace handler struct support these three fields:
9050 * token, flags, and the FD pointer
9052 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9053 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9054 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9057 * Handle optional fields:
9058 * extended version support an info ptr (offset, length), and the
9060 * namedata version supports a unique per-link object ID
9064 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9065 uint64_t u_offset
, u_length
;
9068 u_offset
= uio_offset(uio
);
9069 u_length
= uio_resid(uio
);
9074 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9075 error
= copyout(&u_length
, nhd
->infoptr
+sizeof(uint64_t), sizeof(uint64_t));
9080 VATTR_WANTED(&va
, va_linkid
);
9081 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9083 uint64_t linkid
= 0;
9084 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9085 linkid
= (uint64_t)va
.va_linkid
;
9087 error
= copyout (&linkid
, nhd
->objid
, sizeof(uint64_t));
9092 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9093 fp_free(p
, indx
, fp
);
9097 vnode_put(nspace_items
[i
].vp
);
9101 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9102 i
, nspace_items
[i
].vp
, error
, nspace_items
[i
].vp
->v_name
);
9106 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9107 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9116 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9117 vnode_lock_spin(nspace_items
[i
].vp
);
9118 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9119 vnode_unlock(nspace_items
[i
].vp
);
9121 nspace_items
[i
].vp
= NULL
;
9122 nspace_items
[i
].vid
= 0;
9123 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9124 nspace_items
[i
].token
= 0;
9126 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9129 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9130 // just go through every snapshot event and unblock it immediately.
9131 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9132 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9133 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9134 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9135 nspace_items
[i
].vp
= NULL
;
9136 nspace_items
[i
].vid
= 0;
9137 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9138 nspace_items
[i
].token
= 0;
9140 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9147 lck_mtx_unlock(&nspace_handler_lock
);
9149 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9150 nspace_handlers
[nspace_type
].handler_busy
= 0;
9151 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9156 static inline int validate_namespace_args (int is64bit
, int size
) {
9159 /* Must be one of these */
9160 if (size
== sizeof(user64_namespace_handler_info
)) {
9163 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
9166 if (size
== sizeof(user64_namespace_handler_data
)) {
9172 /* 32 bit -- must be one of these */
9173 if (size
== sizeof(user32_namespace_handler_info
)) {
9176 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
9179 if (size
== sizeof(user32_namespace_handler_data
)) {
9191 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
9194 namespace_handler_data nhd
;
9196 bzero (&nhd
, sizeof(namespace_handler_data
));
9198 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9199 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9203 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9207 error
= validate_namespace_args (is64bit
, size
);
9212 /* Copy in the userland pointers into our kernel-only struct */
9215 /* 64 bit userland structures */
9216 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
9217 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
9218 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
9220 /* If the size is greater than the standard info struct, add in extra fields */
9221 if (size
> (sizeof(user64_namespace_handler_info
))) {
9222 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
9223 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
9225 if (size
== (sizeof(user64_namespace_handler_data
))) {
9226 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
9228 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9232 /* 32 bit userland structures */
9233 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
9234 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
9235 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
9237 if (size
> (sizeof(user32_namespace_handler_info
))) {
9238 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
9239 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
9241 if (size
== (sizeof(user32_namespace_handler_data
))) {
9242 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
9244 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9248 return wait_for_namespace_event(&nhd
, nspace_type
);
9252 * Make a filesystem-specific control call:
9256 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
9261 #define STK_PARAMS 128
9262 char stkbuf
[STK_PARAMS
];
9264 vnode_t vp
= *arg_vp
;
9266 size
= IOCPARM_LEN(cmd
);
9267 if (size
> IOCPARM_MAX
) return (EINVAL
);
9269 is64bit
= proc_is64bit(p
);
9275 * ensure the buffer is large enough for underlying calls
9277 #ifndef HFSIOC_GETPATH
9278 typedef char pn_t
[MAXPATHLEN
];
9279 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9283 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9285 if (IOCBASECMD(cmd
) == HFS_GETPATH
) {
9286 /* Round up to MAXPATHLEN regardless of user input */
9290 if (size
> sizeof (stkbuf
)) {
9291 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
9299 error
= copyin(udata
, data
, size
);
9308 *(user_addr_t
*)data
= udata
;
9311 *(uint32_t *)data
= (uint32_t)udata
;
9314 } else if ((cmd
& IOC_OUT
) && size
) {
9316 * Zero the buffer so the user always
9317 * gets back something deterministic.
9320 } else if (cmd
& IOC_VOID
) {
9322 *(user_addr_t
*)data
= udata
;
9325 *(uint32_t *)data
= (uint32_t)udata
;
9329 /* Check to see if it's a generic command */
9330 switch (IOCBASECMD(cmd
)) {
9332 case FSCTL_SYNC_VOLUME
: {
9333 mount_t mp
= vp
->v_mount
;
9334 int arg
= *(uint32_t*)data
;
9336 /* record vid of vp so we can drop it below. */
9337 uint32_t vvid
= vp
->v_id
;
9340 * Then grab mount_iterref so that we can release the vnode.
9341 * Without this, a thread may call vnode_iterate_prepare then
9342 * get into a deadlock because we've never released the root vp
9344 error
= mount_iterref (mp
, 0);
9350 /* issue the sync for this volume */
9351 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
9354 * Then release the mount_iterref once we're done syncing; it's not
9355 * needed for the VNOP_IOCTL below
9359 if (arg
& FSCTL_SYNC_FULLSYNC
) {
9360 /* re-obtain vnode iocount on the root vp, if possible */
9361 error
= vnode_getwithvid (vp
, vvid
);
9363 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
9367 /* mark the argument VP as having been released */
9372 case FSCTL_SET_PACKAGE_EXTS
: {
9373 user_addr_t ext_strings
;
9374 uint32_t num_entries
;
9377 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
9378 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
9380 // either you're 64-bit and passed a 64-bit struct or
9381 // you're 32-bit and passed a 32-bit struct. otherwise
9388 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
9389 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
9390 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
9392 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
9393 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
9394 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
9396 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
9400 /* namespace handlers */
9401 case FSCTL_NAMESPACE_HANDLER_GET
: {
9402 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
9406 /* Snapshot handlers */
9407 case FSCTL_OLD_SNAPSHOT_HANDLER_GET
: {
9408 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
9412 case FSCTL_SNAPSHOT_HANDLER_GET_EXT
: {
9413 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
9417 case FSCTL_NAMESPACE_HANDLER_UPDATE
: {
9418 uint32_t token
, val
;
9421 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9425 if (!nspace_is_special_process(p
)) {
9430 token
= ((uint32_t *)data
)[0];
9431 val
= ((uint32_t *)data
)[1];
9433 lck_mtx_lock(&nspace_handler_lock
);
9435 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9436 if (nspace_items
[i
].token
== token
) {
9437 break; /* exit for loop, not case stmt */
9441 if (i
>= MAX_NSPACE_ITEMS
) {
9445 // if this bit is set, when resolve_nspace_item() times out
9446 // it will loop and go back to sleep.
9448 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
9451 lck_mtx_unlock(&nspace_handler_lock
);
9454 printf("nspace-handler-update: did not find token %u\n", token
);
9459 case FSCTL_NAMESPACE_HANDLER_UNBLOCK
: {
9460 uint32_t token
, val
;
9463 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9467 if (!nspace_is_special_process(p
)) {
9472 token
= ((uint32_t *)data
)[0];
9473 val
= ((uint32_t *)data
)[1];
9475 lck_mtx_lock(&nspace_handler_lock
);
9477 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9478 if (nspace_items
[i
].token
== token
) {
9479 break; /* exit for loop, not case statement */
9483 if (i
>= MAX_NSPACE_ITEMS
) {
9484 printf("nspace-handler-unblock: did not find token %u\n", token
);
9487 if (val
== 0 && nspace_items
[i
].vp
) {
9488 vnode_lock_spin(nspace_items
[i
].vp
);
9489 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9490 vnode_unlock(nspace_items
[i
].vp
);
9493 nspace_items
[i
].vp
= NULL
;
9494 nspace_items
[i
].arg
= NULL
;
9495 nspace_items
[i
].op
= 0;
9496 nspace_items
[i
].vid
= 0;
9497 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9498 nspace_items
[i
].token
= 0;
9500 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9503 lck_mtx_unlock(&nspace_handler_lock
);
9507 case FSCTL_NAMESPACE_HANDLER_CANCEL
: {
9508 uint32_t token
, val
;
9511 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9515 if (!nspace_is_special_process(p
)) {
9520 token
= ((uint32_t *)data
)[0];
9521 val
= ((uint32_t *)data
)[1];
9523 lck_mtx_lock(&nspace_handler_lock
);
9525 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9526 if (nspace_items
[i
].token
== token
) {
9527 break; /* exit for loop, not case stmt */
9531 if (i
>= MAX_NSPACE_ITEMS
) {
9532 printf("nspace-handler-cancel: did not find token %u\n", token
);
9535 if (nspace_items
[i
].vp
) {
9536 vnode_lock_spin(nspace_items
[i
].vp
);
9537 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9538 vnode_unlock(nspace_items
[i
].vp
);
9541 nspace_items
[i
].vp
= NULL
;
9542 nspace_items
[i
].arg
= NULL
;
9543 nspace_items
[i
].vid
= 0;
9544 nspace_items
[i
].token
= val
;
9545 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
9546 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
9548 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9551 lck_mtx_unlock(&nspace_handler_lock
);
9555 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
9556 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9560 // we explicitly do not do the namespace_handler_proc check here
9562 lck_mtx_lock(&nspace_handler_lock
);
9563 snapshot_timestamp
= ((uint32_t *)data
)[0];
9564 wakeup(&nspace_item_idx
);
9565 lck_mtx_unlock(&nspace_handler_lock
);
9566 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
9571 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
9573 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9577 lck_mtx_lock(&nspace_handler_lock
);
9578 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
9579 lck_mtx_unlock(&nspace_handler_lock
);
9580 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9581 nspace_allow_virtual_devs
? "" : " NOT");
9587 case FSCTL_SET_FSTYPENAME_OVERRIDE
:
9589 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9593 mount_lock(vp
->v_mount
);
9595 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
9596 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
9597 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
9598 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
9599 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
9602 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
9603 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
9605 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
9606 vp
->v_mount
->fstypename_override
[0] = '\0';
9608 mount_unlock(vp
->v_mount
);
9614 /* Invoke the filesystem-specific code */
9615 error
= VNOP_IOCTL(vp
, IOCBASECMD(cmd
), data
, options
, ctx
);
9618 } /* end switch stmt */
9621 * if no errors, copy any data to user. Size was
9622 * already set and checked above.
9624 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
9625 error
= copyout(data
, udata
, size
);
9636 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
9639 struct nameidata nd
;
9642 vfs_context_t ctx
= vfs_context_current();
9644 AUDIT_ARG(cmd
, uap
->cmd
);
9645 AUDIT_ARG(value32
, uap
->options
);
9646 /* Get the vnode for the file we are getting info on: */
9648 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
9649 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
9650 UIO_USERSPACE
, uap
->path
, ctx
);
9651 if ((error
= namei(&nd
))) goto done
;
9656 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
9662 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9671 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
9675 vfs_context_t ctx
= vfs_context_current();
9678 AUDIT_ARG(fd
, uap
->fd
);
9679 AUDIT_ARG(cmd
, uap
->cmd
);
9680 AUDIT_ARG(value32
, uap
->options
);
9682 /* Get the vnode for the file we are getting info on: */
9683 if ((error
= file_vnode(uap
->fd
, &vp
)))
9686 if ((error
= vnode_getwithref(vp
))) {
9692 if ((error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) {
9699 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9703 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
9710 /* end of fsctl system call */
9713 * Retrieve the data of an extended attribute.
9716 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
9719 struct nameidata nd
;
9720 char attrname
[XATTR_MAXNAMELEN
+1];
9721 vfs_context_t ctx
= vfs_context_current();
9723 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9724 size_t attrsize
= 0;
9726 u_int32_t nameiflags
;
9728 char uio_buf
[ UIO_SIZEOF(1) ];
9730 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9733 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9734 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9735 if ((error
= namei(&nd
))) {
9741 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9744 if (xattr_protected(attrname
)) {
9745 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
9751 * the specific check for 0xffffffff is a hack to preserve
9752 * binaray compatibilty in K64 with applications that discovered
9753 * that passing in a buf pointer and a size of -1 resulted in
9754 * just the size of the indicated extended attribute being returned.
9755 * this isn't part of the documented behavior, but because of the
9756 * original implemtation's check for "uap->size > 0", this behavior
9757 * was allowed. In K32 that check turned into a signed comparison
9758 * even though uap->size is unsigned... in K64, we blow by that
9759 * check because uap->size is unsigned and doesn't get sign smeared
9760 * in the munger for a 32 bit user app. we also need to add a
9761 * check to limit the maximum size of the buffer being passed in...
9762 * unfortunately, the underlying fileystems seem to just malloc
9763 * the requested size even if the actual extended attribute is tiny.
9764 * because that malloc is for kernel wired memory, we have to put a
9767 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9768 * U64 running on K64 will yield -1 (64 bits wide)
9769 * U32/U64 running on K32 will yield -1 (32 bits wide)
9771 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
9775 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
9776 uap
->size
= XATTR_MAXSIZE
;
9778 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9779 &uio_buf
[0], sizeof(uio_buf
));
9780 uio_addiov(auio
, uap
->value
, uap
->size
);
9783 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
9788 *retval
= uap
->size
- uio_resid(auio
);
9790 *retval
= (user_ssize_t
)attrsize
;
9797 * Retrieve the data of an extended attribute.
9800 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
9803 char attrname
[XATTR_MAXNAMELEN
+1];
9805 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9806 size_t attrsize
= 0;
9809 char uio_buf
[ UIO_SIZEOF(1) ];
9811 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9814 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9817 if ( (error
= vnode_getwithref(vp
)) ) {
9821 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9824 if (xattr_protected(attrname
)) {
9828 if (uap
->value
&& uap
->size
> 0) {
9829 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9830 &uio_buf
[0], sizeof(uio_buf
));
9831 uio_addiov(auio
, uap
->value
, uap
->size
);
9834 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
9836 (void)vnode_put(vp
);
9840 *retval
= uap
->size
- uio_resid(auio
);
9842 *retval
= (user_ssize_t
)attrsize
;
9848 * Set the data of an extended attribute.
9851 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
9854 struct nameidata nd
;
9855 char attrname
[XATTR_MAXNAMELEN
+1];
9856 vfs_context_t ctx
= vfs_context_current();
9858 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9860 u_int32_t nameiflags
;
9862 char uio_buf
[ UIO_SIZEOF(1) ];
9864 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9867 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9868 if (error
== EPERM
) {
9869 /* if the string won't fit in attrname, copyinstr emits EPERM */
9870 return (ENAMETOOLONG
);
9872 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9875 if (xattr_protected(attrname
))
9877 if (uap
->size
!= 0 && uap
->value
== 0) {
9881 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9882 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9883 if ((error
= namei(&nd
))) {
9889 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9890 &uio_buf
[0], sizeof(uio_buf
));
9891 uio_addiov(auio
, uap
->value
, uap
->size
);
9893 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
9896 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9907 * Set the data of an extended attribute.
9910 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
9913 char attrname
[XATTR_MAXNAMELEN
+1];
9915 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9918 char uio_buf
[ UIO_SIZEOF(1) ];
9920 vfs_context_t ctx
= vfs_context_current();
9923 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9926 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9927 if (error
== EPERM
) {
9928 /* if the string won't fit in attrname, copyinstr emits EPERM */
9929 return (ENAMETOOLONG
);
9931 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9934 if (xattr_protected(attrname
))
9936 if (uap
->size
!= 0 && uap
->value
== 0) {
9939 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9942 if ( (error
= vnode_getwithref(vp
)) ) {
9946 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9947 &uio_buf
[0], sizeof(uio_buf
));
9948 uio_addiov(auio
, uap
->value
, uap
->size
);
9950 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
9953 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9965 * Remove an extended attribute.
9966 * XXX Code duplication here.
9969 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
9972 struct nameidata nd
;
9973 char attrname
[XATTR_MAXNAMELEN
+1];
9974 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9975 vfs_context_t ctx
= vfs_context_current();
9977 u_int32_t nameiflags
;
9980 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9983 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
9987 if (xattr_protected(attrname
))
9989 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9990 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9991 if ((error
= namei(&nd
))) {
9997 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
10000 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10011 * Remove an extended attribute.
10012 * XXX Code duplication here.
10015 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
10018 char attrname
[XATTR_MAXNAMELEN
+1];
10022 vfs_context_t ctx
= vfs_context_current();
10025 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10028 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10032 if (xattr_protected(attrname
))
10034 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10037 if ( (error
= vnode_getwithref(vp
)) ) {
10038 file_drop(uap
->fd
);
10042 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
10045 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10051 file_drop(uap
->fd
);
10057 * Retrieve the list of extended attribute names.
10058 * XXX Code duplication here.
10061 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
10064 struct nameidata nd
;
10065 vfs_context_t ctx
= vfs_context_current();
10067 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10068 size_t attrsize
= 0;
10069 u_int32_t nameiflags
;
10071 char uio_buf
[ UIO_SIZEOF(1) ];
10073 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10076 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10077 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10078 if ((error
= namei(&nd
))) {
10083 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10084 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
10085 &uio_buf
[0], sizeof(uio_buf
));
10086 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10089 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
10093 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10095 *retval
= (user_ssize_t
)attrsize
;
10101 * Retrieve the list of extended attribute names.
10102 * XXX Code duplication here.
10105 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
10109 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10110 size_t attrsize
= 0;
10112 char uio_buf
[ UIO_SIZEOF(1) ];
10114 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10117 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10120 if ( (error
= vnode_getwithref(vp
)) ) {
10121 file_drop(uap
->fd
);
10124 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10125 auio
= uio_createwithbuffer(1, 0, spacetype
,
10126 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10127 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10130 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10133 file_drop(uap
->fd
);
10135 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10137 *retval
= (user_ssize_t
)attrsize
;
10142 static int fsgetpath_internal(
10143 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
10144 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
10147 struct mount
*mp
= NULL
;
10152 if (bufsize
> PAGE_SIZE
) {
10160 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
10161 error
= ENOTSUP
; /* unexpected failure */
10167 error
= VFS_ROOT(mp
, &vp
, ctx
);
10169 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
10172 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
10174 * If the fileid isn't found and we're in a union
10175 * mount volume, then see if the fileid is in the
10176 * mounted-on volume.
10178 struct mount
*tmp
= mp
;
10179 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
10181 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
10192 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
10199 /* Obtain the absolute path to this vnode. */
10200 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
10201 bpflags
|= BUILDPATH_CHECK_MOVED
;
10202 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
10209 AUDIT_ARG(text
, buf
);
10211 if (kdebug_enable
) {
10212 long dbg_parms
[NUMPARMS
];
10215 dbg_namelen
= (int)sizeof(dbg_parms
);
10217 if (length
< dbg_namelen
) {
10218 memcpy((char *)dbg_parms
, buf
, length
);
10219 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
10221 dbg_namelen
= length
;
10223 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
10226 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
10229 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
10236 * Obtain the full pathname of a file system object by id.
10238 * This is a private SPI used by the File Manager.
10242 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
10244 vfs_context_t ctx
= vfs_context_current();
10250 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
10253 AUDIT_ARG(value32
, fsid
.val
[0]);
10254 AUDIT_ARG(value64
, uap
->objid
);
10255 /* Restrict output buffer size for now. */
10257 if (uap
->bufsize
> PAGE_SIZE
) {
10260 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
10261 if (realpath
== NULL
) {
10265 error
= fsgetpath_internal(
10266 ctx
, fsid
.val
[0], uap
->objid
,
10267 uap
->bufsize
, realpath
, &length
);
10273 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
10275 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
10278 FREE(realpath
, M_TEMP
);
10284 * Common routine to handle various flavors of statfs data heading out
10287 * Returns: 0 Success
10291 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
10292 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
10293 boolean_t partial_copy
)
10296 int my_size
, copy_size
;
10299 struct user64_statfs sfs
;
10300 my_size
= copy_size
= sizeof(sfs
);
10301 bzero(&sfs
, my_size
);
10302 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10303 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10304 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10305 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
10306 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
10307 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
10308 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
10309 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
10310 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
10311 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
10312 sfs
.f_fsid
= sfsp
->f_fsid
;
10313 sfs
.f_owner
= sfsp
->f_owner
;
10314 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10315 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10317 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10319 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10320 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10322 if (partial_copy
) {
10323 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10325 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10328 struct user32_statfs sfs
;
10330 my_size
= copy_size
= sizeof(sfs
);
10331 bzero(&sfs
, my_size
);
10333 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10334 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10335 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10338 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10339 * have to fudge the numbers here in that case. We inflate the blocksize in order
10340 * to reflect the filesystem size as best we can.
10342 if ((sfsp
->f_blocks
> INT_MAX
)
10343 /* Hack for 4061702 . I think the real fix is for Carbon to
10344 * look for some volume capability and not depend on hidden
10345 * semantics agreed between a FS and carbon.
10346 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10347 * for Carbon to set bNoVolumeSizes volume attribute.
10348 * Without this the webdavfs files cannot be copied onto
10349 * disk as they look huge. This change should not affect
10350 * XSAN as they should not setting these to -1..
10352 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
10353 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
10354 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
10358 * Work out how far we have to shift the block count down to make it fit.
10359 * Note that it's possible to have to shift so far that the resulting
10360 * blocksize would be unreportably large. At that point, we will clip
10361 * any values that don't fit.
10363 * For safety's sake, we also ensure that f_iosize is never reported as
10364 * being smaller than f_bsize.
10366 for (shift
= 0; shift
< 32; shift
++) {
10367 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
10369 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
10372 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10373 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
10374 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
10375 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
10376 #undef __SHIFT_OR_CLIP
10377 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
10378 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
10380 /* filesystem is small enough to be reported honestly */
10381 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
10382 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
10383 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
10384 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
10385 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
10387 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
10388 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
10389 sfs
.f_fsid
= sfsp
->f_fsid
;
10390 sfs
.f_owner
= sfsp
->f_owner
;
10391 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10392 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10394 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10396 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10397 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10399 if (partial_copy
) {
10400 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10402 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10405 if (sizep
!= NULL
) {
10412 * copy stat structure into user_stat structure.
10414 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
10416 bzero(usbp
, sizeof(*usbp
));
10418 usbp
->st_dev
= sbp
->st_dev
;
10419 usbp
->st_ino
= sbp
->st_ino
;
10420 usbp
->st_mode
= sbp
->st_mode
;
10421 usbp
->st_nlink
= sbp
->st_nlink
;
10422 usbp
->st_uid
= sbp
->st_uid
;
10423 usbp
->st_gid
= sbp
->st_gid
;
10424 usbp
->st_rdev
= sbp
->st_rdev
;
10425 #ifndef _POSIX_C_SOURCE
10426 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10427 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10428 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10429 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10430 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10431 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10433 usbp
->st_atime
= sbp
->st_atime
;
10434 usbp
->st_atimensec
= sbp
->st_atimensec
;
10435 usbp
->st_mtime
= sbp
->st_mtime
;
10436 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10437 usbp
->st_ctime
= sbp
->st_ctime
;
10438 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10440 usbp
->st_size
= sbp
->st_size
;
10441 usbp
->st_blocks
= sbp
->st_blocks
;
10442 usbp
->st_blksize
= sbp
->st_blksize
;
10443 usbp
->st_flags
= sbp
->st_flags
;
10444 usbp
->st_gen
= sbp
->st_gen
;
10445 usbp
->st_lspare
= sbp
->st_lspare
;
10446 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10447 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10450 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
10452 bzero(usbp
, sizeof(*usbp
));
10454 usbp
->st_dev
= sbp
->st_dev
;
10455 usbp
->st_ino
= sbp
->st_ino
;
10456 usbp
->st_mode
= sbp
->st_mode
;
10457 usbp
->st_nlink
= sbp
->st_nlink
;
10458 usbp
->st_uid
= sbp
->st_uid
;
10459 usbp
->st_gid
= sbp
->st_gid
;
10460 usbp
->st_rdev
= sbp
->st_rdev
;
10461 #ifndef _POSIX_C_SOURCE
10462 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10463 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10464 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10465 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10466 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10467 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10469 usbp
->st_atime
= sbp
->st_atime
;
10470 usbp
->st_atimensec
= sbp
->st_atimensec
;
10471 usbp
->st_mtime
= sbp
->st_mtime
;
10472 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10473 usbp
->st_ctime
= sbp
->st_ctime
;
10474 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10476 usbp
->st_size
= sbp
->st_size
;
10477 usbp
->st_blocks
= sbp
->st_blocks
;
10478 usbp
->st_blksize
= sbp
->st_blksize
;
10479 usbp
->st_flags
= sbp
->st_flags
;
10480 usbp
->st_gen
= sbp
->st_gen
;
10481 usbp
->st_lspare
= sbp
->st_lspare
;
10482 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10483 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10487 * copy stat64 structure into user_stat64 structure.
10489 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
10491 bzero(usbp
, sizeof(*usbp
));
10493 usbp
->st_dev
= sbp
->st_dev
;
10494 usbp
->st_ino
= sbp
->st_ino
;
10495 usbp
->st_mode
= sbp
->st_mode
;
10496 usbp
->st_nlink
= sbp
->st_nlink
;
10497 usbp
->st_uid
= sbp
->st_uid
;
10498 usbp
->st_gid
= sbp
->st_gid
;
10499 usbp
->st_rdev
= sbp
->st_rdev
;
10500 #ifndef _POSIX_C_SOURCE
10501 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10502 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10503 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10504 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10505 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10506 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10507 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
10508 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
10510 usbp
->st_atime
= sbp
->st_atime
;
10511 usbp
->st_atimensec
= sbp
->st_atimensec
;
10512 usbp
->st_mtime
= sbp
->st_mtime
;
10513 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10514 usbp
->st_ctime
= sbp
->st_ctime
;
10515 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10516 usbp
->st_birthtime
= sbp
->st_birthtime
;
10517 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
10519 usbp
->st_size
= sbp
->st_size
;
10520 usbp
->st_blocks
= sbp
->st_blocks
;
10521 usbp
->st_blksize
= sbp
->st_blksize
;
10522 usbp
->st_flags
= sbp
->st_flags
;
10523 usbp
->st_gen
= sbp
->st_gen
;
10524 usbp
->st_lspare
= sbp
->st_lspare
;
10525 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10526 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10529 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
10531 bzero(usbp
, sizeof(*usbp
));
10533 usbp
->st_dev
= sbp
->st_dev
;
10534 usbp
->st_ino
= sbp
->st_ino
;
10535 usbp
->st_mode
= sbp
->st_mode
;
10536 usbp
->st_nlink
= sbp
->st_nlink
;
10537 usbp
->st_uid
= sbp
->st_uid
;
10538 usbp
->st_gid
= sbp
->st_gid
;
10539 usbp
->st_rdev
= sbp
->st_rdev
;
10540 #ifndef _POSIX_C_SOURCE
10541 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10542 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10543 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10544 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10545 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10546 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10547 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
10548 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
10550 usbp
->st_atime
= sbp
->st_atime
;
10551 usbp
->st_atimensec
= sbp
->st_atimensec
;
10552 usbp
->st_mtime
= sbp
->st_mtime
;
10553 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10554 usbp
->st_ctime
= sbp
->st_ctime
;
10555 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10556 usbp
->st_birthtime
= sbp
->st_birthtime
;
10557 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
10559 usbp
->st_size
= sbp
->st_size
;
10560 usbp
->st_blocks
= sbp
->st_blocks
;
10561 usbp
->st_blksize
= sbp
->st_blksize
;
10562 usbp
->st_flags
= sbp
->st_flags
;
10563 usbp
->st_gen
= sbp
->st_gen
;
10564 usbp
->st_lspare
= sbp
->st_lspare
;
10565 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10566 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10570 * Purge buffer cache for simulating cold starts
10572 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
10574 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
10576 return VNODE_RETURNED
;
10579 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
10581 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
10583 return VFS_RETURNED
;
10587 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
10589 if (!kauth_cred_issuser(kauth_cred_get()))
10592 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);