2 * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <machine/cons.h>
103 #include <machine/limits.h>
104 #include <miscfs/specfs/specdev.h>
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
109 #include <mach/mach_types.h>
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/task.h>
114 #include <vm/vm_pageout.h>
116 #include <libkern/OSAtomic.h>
117 #include <pexpert/pexpert.h>
120 #include <security/mac.h>
121 #include <security/mac_framework.h>
125 #define GET_PATH(x) \
126 (x) = get_pathbuff();
127 #define RELEASE_PATH(x) \
130 #define GET_PATH(x) \
131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
132 #define RELEASE_PATH(x) \
133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
134 #endif /* CONFIG_FSE */
136 /* struct for checkdirs iteration */
141 /* callback for checkdirs iteration */
142 static int checkdirs_callback(proc_t p
, void * arg
);
144 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
145 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
146 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
147 static int getfsstat_callback(mount_t mp
, void * arg
);
148 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
149 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
150 static int sync_callback(mount_t
, void *);
151 static void sync_thread(void *, __unused wait_result_t
);
152 static int sync_async(int);
153 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
154 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
155 boolean_t partial_copy
);
156 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
158 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
159 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
160 struct componentname
*cnp
, user_addr_t fsmountargs
,
161 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
163 void vfs_notify_mount(vnode_t pdvp
);
165 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
167 struct fd_vn_data
* fg_vn_data_alloc(void);
170 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
171 * Concurrent lookups (or lookups by ids) on hard links can cause the
172 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
173 * does) to return ENOENT as the path cannot be returned from the name cache
174 * alone. We have no option but to retry and hope to get one namei->reverse path
175 * generation done without an intervening lookup, lookup by id on the hard link
176 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
177 * which currently are the MAC hooks for rename, unlink and rmdir.
179 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
181 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
183 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
185 #ifdef CONFIG_IMGSRC_ACCESS
186 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
187 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
188 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
189 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
190 static void mount_end_update(mount_t mp
);
191 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
192 #endif /* CONFIG_IMGSRC_ACCESS */
194 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
197 int sync_internal(void);
200 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
202 extern lck_grp_t
*fd_vn_lck_grp
;
203 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
204 extern lck_attr_t
*fd_vn_lck_attr
;
207 * incremented each time a mount or unmount operation occurs
208 * used to invalidate the cached value of the rootvp in the
209 * mount structure utilized by cache_lookup_path
211 uint32_t mount_generation
= 0;
213 /* counts number of mount and unmount operations */
214 unsigned int vfs_nummntops
=0;
216 extern const struct fileops vnops
;
217 #if CONFIG_APPLEDOUBLE
218 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
219 #endif /* CONFIG_APPLEDOUBLE */
221 typedef uint32_t vfs_rename_flags_t
;
222 #if CONFIG_SECLUDED_RENAME
224 VFS_SECLUDE_RENAME
= 0x00000001
229 * Virtual File System System Calls
232 #if NFSCLIENT || DEVFS
234 * Private in-kernel mounting spi (NFS only, not exported)
238 vfs_iskernelmount(mount_t mp
)
240 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
245 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
246 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
252 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
253 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
256 * Get the vnode to be covered if it's not supplied
266 char *pnbuf
= CAST_DOWN(char *, path
);
268 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
269 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
273 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
274 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
284 #endif /* NFSCLIENT || DEVFS */
287 * Mount a file system.
291 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
293 struct __mac_mount_args muap
;
295 muap
.type
= uap
->type
;
296 muap
.path
= uap
->path
;
297 muap
.flags
= uap
->flags
;
298 muap
.data
= uap
->data
;
299 muap
.mac_p
= USER_ADDR_NULL
;
300 return (__mac_mount(p
, &muap
, retval
));
304 vfs_notify_mount(vnode_t pdvp
)
306 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
307 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
312 * Mount a file system taking into account MAC label behavior.
313 * See mount(2) man page for more information
315 * Parameters: p Process requesting the mount
316 * uap User argument descriptor (see below)
319 * Indirect: uap->type Filesystem type
320 * uap->path Path to mount
321 * uap->data Mount arguments
322 * uap->mac_p MAC info
323 * uap->flags Mount flags
329 boolean_t root_fs_upgrade_try
= FALSE
;
332 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
336 int need_nameidone
= 0;
337 vfs_context_t ctx
= vfs_context_current();
338 char fstypename
[MFSNAMELEN
];
341 char *labelstr
= NULL
;
342 int flags
= uap
->flags
;
344 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
345 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
350 * Get the fs type name from user space
352 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
357 * Get the vnode to be covered
359 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
360 UIO_USERSPACE
, uap
->path
, ctx
);
369 #ifdef CONFIG_IMGSRC_ACCESS
370 /* Mounting image source cannot be batched with other operations */
371 if (flags
== MNT_IMGSRC_BY_INDEX
) {
372 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
373 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
376 #endif /* CONFIG_IMGSRC_ACCESS */
380 * Get the label string (if any) from user space
382 if (uap
->mac_p
!= USER_ADDR_NULL
) {
387 struct user64_mac mac64
;
388 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
389 mac
.m_buflen
= mac64
.m_buflen
;
390 mac
.m_string
= mac64
.m_string
;
392 struct user32_mac mac32
;
393 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
394 mac
.m_buflen
= mac32
.m_buflen
;
395 mac
.m_string
= mac32
.m_string
;
399 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
400 (mac
.m_buflen
< 2)) {
404 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
405 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
409 AUDIT_ARG(mac_string
, labelstr
);
411 #endif /* CONFIG_MACF */
413 AUDIT_ARG(fflags
, flags
);
415 if ((vp
->v_flag
& VROOT
) &&
416 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
417 if (!(flags
& MNT_UNION
)) {
422 * For a union mount on '/', treat it as fresh
423 * mount instead of update.
424 * Otherwise, union mouting on '/' used to panic the
425 * system before, since mnt_vnodecovered was found to
426 * be NULL for '/' which is required for unionlookup
427 * after it gets ENOENT on union mount.
429 flags
= (flags
& ~(MNT_UPDATE
));
433 if ((flags
& MNT_RDONLY
) == 0) {
434 /* Release kernels are not allowed to mount "/" as rw */
440 * See 7392553 for more details on why this check exists.
441 * Suffice to say: If this check is ON and something tries
442 * to mount the rootFS RW, we'll turn off the codesign
443 * bitmap optimization.
445 #if CHECK_CS_VALIDATION_BITMAP
446 if ((flags
& MNT_RDONLY
) == 0 ) {
447 root_fs_upgrade_try
= TRUE
;
452 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
453 labelstr
, FALSE
, ctx
);
459 FREE(labelstr
, M_MACTEMP
);
460 #endif /* CONFIG_MACF */
468 if (need_nameidone
) {
476 * common mount implementation (final stage of mounting)
479 * fstypename file system type (ie it's vfs name)
480 * pvp parent of covered vnode
482 * cnp component name (ie path) of covered vnode
483 * flags generic mount flags
484 * fsmountargs file system specific data
485 * labelstr optional MAC label
486 * kernelmount TRUE for mounts initiated from inside the kernel
487 * ctx caller's context
490 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
491 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
492 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
495 #pragma unused(labelstr)
497 struct vnode
*devvp
= NULLVP
;
498 struct vnode
*device_vnode
= NULLVP
;
503 struct vfstable
*vfsp
= (struct vfstable
*)0;
504 struct proc
*p
= vfs_context_proc(ctx
);
506 user_addr_t devpath
= USER_ADDR_NULL
;
509 boolean_t vfsp_ref
= FALSE
;
510 boolean_t is_rwlock_locked
= FALSE
;
511 boolean_t did_rele
= FALSE
;
512 boolean_t have_usecount
= FALSE
;
515 * Process an update for an existing mount
517 if (flags
& MNT_UPDATE
) {
518 if ((vp
->v_flag
& VROOT
) == 0) {
524 /* unmount in progress return error */
526 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
532 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
533 is_rwlock_locked
= TRUE
;
535 * We only allow the filesystem to be reloaded if it
536 * is currently mounted read-only.
538 if ((flags
& MNT_RELOAD
) &&
539 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
545 * If content protection is enabled, update mounts are not
546 * allowed to turn it off.
548 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
549 ((flags
& MNT_CPROTECT
) == 0)) {
554 #ifdef CONFIG_IMGSRC_ACCESS
555 /* Can't downgrade the backer of the root FS */
556 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
557 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
561 #endif /* CONFIG_IMGSRC_ACCESS */
564 * Only root, or the user that did the original mount is
565 * permitted to update it.
567 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
568 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
572 error
= mac_mount_check_remount(ctx
, mp
);
578 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
579 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
581 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
582 flags
|= MNT_NOSUID
| MNT_NODEV
;
583 if (mp
->mnt_flag
& MNT_NOEXEC
)
590 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
592 vfsp
= mp
->mnt_vtable
;
596 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
597 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
599 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
600 flags
|= MNT_NOSUID
| MNT_NODEV
;
601 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
605 /* XXXAUDIT: Should we capture the type on the error path as well? */
606 AUDIT_ARG(text
, fstypename
);
608 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
609 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
610 vfsp
->vfc_refcount
++;
621 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
623 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
624 error
= EINVAL
; /* unsupported request */
628 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
634 * Allocate and initialize the filesystem (mount_t)
636 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
638 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
641 /* Initialize the default IO constraints */
642 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
643 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
644 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
645 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
646 mp
->mnt_devblocksize
= DEV_BSIZE
;
647 mp
->mnt_alignmentmask
= PAGE_MASK
;
648 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
651 mp
->mnt_realrootvp
= NULLVP
;
652 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
654 TAILQ_INIT(&mp
->mnt_vnodelist
);
655 TAILQ_INIT(&mp
->mnt_workerqueue
);
656 TAILQ_INIT(&mp
->mnt_newvnodes
);
658 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
659 is_rwlock_locked
= TRUE
;
660 mp
->mnt_op
= vfsp
->vfc_vfsops
;
661 mp
->mnt_vtable
= vfsp
;
662 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
663 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
664 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
665 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
666 mp
->mnt_vnodecovered
= vp
;
667 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
668 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
669 mp
->mnt_devbsdunit
= 0;
671 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
672 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
674 #if NFSCLIENT || DEVFS
676 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
677 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
678 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
679 #endif /* NFSCLIENT || DEVFS */
683 * Set the mount level flags.
685 if (flags
& MNT_RDONLY
)
686 mp
->mnt_flag
|= MNT_RDONLY
;
687 else if (mp
->mnt_flag
& MNT_RDONLY
) {
688 // disallow read/write upgrades of file systems that
689 // had the TYPENAME_OVERRIDE feature set.
690 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
694 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
696 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
697 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
698 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
699 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
700 MNT_QUARANTINE
| MNT_CPROTECT
);
701 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
702 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
703 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
704 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
705 MNT_QUARANTINE
| MNT_CPROTECT
);
708 if (flags
& MNT_MULTILABEL
) {
709 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
713 mp
->mnt_flag
|= MNT_MULTILABEL
;
717 * Process device path for local file systems if requested
719 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
720 if (vfs_context_is64bit(ctx
)) {
721 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
723 fsmountargs
+= sizeof(devpath
);
726 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
728 /* munge into LP64 addr */
729 devpath
= CAST_USER_ADDR_T(tmp
);
730 fsmountargs
+= sizeof(tmp
);
733 /* Lookup device and authorize access to it */
737 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
738 if ( (error
= namei(&nd
)) )
741 strncpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
746 if (devvp
->v_type
!= VBLK
) {
750 if (major(devvp
->v_rdev
) >= nblkdev
) {
755 * If mount by non-root, then verify that user has necessary
756 * permissions on the device.
758 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
759 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
761 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
762 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
763 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
767 /* On first mount, preflight and open device */
768 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
769 if ( (error
= vnode_ref(devvp
)) )
772 * Disallow multiple mounts of the same device.
773 * Disallow mounting of a device that is currently in use
774 * (except for root, which might share swap device for miniroot).
775 * Flush out any old buffers remaining from a previous use.
777 if ( (error
= vfs_mountedon(devvp
)) )
780 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
784 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
788 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
791 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
793 error
= mac_vnode_check_open(ctx
,
795 ronly
? FREAD
: FREAD
|FWRITE
);
799 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
802 mp
->mnt_devvp
= devvp
;
803 device_vnode
= devvp
;
805 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
806 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
807 (device_vnode
= mp
->mnt_devvp
)) {
811 * If upgrade to read-write by non-root, then verify
812 * that user has necessary permissions on the device.
814 vnode_getalways(device_vnode
);
816 if (suser(vfs_context_ucred(ctx
), NULL
) &&
817 (error
= vnode_authorize(device_vnode
, NULL
,
818 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
820 vnode_put(device_vnode
);
824 /* Tell the device that we're upgrading */
825 dev
= (dev_t
)device_vnode
->v_rdev
;
828 if ((u_int
)maj
>= (u_int
)nblkdev
)
829 panic("Volume mounted on a device with invalid major number.");
831 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
832 vnode_put(device_vnode
);
833 device_vnode
= NULLVP
;
840 if ((flags
& MNT_UPDATE
) == 0) {
841 mac_mount_label_init(mp
);
842 mac_mount_label_associate(ctx
, mp
);
845 if ((flags
& MNT_UPDATE
) != 0) {
846 error
= mac_mount_check_label_update(ctx
, mp
);
853 * Mount the filesystem.
855 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
857 if (flags
& MNT_UPDATE
) {
858 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
859 mp
->mnt_flag
&= ~MNT_RDONLY
;
861 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
862 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
864 mp
->mnt_flag
= flag
; /* restore flag value */
865 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
866 lck_rw_done(&mp
->mnt_rwlock
);
867 is_rwlock_locked
= FALSE
;
869 enablequotas(mp
, ctx
);
874 * Put the new filesystem on the mount list after root.
877 struct vfs_attr vfsattr
;
879 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
880 error
= VFS_ROOT(mp
, &rvp
, ctx
);
882 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
885 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
887 * drop reference provided by VFS_ROOT
897 CLR(vp
->v_flag
, VMOUNT
);
898 vp
->v_mountedhere
= mp
;
902 * taking the name_cache_lock exclusively will
903 * insure that everyone is out of the fast path who
904 * might be trying to use a now stale copy of
905 * vp->v_mountedhere->mnt_realrootvp
906 * bumping mount_generation causes the cached values
913 error
= vnode_ref(vp
);
918 have_usecount
= TRUE
;
920 error
= checkdirs(vp
, ctx
);
922 /* Unmount the filesystem as cdir/rdirs cannot be updated */
926 * there is no cleanup code here so I have made it void
927 * we need to revisit this
929 (void)VFS_START(mp
, 0, ctx
);
931 if (mount_list_add(mp
) != 0) {
933 * The system is shutting down trying to umount
934 * everything, so fail with a plausible errno.
939 lck_rw_done(&mp
->mnt_rwlock
);
940 is_rwlock_locked
= FALSE
;
942 /* Check if this mounted file system supports EAs or named streams. */
943 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
944 VFSATTR_INIT(&vfsattr
);
945 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
946 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
947 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
948 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
949 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
950 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
951 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
954 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
955 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
956 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
959 /* Check if this file system supports path from id lookups. */
960 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
961 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
962 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
963 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
964 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
965 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
968 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
969 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
971 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
972 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
974 /* increment the operations count */
975 OSAddAtomic(1, &vfs_nummntops
);
976 enablequotas(mp
, ctx
);
979 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
982 * cache the IO attributes for the underlying physical media...
983 * an error return indicates the underlying driver doesn't
984 * support all the queries necessary... however, reasonable
985 * defaults will have been set, so no reason to bail or care
987 vfs_init_io_attributes(device_vnode
, mp
);
990 /* Now that mount is setup, notify the listeners */
991 vfs_notify_mount(pvp
);
993 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
994 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
995 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
996 mp
->mnt_vtable
->vfc_name
, error
);
1000 CLR(vp
->v_flag
, VMOUNT
);
1003 mp
->mnt_vtable
->vfc_refcount
--;
1004 mount_list_unlock();
1006 if (device_vnode
) {
1007 vnode_rele(device_vnode
);
1008 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
1010 lck_rw_done(&mp
->mnt_rwlock
);
1011 is_rwlock_locked
= FALSE
;
1014 * if we get here, we have a mount structure that needs to be freed,
1015 * but since the coveredvp hasn't yet been updated to point at it,
1016 * no need to worry about other threads holding a crossref on this mp
1017 * so it's ok to just free it
1019 mount_lock_destroy(mp
);
1021 mac_mount_label_destroy(mp
);
1023 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1027 * drop I/O count on the device vp if there was one
1029 if (devpath
&& devvp
)
1034 /* Error condition exits */
1036 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1039 * If the mount has been placed on the covered vp,
1040 * it may have been discovered by now, so we have
1041 * to treat this just like an unmount
1043 mount_lock_spin(mp
);
1044 mp
->mnt_lflag
|= MNT_LDEAD
;
1047 if (device_vnode
!= NULLVP
) {
1048 vnode_rele(device_vnode
);
1049 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1054 vnode_lock_spin(vp
);
1057 vp
->v_mountedhere
= (mount_t
) 0;
1061 if (have_usecount
) {
1065 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1068 if (devpath
&& devvp
)
1071 /* Release mnt_rwlock only when it was taken */
1072 if (is_rwlock_locked
== TRUE
) {
1073 lck_rw_done(&mp
->mnt_rwlock
);
1077 if (mp
->mnt_crossref
)
1078 mount_dropcrossref(mp
, vp
, 0);
1080 mount_lock_destroy(mp
);
1082 mac_mount_label_destroy(mp
);
1084 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1089 vfsp
->vfc_refcount
--;
1090 mount_list_unlock();
1097 * Flush in-core data, check for competing mount attempts,
1101 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1104 #pragma unused(cnp,fsname)
1106 struct vnode_attr va
;
1111 * If the user is not root, ensure that they own the directory
1112 * onto which we are attempting to mount.
1115 VATTR_WANTED(&va
, va_uid
);
1116 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1117 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1118 (!vfs_context_issuser(ctx
)))) {
1124 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1127 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1130 if (vp
->v_type
!= VDIR
) {
1135 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1141 error
= mac_mount_check_mount(ctx
, vp
,
1147 vnode_lock_spin(vp
);
1148 SET(vp
->v_flag
, VMOUNT
);
1155 #if CONFIG_IMGSRC_ACCESS
1158 #define IMGSRC_DEBUG(args...) printf(args)
1160 #define IMGSRC_DEBUG(args...) do { } while(0)
1164 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1166 struct nameidata nd
;
1167 vnode_t vp
, realdevvp
;
1171 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1172 if ( (error
= namei(&nd
)) ) {
1173 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1179 if (!vnode_isblk(vp
)) {
1180 IMGSRC_DEBUG("Not block device.\n");
1185 realdevvp
= mp
->mnt_devvp
;
1186 if (realdevvp
== NULLVP
) {
1187 IMGSRC_DEBUG("No device backs the mount.\n");
1192 error
= vnode_getwithref(realdevvp
);
1194 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1198 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1199 IMGSRC_DEBUG("Wrong dev_t.\n");
1204 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1207 * If mount by non-root, then verify that user has necessary
1208 * permissions on the device.
1210 if (!vfs_context_issuser(ctx
)) {
1211 accessmode
= KAUTH_VNODE_READ_DATA
;
1212 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1213 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1214 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1215 IMGSRC_DEBUG("Access denied.\n");
1223 vnode_put(realdevvp
);
1234 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1235 * and call checkdirs()
1238 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1242 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1244 vnode_lock_spin(vp
);
1245 CLR(vp
->v_flag
, VMOUNT
);
1246 vp
->v_mountedhere
= mp
;
1250 * taking the name_cache_lock exclusively will
1251 * insure that everyone is out of the fast path who
1252 * might be trying to use a now stale copy of
1253 * vp->v_mountedhere->mnt_realrootvp
1254 * bumping mount_generation causes the cached values
1259 name_cache_unlock();
1261 error
= vnode_ref(vp
);
1266 error
= checkdirs(vp
, ctx
);
1268 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1275 mp
->mnt_vnodecovered
= NULLVP
;
1281 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1284 vnode_lock_spin(vp
);
1285 vp
->v_mountedhere
= (mount_t
)NULL
;
1288 mp
->mnt_vnodecovered
= NULLVP
;
1292 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1296 /* unmount in progress return error */
1297 mount_lock_spin(mp
);
1298 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1303 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1306 * We only allow the filesystem to be reloaded if it
1307 * is currently mounted read-only.
1309 if ((flags
& MNT_RELOAD
) &&
1310 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1316 * Only root, or the user that did the original mount is
1317 * permitted to update it.
1319 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1320 (!vfs_context_issuser(ctx
))) {
1325 error
= mac_mount_check_remount(ctx
, mp
);
1333 lck_rw_done(&mp
->mnt_rwlock
);
1340 mount_end_update(mount_t mp
)
1342 lck_rw_done(&mp
->mnt_rwlock
);
1346 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1350 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1354 vp
= imgsrc_rootvnodes
[height
];
1355 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1364 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1365 const char *fsname
, vfs_context_t ctx
,
1366 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1370 boolean_t placed
= FALSE
;
1371 vnode_t devvp
= NULLVP
;
1372 struct vfstable
*vfsp
;
1373 user_addr_t devpath
;
1374 char *old_mntonname
;
1379 /* If we didn't imageboot, nothing to move */
1380 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1384 /* Only root can do this */
1385 if (!vfs_context_issuser(ctx
)) {
1389 IMGSRC_DEBUG("looking for root vnode.\n");
1392 * Get root vnode of filesystem we're moving.
1396 struct user64_mnt_imgsrc_args mia64
;
1397 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1399 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1403 height
= mia64
.mi_height
;
1404 flags
= mia64
.mi_flags
;
1405 devpath
= mia64
.mi_devpath
;
1407 struct user32_mnt_imgsrc_args mia32
;
1408 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1410 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1414 height
= mia32
.mi_height
;
1415 flags
= mia32
.mi_flags
;
1416 devpath
= mia32
.mi_devpath
;
1420 * For binary compatibility--assumes one level of nesting.
1423 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1427 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1430 /* munge into LP64 addr */
1431 devpath
= CAST_USER_ADDR_T(tmp
);
1439 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1443 error
= get_imgsrc_rootvnode(height
, &rvp
);
1445 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1449 IMGSRC_DEBUG("got root vnode.\n");
1451 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1453 /* Can only move once */
1454 mp
= vnode_mount(rvp
);
1455 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1456 IMGSRC_DEBUG("Already moved.\n");
1461 IMGSRC_DEBUG("Starting updated.\n");
1463 /* Get exclusive rwlock on mount, authorize update on mp */
1464 error
= mount_begin_update(mp
, ctx
, 0);
1466 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1471 * It can only be moved once. Flag is set under the rwlock,
1472 * so we're now safe to proceed.
1474 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1475 IMGSRC_DEBUG("Already moved [2]\n");
1480 IMGSRC_DEBUG("Preparing coveredvp.\n");
1482 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1483 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1485 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1489 IMGSRC_DEBUG("Covered vp OK.\n");
1491 /* Sanity check the name caller has provided */
1492 vfsp
= mp
->mnt_vtable
;
1493 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1494 IMGSRC_DEBUG("Wrong fs name.\n");
1499 /* Check the device vnode and update mount-from name, for local filesystems */
1500 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1501 IMGSRC_DEBUG("Local, doing device validation.\n");
1503 if (devpath
!= USER_ADDR_NULL
) {
1504 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1506 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1515 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1516 * and increment the name cache's mount generation
1519 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1520 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1527 strncpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1528 strncpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1530 /* Forbid future moves */
1532 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1535 /* Finally, add to mount list, completely ready to go */
1536 if (mount_list_add(mp
) != 0) {
1538 * The system is shutting down trying to umount
1539 * everything, so fail with a plausible errno.
1545 mount_end_update(mp
);
1547 FREE(old_mntonname
, M_TEMP
);
1549 vfs_notify_mount(pvp
);
1553 strncpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1556 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1561 * Placing the mp on the vnode clears VMOUNT,
1562 * so cleanup is different after that point
1565 /* Rele the vp, clear VMOUNT and v_mountedhere */
1566 undo_place_on_covered_vp(mp
, vp
);
1568 vnode_lock_spin(vp
);
1569 CLR(vp
->v_flag
, VMOUNT
);
1573 mount_end_update(mp
);
1577 FREE(old_mntonname
, M_TEMP
);
1581 #endif /* CONFIG_IMGSRC_ACCESS */
1584 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1586 struct nameidata qnd
;
1588 char qfpath
[MAXPATHLEN
];
1589 const char *qfname
= QUOTAFILENAME
;
1590 const char *qfopsname
= QUOTAOPSNAME
;
1591 const char *qfextension
[] = INITQFNAMES
;
1593 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1594 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1598 * Enable filesystem disk quotas if necessary.
1599 * We ignore errors as this should not interfere with final mount
1601 for (type
=0; type
< MAXQUOTAS
; type
++) {
1602 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1603 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1604 CAST_USER_ADDR_T(qfpath
), ctx
);
1605 if (namei(&qnd
) != 0)
1606 continue; /* option file to trigger quotas is not present */
1607 vnode_put(qnd
.ni_vp
);
1609 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1611 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1618 checkdirs_callback(proc_t p
, void * arg
)
1620 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1621 vnode_t olddp
= cdrp
->olddp
;
1622 vnode_t newdp
= cdrp
->newdp
;
1623 struct filedesc
*fdp
;
1627 int cdir_changed
= 0;
1628 int rdir_changed
= 0;
1631 * XXX Also needs to iterate each thread in the process to see if it
1632 * XXX is using a per-thread current working directory, and, if so,
1633 * XXX update that as well.
1638 if (fdp
== (struct filedesc
*)0) {
1640 return(PROC_RETURNED
);
1642 fdp_cvp
= fdp
->fd_cdir
;
1643 fdp_rvp
= fdp
->fd_rdir
;
1646 if (fdp_cvp
== olddp
) {
1653 if (fdp_rvp
== olddp
) {
1660 if (cdir_changed
|| rdir_changed
) {
1662 fdp
->fd_cdir
= fdp_cvp
;
1663 fdp
->fd_rdir
= fdp_rvp
;
1666 return(PROC_RETURNED
);
1672 * Scan all active processes to see if any of them have a current
1673 * or root directory onto which the new filesystem has just been
1674 * mounted. If so, replace them with the new mount point.
1677 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1682 struct cdirargs cdr
;
1684 if (olddp
->v_usecount
== 1)
1686 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1690 panic("mount: lost mount: error %d", err
);
1697 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1698 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1700 if (rootvnode
== olddp
) {
1712 * Unmount a file system.
1714 * Note: unmount takes a path to the vnode mounted on as argument,
1715 * not special file (as before).
1719 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1724 struct nameidata nd
;
1725 vfs_context_t ctx
= vfs_context_current();
1727 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1728 UIO_USERSPACE
, uap
->path
, ctx
);
1737 error
= mac_mount_check_umount(ctx
, mp
);
1744 * Must be the root of the filesystem
1746 if ((vp
->v_flag
& VROOT
) == 0) {
1752 /* safedounmount consumes the mount ref */
1753 return (safedounmount(mp
, uap
->flags
, ctx
));
1757 vfs_unmountbyfsid(fsid_t
* fsid
, int flags
, vfs_context_t ctx
)
1761 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1762 if (mp
== (mount_t
)0) {
1767 /* safedounmount consumes the mount ref */
1768 return(safedounmount(mp
, flags
, ctx
));
1773 * The mount struct comes with a mount ref which will be consumed.
1774 * Do the actual file system unmount, prevent some common foot shooting.
1777 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1780 proc_t p
= vfs_context_proc(ctx
);
1783 * If the file system is not responding and MNT_NOBLOCK
1784 * is set and not a forced unmount then return EBUSY.
1786 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1787 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1793 * Skip authorization if the mount is tagged as permissive and
1794 * this is not a forced-unmount attempt.
1796 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1798 * Only root, or the user that did the original mount is
1799 * permitted to unmount this filesystem.
1801 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1802 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1806 * Don't allow unmounting the root file system.
1808 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1809 error
= EBUSY
; /* the root is always busy */
1813 #ifdef CONFIG_IMGSRC_ACCESS
1814 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1818 #endif /* CONFIG_IMGSRC_ACCESS */
1820 return (dounmount(mp
, flags
, 1, ctx
));
1828 * Do the actual file system unmount.
1831 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1833 vnode_t coveredvp
= (vnode_t
)0;
1836 int forcedunmount
= 0;
1838 struct vnode
*devvp
= NULLVP
;
1840 proc_t p
= vfs_context_proc(ctx
);
1842 int pflags_save
= 0;
1843 #endif /* CONFIG_TRIGGERS */
1848 * If already an unmount in progress just return EBUSY.
1849 * Even a forced unmount cannot override.
1851 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1858 if (flags
& MNT_FORCE
) {
1860 mp
->mnt_lflag
|= MNT_LFORCE
;
1864 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1865 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1868 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1869 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1870 mp
->mnt_flag
&=~ MNT_ASYNC
;
1872 * anyone currently in the fast path that
1873 * trips over the cached rootvp will be
1874 * dumped out and forced into the slow path
1875 * to regenerate a new cached value
1877 mp
->mnt_realrootvp
= NULLVP
;
1880 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
1882 * Force unmount any mounts in this filesystem.
1883 * If any unmounts fail - just leave them dangling.
1886 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
1890 * taking the name_cache_lock exclusively will
1891 * insure that everyone is out of the fast path who
1892 * might be trying to use a now stale copy of
1893 * vp->v_mountedhere->mnt_realrootvp
1894 * bumping mount_generation causes the cached values
1899 name_cache_unlock();
1902 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1906 fsevent_unmount(mp
); /* has to come first! */
1909 if (forcedunmount
== 0) {
1910 ubc_umount(mp
); /* release cached vnodes */
1911 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1912 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
1915 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1916 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1917 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1924 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
1928 lflags
|= FORCECLOSE
;
1929 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
1930 if ((forcedunmount
== 0) && error
) {
1932 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1933 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1934 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1938 /* make sure there are no one in the mount iterations or lookup */
1939 mount_iterdrain(mp
);
1941 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
1943 mount_iterreset(mp
);
1945 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1946 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1947 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1951 /* increment the operations count */
1953 OSAddAtomic(1, &vfs_nummntops
);
1955 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1956 /* hold an io reference and drop the usecount before close */
1957 devvp
= mp
->mnt_devvp
;
1958 vnode_getalways(devvp
);
1960 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1962 vnode_clearmountedon(devvp
);
1965 lck_rw_done(&mp
->mnt_rwlock
);
1966 mount_list_remove(mp
);
1967 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1969 /* mark the mount point hook in the vp but not drop the ref yet */
1970 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
1972 * The covered vnode needs special handling. Trying to get an
1973 * iocount must not block here as this may lead to deadlocks
1974 * if the Filesystem to which the covered vnode belongs is
1975 * undergoing forced unmounts. Since we hold a usecount, the
1976 * vnode cannot be reused (it can, however, still be terminated)
1978 vnode_getalways(coveredvp
);
1979 vnode_lock_spin(coveredvp
);
1982 coveredvp
->v_mountedhere
= (struct mount
*)0;
1983 CLR(coveredvp
->v_flag
, VMOUNT
);
1985 vnode_unlock(coveredvp
);
1986 vnode_put(coveredvp
);
1990 mp
->mnt_vtable
->vfc_refcount
--;
1991 mount_list_unlock();
1993 cache_purgevfs(mp
); /* remove cache entries for this file sys */
1994 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
1996 mp
->mnt_lflag
|= MNT_LDEAD
;
1998 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2000 * do the wakeup here
2001 * in case we block in mount_refdrain
2002 * which will drop the mount lock
2003 * and allow anyone blocked in vfs_busy
2004 * to wakeup and see the LDEAD state
2006 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2007 wakeup((caddr_t
)mp
);
2011 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2012 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2017 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2018 // Restore P_NOREMOTEHANG bit to its previous value
2019 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2020 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2024 * Callback and context are set together under the mount lock, and
2025 * never cleared, so we're safe to examine them here, drop the lock,
2028 if (mp
->mnt_triggercallback
!= NULL
) {
2031 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2032 } else if (did_vflush
) {
2033 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2040 #endif /* CONFIG_TRIGGERS */
2042 lck_rw_done(&mp
->mnt_rwlock
);
2045 wakeup((caddr_t
)mp
);
2048 if ((coveredvp
!= NULLVP
)) {
2049 vnode_t pvp
= NULLVP
;
2052 * The covered vnode needs special handling. Trying to
2053 * get an iocount must not block here as this may lead
2054 * to deadlocks if the Filesystem to which the covered
2055 * vnode belongs is undergoing forced unmounts. Since we
2056 * hold a usecount, the vnode cannot be reused
2057 * (it can, however, still be terminated).
2059 vnode_getalways(coveredvp
);
2061 mount_dropcrossref(mp
, coveredvp
, 0);
2063 * We'll _try_ to detect if this really needs to be
2064 * done. The coveredvp can only be in termination (or
2065 * terminated) if the coveredvp's mount point is in a
2066 * forced unmount (or has been) since we still hold the
2069 if (!vnode_isrecycled(coveredvp
)) {
2070 pvp
= vnode_getparent(coveredvp
);
2072 if (coveredvp
->v_resolve
) {
2073 vnode_trigger_rearm(coveredvp
, ctx
);
2078 vnode_rele(coveredvp
);
2079 vnode_put(coveredvp
);
2083 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2086 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2087 mount_lock_destroy(mp
);
2089 mac_mount_label_destroy(mp
);
2091 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2093 panic("dounmount: no coveredvp");
2099 * Unmount any mounts in this filesystem.
2102 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2105 fsid_t
*fsids
, fsid
;
2107 int count
= 0, i
, m
= 0;
2112 // Get an array to hold the submounts fsids.
2113 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2115 fsids_sz
= count
* sizeof(fsid_t
);
2116 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2117 if (fsids
== NULL
) {
2118 mount_list_unlock();
2121 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2124 * Fill the array with submount fsids.
2125 * Since mounts are always added to the tail of the mount list, the
2126 * list is always in mount order.
2127 * For each mount check if the mounted-on vnode belongs to a
2128 * mount that's already added to our array of mounts to be unmounted.
2130 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2131 vp
= smp
->mnt_vnodecovered
;
2134 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2135 for (i
= 0; i
<= m
; i
++) {
2136 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2137 fsids
[i
].val
[1] == fsid
.val
[1]) {
2138 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2143 mount_list_unlock();
2145 // Unmount the submounts in reverse order. Ignore errors.
2146 for (i
= m
; i
> 0; i
--) {
2147 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2150 mount_iterdrop(smp
);
2151 (void) dounmount(smp
, flags
, 1, ctx
);
2156 FREE(fsids
, M_TEMP
);
2160 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2165 if (mp
->mnt_crossref
< 0)
2166 panic("mount cross refs -ve");
2168 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2171 vnode_put_locked(dp
);
2174 mount_lock_destroy(mp
);
2176 mac_mount_label_destroy(mp
);
2178 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2182 vnode_put_locked(dp
);
2188 * Sync each mounted filesystem.
2194 int print_vmpage_stat
=0;
2195 int sync_timeout
= 60; // Sync time limit (sec)
2198 sync_callback(mount_t mp
, __unused
void *arg
)
2200 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2201 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2203 mp
->mnt_flag
&= ~MNT_ASYNC
;
2204 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2206 mp
->mnt_flag
|= MNT_ASYNC
;
2209 return (VFS_RETURNED
);
2214 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2216 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2218 if (print_vmpage_stat
) {
2219 vm_countdirtypages();
2225 #endif /* DIAGNOSTIC */
2230 sync_thread(void *arg
, __unused wait_result_t wr
)
2232 int *timeout
= (int *) arg
;
2234 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2237 wakeup((caddr_t
) timeout
);
2238 if (print_vmpage_stat
) {
2239 vm_countdirtypages();
2245 #endif /* DIAGNOSTIC */
2249 * Sync in a separate thread so we can time out if it blocks.
2252 sync_async(int timeout
)
2256 struct timespec ts
= {timeout
, 0};
2258 lck_mtx_lock(sync_mtx_lck
);
2259 if (kernel_thread_start(sync_thread
, &timeout
, &thd
) != KERN_SUCCESS
) {
2260 printf("sync_thread failed\n");
2261 lck_mtx_unlock(sync_mtx_lck
);
2265 error
= msleep((caddr_t
) &timeout
, sync_mtx_lck
, (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2267 printf("sync timed out: %d sec\n", timeout
);
2269 thread_deallocate(thd
);
2275 * An in-kernel sync for power management to call.
2277 __private_extern__
int
2280 (void) sync_async(sync_timeout
);
2283 } /* end of sync_internal call */
2286 * Change filesystem quotas.
2290 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2293 int error
, quota_cmd
, quota_status
;
2296 struct nameidata nd
;
2297 vfs_context_t ctx
= vfs_context_current();
2298 struct dqblk my_dqblk
;
2300 AUDIT_ARG(uid
, uap
->uid
);
2301 AUDIT_ARG(cmd
, uap
->cmd
);
2302 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2307 mp
= nd
.ni_vp
->v_mount
;
2308 vnode_put(nd
.ni_vp
);
2311 /* copyin any data we will need for downstream code */
2312 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2314 switch (quota_cmd
) {
2316 /* uap->arg specifies a file from which to take the quotas */
2317 fnamelen
= MAXPATHLEN
;
2318 datap
= kalloc(MAXPATHLEN
);
2319 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2322 /* uap->arg is a pointer to a dqblk structure. */
2323 datap
= (caddr_t
) &my_dqblk
;
2327 /* uap->arg is a pointer to a dqblk structure. */
2328 datap
= (caddr_t
) &my_dqblk
;
2329 if (proc_is64bit(p
)) {
2330 struct user_dqblk my_dqblk64
;
2331 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2333 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2337 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2341 /* uap->arg is a pointer to an integer */
2342 datap
= (caddr_t
) "a_status
;
2350 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2353 switch (quota_cmd
) {
2356 kfree(datap
, MAXPATHLEN
);
2359 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2361 if (proc_is64bit(p
)) {
2362 struct user_dqblk my_dqblk64
= {.dqb_bhardlimit
= 0};
2363 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2364 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2367 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2372 /* uap->arg is a pointer to an integer */
2374 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2385 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2387 return (EOPNOTSUPP
);
2392 * Get filesystem statistics.
2394 * Returns: 0 Success
2396 * vfs_update_vfsstat:???
2397 * munge_statfs:EFAULT
2401 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2404 struct vfsstatfs
*sp
;
2406 struct nameidata nd
;
2407 vfs_context_t ctx
= vfs_context_current();
2410 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2411 UIO_USERSPACE
, uap
->path
, ctx
);
2417 sp
= &mp
->mnt_vfsstat
;
2420 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2426 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2432 * Get filesystem statistics.
2436 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2440 struct vfsstatfs
*sp
;
2443 AUDIT_ARG(fd
, uap
->fd
);
2445 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2448 error
= vnode_getwithref(vp
);
2454 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2461 sp
= &mp
->mnt_vfsstat
;
2462 if ((error
= vfs_update_vfsstat(mp
,vfs_context_current(),VFS_USER_EVENT
)) != 0) {
2466 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2476 * Common routine to handle copying of statfs64 data to user space
2479 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2482 struct statfs64 sfs
;
2484 bzero(&sfs
, sizeof(sfs
));
2486 sfs
.f_bsize
= sfsp
->f_bsize
;
2487 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2488 sfs
.f_blocks
= sfsp
->f_blocks
;
2489 sfs
.f_bfree
= sfsp
->f_bfree
;
2490 sfs
.f_bavail
= sfsp
->f_bavail
;
2491 sfs
.f_files
= sfsp
->f_files
;
2492 sfs
.f_ffree
= sfsp
->f_ffree
;
2493 sfs
.f_fsid
= sfsp
->f_fsid
;
2494 sfs
.f_owner
= sfsp
->f_owner
;
2495 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2496 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2497 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2498 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2499 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2501 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2503 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2504 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2506 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2512 * Get file system statistics in 64-bit mode
2515 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2518 struct vfsstatfs
*sp
;
2520 struct nameidata nd
;
2521 vfs_context_t ctxp
= vfs_context_current();
2524 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2525 UIO_USERSPACE
, uap
->path
, ctxp
);
2531 sp
= &mp
->mnt_vfsstat
;
2534 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2540 error
= statfs64_common(mp
, sp
, uap
->buf
);
2547 * Get file system statistics in 64-bit mode
2550 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2554 struct vfsstatfs
*sp
;
2557 AUDIT_ARG(fd
, uap
->fd
);
2559 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2562 error
= vnode_getwithref(vp
);
2568 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2575 sp
= &mp
->mnt_vfsstat
;
2576 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2580 error
= statfs64_common(mp
, sp
, uap
->buf
);
2589 struct getfsstat_struct
{
2600 getfsstat_callback(mount_t mp
, void * arg
)
2603 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2604 struct vfsstatfs
*sp
;
2606 vfs_context_t ctx
= vfs_context_current();
2608 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2609 sp
= &mp
->mnt_vfsstat
;
2611 * If MNT_NOWAIT is specified, do not refresh the
2612 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2614 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2615 (error
= vfs_update_vfsstat(mp
, ctx
,
2617 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2618 return(VFS_RETURNED
);
2622 * Need to handle LP64 version of struct statfs
2624 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2626 fstp
->error
= error
;
2627 return(VFS_RETURNED_DONE
);
2629 fstp
->sfsp
+= my_size
;
2633 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2635 fstp
->error
= error
;
2636 return(VFS_RETURNED_DONE
);
2643 return(VFS_RETURNED
);
2647 * Get statistics on all filesystems.
2650 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2652 struct __mac_getfsstat_args muap
;
2654 muap
.buf
= uap
->buf
;
2655 muap
.bufsize
= uap
->bufsize
;
2656 muap
.mac
= USER_ADDR_NULL
;
2658 muap
.flags
= uap
->flags
;
2660 return (__mac_getfsstat(p
, &muap
, retval
));
2664 * __mac_getfsstat: Get MAC-related file system statistics
2666 * Parameters: p (ignored)
2667 * uap User argument descriptor (see below)
2668 * retval Count of file system statistics (N stats)
2670 * Indirect: uap->bufsize Buffer size
2671 * uap->macsize MAC info size
2672 * uap->buf Buffer where information will be returned
2674 * uap->flags File system flags
2677 * Returns: 0 Success
2682 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2686 size_t count
, maxcount
, bufsize
, macsize
;
2687 struct getfsstat_struct fst
;
2689 bufsize
= (size_t) uap
->bufsize
;
2690 macsize
= (size_t) uap
->macsize
;
2692 if (IS_64BIT_PROCESS(p
)) {
2693 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2696 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2704 if (uap
->mac
!= USER_ADDR_NULL
) {
2709 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2710 if (count
!= maxcount
)
2713 /* Copy in the array */
2714 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2719 error
= copyin(uap
->mac
, mp0
, macsize
);
2721 FREE(mp0
, M_MACTEMP
);
2725 /* Normalize to an array of user_addr_t */
2726 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2728 FREE(mp0
, M_MACTEMP
);
2732 for (i
= 0; i
< count
; i
++) {
2733 if (IS_64BIT_PROCESS(p
))
2734 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2736 mp
[i
] = (user_addr_t
)mp0
[i
];
2738 FREE(mp0
, M_MACTEMP
);
2745 fst
.flags
= uap
->flags
;
2748 fst
.maxcount
= maxcount
;
2751 vfs_iterate(0, getfsstat_callback
, &fst
);
2754 FREE(mp
, M_MACTEMP
);
2757 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2761 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2762 *retval
= fst
.maxcount
;
2764 *retval
= fst
.count
;
2769 getfsstat64_callback(mount_t mp
, void * arg
)
2771 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2772 struct vfsstatfs
*sp
;
2775 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2776 sp
= &mp
->mnt_vfsstat
;
2778 * If MNT_NOWAIT is specified, do not refresh the fsstat
2779 * cache. MNT_WAIT overrides MNT_NOWAIT.
2781 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2782 * getfsstat, since the constants are out of the same
2785 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2786 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2787 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2788 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2789 return(VFS_RETURNED
);
2792 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2794 fstp
->error
= error
;
2795 return(VFS_RETURNED_DONE
);
2797 fstp
->sfsp
+= sizeof(struct statfs64
);
2800 return(VFS_RETURNED
);
2804 * Get statistics on all file systems in 64 bit mode.
2807 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2810 int count
, maxcount
;
2811 struct getfsstat_struct fst
;
2813 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2819 fst
.flags
= uap
->flags
;
2822 fst
.maxcount
= maxcount
;
2824 vfs_iterate(0, getfsstat64_callback
, &fst
);
2827 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2831 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2832 *retval
= fst
.maxcount
;
2834 *retval
= fst
.count
;
2840 * gets the associated vnode with the file descriptor passed.
2844 * ctx - vfs context of caller
2845 * fd - file descriptor for which vnode is required.
2846 * vpp - Pointer to pointer to vnode to be returned.
2848 * The vnode is returned with an iocount so any vnode obtained
2849 * by this call needs a vnode_put
2853 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
2857 struct fileproc
*fp
;
2858 proc_t p
= vfs_context_proc(ctx
);
2862 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
2866 error
= vnode_getwithref(vp
);
2868 (void)fp_drop(p
, fd
, fp
, 0);
2872 (void)fp_drop(p
, fd
, fp
, 0);
2878 * Wrapper function around namei to start lookup from a directory
2879 * specified by a file descriptor ni_dirfd.
2881 * In addition to all the errors returned by namei, this call can
2882 * return ENOTDIR if the file descriptor does not refer to a directory.
2883 * and EBADF if the file descriptor is not valid.
2886 nameiat(struct nameidata
*ndp
, int dirfd
)
2888 if ((dirfd
!= AT_FDCWD
) &&
2889 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
2890 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
2894 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
2895 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
2899 c
= *((char *)(ndp
->ni_dirp
));
2905 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
2910 if (vnode_vtype(dvp_at
) != VDIR
) {
2915 ndp
->ni_dvp
= dvp_at
;
2916 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
2918 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
2924 return (namei(ndp
));
2928 * Change current working directory to a given file descriptor.
2932 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
2934 struct filedesc
*fdp
= p
->p_fd
;
2940 vfs_context_t ctx
= vfs_context_current();
2942 AUDIT_ARG(fd
, uap
->fd
);
2943 if (per_thread
&& uap
->fd
== -1) {
2945 * Switching back from per-thread to per process CWD; verify we
2946 * in fact have one before proceeding. The only success case
2947 * for this code path is to return 0 preemptively after zapping
2948 * the thread structure contents.
2950 thread_t th
= vfs_context_thread(ctx
);
2952 uthread_t uth
= get_bsdthread_info(th
);
2954 uth
->uu_cdir
= NULLVP
;
2955 if (tvp
!= NULLVP
) {
2963 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2965 if ( (error
= vnode_getwithref(vp
)) ) {
2970 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
2972 if (vp
->v_type
!= VDIR
) {
2978 error
= mac_vnode_check_chdir(ctx
, vp
);
2982 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
2986 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
2987 if (vfs_busy(mp
, LK_NOWAIT
)) {
2991 error
= VFS_ROOT(mp
, &tdp
, ctx
);
3000 if ( (error
= vnode_ref(vp
)) )
3005 thread_t th
= vfs_context_thread(ctx
);
3007 uthread_t uth
= get_bsdthread_info(th
);
3010 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3035 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3037 return common_fchdir(p
, uap
, 0);
3041 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3043 return common_fchdir(p
, (void *)uap
, 1);
3047 * Change current working directory (".").
3049 * Returns: 0 Success
3050 * change_dir:ENOTDIR
3052 * vnode_ref:ENOENT No such file or directory
3056 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3058 struct filedesc
*fdp
= p
->p_fd
;
3060 struct nameidata nd
;
3062 vfs_context_t ctx
= vfs_context_current();
3064 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3065 UIO_USERSPACE
, uap
->path
, ctx
);
3066 error
= change_dir(&nd
, ctx
);
3069 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3070 vnode_put(nd
.ni_vp
);
3074 * drop the iocount we picked up in change_dir
3076 vnode_put(nd
.ni_vp
);
3079 thread_t th
= vfs_context_thread(ctx
);
3081 uthread_t uth
= get_bsdthread_info(th
);
3083 uth
->uu_cdir
= nd
.ni_vp
;
3084 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3086 vnode_rele(nd
.ni_vp
);
3092 fdp
->fd_cdir
= nd
.ni_vp
;
3106 * Change current working directory (".") for the entire process
3108 * Parameters: p Process requesting the call
3109 * uap User argument descriptor (see below)
3112 * Indirect parameters: uap->path Directory path
3114 * Returns: 0 Success
3115 * common_chdir: ENOTDIR
3116 * common_chdir: ENOENT No such file or directory
3121 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3123 return common_chdir(p
, (void *)uap
, 0);
3129 * Change current working directory (".") for a single thread
3131 * Parameters: p Process requesting the call
3132 * uap User argument descriptor (see below)
3135 * Indirect parameters: uap->path Directory path
3137 * Returns: 0 Success
3138 * common_chdir: ENOTDIR
3139 * common_chdir: ENOENT No such file or directory
3144 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3146 return common_chdir(p
, (void *)uap
, 1);
3151 * Change notion of root (``/'') directory.
3155 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3157 struct filedesc
*fdp
= p
->p_fd
;
3159 struct nameidata nd
;
3161 vfs_context_t ctx
= vfs_context_current();
3163 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3166 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3167 UIO_USERSPACE
, uap
->path
, ctx
);
3168 error
= change_dir(&nd
, ctx
);
3173 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3176 vnode_put(nd
.ni_vp
);
3181 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3182 vnode_put(nd
.ni_vp
);
3185 vnode_put(nd
.ni_vp
);
3189 fdp
->fd_rdir
= nd
.ni_vp
;
3190 fdp
->fd_flags
|= FD_CHROOT
;
3200 * Common routine for chroot and chdir.
3202 * Returns: 0 Success
3203 * ENOTDIR Not a directory
3204 * namei:??? [anything namei can return]
3205 * vnode_authorize:??? [anything vnode_authorize can return]
3208 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3213 if ((error
= namei(ndp
)))
3218 if (vp
->v_type
!= VDIR
) {
3224 error
= mac_vnode_check_chdir(ctx
, vp
);
3231 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3241 * Free the vnode data (for directories) associated with the file glob.
3244 fg_vn_data_alloc(void)
3246 struct fd_vn_data
*fvdata
;
3248 /* Allocate per fd vnode data */
3249 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3250 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3251 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3256 * Free the vnode data (for directories) associated with the file glob.
3259 fg_vn_data_free(void *fgvndata
)
3261 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3264 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3265 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3266 FREE(fvdata
, M_FD_VN_DATA
);
3270 * Check permissions, allocate an open file structure,
3271 * and call the device open routine if any.
3273 * Returns: 0 Success
3284 * XXX Need to implement uid, gid
3287 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3288 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3291 proc_t p
= vfs_context_proc(ctx
);
3292 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3293 struct fileproc
*fp
;
3296 int type
, indx
, error
;
3298 int no_controlling_tty
= 0;
3299 int deny_controlling_tty
= 0;
3300 struct session
*sessp
= SESSION_NULL
;
3304 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3306 flags
= FFLAGS(uflags
);
3308 AUDIT_ARG(fflags
, oflags
);
3309 AUDIT_ARG(mode
, vap
->va_mode
);
3311 if ((error
= falloc_withalloc(p
,
3312 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3315 uu
->uu_dupfd
= -indx
- 1;
3317 if (!(p
->p_flag
& P_CONTROLT
)) {
3318 sessp
= proc_session(p
);
3319 no_controlling_tty
= 1;
3321 * If conditions would warrant getting a controlling tty if
3322 * the device being opened is a tty (see ttyopen in tty.c),
3323 * but the open flags deny it, set a flag in the session to
3326 if (SESS_LEADER(p
, sessp
) &&
3327 sessp
->s_ttyvp
== NULL
&&
3328 (flags
& O_NOCTTY
)) {
3329 session_lock(sessp
);
3330 sessp
->s_flags
|= S_NOCTTY
;
3331 session_unlock(sessp
);
3332 deny_controlling_tty
= 1;
3336 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3337 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3338 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3339 fp_drop(p
, indx
, NULL
, 0);
3341 if (deny_controlling_tty
) {
3342 session_lock(sessp
);
3343 sessp
->s_flags
&= ~S_NOCTTY
;
3344 session_unlock(sessp
);
3346 if (sessp
!= SESSION_NULL
)
3347 session_rele(sessp
);
3351 if (error
== ERESTART
)
3353 fp_free(p
, indx
, fp
);
3355 if (deny_controlling_tty
) {
3356 session_lock(sessp
);
3357 sessp
->s_flags
&= ~S_NOCTTY
;
3358 session_unlock(sessp
);
3360 if (sessp
!= SESSION_NULL
)
3361 session_rele(sessp
);
3367 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
);
3368 fp
->f_fglob
->fg_ops
= &vnops
;
3369 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3372 if (VATTR_IS_ACTIVE (vap
, va_dataprotect_flags
)) {
3373 if (vap
->va_dataprotect_flags
& VA_DP_RAWENCRYPTED
) {
3374 fp
->f_fglob
->fg_flag
|= FENCRYPTED
;
3379 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3380 lf
.l_whence
= SEEK_SET
;
3383 if (flags
& O_EXLOCK
)
3384 lf
.l_type
= F_WRLCK
;
3386 lf
.l_type
= F_RDLCK
;
3388 if ((flags
& FNONBLOCK
) == 0)
3391 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3396 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3398 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3401 /* try to truncate by setting the size attribute */
3402 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3406 * If the open flags denied the acquisition of a controlling tty,
3407 * clear the flag in the session structure that prevented the lower
3408 * level code from assigning one.
3410 if (deny_controlling_tty
) {
3411 session_lock(sessp
);
3412 sessp
->s_flags
&= ~S_NOCTTY
;
3413 session_unlock(sessp
);
3417 * If a controlling tty was set by the tty line discipline, then we
3418 * want to set the vp of the tty into the session structure. We have
3419 * a race here because we can't get to the vp for the tp in ttyopen,
3420 * because it's not passed as a parameter in the open path.
3422 if (no_controlling_tty
&& (p
->p_flag
& P_CONTROLT
)) {
3425 session_lock(sessp
);
3426 ttyvp
= sessp
->s_ttyvp
;
3427 sessp
->s_ttyvp
= vp
;
3428 sessp
->s_ttyvid
= vnode_vid(vp
);
3429 session_unlock(sessp
);
3433 * For directories we hold some additional information in the fd.
3435 if (vnode_vtype(vp
) == VDIR
) {
3436 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3438 fp
->f_fglob
->fg_vn_data
= NULL
;
3444 if (flags
& O_CLOEXEC
)
3445 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3446 if (flags
& O_CLOFORK
)
3447 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3448 procfdtbl_releasefd(p
, indx
, NULL
);
3449 fp_drop(p
, indx
, fp
, 1);
3454 if (sessp
!= SESSION_NULL
)
3455 session_rele(sessp
);
3458 if (deny_controlling_tty
) {
3459 session_lock(sessp
);
3460 sessp
->s_flags
&= ~S_NOCTTY
;
3461 session_unlock(sessp
);
3463 if (sessp
!= SESSION_NULL
)
3464 session_rele(sessp
);
3466 struct vfs_context context
= *vfs_context_current();
3467 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3469 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3470 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3471 lf
.l_whence
= SEEK_SET
;
3474 lf
.l_type
= F_UNLCK
;
3477 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3480 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3482 fp_free(p
, indx
, fp
);
3488 * While most of the *at syscall handlers can call nameiat() which
3489 * is a wrapper around namei, the use of namei and initialisation
3490 * of nameidata are far removed and in different functions - namei
3491 * gets called in vn_open_auth for open1. So we'll just do here what
3495 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3496 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3499 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3503 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3504 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3508 c
= *((char *)(ndp
->ni_dirp
));
3514 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3519 if (vnode_vtype(dvp_at
) != VDIR
) {
3524 ndp
->ni_dvp
= dvp_at
;
3525 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3526 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3533 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3537 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3539 * Parameters: p Process requesting the open
3540 * uap User argument descriptor (see below)
3541 * retval Pointer to an area to receive the
3542 * return calue from the system call
3544 * Indirect: uap->path Path to open (same as 'open')
3545 * uap->flags Flags to open (same as 'open'
3546 * uap->uid UID to set, if creating
3547 * uap->gid GID to set, if creating
3548 * uap->mode File mode, if creating (same as 'open')
3549 * uap->xsecurity ACL to set, if creating
3551 * Returns: 0 Success
3554 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3556 * XXX: We should enummerate the possible errno values here, and where
3557 * in the code they originated.
3560 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3562 struct filedesc
*fdp
= p
->p_fd
;
3564 kauth_filesec_t xsecdst
;
3565 struct vnode_attr va
;
3566 struct nameidata nd
;
3569 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3572 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3573 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3577 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3578 VATTR_SET(&va
, va_mode
, cmode
);
3579 if (uap
->uid
!= KAUTH_UID_NONE
)
3580 VATTR_SET(&va
, va_uid
, uap
->uid
);
3581 if (uap
->gid
!= KAUTH_GID_NONE
)
3582 VATTR_SET(&va
, va_gid
, uap
->gid
);
3583 if (xsecdst
!= NULL
)
3584 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3586 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3587 uap
->path
, vfs_context_current());
3589 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3590 fileproc_alloc_init
, NULL
, retval
);
3591 if (xsecdst
!= NULL
)
3592 kauth_filesec_free(xsecdst
);
3598 * Go through the data-protected atomically controlled open (2)
3600 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3602 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3603 int flags
= uap
->flags
;
3604 int class = uap
->class;
3605 int dpflags
= uap
->dpflags
;
3608 * Follow the same path as normal open(2)
3609 * Look up the item if it exists, and acquire the vnode.
3611 struct filedesc
*fdp
= p
->p_fd
;
3612 struct vnode_attr va
;
3613 struct nameidata nd
;
3618 /* Mask off all but regular access permissions */
3619 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3620 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3622 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3623 uap
->path
, vfs_context_current());
3626 * Initialize the extra fields in vnode_attr to pass down our
3628 * 1. target cprotect class.
3629 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3631 if (flags
& O_CREAT
) {
3632 VATTR_SET(&va
, va_dataprotect_class
, class);
3635 if (dpflags
& O_DP_GETRAWENCRYPTED
) {
3636 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3637 /* Not allowed to write raw encrypted bytes */
3640 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3643 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3644 fileproc_alloc_init
, NULL
, retval
);
3650 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3651 int fd
, enum uio_seg segflg
, int *retval
)
3653 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3654 struct vnode_attr va
;
3655 struct nameidata nd
;
3659 /* Mask off all but regular access permissions */
3660 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3661 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3663 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3666 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3671 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3673 __pthread_testcancel(1);
3674 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3678 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3681 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3682 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3686 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3689 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3690 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3694 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3696 __pthread_testcancel(1);
3697 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3701 * openbyid_np: open a file given a file system id and a file system object id
3702 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3703 * file systems that don't support object ids it is a node id (uint64_t).
3705 * Parameters: p Process requesting the open
3706 * uap User argument descriptor (see below)
3707 * retval Pointer to an area to receive the
3708 * return calue from the system call
3710 * Indirect: uap->path Path to open (same as 'open')
3712 * uap->fsid id of target file system
3713 * uap->objid id of target file system object
3714 * uap->flags Flags to open (same as 'open')
3716 * Returns: 0 Success
3720 * XXX: We should enummerate the possible errno values here, and where
3721 * in the code they originated.
3724 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
3730 int buflen
= MAXPATHLEN
;
3732 vfs_context_t ctx
= vfs_context_current();
3734 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
3738 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3739 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
3743 AUDIT_ARG(value32
, fsid
.val
[0]);
3744 AUDIT_ARG(value64
, objid
);
3746 /*resolve path from fsis, objid*/
3748 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
3753 error
= fsgetpath_internal(
3754 ctx
, fsid
.val
[0], objid
,
3755 buflen
, buf
, &pathlen
);
3761 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
3769 error
= openat_internal(
3770 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
3779 * Create a special file.
3781 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
3784 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
3786 struct vnode_attr va
;
3787 vfs_context_t ctx
= vfs_context_current();
3789 struct nameidata nd
;
3793 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3794 VATTR_SET(&va
, va_rdev
, uap
->dev
);
3796 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3797 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
3798 return(mkfifo1(ctx
, uap
->path
, &va
));
3800 AUDIT_ARG(mode
, uap
->mode
);
3801 AUDIT_ARG(value32
, uap
->dev
);
3803 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
3805 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
3806 UIO_USERSPACE
, uap
->path
, ctx
);
3818 switch (uap
->mode
& S_IFMT
) {
3819 case S_IFMT
: /* used by badsect to flag bad sectors */
3820 VATTR_SET(&va
, va_type
, VBAD
);
3823 VATTR_SET(&va
, va_type
, VCHR
);
3826 VATTR_SET(&va
, va_type
, VBLK
);
3834 error
= mac_vnode_check_create(ctx
,
3835 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
3840 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3843 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
3847 int update_flags
= 0;
3849 // Make sure the name & parent pointers are hooked up
3850 if (vp
->v_name
== NULL
)
3851 update_flags
|= VNODE_UPDATE_NAME
;
3852 if (vp
->v_parent
== NULLVP
)
3853 update_flags
|= VNODE_UPDATE_PARENT
;
3856 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3859 add_fsevent(FSE_CREATE_FILE
, ctx
,
3867 * nameidone has to happen before we vnode_put(dvp)
3868 * since it may need to release the fs_nodelock on the dvp
3880 * Create a named pipe.
3882 * Returns: 0 Success
3885 * vnode_authorize:???
3889 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
3893 struct nameidata nd
;
3895 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
3896 UIO_USERSPACE
, upath
, ctx
);
3903 /* check that this is a new file and authorize addition */
3908 VATTR_SET(vap
, va_type
, VFIFO
);
3910 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
3913 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
3916 * nameidone has to happen before we vnode_put(dvp)
3917 * since it may need to release the fs_nodelock on the dvp
3930 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3932 * Parameters: p Process requesting the open
3933 * uap User argument descriptor (see below)
3936 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3937 * uap->uid UID to set
3938 * uap->gid GID to set
3939 * uap->mode File mode to set (same as 'mkfifo')
3940 * uap->xsecurity ACL to set, if creating
3942 * Returns: 0 Success
3945 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3947 * XXX: We should enummerate the possible errno values here, and where
3948 * in the code they originated.
3951 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
3954 kauth_filesec_t xsecdst
;
3955 struct vnode_attr va
;
3957 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3959 xsecdst
= KAUTH_FILESEC_NONE
;
3960 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
3961 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
3966 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3967 if (uap
->uid
!= KAUTH_UID_NONE
)
3968 VATTR_SET(&va
, va_uid
, uap
->uid
);
3969 if (uap
->gid
!= KAUTH_GID_NONE
)
3970 VATTR_SET(&va
, va_gid
, uap
->gid
);
3971 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3972 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3974 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
3976 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3977 kauth_filesec_free(xsecdst
);
3983 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
3985 struct vnode_attr va
;
3988 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3990 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
3995 my_strrchr(char *p
, int ch
)
3999 for (save
= NULL
;; ++p
) {
4008 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
4011 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4013 int ret
, len
= _len
;
4015 *truncated_path
= 0;
4016 ret
= vn_getpath(dvp
, path
, &len
);
4017 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4020 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
4021 if (len
> MAXPATHLEN
) {
4024 // the string got truncated!
4025 *truncated_path
= 1;
4026 ptr
= my_strrchr(path
, '/');
4028 *ptr
= '\0'; // chop off the string at the last directory component
4030 len
= strlen(path
) + 1;
4033 } else if (ret
== 0) {
4034 *truncated_path
= 1;
4035 } else if (ret
!= 0) {
4036 struct vnode
*mydvp
=dvp
;
4038 if (ret
!= ENOSPC
) {
4039 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4040 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4042 *truncated_path
= 1;
4045 if (mydvp
->v_parent
!= NULL
) {
4046 mydvp
= mydvp
->v_parent
;
4047 } else if (mydvp
->v_mount
) {
4048 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4051 // no parent and no mount point? only thing is to punt and say "/" changed
4052 strlcpy(path
, "/", _len
);
4057 if (mydvp
== NULL
) {
4062 ret
= vn_getpath(mydvp
, path
, &len
);
4063 } while (ret
== ENOSPC
);
4071 * Make a hard file link.
4073 * Returns: 0 Success
4078 * vnode_authorize:???
4083 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4084 user_addr_t link
, int flag
, enum uio_seg segflg
)
4086 vnode_t vp
, dvp
, lvp
;
4087 struct nameidata nd
;
4093 int need_event
, has_listeners
;
4094 char *target_path
= NULL
;
4097 vp
= dvp
= lvp
= NULLVP
;
4099 /* look up the object we are linking to */
4100 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4101 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4104 error
= nameiat(&nd
, fd1
);
4112 * Normally, linking to directories is not supported.
4113 * However, some file systems may have limited support.
4115 if (vp
->v_type
== VDIR
) {
4116 if (!(vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSDIRLINKS
)) {
4117 error
= EPERM
; /* POSIX */
4120 /* Linking to a directory requires ownership. */
4121 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4122 struct vnode_attr dva
;
4125 VATTR_WANTED(&dva
, va_uid
);
4126 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4127 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4128 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4135 /* lookup the target node */
4139 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4140 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4142 error
= nameiat(&nd
, fd2
);
4149 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4153 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4154 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4157 /* target node must not exist */
4158 if (lvp
!= NULLVP
) {
4162 /* cannot link across mountpoints */
4163 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4168 /* authorize creation of the target note */
4169 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4172 /* and finally make the link */
4173 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4178 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4182 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4186 has_listeners
= kauth_authorize_fileop_has_listeners();
4188 if (need_event
|| has_listeners
) {
4189 char *link_to_path
= NULL
;
4190 int len
, link_name_len
;
4192 /* build the path to the new link file */
4193 GET_PATH(target_path
);
4194 if (target_path
== NULL
) {
4199 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4201 if (has_listeners
) {
4202 /* build the path to file we are linking to */
4203 GET_PATH(link_to_path
);
4204 if (link_to_path
== NULL
) {
4209 link_name_len
= MAXPATHLEN
;
4210 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4212 * Call out to allow 3rd party notification of rename.
4213 * Ignore result of kauth_authorize_fileop call.
4215 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4216 (uintptr_t)link_to_path
,
4217 (uintptr_t)target_path
);
4219 if (link_to_path
!= NULL
) {
4220 RELEASE_PATH(link_to_path
);
4225 /* construct fsevent */
4226 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4228 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4231 // build the path to the destination of the link
4232 add_fsevent(FSE_CREATE_FILE
, ctx
,
4233 FSE_ARG_STRING
, len
, target_path
,
4234 FSE_ARG_FINFO
, &finfo
,
4238 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4239 FSE_ARG_VNODE
, vp
->v_parent
,
4247 * nameidone has to happen before we vnode_put(dvp)
4248 * since it may need to release the fs_nodelock on the dvp
4251 if (target_path
!= NULL
) {
4252 RELEASE_PATH(target_path
);
4264 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4266 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4267 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4271 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4273 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4276 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4277 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4281 * Make a symbolic link.
4283 * We could add support for ACLs here too...
4287 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4288 user_addr_t link
, enum uio_seg segflg
)
4290 struct vnode_attr va
;
4293 struct nameidata nd
;
4295 uint32_t dfflags
; // Directory file flags
4300 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4301 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4302 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4304 path
= (char *)path_data
;
4308 AUDIT_ARG(text
, path
); /* This is the link string */
4310 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4313 error
= nameiat(&nd
, fd
);
4319 p
= vfs_context_proc(ctx
);
4321 VATTR_SET(&va
, va_type
, VLNK
);
4322 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4325 * Handle inheritance of restricted flag
4327 error
= vnode_flags(dvp
, &dfflags
, ctx
);
4330 if (dfflags
& SF_RESTRICTED
)
4331 VATTR_SET(&va
, va_flags
, SF_RESTRICTED
);
4334 error
= mac_vnode_check_create(ctx
,
4335 dvp
, &nd
.ni_cnd
, &va
);
4348 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4349 /* get default ownership, etc. */
4351 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4353 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4357 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4360 /* do fallback attribute handling */
4362 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4365 int update_flags
= 0;
4368 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4370 nd
.ni_op
= OP_LOOKUP
;
4372 nd
.ni_cnd
.cn_flags
= 0;
4373 error
= nameiat(&nd
, fd
);
4380 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4381 /* call out to allow 3rd party notification of rename.
4382 * Ignore result of kauth_authorize_fileop call.
4384 if (kauth_authorize_fileop_has_listeners() &&
4386 char *new_link_path
= NULL
;
4389 /* build the path to the new link file */
4390 new_link_path
= get_pathbuff();
4392 vn_getpath(dvp
, new_link_path
, &len
);
4393 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4394 new_link_path
[len
- 1] = '/';
4395 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4398 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4399 (uintptr_t)path
, (uintptr_t)new_link_path
);
4400 if (new_link_path
!= NULL
)
4401 release_pathbuff(new_link_path
);
4404 // Make sure the name & parent pointers are hooked up
4405 if (vp
->v_name
== NULL
)
4406 update_flags
|= VNODE_UPDATE_NAME
;
4407 if (vp
->v_parent
== NULLVP
)
4408 update_flags
|= VNODE_UPDATE_PARENT
;
4411 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4414 add_fsevent(FSE_CREATE_FILE
, ctx
,
4422 * nameidone has to happen before we vnode_put(dvp)
4423 * since it may need to release the fs_nodelock on the dvp
4431 if (path
&& (path
!= (char *)path_data
))
4432 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4438 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4440 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4441 uap
->link
, UIO_USERSPACE
));
4445 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4446 __unused
int32_t *retval
)
4448 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4449 uap
->path2
, UIO_USERSPACE
));
4453 * Delete a whiteout from the filesystem.
4454 * No longer supported.
4457 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4463 * Delete a name from the filesystem.
4467 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
,
4468 user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
)
4470 struct nameidata nd
;
4473 struct componentname
*cnp
;
4478 struct vnode_attr va
;
4485 struct vnode_attr
*vap
;
4487 int retry_count
= 0;
4490 cn_flags
= LOCKPARENT
;
4491 if (!(unlink_flags
& VNODE_REMOVE_NO_AUDIT_PATH
))
4492 cn_flags
|= AUDITVNPATH1
;
4493 /* If a starting dvp is passed, it trumps any fd passed. */
4498 /* unlink or delete is allowed on rsrc forks and named streams */
4499 cn_flags
|= CN_ALLOWRSRCFORK
;
4510 NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
);
4512 nd
.ni_dvp
= start_dvp
;
4513 nd
.ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4517 error
= nameiat(&nd
, fd
);
4525 /* With Carbon delete semantics, busy files cannot be deleted */
4526 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4527 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4530 /* Skip any potential upcalls if told to. */
4531 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4532 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4536 batched
= vnode_compound_remove_available(vp
);
4538 * The root of a mounted filesystem cannot be deleted.
4540 if (vp
->v_flag
& VROOT
) {
4545 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4547 if (error
== ENOENT
&&
4548 retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4558 if (!vnode_compound_remove_available(dvp
)) {
4559 panic("No vp, but no compound remove?");
4564 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4567 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4568 /* XXX need to get these data in batched VNOP */
4569 get_fse_info(vp
, &finfo
, ctx
);
4572 error
= vfs_get_notify_attributes(&va
);
4581 has_listeners
= kauth_authorize_fileop_has_listeners();
4582 if (need_event
|| has_listeners
) {
4590 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4594 if (nd
.ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4595 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4599 error
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
);
4601 if (error
== EKEEPLOOKING
) {
4603 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4606 if ((nd
.ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4607 panic("EKEEPLOOKING, but continue flag not set?");
4610 if (vnode_isdir(vp
)) {
4614 goto lookup_continue
;
4615 } else if (error
== ENOENT
&& batched
&&
4616 retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
4618 * For compound VNOPs, the authorization callback may
4619 * return ENOENT in case of racing hardlink lookups
4620 * hitting the name cache, redrive the lookup.
4629 * Call out to allow 3rd party notification of delete.
4630 * Ignore result of kauth_authorize_fileop call.
4633 if (has_listeners
) {
4634 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4635 KAUTH_FILEOP_DELETE
,
4640 if (vp
->v_flag
& VISHARDLINK
) {
4642 // if a hardlink gets deleted we want to blow away the
4643 // v_parent link because the path that got us to this
4644 // instance of the link is no longer valid. this will
4645 // force the next call to get the path to ask the file
4646 // system instead of just following the v_parent link.
4648 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4653 if (vp
->v_flag
& VISHARDLINK
) {
4654 get_fse_info(vp
, &finfo
, ctx
);
4656 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4658 if (truncated_path
) {
4659 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4661 add_fsevent(FSE_DELETE
, ctx
,
4662 FSE_ARG_STRING
, len
, path
,
4663 FSE_ARG_FINFO
, &finfo
,
4674 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4675 * will cause its shadow file to go away if necessary.
4677 if (vp
&& (vnode_isnamedstream(vp
)) &&
4678 (vp
->v_parent
!= NULLVP
) &&
4679 vnode_isshadow(vp
)) {
4684 * nameidone has to happen before we vnode_put(dvp)
4685 * since it may need to release the fs_nodelock on the dvp
4701 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
,
4702 enum uio_seg segflg
, int unlink_flags
)
4704 return (unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
,
4709 * Delete a name from the filesystem using Carbon semantics.
4712 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4714 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4715 uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
));
4719 * Delete a name from the filesystem using POSIX semantics.
4722 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4724 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
,
4725 uap
->path
, UIO_USERSPACE
, 0));
4729 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
4731 if (uap
->flag
& ~AT_REMOVEDIR
)
4734 if (uap
->flag
& AT_REMOVEDIR
)
4735 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
4736 uap
->path
, UIO_USERSPACE
));
4738 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
4739 NULLVP
, uap
->path
, UIO_USERSPACE
, 0));
4743 * Reposition read/write file offset.
4746 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4748 struct fileproc
*fp
;
4750 struct vfs_context
*ctx
;
4751 off_t offset
= uap
->offset
, file_size
;
4754 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4755 if (error
== ENOTSUP
)
4759 if (vnode_isfifo(vp
)) {
4765 ctx
= vfs_context_current();
4767 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4768 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4771 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
4778 if ( (error
= vnode_getwithref(vp
)) ) {
4783 switch (uap
->whence
) {
4785 offset
+= fp
->f_fglob
->fg_offset
;
4788 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
4790 offset
+= file_size
;
4798 if (uap
->offset
> 0 && offset
< 0) {
4799 /* Incremented/relative move past max size */
4803 * Allow negative offsets on character devices, per
4804 * POSIX 1003.1-2001. Most likely for writing disk
4807 if (offset
< 0 && vp
->v_type
!= VCHR
) {
4808 /* Decremented/relative move before start */
4812 fp
->f_fglob
->fg_offset
= offset
;
4813 *retval
= fp
->f_fglob
->fg_offset
;
4819 * An lseek can affect whether data is "available to read." Use
4820 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4822 post_event_if_success(vp
, error
, NOTE_NONE
);
4823 (void)vnode_put(vp
);
4830 * Check access permissions.
4832 * Returns: 0 Success
4833 * vnode_authorize:???
4836 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
4838 kauth_action_t action
;
4842 * If just the regular access bits, convert them to something
4843 * that vnode_authorize will understand.
4845 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
4848 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4849 if (uflags
& W_OK
) {
4850 if (vnode_isdir(vp
)) {
4851 action
|= KAUTH_VNODE_ADD_FILE
|
4852 KAUTH_VNODE_ADD_SUBDIRECTORY
;
4853 /* might want delete rights here too */
4855 action
|= KAUTH_VNODE_WRITE_DATA
;
4858 if (uflags
& X_OK
) {
4859 if (vnode_isdir(vp
)) {
4860 action
|= KAUTH_VNODE_SEARCH
;
4862 action
|= KAUTH_VNODE_EXECUTE
;
4866 /* take advantage of definition of uflags */
4867 action
= uflags
>> 8;
4871 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
4876 /* action == 0 means only check for existence */
4878 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
4889 * access_extended: Check access permissions in bulk.
4891 * Description: uap->entries Pointer to an array of accessx
4892 * descriptor structs, plus one or
4893 * more NULL terminated strings (see
4894 * "Notes" section below).
4895 * uap->size Size of the area pointed to by
4897 * uap->results Pointer to the results array.
4899 * Returns: 0 Success
4900 * ENOMEM Insufficient memory
4901 * EINVAL Invalid arguments
4902 * namei:EFAULT Bad address
4903 * namei:ENAMETOOLONG Filename too long
4904 * namei:ENOENT No such file or directory
4905 * namei:ELOOP Too many levels of symbolic links
4906 * namei:EBADF Bad file descriptor
4907 * namei:ENOTDIR Not a directory
4912 * uap->results Array contents modified
4914 * Notes: The uap->entries are structured as an arbitrary length array
4915 * of accessx descriptors, followed by one or more NULL terminated
4918 * struct accessx_descriptor[0]
4920 * struct accessx_descriptor[n]
4921 * char name_data[0];
4923 * We determine the entry count by walking the buffer containing
4924 * the uap->entries argument descriptor. For each descriptor we
4925 * see, the valid values for the offset ad_name_offset will be
4926 * in the byte range:
4928 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4930 * [ uap->entries + uap->size - 2 ]
4932 * since we must have at least one string, and the string must
4933 * be at least one character plus the NULL terminator in length.
4935 * XXX: Need to support the check-as uid argument
4938 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
4940 struct accessx_descriptor
*input
= NULL
;
4941 errno_t
*result
= NULL
;
4944 unsigned int desc_max
, desc_actual
, i
, j
;
4945 struct vfs_context context
;
4946 struct nameidata nd
;
4950 #define ACCESSX_MAX_DESCR_ON_STACK 10
4951 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
4953 context
.vc_ucred
= NULL
;
4956 * Validate parameters; if valid, copy the descriptor array and string
4957 * arguments into local memory. Before proceeding, the following
4958 * conditions must have been met:
4960 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4961 * o There must be sufficient room in the request for at least one
4962 * descriptor and a one yte NUL terminated string.
4963 * o The allocation of local storage must not fail.
4965 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
4967 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
4969 if (uap
->size
<= sizeof (stack_input
)) {
4970 input
= stack_input
;
4972 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
4973 if (input
== NULL
) {
4978 error
= copyin(uap
->entries
, input
, uap
->size
);
4982 AUDIT_ARG(opaque
, input
, uap
->size
);
4985 * Force NUL termination of the copyin buffer to avoid nami() running
4986 * off the end. If the caller passes us bogus data, they may get a
4989 ((char *)input
)[uap
->size
- 1] = 0;
4992 * Access is defined as checking against the process' real identity,
4993 * even if operations are checking the effective identity. This
4994 * requires that we use a local vfs context.
4996 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
4997 context
.vc_thread
= current_thread();
5000 * Find out how many entries we have, so we can allocate the result
5001 * array by walking the list and adjusting the count downward by the
5002 * earliest string offset we see.
5004 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
5005 desc_actual
= desc_max
;
5006 for (i
= 0; i
< desc_actual
; i
++) {
5008 * Take the offset to the name string for this entry and
5009 * convert to an input array index, which would be one off
5010 * the end of the array if this entry was the lowest-addressed
5013 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
5016 * An offset greater than the max allowable offset is an error.
5017 * It is also an error for any valid entry to point
5018 * to a location prior to the end of the current entry, if
5019 * it's not a reference to the string of the previous entry.
5021 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
5027 * An offset of 0 means use the previous descriptor's offset;
5028 * this is used to chain multiple requests for the same file
5029 * to avoid multiple lookups.
5032 /* This is not valid for the first entry */
5041 * If the offset of the string for this descriptor is before
5042 * what we believe is the current actual last descriptor,
5043 * then we need to adjust our estimate downward; this permits
5044 * the string table following the last descriptor to be out
5045 * of order relative to the descriptor list.
5047 if (j
< desc_actual
)
5052 * We limit the actual number of descriptors we are willing to process
5053 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5054 * requested does not exceed this limit,
5056 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5060 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
5061 if (result
== NULL
) {
5067 * Do the work by iterating over the descriptor entries we know to
5068 * at least appear to contain valid data.
5071 for (i
= 0; i
< desc_actual
; i
++) {
5073 * If the ad_name_offset is 0, then we use the previous
5074 * results to make the check; otherwise, we are looking up
5077 if (input
[i
].ad_name_offset
!= 0) {
5078 /* discard old vnodes */
5089 * Scan forward in the descriptor list to see if we
5090 * need the parent vnode. We will need it if we are
5091 * deleting, since we must have rights to remove
5092 * entries in the parent directory, as well as the
5093 * rights to delete the object itself.
5095 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5096 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5097 if (input
[j
].ad_flags
& _DELETE_OK
)
5100 niopts
= FOLLOW
| AUDITVNPATH1
;
5102 /* need parent for vnode_authorize for deletion test */
5104 niopts
|= WANTPARENT
;
5107 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5108 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5120 * Handle lookup errors.
5130 /* run this access check */
5131 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5134 /* fatal lookup error */
5140 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5142 /* copy out results */
5143 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5146 if (input
&& input
!= stack_input
)
5147 FREE(input
, M_TEMP
);
5149 FREE(result
, M_TEMP
);
5154 if (IS_VALID_CRED(context
.vc_ucred
))
5155 kauth_cred_unref(&context
.vc_ucred
);
5161 * Returns: 0 Success
5162 * namei:EFAULT Bad address
5163 * namei:ENAMETOOLONG Filename too long
5164 * namei:ENOENT No such file or directory
5165 * namei:ELOOP Too many levels of symbolic links
5166 * namei:EBADF Bad file descriptor
5167 * namei:ENOTDIR Not a directory
5172 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5173 int flag
, enum uio_seg segflg
)
5176 struct nameidata nd
;
5178 struct vfs_context context
;
5180 int is_namedstream
= 0;
5184 * Unless the AT_EACCESS option is used, Access is defined as checking
5185 * against the process' real identity, even if operations are checking
5186 * the effective identity. So we need to tweak the credential
5187 * in the context for that case.
5189 if (!(flag
& AT_EACCESS
))
5190 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5192 context
.vc_ucred
= ctx
->vc_ucred
;
5193 context
.vc_thread
= ctx
->vc_thread
;
5196 niopts
= FOLLOW
| AUDITVNPATH1
;
5197 /* need parent for vnode_authorize for deletion test */
5198 if (amode
& _DELETE_OK
)
5199 niopts
|= WANTPARENT
;
5200 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5204 /* access(F_OK) calls are allowed for resource forks. */
5206 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5208 error
= nameiat(&nd
, fd
);
5213 /* Grab reference on the shadow stream file vnode to
5214 * force an inactive on release which will mark it
5217 if (vnode_isnamedstream(nd
.ni_vp
) &&
5218 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5219 vnode_isshadow(nd
.ni_vp
)) {
5221 vnode_ref(nd
.ni_vp
);
5225 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5228 if (is_namedstream
) {
5229 vnode_rele(nd
.ni_vp
);
5233 vnode_put(nd
.ni_vp
);
5234 if (amode
& _DELETE_OK
)
5235 vnode_put(nd
.ni_dvp
);
5239 if (!(flag
& AT_EACCESS
))
5240 kauth_cred_unref(&context
.vc_ucred
);
5245 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5247 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5248 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5252 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5253 __unused
int32_t *retval
)
5255 if (uap
->flag
& ~AT_EACCESS
)
5258 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5259 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5263 * Returns: 0 Success
5270 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5271 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5272 enum uio_seg segflg
, int fd
, int flag
)
5274 struct nameidata nd
;
5281 struct user64_stat user64_sb
;
5282 struct user32_stat user32_sb
;
5283 struct user64_stat64 user64_sb64
;
5284 struct user32_stat64 user32_sb64
;
5288 kauth_filesec_t fsec
;
5289 size_t xsecurity_bufsize
;
5292 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5293 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5297 int is_namedstream
= 0;
5298 /* stat calls are allowed for resource forks. */
5299 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5301 error
= nameiat(&nd
, fd
);
5304 fsec
= KAUTH_FILESEC_NONE
;
5306 statptr
= (void *)&source
;
5309 /* Grab reference on the shadow stream file vnode to
5310 * force an inactive on release which will mark it
5313 if (vnode_isnamedstream(nd
.ni_vp
) &&
5314 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5315 vnode_isshadow(nd
.ni_vp
)) {
5317 vnode_ref(nd
.ni_vp
);
5321 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5324 if (is_namedstream
) {
5325 vnode_rele(nd
.ni_vp
);
5328 vnode_put(nd
.ni_vp
);
5333 /* Zap spare fields */
5334 if (isstat64
!= 0) {
5335 source
.sb64
.st_lspare
= 0;
5336 source
.sb64
.st_qspare
[0] = 0LL;
5337 source
.sb64
.st_qspare
[1] = 0LL;
5338 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5339 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5340 my_size
= sizeof(dest
.user64_sb64
);
5341 sbp
= (caddr_t
)&dest
.user64_sb64
;
5343 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5344 my_size
= sizeof(dest
.user32_sb64
);
5345 sbp
= (caddr_t
)&dest
.user32_sb64
;
5348 * Check if we raced (post lookup) against the last unlink of a file.
5350 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5351 source
.sb64
.st_nlink
= 1;
5354 source
.sb
.st_lspare
= 0;
5355 source
.sb
.st_qspare
[0] = 0LL;
5356 source
.sb
.st_qspare
[1] = 0LL;
5357 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5358 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5359 my_size
= sizeof(dest
.user64_sb
);
5360 sbp
= (caddr_t
)&dest
.user64_sb
;
5362 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5363 my_size
= sizeof(dest
.user32_sb
);
5364 sbp
= (caddr_t
)&dest
.user32_sb
;
5368 * Check if we raced (post lookup) against the last unlink of a file.
5370 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5371 source
.sb
.st_nlink
= 1;
5374 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5377 /* caller wants extended security information? */
5378 if (xsecurity
!= USER_ADDR_NULL
) {
5380 /* did we get any? */
5381 if (fsec
== KAUTH_FILESEC_NONE
) {
5382 if (susize(xsecurity_size
, 0) != 0) {
5387 /* find the user buffer size */
5388 xsecurity_bufsize
= fusize(xsecurity_size
);
5390 /* copy out the actual data size */
5391 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5396 /* if the caller supplied enough room, copy out to it */
5397 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5398 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5402 if (fsec
!= KAUTH_FILESEC_NONE
)
5403 kauth_filesec_free(fsec
);
5408 * stat_extended: Get file status; with extended security (ACL).
5410 * Parameters: p (ignored)
5411 * uap User argument descriptor (see below)
5414 * Indirect: uap->path Path of file to get status from
5415 * uap->ub User buffer (holds file status info)
5416 * uap->xsecurity ACL to get (extended security)
5417 * uap->xsecurity_size Size of ACL
5419 * Returns: 0 Success
5424 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5425 __unused
int32_t *retval
)
5427 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5428 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5433 * Returns: 0 Success
5434 * fstatat_internal:??? [see fstatat_internal() in this file]
5437 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5439 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5440 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5444 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5446 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5447 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5451 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5453 * Parameters: p (ignored)
5454 * uap User argument descriptor (see below)
5457 * Indirect: uap->path Path of file to get status from
5458 * uap->ub User buffer (holds file status info)
5459 * uap->xsecurity ACL to get (extended security)
5460 * uap->xsecurity_size Size of ACL
5462 * Returns: 0 Success
5467 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5469 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5470 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5475 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5477 * Parameters: p (ignored)
5478 * uap User argument descriptor (see below)
5481 * Indirect: uap->path Path of file to get status from
5482 * uap->ub User buffer (holds file status info)
5483 * uap->xsecurity ACL to get (extended security)
5484 * uap->xsecurity_size Size of ACL
5486 * Returns: 0 Success
5491 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5493 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5494 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5495 AT_SYMLINK_NOFOLLOW
));
5499 * Get file status; this version does not follow links.
5502 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5504 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5505 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5509 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5511 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5512 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5516 * lstat64_extended: Get file status; can handle large inode numbers; does not
5517 * follow links; with extended security (ACL).
5519 * Parameters: p (ignored)
5520 * uap User argument descriptor (see below)
5523 * Indirect: uap->path Path of file to get status from
5524 * uap->ub User buffer (holds file status info)
5525 * uap->xsecurity ACL to get (extended security)
5526 * uap->xsecurity_size Size of ACL
5528 * Returns: 0 Success
5533 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5535 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5536 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5537 AT_SYMLINK_NOFOLLOW
));
5541 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5543 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5546 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5547 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5551 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5552 __unused
int32_t *retval
)
5554 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5557 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5558 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5562 * Get configurable pathname variables.
5564 * Returns: 0 Success
5568 * Notes: Global implementation constants are intended to be
5569 * implemented in this function directly; all other constants
5570 * are per-FS implementation, and therefore must be handled in
5571 * each respective FS, instead.
5573 * XXX We implement some things globally right now that should actually be
5574 * XXX per-FS; we will need to deal with this at some point.
5578 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5581 struct nameidata nd
;
5582 vfs_context_t ctx
= vfs_context_current();
5584 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5585 UIO_USERSPACE
, uap
->path
, ctx
);
5590 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5592 vnode_put(nd
.ni_vp
);
5598 * Return target name of a symbolic link.
5602 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5603 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5609 struct nameidata nd
;
5610 char uio_buf
[ UIO_SIZEOF(1) ];
5612 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5615 error
= nameiat(&nd
, fd
);
5622 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5623 &uio_buf
[0], sizeof(uio_buf
));
5624 uio_addiov(auio
, buf
, bufsize
);
5625 if (vp
->v_type
!= VLNK
) {
5629 error
= mac_vnode_check_readlink(ctx
, vp
);
5632 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5635 error
= VNOP_READLINK(vp
, auio
, ctx
);
5639 *retval
= bufsize
- (int)uio_resid(auio
);
5644 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5646 enum uio_seg procseg
;
5648 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5649 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5650 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5651 uap
->count
, procseg
, retval
));
5655 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5657 enum uio_seg procseg
;
5659 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5660 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5661 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5665 * Change file flags.
5668 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5670 struct vnode_attr va
;
5671 kauth_action_t action
;
5675 VATTR_SET(&va
, va_flags
, flags
);
5678 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5683 /* request authorisation, disregard immutability */
5684 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5687 * Request that the auth layer disregard those file flags it's allowed to when
5688 * authorizing this operation; we need to do this in order to be able to
5689 * clear immutable flags.
5691 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5693 error
= vnode_setattr(vp
, &va
, ctx
);
5695 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5704 * Change flags of a file given a path name.
5708 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5711 vfs_context_t ctx
= vfs_context_current();
5713 struct nameidata nd
;
5715 AUDIT_ARG(fflags
, uap
->flags
);
5716 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5717 UIO_USERSPACE
, uap
->path
, ctx
);
5724 error
= chflags1(vp
, uap
->flags
, ctx
);
5730 * Change flags of a file given a file descriptor.
5734 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5739 AUDIT_ARG(fd
, uap
->fd
);
5740 AUDIT_ARG(fflags
, uap
->flags
);
5741 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5744 if ((error
= vnode_getwithref(vp
))) {
5749 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5751 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
5758 * Change security information on a filesystem object.
5760 * Returns: 0 Success
5761 * EPERM Operation not permitted
5762 * vnode_authattr:??? [anything vnode_authattr can return]
5763 * vnode_authorize:??? [anything vnode_authorize can return]
5764 * vnode_setattr:??? [anything vnode_setattr can return]
5766 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5767 * translated to EPERM before being returned.
5770 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
5772 kauth_action_t action
;
5775 AUDIT_ARG(mode
, vap
->va_mode
);
5776 /* XXX audit new args */
5779 /* chmod calls are not allowed for resource forks. */
5780 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5786 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
5787 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
5791 /* make sure that the caller is allowed to set this security information */
5792 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
5793 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5794 if (error
== EACCES
)
5799 error
= vnode_setattr(vp
, vap
, ctx
);
5806 * Change mode of a file given a path name.
5808 * Returns: 0 Success
5809 * namei:??? [anything namei can return]
5810 * chmod_vnode:??? [anything chmod_vnode can return]
5813 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
5814 int fd
, int flag
, enum uio_seg segflg
)
5816 struct nameidata nd
;
5819 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5820 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
5822 if ((error
= nameiat(&nd
, fd
)))
5824 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
5825 vnode_put(nd
.ni_vp
);
5831 * chmod_extended: Change the mode of a file given a path name; with extended
5832 * argument list (including extended security (ACL)).
5834 * Parameters: p Process requesting the open
5835 * uap User argument descriptor (see below)
5838 * Indirect: uap->path Path to object (same as 'chmod')
5839 * uap->uid UID to set
5840 * uap->gid GID to set
5841 * uap->mode File mode to set (same as 'chmod')
5842 * uap->xsecurity ACL to set (or delete)
5844 * Returns: 0 Success
5847 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5849 * XXX: We should enummerate the possible errno values here, and where
5850 * in the code they originated.
5853 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
5856 struct vnode_attr va
;
5857 kauth_filesec_t xsecdst
;
5859 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5862 if (uap
->mode
!= -1)
5863 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5864 if (uap
->uid
!= KAUTH_UID_NONE
)
5865 VATTR_SET(&va
, va_uid
, uap
->uid
);
5866 if (uap
->gid
!= KAUTH_GID_NONE
)
5867 VATTR_SET(&va
, va_gid
, uap
->gid
);
5870 switch(uap
->xsecurity
) {
5871 /* explicit remove request */
5872 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5873 VATTR_SET(&va
, va_acl
, NULL
);
5876 case USER_ADDR_NULL
:
5879 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5881 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5882 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
5885 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
5888 if (xsecdst
!= NULL
)
5889 kauth_filesec_free(xsecdst
);
5894 * Returns: 0 Success
5895 * chmodat:??? [anything chmodat can return]
5898 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
5899 int flag
, enum uio_seg segflg
)
5901 struct vnode_attr va
;
5904 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
5906 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
5910 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
5912 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
5913 AT_FDCWD
, 0, UIO_USERSPACE
));
5917 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
5919 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5922 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
5923 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
5927 * Change mode of a file given a file descriptor.
5930 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
5937 if ((error
= file_vnode(fd
, &vp
)) != 0)
5939 if ((error
= vnode_getwithref(vp
)) != 0) {
5943 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5945 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
5946 (void)vnode_put(vp
);
5953 * fchmod_extended: Change mode of a file given a file descriptor; with
5954 * extended argument list (including extended security (ACL)).
5956 * Parameters: p Process requesting to change file mode
5957 * uap User argument descriptor (see below)
5960 * Indirect: uap->mode File mode to set (same as 'chmod')
5961 * uap->uid UID to set
5962 * uap->gid GID to set
5963 * uap->xsecurity ACL to set (or delete)
5964 * uap->fd File descriptor of file to change mode
5966 * Returns: 0 Success
5971 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
5974 struct vnode_attr va
;
5975 kauth_filesec_t xsecdst
;
5977 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5980 if (uap
->mode
!= -1)
5981 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5982 if (uap
->uid
!= KAUTH_UID_NONE
)
5983 VATTR_SET(&va
, va_uid
, uap
->uid
);
5984 if (uap
->gid
!= KAUTH_GID_NONE
)
5985 VATTR_SET(&va
, va_gid
, uap
->gid
);
5988 switch(uap
->xsecurity
) {
5989 case USER_ADDR_NULL
:
5990 VATTR_SET(&va
, va_acl
, NULL
);
5992 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5993 VATTR_SET(&va
, va_acl
, NULL
);
5996 case CAST_USER_ADDR_T(-1):
5999 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
6001 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
6004 error
= fchmod1(p
, uap
->fd
, &va
);
6007 switch(uap
->xsecurity
) {
6008 case USER_ADDR_NULL
:
6009 case CAST_USER_ADDR_T(-1):
6012 if (xsecdst
!= NULL
)
6013 kauth_filesec_free(xsecdst
);
6019 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
6021 struct vnode_attr va
;
6024 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
6026 return(fchmod1(p
, uap
->fd
, &va
));
6031 * Set ownership given a path name.
6035 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
6036 gid_t gid
, int flag
, enum uio_seg segflg
)
6039 struct vnode_attr va
;
6041 struct nameidata nd
;
6043 kauth_action_t action
;
6045 AUDIT_ARG(owner
, uid
, gid
);
6047 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6048 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6050 error
= nameiat(&nd
, fd
);
6058 if (uid
!= (uid_t
)VNOVAL
)
6059 VATTR_SET(&va
, va_uid
, uid
);
6060 if (gid
!= (gid_t
)VNOVAL
)
6061 VATTR_SET(&va
, va_gid
, gid
);
6064 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6069 /* preflight and authorize attribute changes */
6070 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6072 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6074 error
= vnode_setattr(vp
, &va
, ctx
);
6078 * EACCES is only allowed from namei(); permissions failure should
6079 * return EPERM, so we need to translate the error code.
6081 if (error
== EACCES
)
6089 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6091 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6092 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6096 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6098 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6099 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6103 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6105 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6108 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6109 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6113 * Set ownership given a file descriptor.
6117 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6119 struct vnode_attr va
;
6120 vfs_context_t ctx
= vfs_context_current();
6123 kauth_action_t action
;
6125 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6126 AUDIT_ARG(fd
, uap
->fd
);
6128 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6131 if ( (error
= vnode_getwithref(vp
)) ) {
6135 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6138 if (uap
->uid
!= VNOVAL
)
6139 VATTR_SET(&va
, va_uid
, uap
->uid
);
6140 if (uap
->gid
!= VNOVAL
)
6141 VATTR_SET(&va
, va_gid
, uap
->gid
);
6144 /* chown calls are not allowed for resource forks. */
6145 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6152 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6157 /* preflight and authorize attribute changes */
6158 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6160 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6161 if (error
== EACCES
)
6165 error
= vnode_setattr(vp
, &va
, ctx
);
6168 (void)vnode_put(vp
);
6174 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6178 if (usrtvp
== USER_ADDR_NULL
) {
6179 struct timeval old_tv
;
6180 /* XXX Y2038 bug because of microtime argument */
6182 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6185 if (IS_64BIT_PROCESS(current_proc())) {
6186 struct user64_timeval tv
[2];
6187 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6190 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6191 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6193 struct user32_timeval tv
[2];
6194 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6197 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6198 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6205 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6209 struct vnode_attr va
;
6210 kauth_action_t action
;
6212 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6215 VATTR_SET(&va
, va_access_time
, ts
[0]);
6216 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6218 va
.va_vaflags
|= VA_UTIMES_NULL
;
6221 /* utimes calls are not allowed for resource forks. */
6222 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6229 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6233 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6234 if (!nullflag
&& error
== EACCES
)
6239 /* since we may not need to auth anything, check here */
6240 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6241 if (!nullflag
&& error
== EACCES
)
6245 error
= vnode_setattr(vp
, &va
, ctx
);
6252 * Set the access and modification times of a file.
6256 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6258 struct timespec ts
[2];
6261 struct nameidata nd
;
6262 vfs_context_t ctx
= vfs_context_current();
6265 * AUDIT: Needed to change the order of operations to do the
6266 * name lookup first because auditing wants the path.
6268 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6269 UIO_USERSPACE
, uap
->path
, ctx
);
6276 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6277 * the current time instead.
6280 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6283 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6286 vnode_put(nd
.ni_vp
);
6291 * Set the access and modification times of a file.
6295 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6297 struct timespec ts
[2];
6302 AUDIT_ARG(fd
, uap
->fd
);
6304 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6306 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6308 if((error
= vnode_getwithref(vp
))) {
6313 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6320 * Truncate a file given its path name.
6324 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6327 struct vnode_attr va
;
6328 vfs_context_t ctx
= vfs_context_current();
6330 struct nameidata nd
;
6331 kauth_action_t action
;
6333 if (uap
->length
< 0)
6335 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6336 UIO_USERSPACE
, uap
->path
, ctx
);
6337 if ((error
= namei(&nd
)))
6344 VATTR_SET(&va
, va_data_size
, uap
->length
);
6347 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6352 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6354 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6356 error
= vnode_setattr(vp
, &va
, ctx
);
6363 * Truncate a file given a file descriptor.
6367 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6369 vfs_context_t ctx
= vfs_context_current();
6370 struct vnode_attr va
;
6372 struct fileproc
*fp
;
6376 AUDIT_ARG(fd
, uap
->fd
);
6377 if (uap
->length
< 0)
6380 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6384 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6386 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6395 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6397 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6398 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6403 if ((error
= vnode_getwithref(vp
)) != 0) {
6407 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6410 error
= mac_vnode_check_truncate(ctx
,
6411 fp
->f_fglob
->fg_cred
, vp
);
6413 (void)vnode_put(vp
);
6418 VATTR_SET(&va
, va_data_size
, uap
->length
);
6419 error
= vnode_setattr(vp
, &va
, ctx
);
6420 (void)vnode_put(vp
);
6428 * Sync an open file with synchronized I/O _file_ integrity completion
6432 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6434 __pthread_testcancel(1);
6435 return(fsync_common(p
, uap
, MNT_WAIT
));
6440 * Sync an open file with synchronized I/O _file_ integrity completion
6442 * Notes: This is a legacy support function that does not test for
6443 * thread cancellation points.
6447 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6449 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6454 * Sync an open file with synchronized I/O _data_ integrity completion
6458 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6460 __pthread_testcancel(1);
6461 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6468 * Common fsync code to support both synchronized I/O file integrity completion
6469 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6471 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6472 * will only guarantee that the file data contents are retrievable. If
6473 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6474 * includes additional metadata unnecessary for retrieving the file data
6475 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6478 * Parameters: p The process
6479 * uap->fd The descriptor to synchronize
6480 * flags The data integrity flags
6482 * Returns: int Success
6483 * fp_getfvp:EBADF Bad file descriptor
6484 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6485 * VNOP_FSYNC:??? unspecified
6487 * Notes: We use struct fsync_args because it is a short name, and all
6488 * caller argument structures are otherwise identical.
6491 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6494 struct fileproc
*fp
;
6495 vfs_context_t ctx
= vfs_context_current();
6498 AUDIT_ARG(fd
, uap
->fd
);
6500 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6502 if ( (error
= vnode_getwithref(vp
)) ) {
6507 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6509 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6512 /* Sync resource fork shadow file if necessary. */
6514 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6515 (vp
->v_parent
!= NULLVP
) &&
6516 vnode_isshadow(vp
) &&
6517 (fp
->f_flags
& FP_WRITTEN
)) {
6518 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6522 (void)vnode_put(vp
);
6528 * Duplicate files. Source must be a file, target must be a file or
6531 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6532 * perform inheritance correctly.
6536 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6538 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6539 struct nameidata fromnd
, tond
;
6541 vfs_context_t ctx
= vfs_context_current();
6543 /* Check that the flags are valid. */
6545 if (uap
->flags
& ~CPF_MASK
) {
6549 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, SAVESTART
| AUDITVNPATH1
,
6550 UIO_USERSPACE
, uap
->from
, ctx
);
6551 if ((error
= namei(&fromnd
)))
6555 NDINIT(&tond
, CREATE
, OP_LINK
,
6556 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6557 UIO_USERSPACE
, uap
->to
, ctx
);
6558 if ((error
= namei(&tond
))) {
6565 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6570 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6575 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6581 * If source is the same as the destination (that is the
6582 * same inode number) then there is nothing to do.
6583 * (fixed to have POSIX semantics - CSM 3/2/98)
6588 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6590 sdvp
= tond
.ni_startdir
;
6592 * nameidone has to happen before we vnode_put(tdvp)
6593 * since it may need to release the fs_nodelock on the tdvp
6604 if (fromnd
.ni_startdir
)
6605 vnode_put(fromnd
.ni_startdir
);
6615 * Rename files. Source and destination must either both be directories,
6616 * or both not be directories. If target is a directory, it must be empty.
6620 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
6621 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
6625 struct nameidata
*fromnd
, *tond
;
6631 const char *oname
= NULL
;
6632 char *from_name
= NULL
, *to_name
= NULL
;
6633 int from_len
=0, to_len
=0;
6634 int holding_mntlock
;
6635 mount_t locked_mp
= NULL
;
6636 vnode_t oparent
= NULLVP
;
6638 fse_info from_finfo
, to_finfo
;
6640 int from_truncated
=0, to_truncated
;
6642 struct vnode_attr
*fvap
, *tvap
;
6644 /* carving out a chunk for structs that are too big to be on stack. */
6646 struct nameidata from_node
, to_node
;
6647 struct vnode_attr fv_attr
, tv_attr
;
6649 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
6650 fromnd
= &__rename_data
->from_node
;
6651 tond
= &__rename_data
->to_node
;
6653 holding_mntlock
= 0;
6662 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
6664 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6666 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6668 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6671 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6672 if ( (error
= nameiat(fromnd
, fromfd
)) )
6674 fdvp
= fromnd
->ni_dvp
;
6675 fvp
= fromnd
->ni_vp
;
6677 if (fvp
&& fvp
->v_type
== VDIR
)
6678 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
6681 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6682 if ( (error
= nameiat(tond
, tofd
)) ) {
6684 * Translate error code for rename("dir1", "dir2/.").
6686 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
6690 tdvp
= tond
->ni_dvp
;
6694 batched
= vnode_compound_rename_available(fdvp
);
6697 * Claim: this check will never reject a valid rename.
6698 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6699 * Suppose fdvp and tdvp are not on the same mount.
6700 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6701 * then you can't move it to within another dir on the same mountpoint.
6702 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6704 * If this check passes, then we are safe to pass these vnodes to the same FS.
6706 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
6710 goto skipped_lookup
;
6714 error
= vn_authorize_rename(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, NULL
);
6716 if (error
== ENOENT
&&
6717 retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
6719 * We encountered a race where after doing the namei, tvp stops
6720 * being valid. If so, simply re-drive the rename call from the
6731 * If the source and destination are the same (i.e. they're
6732 * links to the same vnode) and the target file system is
6733 * case sensitive, then there is nothing to do.
6735 * XXX Come back to this.
6741 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6742 * then assume that this file system is case sensitive.
6744 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
6745 pathconf_val
!= 0) {
6751 * Allow the renaming of mount points.
6752 * - target must not exist
6753 * - target must reside in the same directory as source
6754 * - union mounts cannot be renamed
6755 * - "/" cannot be renamed
6757 * XXX Handle this in VFS after a continued lookup (if we missed
6758 * in the cache to start off)
6760 if ((fvp
->v_flag
& VROOT
) &&
6761 (fvp
->v_type
== VDIR
) &&
6763 (fvp
->v_mountedhere
== NULL
) &&
6765 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
6766 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
6769 /* switch fvp to the covered vnode */
6770 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
6771 if ( (vnode_getwithref(coveredvp
)) ) {
6781 * Check for cross-device rename.
6783 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
6784 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
6790 * If source is the same as the destination (that is the
6791 * same inode number) then there is nothing to do...
6792 * EXCEPT if the underlying file system supports case
6793 * insensitivity and is case preserving. In this case
6794 * the file system needs to handle the special case of
6795 * getting the same vnode as target (fvp) and source (tvp).
6797 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6798 * and _PC_CASE_PRESERVING can have this exception, and they need to
6799 * handle the special case of getting the same vnode as target and
6800 * source. NOTE: Then the target is unlocked going into vnop_rename,
6801 * so not to cause locking problems. There is a single reference on tvp.
6803 * NOTE - that fvp == tvp also occurs if they are hard linked and
6804 * that correct behaviour then is just to return success without doing
6807 * XXX filesystem should take care of this itself, perhaps...
6809 if (fvp
== tvp
&& fdvp
== tdvp
) {
6810 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
6811 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
6812 fromnd
->ni_cnd
.cn_namelen
)) {
6817 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
6819 * we're holding a reference and lock
6820 * on locked_mp, but it no longer matches
6821 * what we want to do... so drop our hold
6823 mount_unlock_renames(locked_mp
);
6824 mount_drop(locked_mp
, 0);
6825 holding_mntlock
= 0;
6827 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
6829 * serialize renames that re-shape
6830 * the tree... if holding_mntlock is
6831 * set, then we're ready to go...
6833 * first need to drop the iocounts
6834 * we picked up, second take the
6835 * lock to serialize the access,
6836 * then finally start the lookup
6837 * process over with the lock held
6839 if (!holding_mntlock
) {
6841 * need to grab a reference on
6842 * the mount point before we
6843 * drop all the iocounts... once
6844 * the iocounts are gone, the mount
6847 locked_mp
= fvp
->v_mount
;
6848 mount_ref(locked_mp
, 0);
6851 * nameidone has to happen before we vnode_put(tvp)
6852 * since it may need to release the fs_nodelock on the tvp
6861 * nameidone has to happen before we vnode_put(fdvp)
6862 * since it may need to release the fs_nodelock on the fvp
6869 mount_lock_renames(locked_mp
);
6870 holding_mntlock
= 1;
6876 * when we dropped the iocounts to take
6877 * the lock, we allowed the identity of
6878 * the various vnodes to change... if they did,
6879 * we may no longer be dealing with a rename
6880 * that reshapes the tree... once we're holding
6881 * the iocounts, the vnodes can't change type
6882 * so we're free to drop the lock at this point
6885 if (holding_mntlock
) {
6886 mount_unlock_renames(locked_mp
);
6887 mount_drop(locked_mp
, 0);
6888 holding_mntlock
= 0;
6892 // save these off so we can later verify that fvp is the same
6893 oname
= fvp
->v_name
;
6894 oparent
= fvp
->v_parent
;
6898 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
6901 get_fse_info(fvp
, &from_finfo
, ctx
);
6903 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
6908 fvap
= &__rename_data
->fv_attr
;
6912 get_fse_info(tvp
, &to_finfo
, ctx
);
6913 } else if (batched
) {
6914 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
6919 tvap
= &__rename_data
->tv_attr
;
6924 #endif /* CONFIG_FSE */
6926 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
6927 if (from_name
== NULL
) {
6928 GET_PATH(from_name
);
6929 if (from_name
== NULL
) {
6935 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
6937 if (to_name
== NULL
) {
6939 if (to_name
== NULL
) {
6945 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
6947 #if CONFIG_SECLUDED_RENAME
6948 if (flags
& VFS_SECLUDE_RENAME
) {
6949 fromnd
->ni_cnd
.cn_flags
|= CN_SECLUDE_RENAME
;
6952 #pragma unused(flags)
6954 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
6955 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
6958 if (holding_mntlock
) {
6960 * we can drop our serialization
6963 mount_unlock_renames(locked_mp
);
6964 mount_drop(locked_mp
, 0);
6965 holding_mntlock
= 0;
6968 if (error
== EKEEPLOOKING
) {
6969 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6970 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6971 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6975 fromnd
->ni_vp
= fvp
;
6978 goto continue_lookup
;
6982 * We may encounter a race in the VNOP where the destination didn't
6983 * exist when we did the namei, but it does by the time we go and
6984 * try to create the entry. In this case, we should re-drive this rename
6985 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6986 * but other filesystems susceptible to this race could return it, too.
6988 if (error
== ERECYCLE
) {
6993 * For compound VNOPs, the authorization callback may return
6994 * ENOENT in case of racing hardlink lookups hitting the name
6995 * cache, redrive the lookup.
6997 if (batched
&& error
== ENOENT
&&
6998 retry_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7006 /* call out to allow 3rd party notification of rename.
7007 * Ignore result of kauth_authorize_fileop call.
7009 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7010 KAUTH_FILEOP_RENAME
,
7011 (uintptr_t)from_name
, (uintptr_t)to_name
);
7014 if (from_name
!= NULL
&& to_name
!= NULL
) {
7015 if (from_truncated
|| to_truncated
) {
7016 // set it here since only the from_finfo gets reported up to user space
7017 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
7021 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
7024 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
7028 add_fsevent(FSE_RENAME
, ctx
,
7029 FSE_ARG_STRING
, from_len
, from_name
,
7030 FSE_ARG_FINFO
, &from_finfo
,
7031 FSE_ARG_STRING
, to_len
, to_name
,
7032 FSE_ARG_FINFO
, &to_finfo
,
7035 add_fsevent(FSE_RENAME
, ctx
,
7036 FSE_ARG_STRING
, from_len
, from_name
,
7037 FSE_ARG_FINFO
, &from_finfo
,
7038 FSE_ARG_STRING
, to_len
, to_name
,
7042 #endif /* CONFIG_FSE */
7045 * update filesystem's mount point data
7048 char *cp
, *pathend
, *mpname
;
7054 mp
= fvp
->v_mountedhere
;
7056 if (vfs_busy(mp
, LK_NOWAIT
)) {
7060 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7062 if (UIO_SEG_IS_USER_SPACE(segflg
))
7063 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7065 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7067 /* find current mount point prefix */
7068 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7069 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7073 /* find last component of target name */
7074 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7078 /* append name to prefix */
7079 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7080 bzero(pathend
, maxlen
);
7081 strlcpy(pathend
, mpname
, maxlen
);
7083 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7088 * fix up name & parent pointers. note that we first
7089 * check that fvp has the same name/parent pointers it
7090 * had before the rename call... this is a 'weak' check
7093 * XXX oparent and oname may not be set in the compound vnop case
7095 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7098 update_flags
= VNODE_UPDATE_NAME
;
7101 update_flags
|= VNODE_UPDATE_PARENT
;
7103 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7106 if (to_name
!= NULL
) {
7107 RELEASE_PATH(to_name
);
7110 if (from_name
!= NULL
) {
7111 RELEASE_PATH(from_name
);
7114 if (holding_mntlock
) {
7115 mount_unlock_renames(locked_mp
);
7116 mount_drop(locked_mp
, 0);
7117 holding_mntlock
= 0;
7121 * nameidone has to happen before we vnode_put(tdvp)
7122 * since it may need to release the fs_nodelock on the tdvp
7132 * nameidone has to happen before we vnode_put(fdvp)
7133 * since it may need to release the fs_nodelock on the fdvp
7143 * If things changed after we did the namei, then we will re-drive
7144 * this rename call from the top.
7151 FREE(__rename_data
, M_TEMP
);
7156 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7158 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7159 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7162 #if CONFIG_SECLUDED_RENAME
7163 int rename_ext(__unused proc_t p
, struct rename_ext_args
*uap
, __unused
int32_t *retval
)
7165 return renameat_internal(
7166 vfs_context_current(),
7167 AT_FDCWD
, uap
->from
,
7169 UIO_USERSPACE
, uap
->flags
);
7174 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7176 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7177 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7181 * Make a directory file.
7183 * Returns: 0 Success
7186 * vnode_authorize:???
7191 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7192 enum uio_seg segflg
)
7196 int update_flags
= 0;
7198 struct nameidata nd
;
7200 AUDIT_ARG(mode
, vap
->va_mode
);
7201 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7203 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7204 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7207 error
= nameiat(&nd
, fd
);
7218 batched
= vnode_compound_mkdir_available(dvp
);
7220 VATTR_SET(vap
, va_type
, VDIR
);
7224 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7225 * only get EXISTS or EISDIR for existing path components, and not that it could see
7226 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7227 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7229 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7230 if (error
== EACCES
|| error
== EPERM
) {
7238 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7239 * rather than EACCESS if the target exists.
7241 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7243 error2
= nameiat(&nd
, fd
);
7257 * make the directory
7259 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7260 if (error
== EKEEPLOOKING
) {
7262 goto continue_lookup
;
7268 // Make sure the name & parent pointers are hooked up
7269 if (vp
->v_name
== NULL
)
7270 update_flags
|= VNODE_UPDATE_NAME
;
7271 if (vp
->v_parent
== NULLVP
)
7272 update_flags
|= VNODE_UPDATE_PARENT
;
7275 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
7278 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
7283 * nameidone has to happen before we vnode_put(dvp)
7284 * since it may need to release the fs_nodelock on the dvp
7297 * mkdir_extended: Create a directory; with extended security (ACL).
7299 * Parameters: p Process requesting to create the directory
7300 * uap User argument descriptor (see below)
7303 * Indirect: uap->path Path of directory to create
7304 * uap->mode Access permissions to set
7305 * uap->xsecurity ACL to set
7307 * Returns: 0 Success
7312 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
7315 kauth_filesec_t xsecdst
;
7316 struct vnode_attr va
;
7318 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7321 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
7322 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
7326 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7327 if (xsecdst
!= NULL
)
7328 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7330 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7332 if (xsecdst
!= NULL
)
7333 kauth_filesec_free(xsecdst
);
7338 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
7340 struct vnode_attr va
;
7343 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7345 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7350 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
7352 struct vnode_attr va
;
7355 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7357 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
7362 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
7363 enum uio_seg segflg
)
7367 struct nameidata nd
;
7370 int has_listeners
= 0;
7374 struct vnode_attr va
;
7375 #endif /* CONFIG_FSE */
7376 struct vnode_attr
*vap
= NULL
;
7377 int restart_count
= 0;
7383 * This loop exists to restart rmdir in the unlikely case that two
7384 * processes are simultaneously trying to remove the same directory
7385 * containing orphaned appleDouble files.
7388 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
7389 segflg
, dirpath
, ctx
);
7390 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
7395 error
= nameiat(&nd
, fd
);
7403 batched
= vnode_compound_rmdir_available(vp
);
7405 if (vp
->v_flag
& VROOT
) {
7407 * The root of a mounted filesystem cannot be deleted.
7414 * Removed a check here; we used to abort if vp's vid
7415 * was not the same as what we'd seen the last time around.
7416 * I do not think that check was valid, because if we retry
7417 * and all dirents are gone, the directory could legitimately
7418 * be recycled but still be present in a situation where we would
7419 * have had permission to delete. Therefore, we won't make
7420 * an effort to preserve that check now that we may not have a
7425 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
7427 if (error
== ENOENT
&&
7428 restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7438 if (!vnode_compound_rmdir_available(dvp
)) {
7439 panic("No error, but no compound rmdir?");
7446 need_event
= need_fsevent(FSE_DELETE
, dvp
);
7449 get_fse_info(vp
, &finfo
, ctx
);
7451 error
= vfs_get_notify_attributes(&va
);
7460 has_listeners
= kauth_authorize_fileop_has_listeners();
7461 if (need_event
|| has_listeners
) {
7470 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
7473 finfo
.mode
|= FSE_TRUNCATED_PATH
;
7478 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7481 /* Couldn't find a vnode */
7485 if (error
== EKEEPLOOKING
) {
7486 goto continue_lookup
;
7487 } else if (batched
&& error
== ENOENT
&&
7488 restart_count
< MAX_AUTHORIZE_ENOENT_RETRIES
) {
7490 * For compound VNOPs, the authorization callback
7491 * may return ENOENT in case of racing hard link lookups
7492 * redrive the lookup.
7498 #if CONFIG_APPLEDOUBLE
7500 * Special case to remove orphaned AppleDouble
7501 * files. I don't like putting this in the kernel,
7502 * but carbon does not like putting this in carbon either,
7505 if (error
== ENOTEMPTY
) {
7506 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
7507 if (error
== EBUSY
) {
7513 * Assuming everything went well, we will try the RMDIR again
7516 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7518 #endif /* CONFIG_APPLEDOUBLE */
7520 * Call out to allow 3rd party notification of delete.
7521 * Ignore result of kauth_authorize_fileop call.
7524 if (has_listeners
) {
7525 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7526 KAUTH_FILEOP_DELETE
,
7531 if (vp
->v_flag
& VISHARDLINK
) {
7532 // see the comment in unlink1() about why we update
7533 // the parent of a hard link when it is removed
7534 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
7540 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
7542 add_fsevent(FSE_DELETE
, ctx
,
7543 FSE_ARG_STRING
, len
, path
,
7544 FSE_ARG_FINFO
, &finfo
,
7556 * nameidone has to happen before we vnode_put(dvp)
7557 * since it may need to release the fs_nodelock on the dvp
7565 if (restart_flag
== 0) {
7566 wakeup_one((caddr_t
)vp
);
7569 tsleep(vp
, PVFS
, "rm AD", 1);
7571 } while (restart_flag
!= 0);
7578 * Remove a directory file.
7582 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
7584 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
7585 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
7588 /* Get direntry length padded to 8 byte alignment */
7589 #define DIRENT64_LEN(namlen) \
7590 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7593 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
7594 int *numdirent
, vfs_context_t ctxp
)
7596 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7597 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
7598 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
7599 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
7604 struct direntry
*entry64
;
7610 * Our kernel buffer needs to be smaller since re-packing
7611 * will expand each dirent. The worse case (when the name
7612 * length is 3) corresponds to a struct direntry size of 32
7613 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7614 * (4-byte aligned). So having a buffer that is 3/8 the size
7615 * will prevent us from reading more than we can pack.
7617 * Since this buffer is wired memory, we will limit the
7618 * buffer size to a maximum of 32K. We would really like to
7619 * use 32K in the MIN(), but we use magic number 87371 to
7620 * prevent uio_resid() * 3 / 8 from overflowing.
7622 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
7623 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
7624 if (bufptr
== NULL
) {
7628 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
7629 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
7630 auio
->uio_offset
= uio
->uio_offset
;
7632 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
7634 dep
= (struct dirent
*)bufptr
;
7635 bytesread
= bufsize
- uio_resid(auio
);
7637 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
7640 * Convert all the entries and copy them out to user's buffer.
7642 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
7643 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
7645 bzero(entry64
, enbufsize
);
7646 /* Convert a dirent to a dirent64. */
7647 entry64
->d_ino
= dep
->d_ino
;
7648 entry64
->d_seekoff
= 0;
7649 entry64
->d_reclen
= enbufsize
;
7650 entry64
->d_namlen
= dep
->d_namlen
;
7651 entry64
->d_type
= dep
->d_type
;
7652 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
7654 /* Move to next entry. */
7655 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
7657 /* Copy entry64 to user's buffer. */
7658 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
7661 /* Update the real offset using the offset we got from VNOP_READDIR. */
7663 uio
->uio_offset
= auio
->uio_offset
;
7666 FREE(bufptr
, M_TEMP
);
7667 FREE(entry64
, M_TEMP
);
7672 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7675 * Read a block of directory entries in a file system independent format.
7678 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
7679 off_t
*offset
, int flags
)
7682 struct vfs_context context
= *vfs_context_current(); /* local copy */
7683 struct fileproc
*fp
;
7685 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7687 int error
, eofflag
, numdirent
;
7688 char uio_buf
[ UIO_SIZEOF(1) ];
7690 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
7694 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7695 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7700 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
7701 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
7704 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
7708 if ( (error
= vnode_getwithref(vp
)) ) {
7711 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7714 if (vp
->v_type
!= VDIR
) {
7715 (void)vnode_put(vp
);
7721 error
= mac_vnode_check_readdir(&context
, vp
);
7723 (void)vnode_put(vp
);
7728 loff
= fp
->f_fglob
->fg_offset
;
7729 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7730 uio_addiov(auio
, bufp
, bufsize
);
7732 if (flags
& VNODE_READDIR_EXTENDED
) {
7733 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
7734 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7736 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
7737 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7740 (void)vnode_put(vp
);
7744 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
7745 if (union_dircheckp
) {
7746 error
= union_dircheckp(&vp
, fp
, &context
);
7753 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
7754 struct vnode
*tvp
= vp
;
7755 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
7757 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
7758 fp
->f_fglob
->fg_offset
= 0;
7772 *bytesread
= bufsize
- uio_resid(auio
);
7780 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
7786 AUDIT_ARG(fd
, uap
->fd
);
7787 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
7790 if (proc_is64bit(p
)) {
7791 user64_long_t base
= (user64_long_t
)offset
;
7792 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
7794 user32_long_t base
= (user32_long_t
)offset
;
7795 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
7797 *retval
= bytesread
;
7803 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
7809 AUDIT_ARG(fd
, uap
->fd
);
7810 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
7813 *retval
= bytesread
;
7814 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
7821 * Set the mode mask for creation of filesystem nodes.
7822 * XXX implement xsecurity
7824 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7826 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
7828 struct filedesc
*fdp
;
7830 AUDIT_ARG(mask
, newmask
);
7833 *retval
= fdp
->fd_cmask
;
7834 fdp
->fd_cmask
= newmask
& ALLPERMS
;
7840 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7842 * Parameters: p Process requesting to set the umask
7843 * uap User argument descriptor (see below)
7844 * retval umask of the process (parameter p)
7846 * Indirect: uap->newmask umask to set
7847 * uap->xsecurity ACL to set
7849 * Returns: 0 Success
7854 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
7857 kauth_filesec_t xsecdst
;
7859 xsecdst
= KAUTH_FILESEC_NONE
;
7860 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
7861 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
7864 xsecdst
= KAUTH_FILESEC_NONE
;
7867 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
7869 if (xsecdst
!= KAUTH_FILESEC_NONE
)
7870 kauth_filesec_free(xsecdst
);
7875 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
7877 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
7881 * Void all references to file by ripping underlying filesystem
7886 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
7889 struct vnode_attr va
;
7890 vfs_context_t ctx
= vfs_context_current();
7892 struct nameidata nd
;
7894 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
7903 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
7908 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
7914 error
= mac_vnode_check_revoke(ctx
, vp
);
7920 VATTR_WANTED(&va
, va_uid
);
7921 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
7923 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
7924 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
7926 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
7927 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
7935 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7936 * The following system calls are designed to support features
7937 * which are specific to the HFS & HFS Plus volume formats
7942 * Obtain attribute information on objects in a directory while enumerating
7947 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
7950 struct fileproc
*fp
;
7952 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7953 uint32_t count
, savecount
;
7957 struct attrlist attributelist
;
7958 vfs_context_t ctx
= vfs_context_current();
7960 char uio_buf
[ UIO_SIZEOF(1) ];
7961 kauth_action_t action
;
7965 /* Get the attributes into kernel space */
7966 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
7969 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
7973 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
7976 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7977 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7984 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
7991 if ( (error
= vnode_getwithref(vp
)) )
7994 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7997 if (vp
->v_type
!= VDIR
) {
7998 (void)vnode_put(vp
);
8004 error
= mac_vnode_check_readdir(ctx
, vp
);
8006 (void)vnode_put(vp
);
8011 /* set up the uio structure which will contain the users return buffer */
8012 loff
= fp
->f_fglob
->fg_offset
;
8013 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8014 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
8017 * If the only item requested is file names, we can let that past with
8018 * just LIST_DIRECTORY. If they want any other attributes, that means
8019 * they need SEARCH as well.
8021 action
= KAUTH_VNODE_LIST_DIRECTORY
;
8022 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
8023 attributelist
.fileattr
|| attributelist
.dirattr
)
8024 action
|= KAUTH_VNODE_SEARCH
;
8026 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
8028 /* Believe it or not, uap->options only has 32-bits of valid
8029 * info, so truncate before extending again */
8031 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
8032 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
8036 (void) vnode_put(vp
);
8041 * If we've got the last entry of a directory in a union mount
8042 * then reset the eofflag and pretend there's still more to come.
8043 * The next call will again set eofflag and the buffer will be empty,
8044 * so traverse to the underlying directory and do the directory
8047 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
8048 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
8050 } else { // Empty buffer
8051 struct vnode
*tvp
= vp
;
8052 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
8053 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
8054 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
8055 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
8057 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
8065 (void)vnode_put(vp
);
8069 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
8071 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8073 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8075 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8078 *retval
= eofflag
; /* similar to getdirentries */
8082 return (error
); /* return error earlier, an retval of 0 or 1 now */
8084 } /* end of getdirentriesattr system call */
8087 * Exchange data between two files
8092 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8095 struct nameidata fnd
, snd
;
8096 vfs_context_t ctx
= vfs_context_current();
8100 u_int32_t nameiflags
;
8104 int from_truncated
=0, to_truncated
=0;
8106 fse_info f_finfo
, s_finfo
;
8110 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8112 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8113 UIO_USERSPACE
, uap
->path1
, ctx
);
8115 error
= namei(&fnd
);
8122 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8123 UIO_USERSPACE
, uap
->path2
, ctx
);
8125 error
= namei(&snd
);
8134 * if the files are the same, return an inval error
8142 * if the files are on different volumes, return an error
8144 if (svp
->v_mount
!= fvp
->v_mount
) {
8149 /* If they're not files, return an error */
8150 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8156 error
= mac_vnode_check_exchangedata(ctx
,
8161 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8162 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8167 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8169 kauth_authorize_fileop_has_listeners()) {
8172 if (fpath
== NULL
|| spath
== NULL
) {
8177 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8178 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8181 get_fse_info(fvp
, &f_finfo
, ctx
);
8182 get_fse_info(svp
, &s_finfo
, ctx
);
8183 if (from_truncated
|| to_truncated
) {
8184 // set it here since only the f_finfo gets reported up to user space
8185 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8189 /* Ok, make the call */
8190 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8193 const char *tmpname
;
8195 if (fpath
!= NULL
&& spath
!= NULL
) {
8196 /* call out to allow 3rd party notification of exchangedata.
8197 * Ignore result of kauth_authorize_fileop call.
8199 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8200 (uintptr_t)fpath
, (uintptr_t)spath
);
8204 tmpname
= fvp
->v_name
;
8205 fvp
->v_name
= svp
->v_name
;
8206 svp
->v_name
= tmpname
;
8208 if (fvp
->v_parent
!= svp
->v_parent
) {
8211 tmp
= fvp
->v_parent
;
8212 fvp
->v_parent
= svp
->v_parent
;
8213 svp
->v_parent
= tmp
;
8215 name_cache_unlock();
8218 if (fpath
!= NULL
&& spath
!= NULL
) {
8219 add_fsevent(FSE_EXCHANGE
, ctx
,
8220 FSE_ARG_STRING
, flen
, fpath
,
8221 FSE_ARG_FINFO
, &f_finfo
,
8222 FSE_ARG_STRING
, slen
, spath
,
8223 FSE_ARG_FINFO
, &s_finfo
,
8231 RELEASE_PATH(fpath
);
8233 RELEASE_PATH(spath
);
8241 * Return (in MB) the amount of freespace on the given vnode's volume.
8243 uint32_t freespace_mb(vnode_t vp
);
8246 freespace_mb(vnode_t vp
)
8248 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
8249 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
8250 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
8258 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
8263 struct nameidata nd
;
8264 struct user64_fssearchblock searchblock
;
8265 struct searchstate
*state
;
8266 struct attrlist
*returnattrs
;
8267 struct timeval timelimit
;
8268 void *searchparams1
,*searchparams2
;
8270 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8271 uint32_t nummatches
;
8273 uint32_t nameiflags
;
8274 vfs_context_t ctx
= vfs_context_current();
8275 char uio_buf
[ UIO_SIZEOF(1) ];
8277 /* Start by copying in fsearchblock parameter list */
8278 if (IS_64BIT_PROCESS(p
)) {
8279 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
8280 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
8281 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
8284 struct user32_fssearchblock tmp_searchblock
;
8286 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
8287 // munge into 64-bit version
8288 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
8289 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
8290 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
8291 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
8293 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8294 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8296 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
8297 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
8298 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
8299 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
8300 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
8301 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
8302 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
8307 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8309 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
8310 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
8313 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8314 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8315 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8318 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8319 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8320 /* assumes the size is still 556 bytes it will continue to work */
8322 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
8323 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
8325 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
8327 /* Now set up the various pointers to the correct place in our newly allocated memory */
8329 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
8330 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
8331 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
8333 /* Now copy in the stuff given our local variables. */
8335 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
8338 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
8341 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
8344 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
8348 * When searching a union mount, need to set the
8349 * start flag at the first call on each layer to
8350 * reset state for the new volume.
8352 if (uap
->options
& SRCHFS_START
)
8353 state
->ss_union_layer
= 0;
8355 uap
->options
|= state
->ss_union_flags
;
8356 state
->ss_union_flags
= 0;
8359 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8360 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8361 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8362 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8363 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8366 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
8367 attrreference_t
* string_ref
;
8368 u_int32_t
* start_length
;
8369 user64_size_t param_length
;
8371 /* validate searchparams1 */
8372 param_length
= searchblock
.sizeofsearchparams1
;
8373 /* skip the word that specifies length of the buffer */
8374 start_length
= (u_int32_t
*) searchparams1
;
8375 start_length
= start_length
+1;
8376 string_ref
= (attrreference_t
*) start_length
;
8378 /* ensure no negative offsets or too big offsets */
8379 if (string_ref
->attr_dataoffset
< 0 ) {
8383 if (string_ref
->attr_length
> MAXPATHLEN
) {
8388 /* Check for pointer overflow in the string ref */
8389 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
8394 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
8398 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
8404 /* set up the uio structure which will contain the users return buffer */
8405 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8406 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
8409 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8410 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
8411 UIO_USERSPACE
, uap
->path
, ctx
);
8420 * Switch to the root vnode for the volume
8422 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
8429 * If it's a union mount, the path lookup takes
8430 * us to the top layer. But we may need to descend
8431 * to a lower layer. For non-union mounts the layer
8434 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
8435 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
8438 vp
= vp
->v_mount
->mnt_vnodecovered
;
8444 vnode_getwithref(vp
);
8449 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
8458 * If searchblock.maxmatches == 0, then skip the search. This has happened
8459 * before and sometimes the underlying code doesnt deal with it well.
8461 if (searchblock
.maxmatches
== 0) {
8467 * Allright, we have everything we need, so lets make that call.
8469 * We keep special track of the return value from the file system:
8470 * EAGAIN is an acceptable error condition that shouldn't keep us
8471 * from copying out any results...
8474 fserror
= VNOP_SEARCHFS(vp
,
8477 &searchblock
.searchattrs
,
8478 (u_long
)searchblock
.maxmatches
,
8482 (u_long
)uap
->scriptcode
,
8483 (u_long
)uap
->options
,
8485 (struct searchstate
*) &state
->ss_fsstate
,
8489 * If it's a union mount we need to be called again
8490 * to search the mounted-on filesystem.
8492 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
8493 state
->ss_union_flags
= SRCHFS_START
;
8494 state
->ss_union_layer
++; // search next layer down
8502 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8503 search state. Everything was already put into he return buffer by the vop call. */
8505 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
8508 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
8515 FREE(searchparams1
,M_TEMP
);
8520 } /* end of searchfs system call */
8522 #else /* CONFIG_SEARCHFS */
8525 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
8530 #endif /* CONFIG_SEARCHFS */
8533 lck_grp_attr_t
* nspace_group_attr
;
8534 lck_attr_t
* nspace_lock_attr
;
8535 lck_grp_t
* nspace_mutex_group
;
8537 lck_mtx_t nspace_handler_lock
;
8538 lck_mtx_t nspace_handler_exclusion_lock
;
8540 time_t snapshot_timestamp
=0;
8541 int nspace_allow_virtual_devs
=0;
8543 void nspace_handler_init(void);
8545 typedef struct nspace_item_info
{
8555 #define MAX_NSPACE_ITEMS 128
8556 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
8557 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
8558 uint32_t nspace_token_id
=0;
8559 uint32_t nspace_handler_timeout
= 15; // seconds
8561 #define NSPACE_ITEM_NEW 0x0001
8562 #define NSPACE_ITEM_PROCESSING 0x0002
8563 #define NSPACE_ITEM_DEAD 0x0004
8564 #define NSPACE_ITEM_CANCELLED 0x0008
8565 #define NSPACE_ITEM_DONE 0x0010
8566 #define NSPACE_ITEM_RESET_TIMER 0x0020
8568 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
8569 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8571 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8573 //#pragma optimization_level 0
8576 NSPACE_HANDLER_NSPACE
= 0,
8577 NSPACE_HANDLER_SNAPSHOT
= 1,
8579 NSPACE_HANDLER_COUNT
,
8583 uint64_t handler_tid
;
8584 struct proc
*handler_proc
;
8588 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
8590 /* namespace fsctl functions */
8591 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
8592 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
8593 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
8594 static nspace_type_t
nspace_type_for_op(uint64_t op
);
8595 static int nspace_is_special_process(struct proc
*proc
);
8596 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
8597 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
8598 static int validate_namespace_args (int is64bit
, int size
);
8599 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
8602 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
8604 switch(nspace_type
) {
8605 case NSPACE_HANDLER_NSPACE
:
8606 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
8607 case NSPACE_HANDLER_SNAPSHOT
:
8608 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
8610 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
8615 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
8617 switch(nspace_type
) {
8618 case NSPACE_HANDLER_NSPACE
:
8619 return NSPACE_ITEM_NSPACE_EVENT
;
8620 case NSPACE_HANDLER_SNAPSHOT
:
8621 return NSPACE_ITEM_SNAPSHOT_EVENT
;
8623 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
8628 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
8630 switch(nspace_type
) {
8631 case NSPACE_HANDLER_NSPACE
:
8632 return FREAD
| FWRITE
| O_EVTONLY
;
8633 case NSPACE_HANDLER_SNAPSHOT
:
8634 return FREAD
| O_EVTONLY
;
8636 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
8641 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
8643 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
8644 case NAMESPACE_HANDLER_NSPACE_EVENT
:
8645 return NSPACE_HANDLER_NSPACE
;
8646 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
8647 return NSPACE_HANDLER_SNAPSHOT
;
8649 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
8650 return NSPACE_HANDLER_NSPACE
;
8654 static inline int nspace_is_special_process(struct proc
*proc
)
8657 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
8658 if (proc
== nspace_handlers
[i
].handler_proc
)
8665 nspace_handler_init(void)
8667 nspace_lock_attr
= lck_attr_alloc_init();
8668 nspace_group_attr
= lck_grp_attr_alloc_init();
8669 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
8670 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
8671 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
8672 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
8676 nspace_proc_exit(struct proc
*p
)
8678 int i
, event_mask
= 0;
8680 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
8681 if (p
== nspace_handlers
[i
].handler_proc
) {
8682 event_mask
|= nspace_item_flags_for_type(i
);
8683 nspace_handlers
[i
].handler_tid
= 0;
8684 nspace_handlers
[i
].handler_proc
= NULL
;
8688 if (event_mask
== 0) {
8692 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8693 // if this process was the snapshot handler, zero snapshot_timeout
8694 snapshot_timestamp
= 0;
8698 // unblock anyone that's waiting for the handler that died
8700 lck_mtx_lock(&nspace_handler_lock
);
8701 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8702 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
8704 if ( nspace_items
[i
].flags
& event_mask
) {
8706 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
8707 vnode_lock_spin(nspace_items
[i
].vp
);
8708 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
8709 vnode_unlock(nspace_items
[i
].vp
);
8711 nspace_items
[i
].vp
= NULL
;
8712 nspace_items
[i
].vid
= 0;
8713 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
8714 nspace_items
[i
].token
= 0;
8716 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8721 wakeup((caddr_t
)&nspace_item_idx
);
8722 lck_mtx_unlock(&nspace_handler_lock
);
8727 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
8729 return resolve_nspace_item_ext(vp
, op
, NULL
);
8733 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
8735 int i
, error
, keep_waiting
;
8737 nspace_type_t nspace_type
= nspace_type_for_op(op
);
8739 // only allow namespace events on regular files, directories and symlinks.
8740 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
8745 // if this is a snapshot event and the vnode is on a
8746 // disk image just pretend nothing happened since any
8747 // change to the disk image will cause the disk image
8748 // itself to get backed up and this avoids multi-way
8749 // deadlocks between the snapshot handler and the ever
8750 // popular diskimages-helper process. the variable
8751 // nspace_allow_virtual_devs allows this behavior to
8752 // be overridden (for use by the Mobile TimeMachine
8753 // testing infrastructure which uses disk images)
8755 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
8756 && (vp
->v_mount
!= NULL
)
8757 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
8758 && !nspace_allow_virtual_devs
) {
8763 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8764 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8768 if (nspace_is_special_process(current_proc())) {
8772 lck_mtx_lock(&nspace_handler_lock
);
8775 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8776 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
8781 if (i
>= MAX_NSPACE_ITEMS
) {
8782 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8783 if (nspace_items
[i
].flags
== 0) {
8788 nspace_items
[i
].refcount
++;
8791 if (i
>= MAX_NSPACE_ITEMS
) {
8792 ts
.tv_sec
= nspace_handler_timeout
;
8795 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
8797 // an entry got free'd up, go see if we can get a slot
8800 lck_mtx_unlock(&nspace_handler_lock
);
8806 // if it didn't already exist, add it. if it did exist
8807 // we'll get woken up when someone does a wakeup() on
8808 // the slot in the nspace_items table.
8810 if (vp
!= nspace_items
[i
].vp
) {
8811 nspace_items
[i
].vp
= vp
;
8812 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
8813 nspace_items
[i
].op
= op
;
8814 nspace_items
[i
].vid
= vnode_vid(vp
);
8815 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
8816 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
8817 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8819 vnode_lock_spin(vp
);
8820 vp
->v_flag
|= VNEEDSSNAPSHOT
;
8825 nspace_items
[i
].token
= 0;
8826 nspace_items
[i
].refcount
= 1;
8828 wakeup((caddr_t
)&nspace_item_idx
);
8832 // Now go to sleep until the handler does a wakeup on this
8833 // slot in the nspace_items table (or we timeout).
8836 while(keep_waiting
) {
8837 ts
.tv_sec
= nspace_handler_timeout
;
8839 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
8841 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
8843 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
8844 error
= nspace_items
[i
].token
;
8845 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
8846 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
8847 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
8852 } else if (error
== 0) {
8853 // hmmm, why did we get woken up?
8854 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8855 nspace_items
[i
].token
);
8858 if (--nspace_items
[i
].refcount
== 0) {
8859 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
8860 nspace_items
[i
].arg
= NULL
;
8861 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
8862 nspace_items
[i
].flags
= 0; // this clears it for re-use
8864 wakeup(&nspace_token_id
);
8868 lck_mtx_unlock(&nspace_handler_lock
);
8875 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
8879 lck_mtx_lock(&nspace_handler_lock
);
8880 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8881 if (nspace_items
[i
].vp
== vp
) {
8886 if (i
>= MAX_NSPACE_ITEMS
) {
8887 lck_mtx_unlock(&nspace_handler_lock
);
8891 *status
= nspace_items
[i
].flags
;
8892 lck_mtx_unlock(&nspace_handler_lock
);
8899 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
8901 struct vnode_attr va
;
8905 VATTR_WANTED(&va
, va_fsid
);
8906 VATTR_WANTED(&va
, va_fileid
);
8908 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
8909 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
8912 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
8921 // Note: this function does NOT check permissions on all of the
8922 // parent directories leading to this vnode. It should only be
8923 // called on behalf of a root process. Otherwise a process may
8924 // get access to a file because the file itself is readable even
8925 // though its parent directories would prevent access.
8928 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
8932 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8937 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
8942 /* compute action to be authorized */
8944 if (fmode
& FREAD
) {
8945 action
|= KAUTH_VNODE_READ_DATA
;
8947 if (fmode
& (FWRITE
| O_TRUNC
)) {
8949 * If we are writing, appending, and not truncating,
8950 * indicate that we are appending so that if the
8951 * UF_APPEND or SF_APPEND bits are set, we do not deny
8954 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
8955 action
|= KAUTH_VNODE_APPEND_DATA
;
8957 action
|= KAUTH_VNODE_WRITE_DATA
;
8961 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
8966 // if the vnode is tagged VOPENEVT and the current process
8967 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8968 // flag to the open mode so that this open won't count against
8969 // the vnode when carbon delete() does a vnode_isinuse() to see
8970 // if a file is currently in use. this allows spotlight
8971 // importers to not interfere with carbon apps that depend on
8972 // the no-delete-if-busy semantics of carbon delete().
8974 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
8978 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
8981 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
8982 VNOP_CLOSE(vp
, fmode
, ctx
);
8986 /* Call out to allow 3rd party notification of open.
8987 * Ignore result of kauth_authorize_fileop call.
8990 mac_vnode_notify_open(ctx
, vp
, fmode
);
8992 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
9000 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
9002 int i
, error
=0, unblock
=0;
9005 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9006 if (nspace_handlers
[nspace_type
].handler_busy
) {
9007 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9010 nspace_handlers
[nspace_type
].handler_busy
= 1;
9011 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9014 * Any process that gets here will be one of the namespace handlers.
9015 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9016 * as we can cause deadlocks to occur, because the namespace handler may prevent
9017 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9020 curtask
= current_task();
9021 bsd_set_dependency_capable (curtask
);
9023 lck_mtx_lock(&nspace_handler_lock
);
9024 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
9025 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
9026 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
9029 while (error
== 0) {
9031 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9032 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9033 if (!nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9040 if (i
< MAX_NSPACE_ITEMS
) {
9041 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
9042 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
9043 nspace_items
[i
].token
= ++nspace_token_id
;
9045 if (nspace_items
[i
].vp
) {
9046 struct fileproc
*fp
;
9047 int32_t indx
, fmode
;
9048 struct proc
*p
= current_proc();
9049 vfs_context_t ctx
= vfs_context_current();
9050 struct vnode_attr va
;
9054 * Use vnode pointer to acquire a file descriptor for
9055 * hand-off to userland
9057 fmode
= nspace_open_flags_for_type(nspace_type
);
9058 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
9063 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
9066 vnode_put(nspace_items
[i
].vp
);
9070 if ((error
= falloc(p
, &fp
, &indx
, ctx
))) {
9071 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9072 vnode_put(nspace_items
[i
].vp
);
9077 fp
->f_fglob
->fg_flag
= fmode
;
9078 fp
->f_fglob
->fg_ops
= &vnops
;
9079 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9082 procfdtbl_releasefd(p
, indx
, NULL
);
9083 fp_drop(p
, indx
, fp
, 1);
9087 * All variants of the namespace handler struct support these three fields:
9088 * token, flags, and the FD pointer
9090 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9091 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9092 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9095 * Handle optional fields:
9096 * extended version support an info ptr (offset, length), and the
9098 * namedata version supports a unique per-link object ID
9102 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9103 uint64_t u_offset
, u_length
;
9106 u_offset
= uio_offset(uio
);
9107 u_length
= uio_resid(uio
);
9112 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9113 error
= copyout(&u_length
, nhd
->infoptr
+sizeof(uint64_t), sizeof(uint64_t));
9118 VATTR_WANTED(&va
, va_linkid
);
9119 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9121 uint64_t linkid
= 0;
9122 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9123 linkid
= (uint64_t)va
.va_linkid
;
9125 error
= copyout (&linkid
, nhd
->objid
, sizeof(uint64_t));
9130 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9131 fp_free(p
, indx
, fp
);
9135 vnode_put(nspace_items
[i
].vp
);
9139 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9140 i
, nspace_items
[i
].vp
, error
, nspace_items
[i
].vp
->v_name
);
9144 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9145 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9154 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9155 vnode_lock_spin(nspace_items
[i
].vp
);
9156 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9157 vnode_unlock(nspace_items
[i
].vp
);
9159 nspace_items
[i
].vp
= NULL
;
9160 nspace_items
[i
].vid
= 0;
9161 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9162 nspace_items
[i
].token
= 0;
9164 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9167 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9168 // just go through every snapshot event and unblock it immediately.
9169 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9170 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9171 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9172 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9173 nspace_items
[i
].vp
= NULL
;
9174 nspace_items
[i
].vid
= 0;
9175 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9176 nspace_items
[i
].token
= 0;
9178 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9185 lck_mtx_unlock(&nspace_handler_lock
);
9187 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9188 nspace_handlers
[nspace_type
].handler_busy
= 0;
9189 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9194 static inline int validate_namespace_args (int is64bit
, int size
) {
9197 /* Must be one of these */
9198 if (size
== sizeof(user64_namespace_handler_info
)) {
9201 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
9204 if (size
== sizeof(user64_namespace_handler_data
)) {
9210 /* 32 bit -- must be one of these */
9211 if (size
== sizeof(user32_namespace_handler_info
)) {
9214 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
9217 if (size
== sizeof(user32_namespace_handler_data
)) {
9229 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
9232 namespace_handler_data nhd
;
9234 bzero (&nhd
, sizeof(namespace_handler_data
));
9236 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9237 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9241 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9245 error
= validate_namespace_args (is64bit
, size
);
9250 /* Copy in the userland pointers into our kernel-only struct */
9253 /* 64 bit userland structures */
9254 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
9255 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
9256 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
9258 /* If the size is greater than the standard info struct, add in extra fields */
9259 if (size
> (sizeof(user64_namespace_handler_info
))) {
9260 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
9261 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
9263 if (size
== (sizeof(user64_namespace_handler_data
))) {
9264 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
9266 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9270 /* 32 bit userland structures */
9271 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
9272 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
9273 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
9275 if (size
> (sizeof(user32_namespace_handler_info
))) {
9276 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
9277 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
9279 if (size
== (sizeof(user32_namespace_handler_data
))) {
9280 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
9282 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9286 return wait_for_namespace_event(&nhd
, nspace_type
);
9290 * Make a filesystem-specific control call:
9294 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
9299 #define STK_PARAMS 128
9300 char stkbuf
[STK_PARAMS
];
9302 vnode_t vp
= *arg_vp
;
9304 size
= IOCPARM_LEN(cmd
);
9305 if (size
> IOCPARM_MAX
) return (EINVAL
);
9307 is64bit
= proc_is64bit(p
);
9312 * ensure the buffer is large enough for underlying calls
9314 #ifndef HFSIOC_GETPATH
9315 typedef char pn_t
[MAXPATHLEN
];
9316 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9320 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9322 if (IOCBASECMD(cmd
) == HFS_GETPATH
) {
9323 /* Round up to MAXPATHLEN regardless of user input */
9328 if (size
> sizeof (stkbuf
)) {
9329 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
9337 error
= copyin(udata
, data
, size
);
9346 *(user_addr_t
*)data
= udata
;
9349 *(uint32_t *)data
= (uint32_t)udata
;
9352 } else if ((cmd
& IOC_OUT
) && size
) {
9354 * Zero the buffer so the user always
9355 * gets back something deterministic.
9358 } else if (cmd
& IOC_VOID
) {
9360 *(user_addr_t
*)data
= udata
;
9363 *(uint32_t *)data
= (uint32_t)udata
;
9367 /* Check to see if it's a generic command */
9368 switch (IOCBASECMD(cmd
)) {
9370 case FSCTL_SYNC_VOLUME
: {
9371 mount_t mp
= vp
->v_mount
;
9372 int arg
= *(uint32_t*)data
;
9374 /* record vid of vp so we can drop it below. */
9375 uint32_t vvid
= vp
->v_id
;
9378 * Then grab mount_iterref so that we can release the vnode.
9379 * Without this, a thread may call vnode_iterate_prepare then
9380 * get into a deadlock because we've never released the root vp
9382 error
= mount_iterref (mp
, 0);
9388 /* issue the sync for this volume */
9389 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
9392 * Then release the mount_iterref once we're done syncing; it's not
9393 * needed for the VNOP_IOCTL below
9397 if (arg
& FSCTL_SYNC_FULLSYNC
) {
9398 /* re-obtain vnode iocount on the root vp, if possible */
9399 error
= vnode_getwithvid (vp
, vvid
);
9401 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
9405 /* mark the argument VP as having been released */
9410 case FSCTL_SET_PACKAGE_EXTS
: {
9411 user_addr_t ext_strings
;
9412 uint32_t num_entries
;
9415 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
9416 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
9418 // either you're 64-bit and passed a 64-bit struct or
9419 // you're 32-bit and passed a 32-bit struct. otherwise
9426 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
9427 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
9428 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
9430 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
9431 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
9432 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
9434 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
9438 /* namespace handlers */
9439 case FSCTL_NAMESPACE_HANDLER_GET
: {
9440 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
9444 /* Snapshot handlers */
9445 case FSCTL_OLD_SNAPSHOT_HANDLER_GET
: {
9446 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
9450 case FSCTL_SNAPSHOT_HANDLER_GET_EXT
: {
9451 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
9455 case FSCTL_NAMESPACE_HANDLER_UPDATE
: {
9456 uint32_t token
, val
;
9459 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9463 if (!nspace_is_special_process(p
)) {
9468 token
= ((uint32_t *)data
)[0];
9469 val
= ((uint32_t *)data
)[1];
9471 lck_mtx_lock(&nspace_handler_lock
);
9473 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9474 if (nspace_items
[i
].token
== token
) {
9475 break; /* exit for loop, not case stmt */
9479 if (i
>= MAX_NSPACE_ITEMS
) {
9483 // if this bit is set, when resolve_nspace_item() times out
9484 // it will loop and go back to sleep.
9486 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
9489 lck_mtx_unlock(&nspace_handler_lock
);
9492 printf("nspace-handler-update: did not find token %u\n", token
);
9497 case FSCTL_NAMESPACE_HANDLER_UNBLOCK
: {
9498 uint32_t token
, val
;
9501 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9505 if (!nspace_is_special_process(p
)) {
9510 token
= ((uint32_t *)data
)[0];
9511 val
= ((uint32_t *)data
)[1];
9513 lck_mtx_lock(&nspace_handler_lock
);
9515 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9516 if (nspace_items
[i
].token
== token
) {
9517 break; /* exit for loop, not case statement */
9521 if (i
>= MAX_NSPACE_ITEMS
) {
9522 printf("nspace-handler-unblock: did not find token %u\n", token
);
9525 if (val
== 0 && nspace_items
[i
].vp
) {
9526 vnode_lock_spin(nspace_items
[i
].vp
);
9527 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9528 vnode_unlock(nspace_items
[i
].vp
);
9531 nspace_items
[i
].vp
= NULL
;
9532 nspace_items
[i
].arg
= NULL
;
9533 nspace_items
[i
].op
= 0;
9534 nspace_items
[i
].vid
= 0;
9535 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9536 nspace_items
[i
].token
= 0;
9538 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9541 lck_mtx_unlock(&nspace_handler_lock
);
9545 case FSCTL_NAMESPACE_HANDLER_CANCEL
: {
9546 uint32_t token
, val
;
9549 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9553 if (!nspace_is_special_process(p
)) {
9558 token
= ((uint32_t *)data
)[0];
9559 val
= ((uint32_t *)data
)[1];
9561 lck_mtx_lock(&nspace_handler_lock
);
9563 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9564 if (nspace_items
[i
].token
== token
) {
9565 break; /* exit for loop, not case stmt */
9569 if (i
>= MAX_NSPACE_ITEMS
) {
9570 printf("nspace-handler-cancel: did not find token %u\n", token
);
9573 if (nspace_items
[i
].vp
) {
9574 vnode_lock_spin(nspace_items
[i
].vp
);
9575 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9576 vnode_unlock(nspace_items
[i
].vp
);
9579 nspace_items
[i
].vp
= NULL
;
9580 nspace_items
[i
].arg
= NULL
;
9581 nspace_items
[i
].vid
= 0;
9582 nspace_items
[i
].token
= val
;
9583 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
9584 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
9586 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9589 lck_mtx_unlock(&nspace_handler_lock
);
9593 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
9594 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9598 // we explicitly do not do the namespace_handler_proc check here
9600 lck_mtx_lock(&nspace_handler_lock
);
9601 snapshot_timestamp
= ((uint32_t *)data
)[0];
9602 wakeup(&nspace_item_idx
);
9603 lck_mtx_unlock(&nspace_handler_lock
);
9604 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
9609 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
9611 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9615 lck_mtx_lock(&nspace_handler_lock
);
9616 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
9617 lck_mtx_unlock(&nspace_handler_lock
);
9618 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9619 nspace_allow_virtual_devs
? "" : " NOT");
9625 case FSCTL_SET_FSTYPENAME_OVERRIDE
:
9627 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9631 mount_lock(vp
->v_mount
);
9633 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
9634 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
9635 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
9636 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
9637 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
9640 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
9641 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
9643 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
9644 vp
->v_mount
->fstypename_override
[0] = '\0';
9646 mount_unlock(vp
->v_mount
);
9652 /* Invoke the filesystem-specific code */
9653 error
= VNOP_IOCTL(vp
, IOCBASECMD(cmd
), data
, options
, ctx
);
9656 } /* end switch stmt */
9659 * if no errors, copy any data to user. Size was
9660 * already set and checked above.
9662 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
9663 error
= copyout(data
, udata
, size
);
9674 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
9677 struct nameidata nd
;
9680 vfs_context_t ctx
= vfs_context_current();
9682 AUDIT_ARG(cmd
, uap
->cmd
);
9683 AUDIT_ARG(value32
, uap
->options
);
9684 /* Get the vnode for the file we are getting info on: */
9686 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
9687 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
9688 UIO_USERSPACE
, uap
->path
, ctx
);
9689 if ((error
= namei(&nd
))) goto done
;
9694 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
9700 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9709 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
9713 vfs_context_t ctx
= vfs_context_current();
9716 AUDIT_ARG(fd
, uap
->fd
);
9717 AUDIT_ARG(cmd
, uap
->cmd
);
9718 AUDIT_ARG(value32
, uap
->options
);
9720 /* Get the vnode for the file we are getting info on: */
9721 if ((error
= file_vnode(uap
->fd
, &vp
)))
9724 if ((error
= vnode_getwithref(vp
))) {
9729 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
9735 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9745 /* end of fsctl system call */
9748 * Retrieve the data of an extended attribute.
9751 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
9754 struct nameidata nd
;
9755 char attrname
[XATTR_MAXNAMELEN
+1];
9756 vfs_context_t ctx
= vfs_context_current();
9758 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9759 size_t attrsize
= 0;
9761 u_int32_t nameiflags
;
9763 char uio_buf
[ UIO_SIZEOF(1) ];
9765 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9768 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9769 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9770 if ((error
= namei(&nd
))) {
9776 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9779 if (xattr_protected(attrname
)) {
9780 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
9786 * the specific check for 0xffffffff is a hack to preserve
9787 * binaray compatibilty in K64 with applications that discovered
9788 * that passing in a buf pointer and a size of -1 resulted in
9789 * just the size of the indicated extended attribute being returned.
9790 * this isn't part of the documented behavior, but because of the
9791 * original implemtation's check for "uap->size > 0", this behavior
9792 * was allowed. In K32 that check turned into a signed comparison
9793 * even though uap->size is unsigned... in K64, we blow by that
9794 * check because uap->size is unsigned and doesn't get sign smeared
9795 * in the munger for a 32 bit user app. we also need to add a
9796 * check to limit the maximum size of the buffer being passed in...
9797 * unfortunately, the underlying fileystems seem to just malloc
9798 * the requested size even if the actual extended attribute is tiny.
9799 * because that malloc is for kernel wired memory, we have to put a
9802 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9803 * U64 running on K64 will yield -1 (64 bits wide)
9804 * U32/U64 running on K32 will yield -1 (32 bits wide)
9806 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
9810 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
9811 uap
->size
= XATTR_MAXSIZE
;
9813 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9814 &uio_buf
[0], sizeof(uio_buf
));
9815 uio_addiov(auio
, uap
->value
, uap
->size
);
9818 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
9823 *retval
= uap
->size
- uio_resid(auio
);
9825 *retval
= (user_ssize_t
)attrsize
;
9832 * Retrieve the data of an extended attribute.
9835 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
9838 char attrname
[XATTR_MAXNAMELEN
+1];
9840 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9841 size_t attrsize
= 0;
9844 char uio_buf
[ UIO_SIZEOF(1) ];
9846 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9849 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9852 if ( (error
= vnode_getwithref(vp
)) ) {
9856 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9859 if (xattr_protected(attrname
)) {
9863 if (uap
->value
&& uap
->size
> 0) {
9864 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9865 &uio_buf
[0], sizeof(uio_buf
));
9866 uio_addiov(auio
, uap
->value
, uap
->size
);
9869 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
9871 (void)vnode_put(vp
);
9875 *retval
= uap
->size
- uio_resid(auio
);
9877 *retval
= (user_ssize_t
)attrsize
;
9883 * Set the data of an extended attribute.
9886 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
9889 struct nameidata nd
;
9890 char attrname
[XATTR_MAXNAMELEN
+1];
9891 vfs_context_t ctx
= vfs_context_current();
9893 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9895 u_int32_t nameiflags
;
9897 char uio_buf
[ UIO_SIZEOF(1) ];
9899 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9902 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9903 if (error
== EPERM
) {
9904 /* if the string won't fit in attrname, copyinstr emits EPERM */
9905 return (ENAMETOOLONG
);
9907 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9910 if (xattr_protected(attrname
))
9912 if (uap
->size
!= 0 && uap
->value
== 0) {
9916 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9917 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9918 if ((error
= namei(&nd
))) {
9924 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9925 &uio_buf
[0], sizeof(uio_buf
));
9926 uio_addiov(auio
, uap
->value
, uap
->size
);
9928 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
9931 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9942 * Set the data of an extended attribute.
9945 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
9948 char attrname
[XATTR_MAXNAMELEN
+1];
9950 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9953 char uio_buf
[ UIO_SIZEOF(1) ];
9955 vfs_context_t ctx
= vfs_context_current();
9958 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9961 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9964 if (xattr_protected(attrname
))
9966 if (uap
->size
!= 0 && uap
->value
== 0) {
9969 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9972 if ( (error
= vnode_getwithref(vp
)) ) {
9976 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9977 &uio_buf
[0], sizeof(uio_buf
));
9978 uio_addiov(auio
, uap
->value
, uap
->size
);
9980 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
9983 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9995 * Remove an extended attribute.
9996 * XXX Code duplication here.
9999 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
10002 struct nameidata nd
;
10003 char attrname
[XATTR_MAXNAMELEN
+1];
10004 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10005 vfs_context_t ctx
= vfs_context_current();
10007 u_int32_t nameiflags
;
10010 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10013 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10017 if (xattr_protected(attrname
))
10019 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10020 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10021 if ((error
= namei(&nd
))) {
10027 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
10030 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10041 * Remove an extended attribute.
10042 * XXX Code duplication here.
10045 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
10048 char attrname
[XATTR_MAXNAMELEN
+1];
10052 vfs_context_t ctx
= vfs_context_current();
10055 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10058 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
10062 if (xattr_protected(attrname
))
10064 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10067 if ( (error
= vnode_getwithref(vp
)) ) {
10068 file_drop(uap
->fd
);
10072 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
10075 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
10081 file_drop(uap
->fd
);
10087 * Retrieve the list of extended attribute names.
10088 * XXX Code duplication here.
10091 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
10094 struct nameidata nd
;
10095 vfs_context_t ctx
= vfs_context_current();
10097 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10098 size_t attrsize
= 0;
10099 u_int32_t nameiflags
;
10101 char uio_buf
[ UIO_SIZEOF(1) ];
10103 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10106 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10107 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10108 if ((error
= namei(&nd
))) {
10113 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10114 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
10115 &uio_buf
[0], sizeof(uio_buf
));
10116 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10119 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
10123 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10125 *retval
= (user_ssize_t
)attrsize
;
10131 * Retrieve the list of extended attribute names.
10132 * XXX Code duplication here.
10135 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
10139 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10140 size_t attrsize
= 0;
10142 char uio_buf
[ UIO_SIZEOF(1) ];
10144 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10147 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10150 if ( (error
= vnode_getwithref(vp
)) ) {
10151 file_drop(uap
->fd
);
10154 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10155 auio
= uio_createwithbuffer(1, 0, spacetype
,
10156 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10157 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10160 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10163 file_drop(uap
->fd
);
10165 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10167 *retval
= (user_ssize_t
)attrsize
;
10172 static int fsgetpath_internal(
10173 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
10174 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
10177 struct mount
*mp
= NULL
;
10182 if (bufsize
> PAGE_SIZE
) {
10190 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
10191 error
= ENOTSUP
; /* unexpected failure */
10197 error
= VFS_ROOT(mp
, &vp
, ctx
);
10199 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
10202 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
10204 * If the fileid isn't found and we're in a union
10205 * mount volume, then see if the fileid is in the
10206 * mounted-on volume.
10208 struct mount
*tmp
= mp
;
10209 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
10211 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
10222 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
10229 /* Obtain the absolute path to this vnode. */
10230 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
10231 bpflags
|= BUILDPATH_CHECK_MOVED
;
10232 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
10239 AUDIT_ARG(text
, buf
);
10241 if (kdebug_enable
) {
10242 long dbg_parms
[NUMPARMS
];
10245 dbg_namelen
= (int)sizeof(dbg_parms
);
10247 if (length
< dbg_namelen
) {
10248 memcpy((char *)dbg_parms
, buf
, length
);
10249 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
10251 dbg_namelen
= length
;
10253 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
10256 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
10259 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
10266 * Obtain the full pathname of a file system object by id.
10268 * This is a private SPI used by the File Manager.
10272 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
10274 vfs_context_t ctx
= vfs_context_current();
10280 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
10283 AUDIT_ARG(value32
, fsid
.val
[0]);
10284 AUDIT_ARG(value64
, uap
->objid
);
10285 /* Restrict output buffer size for now. */
10287 if (uap
->bufsize
> PAGE_SIZE
) {
10290 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
10291 if (realpath
== NULL
) {
10295 error
= fsgetpath_internal(
10296 ctx
, fsid
.val
[0], uap
->objid
,
10297 uap
->bufsize
, realpath
, &length
);
10303 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
10305 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
10308 FREE(realpath
, M_TEMP
);
10314 * Common routine to handle various flavors of statfs data heading out
10317 * Returns: 0 Success
10321 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
10322 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
10323 boolean_t partial_copy
)
10326 int my_size
, copy_size
;
10329 struct user64_statfs sfs
;
10330 my_size
= copy_size
= sizeof(sfs
);
10331 bzero(&sfs
, my_size
);
10332 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10333 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10334 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10335 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
10336 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
10337 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
10338 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
10339 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
10340 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
10341 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
10342 sfs
.f_fsid
= sfsp
->f_fsid
;
10343 sfs
.f_owner
= sfsp
->f_owner
;
10344 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10345 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10347 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10349 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10350 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10352 if (partial_copy
) {
10353 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10355 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10358 struct user32_statfs sfs
;
10360 my_size
= copy_size
= sizeof(sfs
);
10361 bzero(&sfs
, my_size
);
10363 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10364 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10365 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10368 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10369 * have to fudge the numbers here in that case. We inflate the blocksize in order
10370 * to reflect the filesystem size as best we can.
10372 if ((sfsp
->f_blocks
> INT_MAX
)
10373 /* Hack for 4061702 . I think the real fix is for Carbon to
10374 * look for some volume capability and not depend on hidden
10375 * semantics agreed between a FS and carbon.
10376 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10377 * for Carbon to set bNoVolumeSizes volume attribute.
10378 * Without this the webdavfs files cannot be copied onto
10379 * disk as they look huge. This change should not affect
10380 * XSAN as they should not setting these to -1..
10382 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
10383 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
10384 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
10388 * Work out how far we have to shift the block count down to make it fit.
10389 * Note that it's possible to have to shift so far that the resulting
10390 * blocksize would be unreportably large. At that point, we will clip
10391 * any values that don't fit.
10393 * For safety's sake, we also ensure that f_iosize is never reported as
10394 * being smaller than f_bsize.
10396 for (shift
= 0; shift
< 32; shift
++) {
10397 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
10399 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
10402 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10403 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
10404 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
10405 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
10406 #undef __SHIFT_OR_CLIP
10407 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
10408 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
10410 /* filesystem is small enough to be reported honestly */
10411 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
10412 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
10413 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
10414 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
10415 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
10417 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
10418 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
10419 sfs
.f_fsid
= sfsp
->f_fsid
;
10420 sfs
.f_owner
= sfsp
->f_owner
;
10421 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10422 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10424 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10426 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10427 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10429 if (partial_copy
) {
10430 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10432 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10435 if (sizep
!= NULL
) {
10442 * copy stat structure into user_stat structure.
10444 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
10446 bzero(usbp
, sizeof(*usbp
));
10448 usbp
->st_dev
= sbp
->st_dev
;
10449 usbp
->st_ino
= sbp
->st_ino
;
10450 usbp
->st_mode
= sbp
->st_mode
;
10451 usbp
->st_nlink
= sbp
->st_nlink
;
10452 usbp
->st_uid
= sbp
->st_uid
;
10453 usbp
->st_gid
= sbp
->st_gid
;
10454 usbp
->st_rdev
= sbp
->st_rdev
;
10455 #ifndef _POSIX_C_SOURCE
10456 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10457 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10458 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10459 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10460 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10461 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10463 usbp
->st_atime
= sbp
->st_atime
;
10464 usbp
->st_atimensec
= sbp
->st_atimensec
;
10465 usbp
->st_mtime
= sbp
->st_mtime
;
10466 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10467 usbp
->st_ctime
= sbp
->st_ctime
;
10468 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10470 usbp
->st_size
= sbp
->st_size
;
10471 usbp
->st_blocks
= sbp
->st_blocks
;
10472 usbp
->st_blksize
= sbp
->st_blksize
;
10473 usbp
->st_flags
= sbp
->st_flags
;
10474 usbp
->st_gen
= sbp
->st_gen
;
10475 usbp
->st_lspare
= sbp
->st_lspare
;
10476 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10477 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10480 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
10482 bzero(usbp
, sizeof(*usbp
));
10484 usbp
->st_dev
= sbp
->st_dev
;
10485 usbp
->st_ino
= sbp
->st_ino
;
10486 usbp
->st_mode
= sbp
->st_mode
;
10487 usbp
->st_nlink
= sbp
->st_nlink
;
10488 usbp
->st_uid
= sbp
->st_uid
;
10489 usbp
->st_gid
= sbp
->st_gid
;
10490 usbp
->st_rdev
= sbp
->st_rdev
;
10491 #ifndef _POSIX_C_SOURCE
10492 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10493 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10494 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10495 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10496 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10497 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10499 usbp
->st_atime
= sbp
->st_atime
;
10500 usbp
->st_atimensec
= sbp
->st_atimensec
;
10501 usbp
->st_mtime
= sbp
->st_mtime
;
10502 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10503 usbp
->st_ctime
= sbp
->st_ctime
;
10504 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10506 usbp
->st_size
= sbp
->st_size
;
10507 usbp
->st_blocks
= sbp
->st_blocks
;
10508 usbp
->st_blksize
= sbp
->st_blksize
;
10509 usbp
->st_flags
= sbp
->st_flags
;
10510 usbp
->st_gen
= sbp
->st_gen
;
10511 usbp
->st_lspare
= sbp
->st_lspare
;
10512 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10513 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10517 * copy stat64 structure into user_stat64 structure.
10519 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
10521 bzero(usbp
, sizeof(*usbp
));
10523 usbp
->st_dev
= sbp
->st_dev
;
10524 usbp
->st_ino
= sbp
->st_ino
;
10525 usbp
->st_mode
= sbp
->st_mode
;
10526 usbp
->st_nlink
= sbp
->st_nlink
;
10527 usbp
->st_uid
= sbp
->st_uid
;
10528 usbp
->st_gid
= sbp
->st_gid
;
10529 usbp
->st_rdev
= sbp
->st_rdev
;
10530 #ifndef _POSIX_C_SOURCE
10531 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10532 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10533 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10534 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10535 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10536 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10537 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
10538 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
10540 usbp
->st_atime
= sbp
->st_atime
;
10541 usbp
->st_atimensec
= sbp
->st_atimensec
;
10542 usbp
->st_mtime
= sbp
->st_mtime
;
10543 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10544 usbp
->st_ctime
= sbp
->st_ctime
;
10545 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10546 usbp
->st_birthtime
= sbp
->st_birthtime
;
10547 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
10549 usbp
->st_size
= sbp
->st_size
;
10550 usbp
->st_blocks
= sbp
->st_blocks
;
10551 usbp
->st_blksize
= sbp
->st_blksize
;
10552 usbp
->st_flags
= sbp
->st_flags
;
10553 usbp
->st_gen
= sbp
->st_gen
;
10554 usbp
->st_lspare
= sbp
->st_lspare
;
10555 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10556 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10559 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
10561 bzero(usbp
, sizeof(*usbp
));
10563 usbp
->st_dev
= sbp
->st_dev
;
10564 usbp
->st_ino
= sbp
->st_ino
;
10565 usbp
->st_mode
= sbp
->st_mode
;
10566 usbp
->st_nlink
= sbp
->st_nlink
;
10567 usbp
->st_uid
= sbp
->st_uid
;
10568 usbp
->st_gid
= sbp
->st_gid
;
10569 usbp
->st_rdev
= sbp
->st_rdev
;
10570 #ifndef _POSIX_C_SOURCE
10571 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10572 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10573 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10574 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10575 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10576 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10577 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
10578 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
10580 usbp
->st_atime
= sbp
->st_atime
;
10581 usbp
->st_atimensec
= sbp
->st_atimensec
;
10582 usbp
->st_mtime
= sbp
->st_mtime
;
10583 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10584 usbp
->st_ctime
= sbp
->st_ctime
;
10585 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10586 usbp
->st_birthtime
= sbp
->st_birthtime
;
10587 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
10589 usbp
->st_size
= sbp
->st_size
;
10590 usbp
->st_blocks
= sbp
->st_blocks
;
10591 usbp
->st_blksize
= sbp
->st_blksize
;
10592 usbp
->st_flags
= sbp
->st_flags
;
10593 usbp
->st_gen
= sbp
->st_gen
;
10594 usbp
->st_lspare
= sbp
->st_lspare
;
10595 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10596 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10600 * Purge buffer cache for simulating cold starts
10602 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
10604 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
10606 return VNODE_RETURNED
;
10609 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
10611 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
10613 return VFS_RETURNED
;
10617 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
10619 if (!kauth_cred_issuser(kauth_cred_get()))
10622 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);