2 * Copyright (c) 1995-2014 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
88 #include <sys/dirent.h>
90 #include <sys/sysctl.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <machine/cons.h>
103 #include <machine/limits.h>
104 #include <miscfs/specfs/specdev.h>
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
109 #include <mach/mach_types.h>
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/task.h>
114 #include <vm/vm_pageout.h>
116 #include <libkern/OSAtomic.h>
117 #include <pexpert/pexpert.h>
120 #include <security/mac.h>
121 #include <security/mac_framework.h>
125 #define GET_PATH(x) \
126 (x) = get_pathbuff();
127 #define RELEASE_PATH(x) \
130 #define GET_PATH(x) \
131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
132 #define RELEASE_PATH(x) \
133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
134 #endif /* CONFIG_FSE */
136 /* struct for checkdirs iteration */
141 /* callback for checkdirs iteration */
142 static int checkdirs_callback(proc_t p
, void * arg
);
144 static int change_dir(struct nameidata
*ndp
, vfs_context_t ctx
);
145 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
);
146 void enablequotas(struct mount
*mp
, vfs_context_t ctx
);
147 static int getfsstat_callback(mount_t mp
, void * arg
);
148 static int getutimes(user_addr_t usrtvp
, struct timespec
*tsp
);
149 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
, int nullflag
);
150 static int sync_callback(mount_t
, void *);
151 static void sync_thread(void *, __unused wait_result_t
);
152 static int sync_async(int);
153 static int munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
154 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
155 boolean_t partial_copy
);
156 static int statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
,
158 static int fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
);
159 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
160 struct componentname
*cnp
, user_addr_t fsmountargs
,
161 int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
,
163 void vfs_notify_mount(vnode_t pdvp
);
165 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
);
167 struct fd_vn_data
* fg_vn_data_alloc(void);
169 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
);
171 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, int *);
173 #ifdef CONFIG_IMGSRC_ACCESS
174 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
);
175 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
);
176 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
);
177 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
);
178 static void mount_end_update(mount_t mp
);
179 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
);
180 #endif /* CONFIG_IMGSRC_ACCESS */
182 int (*union_dircheckp
)(struct vnode
**, struct fileproc
*, vfs_context_t
);
185 int sync_internal(void);
188 int unlink1(vfs_context_t
, struct nameidata
*, int);
190 extern lck_grp_t
*fd_vn_lck_grp
;
191 extern lck_grp_attr_t
*fd_vn_lck_grp_attr
;
192 extern lck_attr_t
*fd_vn_lck_attr
;
195 * incremented each time a mount or unmount operation occurs
196 * used to invalidate the cached value of the rootvp in the
197 * mount structure utilized by cache_lookup_path
199 uint32_t mount_generation
= 0;
201 /* counts number of mount and unmount operations */
202 unsigned int vfs_nummntops
=0;
204 extern const struct fileops vnops
;
205 #if CONFIG_APPLEDOUBLE
206 extern errno_t
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);
207 #endif /* CONFIG_APPLEDOUBLE */
209 typedef uint32_t vfs_rename_flags_t
;
210 #if CONFIG_SECLUDED_RENAME
212 VFS_SECLUDE_RENAME
= 0x00000001
217 * Virtual File System System Calls
220 #if NFSCLIENT || DEVFS
222 * Private in-kernel mounting spi (NFS only, not exported)
226 vfs_iskernelmount(mount_t mp
)
228 return ((mp
->mnt_kern_flag
& MNTK_KERNEL_MOUNT
) ? TRUE
: FALSE
);
233 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
,
234 void *data
, __unused
size_t datalen
, int syscall_flags
, __unused
uint32_t kern_flags
, vfs_context_t ctx
)
240 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
241 UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
);
244 * Get the vnode to be covered if it's not supplied
254 char *pnbuf
= CAST_DOWN(char *, path
);
256 nd
.ni_cnd
.cn_pnbuf
= pnbuf
;
257 nd
.ni_cnd
.cn_pnlen
= strlen(pnbuf
) + 1;
261 error
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
),
262 syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
);
272 #endif /* NFSCLIENT || DEVFS */
275 * Mount a file system.
279 mount(proc_t p
, struct mount_args
*uap
, __unused
int32_t *retval
)
281 struct __mac_mount_args muap
;
283 muap
.type
= uap
->type
;
284 muap
.path
= uap
->path
;
285 muap
.flags
= uap
->flags
;
286 muap
.data
= uap
->data
;
287 muap
.mac_p
= USER_ADDR_NULL
;
288 return (__mac_mount(p
, &muap
, retval
));
292 vfs_notify_mount(vnode_t pdvp
)
294 vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
);
295 lock_vnode_and_post(pdvp
, NOTE_WRITE
);
300 * Mount a file system taking into account MAC label behavior.
301 * See mount(2) man page for more information
303 * Parameters: p Process requesting the mount
304 * uap User argument descriptor (see below)
307 * Indirect: uap->type Filesystem type
308 * uap->path Path to mount
309 * uap->data Mount arguments
310 * uap->mac_p MAC info
311 * uap->flags Mount flags
317 boolean_t root_fs_upgrade_try
= FALSE
;
320 __mac_mount(struct proc
*p
, register struct __mac_mount_args
*uap
, __unused
int32_t *retval
)
324 int need_nameidone
= 0;
325 vfs_context_t ctx
= vfs_context_current();
326 char fstypename
[MFSNAMELEN
];
329 char *labelstr
= NULL
;
330 int flags
= uap
->flags
;
332 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
333 boolean_t is_64bit
= IS_64BIT_PROCESS(p
);
338 * Get the fs type name from user space
340 error
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
);
345 * Get the vnode to be covered
347 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
| AUDITVNPATH1
| WANTPARENT
,
348 UIO_USERSPACE
, uap
->path
, ctx
);
357 #ifdef CONFIG_IMGSRC_ACCESS
358 /* Mounting image source cannot be batched with other operations */
359 if (flags
== MNT_IMGSRC_BY_INDEX
) {
360 error
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
,
361 ctx
, is_64bit
, uap
->data
, (flags
== MNT_IMGSRC_BY_INDEX
));
364 #endif /* CONFIG_IMGSRC_ACCESS */
368 * Get the label string (if any) from user space
370 if (uap
->mac_p
!= USER_ADDR_NULL
) {
375 struct user64_mac mac64
;
376 error
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
));
377 mac
.m_buflen
= mac64
.m_buflen
;
378 mac
.m_string
= mac64
.m_string
;
380 struct user32_mac mac32
;
381 error
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
));
382 mac
.m_buflen
= mac32
.m_buflen
;
383 mac
.m_string
= mac32
.m_string
;
387 if ((mac
.m_buflen
> MAC_MAX_LABEL_BUF_LEN
) ||
388 (mac
.m_buflen
< 2)) {
392 MALLOC(labelstr
, char *, mac
.m_buflen
, M_MACTEMP
, M_WAITOK
);
393 error
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
);
397 AUDIT_ARG(mac_string
, labelstr
);
399 #endif /* CONFIG_MACF */
401 AUDIT_ARG(fflags
, flags
);
403 if ((vp
->v_flag
& VROOT
) &&
404 (vp
->v_mount
->mnt_flag
& MNT_ROOTFS
)) {
405 if (!(flags
& MNT_UNION
)) {
410 * For a union mount on '/', treat it as fresh
411 * mount instead of update.
412 * Otherwise, union mouting on '/' used to panic the
413 * system before, since mnt_vnodecovered was found to
414 * be NULL for '/' which is required for unionlookup
415 * after it gets ENOENT on union mount.
417 flags
= (flags
& ~(MNT_UPDATE
));
421 if ((flags
& MNT_RDONLY
) == 0) {
422 /* Release kernels are not allowed to mount "/" as rw */
428 * See 7392553 for more details on why this check exists.
429 * Suffice to say: If this check is ON and something tries
430 * to mount the rootFS RW, we'll turn off the codesign
431 * bitmap optimization.
433 #if CHECK_CS_VALIDATION_BITMAP
434 if ((flags
& MNT_RDONLY
) == 0 ) {
435 root_fs_upgrade_try
= TRUE
;
440 error
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0,
441 labelstr
, FALSE
, ctx
);
447 FREE(labelstr
, M_MACTEMP
);
448 #endif /* CONFIG_MACF */
456 if (need_nameidone
) {
464 * common mount implementation (final stage of mounting)
467 * fstypename file system type (ie it's vfs name)
468 * pvp parent of covered vnode
470 * cnp component name (ie path) of covered vnode
471 * flags generic mount flags
472 * fsmountargs file system specific data
473 * labelstr optional MAC label
474 * kernelmount TRUE for mounts initiated from inside the kernel
475 * ctx caller's context
478 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
,
479 struct componentname
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
,
480 char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
)
483 #pragma unused(labelstr)
485 struct vnode
*devvp
= NULLVP
;
486 struct vnode
*device_vnode
= NULLVP
;
491 struct vfstable
*vfsp
= (struct vfstable
*)0;
492 struct proc
*p
= vfs_context_proc(ctx
);
494 user_addr_t devpath
= USER_ADDR_NULL
;
497 boolean_t vfsp_ref
= FALSE
;
498 boolean_t is_rwlock_locked
= FALSE
;
499 boolean_t did_rele
= FALSE
;
500 boolean_t have_usecount
= FALSE
;
503 * Process an update for an existing mount
505 if (flags
& MNT_UPDATE
) {
506 if ((vp
->v_flag
& VROOT
) == 0) {
512 /* unmount in progress return error */
514 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
520 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
521 is_rwlock_locked
= TRUE
;
523 * We only allow the filesystem to be reloaded if it
524 * is currently mounted read-only.
526 if ((flags
& MNT_RELOAD
) &&
527 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
533 * If content protection is enabled, update mounts are not
534 * allowed to turn it off.
536 if ((mp
->mnt_flag
& MNT_CPROTECT
) &&
537 ((flags
& MNT_CPROTECT
) == 0)) {
542 #ifdef CONFIG_IMGSRC_ACCESS
543 /* Can't downgrade the backer of the root FS */
544 if ((mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) &&
545 (!vfs_isrdonly(mp
)) && (flags
& MNT_RDONLY
)) {
549 #endif /* CONFIG_IMGSRC_ACCESS */
552 * Only root, or the user that did the original mount is
553 * permitted to update it.
555 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
556 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) {
560 error
= mac_mount_check_remount(ctx
, mp
);
566 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
567 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
569 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
570 flags
|= MNT_NOSUID
| MNT_NODEV
;
571 if (mp
->mnt_flag
& MNT_NOEXEC
)
578 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
580 vfsp
= mp
->mnt_vtable
;
584 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
585 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
587 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) {
588 flags
|= MNT_NOSUID
| MNT_NODEV
;
589 if (vp
->v_mount
->mnt_flag
& MNT_NOEXEC
)
593 /* XXXAUDIT: Should we capture the type on the error path as well? */
594 AUDIT_ARG(text
, fstypename
);
596 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
597 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) {
598 vfsp
->vfc_refcount
++;
609 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
611 if (kernelmount
&& (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
)) {
612 error
= EINVAL
; /* unsupported request */
616 error
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags
& KERNEL_MOUNT_NOAUTH
) != 0));
622 * Allocate and initialize the filesystem (mount_t)
624 MALLOC_ZONE(mp
, struct mount
*, (u_int32_t
)sizeof(struct mount
),
626 bzero((char *)mp
, (u_int32_t
)sizeof(struct mount
));
629 /* Initialize the default IO constraints */
630 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
631 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
632 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
633 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
634 mp
->mnt_devblocksize
= DEV_BSIZE
;
635 mp
->mnt_alignmentmask
= PAGE_MASK
;
636 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
639 mp
->mnt_realrootvp
= NULLVP
;
640 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
642 TAILQ_INIT(&mp
->mnt_vnodelist
);
643 TAILQ_INIT(&mp
->mnt_workerqueue
);
644 TAILQ_INIT(&mp
->mnt_newvnodes
);
646 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
647 is_rwlock_locked
= TRUE
;
648 mp
->mnt_op
= vfsp
->vfc_vfsops
;
649 mp
->mnt_vtable
= vfsp
;
650 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
651 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
652 strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
);
653 strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
654 mp
->mnt_vnodecovered
= vp
;
655 mp
->mnt_vfsstat
.f_owner
= kauth_cred_getuid(vfs_context_ucred(ctx
));
656 mp
->mnt_throttle_mask
= LOWPRI_MAX_NUM_DEV
- 1;
657 mp
->mnt_devbsdunit
= 0;
659 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
660 vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
);
662 #if NFSCLIENT || DEVFS
664 mp
->mnt_kern_flag
|= MNTK_KERNEL_MOUNT
;
665 if ((internal_flags
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0)
666 mp
->mnt_kern_flag
|= MNTK_PERMIT_UNMOUNT
;
667 #endif /* NFSCLIENT || DEVFS */
671 * Set the mount level flags.
673 if (flags
& MNT_RDONLY
)
674 mp
->mnt_flag
|= MNT_RDONLY
;
675 else if (mp
->mnt_flag
& MNT_RDONLY
) {
676 // disallow read/write upgrades of file systems that
677 // had the TYPENAME_OVERRIDE feature set.
678 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
682 mp
->mnt_kern_flag
|= MNTK_WANTRDWR
;
684 mp
->mnt_flag
&= ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
685 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
686 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
687 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
688 MNT_QUARANTINE
| MNT_CPROTECT
);
689 mp
->mnt_flag
|= flags
& (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
690 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
|
691 MNT_UNKNOWNPERMISSIONS
| MNT_DONTBROWSE
|
692 MNT_AUTOMOUNTED
| MNT_DEFWRITE
| MNT_NOATIME
|
693 MNT_QUARANTINE
| MNT_CPROTECT
);
696 if (flags
& MNT_MULTILABEL
) {
697 if (vfsp
->vfc_vfsflags
& VFC_VFSNOMACLABEL
) {
701 mp
->mnt_flag
|= MNT_MULTILABEL
;
705 * Process device path for local file systems if requested
707 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
708 if (vfs_context_is64bit(ctx
)) {
709 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
711 fsmountargs
+= sizeof(devpath
);
714 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
716 /* munge into LP64 addr */
717 devpath
= CAST_USER_ADDR_T(tmp
);
718 fsmountargs
+= sizeof(tmp
);
721 /* Lookup device and authorize access to it */
725 NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
726 if ( (error
= namei(&nd
)) )
729 strncpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
734 if (devvp
->v_type
!= VBLK
) {
738 if (major(devvp
->v_rdev
) >= nblkdev
) {
743 * If mount by non-root, then verify that user has necessary
744 * permissions on the device.
746 if (suser(vfs_context_ucred(ctx
), NULL
) != 0) {
747 mode_t accessmode
= KAUTH_VNODE_READ_DATA
;
749 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
750 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
751 if ((error
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0)
755 /* On first mount, preflight and open device */
756 if (devpath
&& ((flags
& MNT_UPDATE
) == 0)) {
757 if ( (error
= vnode_ref(devvp
)) )
760 * Disallow multiple mounts of the same device.
761 * Disallow mounting of a device that is currently in use
762 * (except for root, which might share swap device for miniroot).
763 * Flush out any old buffers remaining from a previous use.
765 if ( (error
= vfs_mountedon(devvp
)) )
768 if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) {
772 if ( (error
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
)) ) {
776 if ( (error
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0)) )
779 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
781 error
= mac_vnode_check_open(ctx
,
783 ronly
? FREAD
: FREAD
|FWRITE
);
787 if ( (error
= VNOP_OPEN(devvp
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
)) )
790 mp
->mnt_devvp
= devvp
;
791 device_vnode
= devvp
;
793 } else if ((mp
->mnt_flag
& MNT_RDONLY
) &&
794 (mp
->mnt_kern_flag
& MNTK_WANTRDWR
) &&
795 (device_vnode
= mp
->mnt_devvp
)) {
799 * If upgrade to read-write by non-root, then verify
800 * that user has necessary permissions on the device.
802 vnode_getalways(device_vnode
);
804 if (suser(vfs_context_ucred(ctx
), NULL
) &&
805 (error
= vnode_authorize(device_vnode
, NULL
,
806 KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
,
808 vnode_put(device_vnode
);
812 /* Tell the device that we're upgrading */
813 dev
= (dev_t
)device_vnode
->v_rdev
;
816 if ((u_int
)maj
>= (u_int
)nblkdev
)
817 panic("Volume mounted on a device with invalid major number.");
819 error
= bdevsw
[maj
].d_open(dev
, FREAD
| FWRITE
, S_IFBLK
, p
);
820 vnode_put(device_vnode
);
821 device_vnode
= NULLVP
;
828 if ((flags
& MNT_UPDATE
) == 0) {
829 mac_mount_label_init(mp
);
830 mac_mount_label_associate(ctx
, mp
);
833 if ((flags
& MNT_UPDATE
) != 0) {
834 error
= mac_mount_check_label_update(ctx
, mp
);
841 * Mount the filesystem.
843 error
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
);
845 if (flags
& MNT_UPDATE
) {
846 if (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)
847 mp
->mnt_flag
&= ~MNT_RDONLY
;
849 (MNT_UPDATE
| MNT_RELOAD
| MNT_FORCE
);
850 mp
->mnt_kern_flag
&=~ MNTK_WANTRDWR
;
852 mp
->mnt_flag
= flag
; /* restore flag value */
853 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
);
854 lck_rw_done(&mp
->mnt_rwlock
);
855 is_rwlock_locked
= FALSE
;
857 enablequotas(mp
, ctx
);
862 * Put the new filesystem on the mount list after root.
865 struct vfs_attr vfsattr
;
867 if (vfs_flags(mp
) & MNT_MULTILABEL
) {
868 error
= VFS_ROOT(mp
, &rvp
, ctx
);
870 printf("%s() VFS_ROOT returned %d\n", __func__
, error
);
873 error
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
);
875 * drop reference provided by VFS_ROOT
885 CLR(vp
->v_flag
, VMOUNT
);
886 vp
->v_mountedhere
= mp
;
890 * taking the name_cache_lock exclusively will
891 * insure that everyone is out of the fast path who
892 * might be trying to use a now stale copy of
893 * vp->v_mountedhere->mnt_realrootvp
894 * bumping mount_generation causes the cached values
901 error
= vnode_ref(vp
);
906 have_usecount
= TRUE
;
908 error
= checkdirs(vp
, ctx
);
910 /* Unmount the filesystem as cdir/rdirs cannot be updated */
914 * there is no cleanup code here so I have made it void
915 * we need to revisit this
917 (void)VFS_START(mp
, 0, ctx
);
919 if (mount_list_add(mp
) != 0) {
921 * The system is shutting down trying to umount
922 * everything, so fail with a plausible errno.
927 lck_rw_done(&mp
->mnt_rwlock
);
928 is_rwlock_locked
= FALSE
;
930 /* Check if this mounted file system supports EAs or named streams. */
931 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
932 VFSATTR_INIT(&vfsattr
);
933 VFSATTR_WANTED(&vfsattr
, f_capabilities
);
934 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 &&
935 vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&
936 VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) {
937 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) &&
938 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) {
939 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
942 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) &&
943 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) {
944 mp
->mnt_kern_flag
|= MNTK_NAMED_STREAMS
;
947 /* Check if this file system supports path from id lookups. */
948 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) &&
949 (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) {
950 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
951 } else if (mp
->mnt_flag
& MNT_DOVOLFS
) {
952 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
953 mp
->mnt_kern_flag
|= MNTK_PATH_FROM_ID
;
956 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSNATIVEXATTR
) {
957 mp
->mnt_kern_flag
|= MNTK_EXTENDED_ATTRS
;
959 if (mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSPREFLIGHT
) {
960 mp
->mnt_kern_flag
|= MNTK_UNMOUNT_PREFLIGHT
;
962 /* increment the operations count */
963 OSAddAtomic(1, &vfs_nummntops
);
964 enablequotas(mp
, ctx
);
967 device_vnode
->v_specflags
|= SI_MOUNTEDON
;
970 * cache the IO attributes for the underlying physical media...
971 * an error return indicates the underlying driver doesn't
972 * support all the queries necessary... however, reasonable
973 * defaults will have been set, so no reason to bail or care
975 vfs_init_io_attributes(device_vnode
, mp
);
978 /* Now that mount is setup, notify the listeners */
979 vfs_notify_mount(pvp
);
981 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
982 if (mp
->mnt_vnodelist
.tqh_first
!= NULL
) {
983 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
984 mp
->mnt_vtable
->vfc_name
, error
);
988 CLR(vp
->v_flag
, VMOUNT
);
991 mp
->mnt_vtable
->vfc_refcount
--;
995 vnode_rele(device_vnode
);
996 VNOP_CLOSE(device_vnode
, ronly
? FREAD
: FREAD
|FWRITE
, ctx
);
998 lck_rw_done(&mp
->mnt_rwlock
);
999 is_rwlock_locked
= FALSE
;
1002 * if we get here, we have a mount structure that needs to be freed,
1003 * but since the coveredvp hasn't yet been updated to point at it,
1004 * no need to worry about other threads holding a crossref on this mp
1005 * so it's ok to just free it
1007 mount_lock_destroy(mp
);
1009 mac_mount_label_destroy(mp
);
1011 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1015 * drop I/O count on the device vp if there was one
1017 if (devpath
&& devvp
)
1022 /* Error condition exits */
1024 (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
);
1027 * If the mount has been placed on the covered vp,
1028 * it may have been discovered by now, so we have
1029 * to treat this just like an unmount
1031 mount_lock_spin(mp
);
1032 mp
->mnt_lflag
|= MNT_LDEAD
;
1035 if (device_vnode
!= NULLVP
) {
1036 vnode_rele(device_vnode
);
1037 VNOP_CLOSE(device_vnode
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1042 vnode_lock_spin(vp
);
1045 vp
->v_mountedhere
= (mount_t
) 0;
1049 if (have_usecount
) {
1053 if (devpath
&& ((flags
& MNT_UPDATE
) == 0) && (!did_rele
))
1056 if (devpath
&& devvp
)
1059 /* Release mnt_rwlock only when it was taken */
1060 if (is_rwlock_locked
== TRUE
) {
1061 lck_rw_done(&mp
->mnt_rwlock
);
1065 if (mp
->mnt_crossref
)
1066 mount_dropcrossref(mp
, vp
, 0);
1068 mount_lock_destroy(mp
);
1070 mac_mount_label_destroy(mp
);
1072 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
1077 vfsp
->vfc_refcount
--;
1078 mount_list_unlock();
1085 * Flush in-core data, check for competing mount attempts,
1089 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname
*cnp
, const char *fsname
, boolean_t skip_auth
)
1092 #pragma unused(cnp,fsname)
1094 struct vnode_attr va
;
1099 * If the user is not root, ensure that they own the directory
1100 * onto which we are attempting to mount.
1103 VATTR_WANTED(&va
, va_uid
);
1104 if ((error
= vnode_getattr(vp
, &va
, ctx
)) ||
1105 (va
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1106 (!vfs_context_issuser(ctx
)))) {
1112 if ( (error
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
)) )
1115 if ( (error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0)) )
1118 if (vp
->v_type
!= VDIR
) {
1123 if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere
!= NULL
)) {
1129 error
= mac_mount_check_mount(ctx
, vp
,
1135 vnode_lock_spin(vp
);
1136 SET(vp
->v_flag
, VMOUNT
);
1143 #if CONFIG_IMGSRC_ACCESS
1146 #define IMGSRC_DEBUG(args...) printf(args)
1148 #define IMGSRC_DEBUG(args...) do { } while(0)
1152 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t
*devvpp
, vfs_context_t ctx
)
1154 struct nameidata nd
;
1155 vnode_t vp
, realdevvp
;
1159 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, UIO_USERSPACE
, devpath
, ctx
);
1160 if ( (error
= namei(&nd
)) ) {
1161 IMGSRC_DEBUG("namei() failed with %d\n", error
);
1167 if (!vnode_isblk(vp
)) {
1168 IMGSRC_DEBUG("Not block device.\n");
1173 realdevvp
= mp
->mnt_devvp
;
1174 if (realdevvp
== NULLVP
) {
1175 IMGSRC_DEBUG("No device backs the mount.\n");
1180 error
= vnode_getwithref(realdevvp
);
1182 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1186 if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) {
1187 IMGSRC_DEBUG("Wrong dev_t.\n");
1192 strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
);
1195 * If mount by non-root, then verify that user has necessary
1196 * permissions on the device.
1198 if (!vfs_context_issuser(ctx
)) {
1199 accessmode
= KAUTH_VNODE_READ_DATA
;
1200 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
1201 accessmode
|= KAUTH_VNODE_WRITE_DATA
;
1202 if ((error
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) {
1203 IMGSRC_DEBUG("Access denied.\n");
1211 vnode_put(realdevvp
);
1222 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1223 * and call checkdirs()
1226 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
)
1230 mp
->mnt_vnodecovered
= vp
; /* XXX This is normally only set at init-time ... */
1232 vnode_lock_spin(vp
);
1233 CLR(vp
->v_flag
, VMOUNT
);
1234 vp
->v_mountedhere
= mp
;
1238 * taking the name_cache_lock exclusively will
1239 * insure that everyone is out of the fast path who
1240 * might be trying to use a now stale copy of
1241 * vp->v_mountedhere->mnt_realrootvp
1242 * bumping mount_generation causes the cached values
1247 name_cache_unlock();
1249 error
= vnode_ref(vp
);
1254 error
= checkdirs(vp
, ctx
);
1256 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1263 mp
->mnt_vnodecovered
= NULLVP
;
1269 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
)
1272 vnode_lock_spin(vp
);
1273 vp
->v_mountedhere
= (mount_t
)NULL
;
1276 mp
->mnt_vnodecovered
= NULLVP
;
1280 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
)
1284 /* unmount in progress return error */
1285 mount_lock_spin(mp
);
1286 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1291 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1294 * We only allow the filesystem to be reloaded if it
1295 * is currently mounted read-only.
1297 if ((flags
& MNT_RELOAD
) &&
1298 ((mp
->mnt_flag
& MNT_RDONLY
) == 0)) {
1304 * Only root, or the user that did the original mount is
1305 * permitted to update it.
1307 if (mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) &&
1308 (!vfs_context_issuser(ctx
))) {
1313 error
= mac_mount_check_remount(ctx
, mp
);
1321 lck_rw_done(&mp
->mnt_rwlock
);
1328 mount_end_update(mount_t mp
)
1330 lck_rw_done(&mp
->mnt_rwlock
);
1334 get_imgsrc_rootvnode(uint32_t height
, vnode_t
*rvpp
)
1338 if (height
>= MAX_IMAGEBOOT_NESTING
) {
1342 vp
= imgsrc_rootvnodes
[height
];
1343 if ((vp
!= NULLVP
) && (vnode_get(vp
) == 0)) {
1352 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname
*cnp
,
1353 const char *fsname
, vfs_context_t ctx
,
1354 boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
)
1358 boolean_t placed
= FALSE
;
1359 vnode_t devvp
= NULLVP
;
1360 struct vfstable
*vfsp
;
1361 user_addr_t devpath
;
1362 char *old_mntonname
;
1367 /* If we didn't imageboot, nothing to move */
1368 if (imgsrc_rootvnodes
[0] == NULLVP
) {
1372 /* Only root can do this */
1373 if (!vfs_context_issuser(ctx
)) {
1377 IMGSRC_DEBUG("looking for root vnode.\n");
1380 * Get root vnode of filesystem we're moving.
1384 struct user64_mnt_imgsrc_args mia64
;
1385 error
= copyin(fsmountargs
, &mia64
, sizeof(mia64
));
1387 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1391 height
= mia64
.mi_height
;
1392 flags
= mia64
.mi_flags
;
1393 devpath
= mia64
.mi_devpath
;
1395 struct user32_mnt_imgsrc_args mia32
;
1396 error
= copyin(fsmountargs
, &mia32
, sizeof(mia32
));
1398 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1402 height
= mia32
.mi_height
;
1403 flags
= mia32
.mi_flags
;
1404 devpath
= mia32
.mi_devpath
;
1408 * For binary compatibility--assumes one level of nesting.
1411 if ( (error
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
))) )
1415 if ( (error
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
))) )
1418 /* munge into LP64 addr */
1419 devpath
= CAST_USER_ADDR_T(tmp
);
1427 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
);
1431 error
= get_imgsrc_rootvnode(height
, &rvp
);
1433 IMGSRC_DEBUG("getting root vnode failed with %d\n", error
);
1437 IMGSRC_DEBUG("got root vnode.\n");
1439 MALLOC(old_mntonname
, char*, MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1441 /* Can only move once */
1442 mp
= vnode_mount(rvp
);
1443 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1444 IMGSRC_DEBUG("Already moved.\n");
1449 IMGSRC_DEBUG("Starting updated.\n");
1451 /* Get exclusive rwlock on mount, authorize update on mp */
1452 error
= mount_begin_update(mp
, ctx
, 0);
1454 IMGSRC_DEBUG("Starting updated failed with %d\n", error
);
1459 * It can only be moved once. Flag is set under the rwlock,
1460 * so we're now safe to proceed.
1462 if ((mp
->mnt_kern_flag
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) {
1463 IMGSRC_DEBUG("Already moved [2]\n");
1468 IMGSRC_DEBUG("Preparing coveredvp.\n");
1470 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1471 error
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
);
1473 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
);
1477 IMGSRC_DEBUG("Covered vp OK.\n");
1479 /* Sanity check the name caller has provided */
1480 vfsp
= mp
->mnt_vtable
;
1481 if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) {
1482 IMGSRC_DEBUG("Wrong fs name.\n");
1487 /* Check the device vnode and update mount-from name, for local filesystems */
1488 if (vfsp
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1489 IMGSRC_DEBUG("Local, doing device validation.\n");
1491 if (devpath
!= USER_ADDR_NULL
) {
1492 error
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
);
1494 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1503 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1504 * and increment the name cache's mount generation
1507 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1508 error
= place_mount_and_checkdirs(mp
, vp
, ctx
);
1515 strncpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
);
1516 strncpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
);
1518 /* Forbid future moves */
1520 mp
->mnt_kern_flag
|= MNTK_HAS_MOVED
;
1523 /* Finally, add to mount list, completely ready to go */
1524 if (mount_list_add(mp
) != 0) {
1526 * The system is shutting down trying to umount
1527 * everything, so fail with a plausible errno.
1533 mount_end_update(mp
);
1535 FREE(old_mntonname
, M_TEMP
);
1537 vfs_notify_mount(pvp
);
1541 strncpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
);
1544 mp
->mnt_kern_flag
&= ~(MNTK_HAS_MOVED
);
1549 * Placing the mp on the vnode clears VMOUNT,
1550 * so cleanup is different after that point
1553 /* Rele the vp, clear VMOUNT and v_mountedhere */
1554 undo_place_on_covered_vp(mp
, vp
);
1556 vnode_lock_spin(vp
);
1557 CLR(vp
->v_flag
, VMOUNT
);
1561 mount_end_update(mp
);
1565 FREE(old_mntonname
, M_TEMP
);
1569 #endif /* CONFIG_IMGSRC_ACCESS */
1572 enablequotas(struct mount
*mp
, vfs_context_t ctx
)
1574 struct nameidata qnd
;
1576 char qfpath
[MAXPATHLEN
];
1577 const char *qfname
= QUOTAFILENAME
;
1578 const char *qfopsname
= QUOTAOPSNAME
;
1579 const char *qfextension
[] = INITQFNAMES
;
1581 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1582 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0 ) {
1586 * Enable filesystem disk quotas if necessary.
1587 * We ignore errors as this should not interfere with final mount
1589 for (type
=0; type
< MAXQUOTAS
; type
++) {
1590 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]);
1591 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
,
1592 CAST_USER_ADDR_T(qfpath
), ctx
);
1593 if (namei(&qnd
) != 0)
1594 continue; /* option file to trigger quotas is not present */
1595 vnode_put(qnd
.ni_vp
);
1597 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]);
1599 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
);
1606 checkdirs_callback(proc_t p
, void * arg
)
1608 struct cdirargs
* cdrp
= (struct cdirargs
* )arg
;
1609 vnode_t olddp
= cdrp
->olddp
;
1610 vnode_t newdp
= cdrp
->newdp
;
1611 struct filedesc
*fdp
;
1615 int cdir_changed
= 0;
1616 int rdir_changed
= 0;
1619 * XXX Also needs to iterate each thread in the process to see if it
1620 * XXX is using a per-thread current working directory, and, if so,
1621 * XXX update that as well.
1626 if (fdp
== (struct filedesc
*)0) {
1628 return(PROC_RETURNED
);
1630 fdp_cvp
= fdp
->fd_cdir
;
1631 fdp_rvp
= fdp
->fd_rdir
;
1634 if (fdp_cvp
== olddp
) {
1641 if (fdp_rvp
== olddp
) {
1648 if (cdir_changed
|| rdir_changed
) {
1650 fdp
->fd_cdir
= fdp_cvp
;
1651 fdp
->fd_rdir
= fdp_rvp
;
1654 return(PROC_RETURNED
);
1660 * Scan all active processes to see if any of them have a current
1661 * or root directory onto which the new filesystem has just been
1662 * mounted. If so, replace them with the new mount point.
1665 checkdirs(vnode_t olddp
, vfs_context_t ctx
)
1670 struct cdirargs cdr
;
1672 if (olddp
->v_usecount
== 1)
1674 err
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
);
1678 panic("mount: lost mount: error %d", err
);
1685 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1686 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
);
1688 if (rootvnode
== olddp
) {
1700 * Unmount a file system.
1702 * Note: unmount takes a path to the vnode mounted on as argument,
1703 * not special file (as before).
1707 unmount(__unused proc_t p
, struct unmount_args
*uap
, __unused
int32_t *retval
)
1712 struct nameidata nd
;
1713 vfs_context_t ctx
= vfs_context_current();
1715 NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW
| AUDITVNPATH1
,
1716 UIO_USERSPACE
, uap
->path
, ctx
);
1725 error
= mac_mount_check_umount(ctx
, mp
);
1732 * Must be the root of the filesystem
1734 if ((vp
->v_flag
& VROOT
) == 0) {
1740 /* safedounmount consumes the mount ref */
1741 return (safedounmount(mp
, uap
->flags
, ctx
));
1745 vfs_unmountbyfsid(fsid_t
* fsid
, int flags
, vfs_context_t ctx
)
1749 mp
= mount_list_lookupby_fsid(fsid
, 0, 1);
1750 if (mp
== (mount_t
)0) {
1755 /* safedounmount consumes the mount ref */
1756 return(safedounmount(mp
, flags
, ctx
));
1761 * The mount struct comes with a mount ref which will be consumed.
1762 * Do the actual file system unmount, prevent some common foot shooting.
1765 safedounmount(struct mount
*mp
, int flags
, vfs_context_t ctx
)
1768 proc_t p
= vfs_context_proc(ctx
);
1771 * If the file system is not responding and MNT_NOBLOCK
1772 * is set and not a forced unmount then return EBUSY.
1774 if ((mp
->mnt_kern_flag
& MNT_LNOTRESP
) &&
1775 (flags
& MNT_NOBLOCK
) && ((flags
& MNT_FORCE
) == 0)) {
1781 * Skip authorization if the mount is tagged as permissive and
1782 * this is not a forced-unmount attempt.
1784 if (!(((mp
->mnt_kern_flag
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags
& MNT_FORCE
) == 0))) {
1786 * Only root, or the user that did the original mount is
1787 * permitted to unmount this filesystem.
1789 if ((mp
->mnt_vfsstat
.f_owner
!= kauth_cred_getuid(kauth_cred_get())) &&
1790 (error
= suser(kauth_cred_get(), &p
->p_acflag
)))
1794 * Don't allow unmounting the root file system.
1796 if (mp
->mnt_flag
& MNT_ROOTFS
) {
1797 error
= EBUSY
; /* the root is always busy */
1801 #ifdef CONFIG_IMGSRC_ACCESS
1802 if (mp
->mnt_kern_flag
& MNTK_BACKS_ROOT
) {
1806 #endif /* CONFIG_IMGSRC_ACCESS */
1808 return (dounmount(mp
, flags
, 1, ctx
));
1816 * Do the actual file system unmount.
1819 dounmount(struct mount
*mp
, int flags
, int withref
, vfs_context_t ctx
)
1821 vnode_t coveredvp
= (vnode_t
)0;
1824 int forcedunmount
= 0;
1826 struct vnode
*devvp
= NULLVP
;
1828 proc_t p
= vfs_context_proc(ctx
);
1830 int pflags_save
= 0;
1831 #endif /* CONFIG_TRIGGERS */
1836 * If already an unmount in progress just return EBUSY.
1837 * Even a forced unmount cannot override.
1839 if (mp
->mnt_lflag
& MNT_LUNMOUNT
) {
1846 if (flags
& MNT_FORCE
) {
1848 mp
->mnt_lflag
|= MNT_LFORCE
;
1852 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
)
1853 pflags_save
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
);
1856 mp
->mnt_kern_flag
|= MNTK_UNMOUNT
;
1857 mp
->mnt_lflag
|= MNT_LUNMOUNT
;
1858 mp
->mnt_flag
&=~ MNT_ASYNC
;
1860 * anyone currently in the fast path that
1861 * trips over the cached rootvp will be
1862 * dumped out and forced into the slow path
1863 * to regenerate a new cached value
1865 mp
->mnt_realrootvp
= NULLVP
;
1868 if (forcedunmount
&& (flags
& MNT_LNOSUB
) == 0) {
1870 * Force unmount any mounts in this filesystem.
1871 * If any unmounts fail - just leave them dangling.
1874 (void) dounmount_submounts(mp
, flags
| MNT_LNOSUB
, ctx
);
1878 * taking the name_cache_lock exclusively will
1879 * insure that everyone is out of the fast path who
1880 * might be trying to use a now stale copy of
1881 * vp->v_mountedhere->mnt_realrootvp
1882 * bumping mount_generation causes the cached values
1887 name_cache_unlock();
1890 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1894 fsevent_unmount(mp
); /* has to come first! */
1897 if (forcedunmount
== 0) {
1898 ubc_umount(mp
); /* release cached vnodes */
1899 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
1900 error
= VFS_SYNC(mp
, MNT_WAIT
, ctx
);
1903 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1904 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1905 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1912 vfs_nested_trigger_unmounts(mp
, flags
, ctx
);
1916 lflags
|= FORCECLOSE
;
1917 error
= vflush(mp
, NULLVP
, SKIPSWAP
| SKIPSYSTEM
| SKIPROOT
| lflags
);
1918 if ((forcedunmount
== 0) && error
) {
1920 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1921 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1922 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1926 /* make sure there are no one in the mount iterations or lookup */
1927 mount_iterdrain(mp
);
1929 error
= VFS_UNMOUNT(mp
, flags
, ctx
);
1931 mount_iterreset(mp
);
1933 mp
->mnt_kern_flag
&= ~MNTK_UNMOUNT
;
1934 mp
->mnt_lflag
&= ~MNT_LUNMOUNT
;
1935 mp
->mnt_lflag
&= ~MNT_LFORCE
;
1939 /* increment the operations count */
1941 OSAddAtomic(1, &vfs_nummntops
);
1943 if ( mp
->mnt_devvp
&& mp
->mnt_vtable
->vfc_vfsflags
& VFC_VFSLOCALARGS
) {
1944 /* hold an io reference and drop the usecount before close */
1945 devvp
= mp
->mnt_devvp
;
1946 vnode_getalways(devvp
);
1948 VNOP_CLOSE(devvp
, mp
->mnt_flag
& MNT_RDONLY
? FREAD
: FREAD
|FWRITE
,
1950 vnode_clearmountedon(devvp
);
1953 lck_rw_done(&mp
->mnt_rwlock
);
1954 mount_list_remove(mp
);
1955 lck_rw_lock_exclusive(&mp
->mnt_rwlock
);
1957 /* mark the mount point hook in the vp but not drop the ref yet */
1958 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
) {
1960 * The covered vnode needs special handling. Trying to get an
1961 * iocount must not block here as this may lead to deadlocks
1962 * if the Filesystem to which the covered vnode belongs is
1963 * undergoing forced unmounts. Since we hold a usecount, the
1964 * vnode cannot be reused (it can, however, still be terminated)
1966 vnode_getalways(coveredvp
);
1967 vnode_lock_spin(coveredvp
);
1970 coveredvp
->v_mountedhere
= (struct mount
*)0;
1971 CLR(coveredvp
->v_flag
, VMOUNT
);
1973 vnode_unlock(coveredvp
);
1974 vnode_put(coveredvp
);
1978 mp
->mnt_vtable
->vfc_refcount
--;
1979 mount_list_unlock();
1981 cache_purgevfs(mp
); /* remove cache entries for this file sys */
1982 vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
);
1984 mp
->mnt_lflag
|= MNT_LDEAD
;
1986 if (mp
->mnt_lflag
& MNT_LWAIT
) {
1988 * do the wakeup here
1989 * in case we block in mount_refdrain
1990 * which will drop the mount lock
1991 * and allow anyone blocked in vfs_busy
1992 * to wakeup and see the LDEAD state
1994 mp
->mnt_lflag
&= ~MNT_LWAIT
;
1995 wakeup((caddr_t
)mp
);
1999 if (mp
->mnt_lflag
& MNT_LWAIT
) {
2000 mp
->mnt_lflag
&= ~MNT_LWAIT
;
2005 if (flags
& MNT_NOBLOCK
&& p
!= kernproc
) {
2006 // Restore P_NOREMOTEHANG bit to its previous value
2007 if ((pflags_save
& P_NOREMOTEHANG
) == 0)
2008 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
);
2012 * Callback and context are set together under the mount lock, and
2013 * never cleared, so we're safe to examine them here, drop the lock,
2016 if (mp
->mnt_triggercallback
!= NULL
) {
2019 mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
);
2020 } else if (did_vflush
) {
2021 mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
);
2028 #endif /* CONFIG_TRIGGERS */
2030 lck_rw_done(&mp
->mnt_rwlock
);
2033 wakeup((caddr_t
)mp
);
2036 if ((coveredvp
!= NULLVP
)) {
2037 vnode_t pvp
= NULLVP
;
2040 * The covered vnode needs special handling. Trying to
2041 * get an iocount must not block here as this may lead
2042 * to deadlocks if the Filesystem to which the covered
2043 * vnode belongs is undergoing forced unmounts. Since we
2044 * hold a usecount, the vnode cannot be reused
2045 * (it can, however, still be terminated).
2047 vnode_getalways(coveredvp
);
2049 mount_dropcrossref(mp
, coveredvp
, 0);
2051 * We'll _try_ to detect if this really needs to be
2052 * done. The coveredvp can only be in termination (or
2053 * terminated) if the coveredvp's mount point is in a
2054 * forced unmount (or has been) since we still hold the
2057 if (!vnode_isrecycled(coveredvp
)) {
2058 pvp
= vnode_getparent(coveredvp
);
2060 if (coveredvp
->v_resolve
) {
2061 vnode_trigger_rearm(coveredvp
, ctx
);
2066 vnode_rele(coveredvp
);
2067 vnode_put(coveredvp
);
2071 lock_vnode_and_post(pvp
, NOTE_WRITE
);
2074 } else if (mp
->mnt_flag
& MNT_ROOTFS
) {
2075 mount_lock_destroy(mp
);
2077 mac_mount_label_destroy(mp
);
2079 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2081 panic("dounmount: no coveredvp");
2087 * Unmount any mounts in this filesystem.
2090 dounmount_submounts(struct mount
*mp
, int flags
, vfs_context_t ctx
)
2093 fsid_t
*fsids
, fsid
;
2095 int count
= 0, i
, m
= 0;
2100 // Get an array to hold the submounts fsids.
2101 TAILQ_FOREACH(smp
, &mountlist
, mnt_list
)
2103 fsids_sz
= count
* sizeof(fsid_t
);
2104 MALLOC(fsids
, fsid_t
*, fsids_sz
, M_TEMP
, M_NOWAIT
);
2105 if (fsids
== NULL
) {
2106 mount_list_unlock();
2109 fsids
[0] = mp
->mnt_vfsstat
.f_fsid
; // Prime the pump
2112 * Fill the array with submount fsids.
2113 * Since mounts are always added to the tail of the mount list, the
2114 * list is always in mount order.
2115 * For each mount check if the mounted-on vnode belongs to a
2116 * mount that's already added to our array of mounts to be unmounted.
2118 for (smp
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp
= TAILQ_NEXT(smp
, mnt_list
)) {
2119 vp
= smp
->mnt_vnodecovered
;
2122 fsid
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
; // Underlying fsid
2123 for (i
= 0; i
<= m
; i
++) {
2124 if (fsids
[i
].val
[0] == fsid
.val
[0] &&
2125 fsids
[i
].val
[1] == fsid
.val
[1]) {
2126 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
;
2131 mount_list_unlock();
2133 // Unmount the submounts in reverse order. Ignore errors.
2134 for (i
= m
; i
> 0; i
--) {
2135 smp
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1);
2138 mount_iterdrop(smp
);
2139 (void) dounmount(smp
, flags
, 1, ctx
);
2144 FREE(fsids
, M_TEMP
);
2148 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
)
2153 if (mp
->mnt_crossref
< 0)
2154 panic("mount cross refs -ve");
2156 if ((mp
!= dp
->v_mountedhere
) && (mp
->mnt_crossref
== 0)) {
2159 vnode_put_locked(dp
);
2162 mount_lock_destroy(mp
);
2164 mac_mount_label_destroy(mp
);
2166 FREE_ZONE((caddr_t
)mp
, sizeof (struct mount
), M_MOUNT
);
2170 vnode_put_locked(dp
);
2176 * Sync each mounted filesystem.
2182 int print_vmpage_stat
=0;
2183 int sync_timeout
= 60; // Sync time limit (sec)
2186 sync_callback(mount_t mp
, __unused
void *arg
)
2188 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
2189 int asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
2191 mp
->mnt_flag
&= ~MNT_ASYNC
;
2192 VFS_SYNC(mp
, arg
? MNT_WAIT
: MNT_NOWAIT
, vfs_context_kernel());
2194 mp
->mnt_flag
|= MNT_ASYNC
;
2197 return (VFS_RETURNED
);
2202 sync(__unused proc_t p
, __unused
struct sync_args
*uap
, __unused
int32_t *retval
)
2204 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2206 if (print_vmpage_stat
) {
2207 vm_countdirtypages();
2213 #endif /* DIAGNOSTIC */
2218 sync_thread(void *arg
, __unused wait_result_t wr
)
2220 int *timeout
= (int *) arg
;
2222 vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
);
2225 wakeup((caddr_t
) timeout
);
2226 if (print_vmpage_stat
) {
2227 vm_countdirtypages();
2233 #endif /* DIAGNOSTIC */
2237 * Sync in a separate thread so we can time out if it blocks.
2240 sync_async(int timeout
)
2244 struct timespec ts
= {timeout
, 0};
2246 lck_mtx_lock(sync_mtx_lck
);
2247 if (kernel_thread_start(sync_thread
, &timeout
, &thd
) != KERN_SUCCESS
) {
2248 printf("sync_thread failed\n");
2249 lck_mtx_unlock(sync_mtx_lck
);
2253 error
= msleep((caddr_t
) &timeout
, sync_mtx_lck
, (PVFS
| PDROP
| PCATCH
), "sync_thread", &ts
);
2255 printf("sync timed out: %d sec\n", timeout
);
2257 thread_deallocate(thd
);
2263 * An in-kernel sync for power management to call.
2265 __private_extern__
int
2268 (void) sync_async(sync_timeout
);
2271 } /* end of sync_internal call */
2274 * Change filesystem quotas.
2278 quotactl(proc_t p
, struct quotactl_args
*uap
, __unused
int32_t *retval
)
2281 int error
, quota_cmd
, quota_status
;
2284 struct nameidata nd
;
2285 vfs_context_t ctx
= vfs_context_current();
2286 struct dqblk my_dqblk
;
2288 AUDIT_ARG(uid
, uap
->uid
);
2289 AUDIT_ARG(cmd
, uap
->cmd
);
2290 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
2295 mp
= nd
.ni_vp
->v_mount
;
2296 vnode_put(nd
.ni_vp
);
2299 /* copyin any data we will need for downstream code */
2300 quota_cmd
= uap
->cmd
>> SUBCMDSHIFT
;
2302 switch (quota_cmd
) {
2304 /* uap->arg specifies a file from which to take the quotas */
2305 fnamelen
= MAXPATHLEN
;
2306 datap
= kalloc(MAXPATHLEN
);
2307 error
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
);
2310 /* uap->arg is a pointer to a dqblk structure. */
2311 datap
= (caddr_t
) &my_dqblk
;
2315 /* uap->arg is a pointer to a dqblk structure. */
2316 datap
= (caddr_t
) &my_dqblk
;
2317 if (proc_is64bit(p
)) {
2318 struct user_dqblk my_dqblk64
;
2319 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof (my_dqblk64
));
2321 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
);
2325 error
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof (my_dqblk
));
2329 /* uap->arg is a pointer to an integer */
2330 datap
= (caddr_t
) "a_status
;
2338 error
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
);
2341 switch (quota_cmd
) {
2344 kfree(datap
, MAXPATHLEN
);
2347 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2349 if (proc_is64bit(p
)) {
2350 struct user_dqblk my_dqblk64
= {.dqb_bhardlimit
= 0};
2351 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
);
2352 error
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof (my_dqblk64
));
2355 error
= copyout(datap
, uap
->arg
, sizeof (struct dqblk
));
2360 /* uap->arg is a pointer to an integer */
2362 error
= copyout(datap
, uap
->arg
, sizeof(quota_status
));
2373 quotactl(__unused proc_t p
, __unused
struct quotactl_args
*uap
, __unused
int32_t *retval
)
2375 return (EOPNOTSUPP
);
2380 * Get filesystem statistics.
2382 * Returns: 0 Success
2384 * vfs_update_vfsstat:???
2385 * munge_statfs:EFAULT
2389 statfs(__unused proc_t p
, struct statfs_args
*uap
, __unused
int32_t *retval
)
2392 struct vfsstatfs
*sp
;
2394 struct nameidata nd
;
2395 vfs_context_t ctx
= vfs_context_current();
2398 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2399 UIO_USERSPACE
, uap
->path
, ctx
);
2405 sp
= &mp
->mnt_vfsstat
;
2408 error
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
);
2414 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2420 * Get filesystem statistics.
2424 fstatfs(__unused proc_t p
, struct fstatfs_args
*uap
, __unused
int32_t *retval
)
2428 struct vfsstatfs
*sp
;
2431 AUDIT_ARG(fd
, uap
->fd
);
2433 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2436 error
= vnode_getwithref(vp
);
2442 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2449 sp
= &mp
->mnt_vfsstat
;
2450 if ((error
= vfs_update_vfsstat(mp
,vfs_context_current(),VFS_USER_EVENT
)) != 0) {
2454 error
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
);
2464 * Common routine to handle copying of statfs64 data to user space
2467 statfs64_common(struct mount
*mp
, struct vfsstatfs
*sfsp
, user_addr_t bufp
)
2470 struct statfs64 sfs
;
2472 bzero(&sfs
, sizeof(sfs
));
2474 sfs
.f_bsize
= sfsp
->f_bsize
;
2475 sfs
.f_iosize
= (int32_t)sfsp
->f_iosize
;
2476 sfs
.f_blocks
= sfsp
->f_blocks
;
2477 sfs
.f_bfree
= sfsp
->f_bfree
;
2478 sfs
.f_bavail
= sfsp
->f_bavail
;
2479 sfs
.f_files
= sfsp
->f_files
;
2480 sfs
.f_ffree
= sfsp
->f_ffree
;
2481 sfs
.f_fsid
= sfsp
->f_fsid
;
2482 sfs
.f_owner
= sfsp
->f_owner
;
2483 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
2484 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
2485 sfs
.f_fssubtype
= sfsp
->f_fssubtype
;
2486 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
2487 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
);
2489 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSTYPENAMELEN
);
2491 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MAXPATHLEN
);
2492 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MAXPATHLEN
);
2494 error
= copyout((caddr_t
)&sfs
, bufp
, sizeof(sfs
));
2500 * Get file system statistics in 64-bit mode
2503 statfs64(__unused
struct proc
*p
, struct statfs64_args
*uap
, __unused
int32_t *retval
)
2506 struct vfsstatfs
*sp
;
2508 struct nameidata nd
;
2509 vfs_context_t ctxp
= vfs_context_current();
2512 NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW
| AUDITVNPATH1
,
2513 UIO_USERSPACE
, uap
->path
, ctxp
);
2519 sp
= &mp
->mnt_vfsstat
;
2522 error
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
);
2528 error
= statfs64_common(mp
, sp
, uap
->buf
);
2535 * Get file system statistics in 64-bit mode
2538 fstatfs64(__unused
struct proc
*p
, struct fstatfs64_args
*uap
, __unused
int32_t *retval
)
2542 struct vfsstatfs
*sp
;
2545 AUDIT_ARG(fd
, uap
->fd
);
2547 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2550 error
= vnode_getwithref(vp
);
2556 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
2563 sp
= &mp
->mnt_vfsstat
;
2564 if ((error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) {
2568 error
= statfs64_common(mp
, sp
, uap
->buf
);
2577 struct getfsstat_struct
{
2588 getfsstat_callback(mount_t mp
, void * arg
)
2591 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2592 struct vfsstatfs
*sp
;
2594 vfs_context_t ctx
= vfs_context_current();
2596 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2597 sp
= &mp
->mnt_vfsstat
;
2599 * If MNT_NOWAIT is specified, do not refresh the
2600 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2602 if (((fstp
->flags
& MNT_NOWAIT
) == 0 || (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2603 (error
= vfs_update_vfsstat(mp
, ctx
,
2605 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2606 return(VFS_RETURNED
);
2610 * Need to handle LP64 version of struct statfs
2612 error
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
);
2614 fstp
->error
= error
;
2615 return(VFS_RETURNED_DONE
);
2617 fstp
->sfsp
+= my_size
;
2621 error
= mac_mount_label_get(mp
, *fstp
->mp
);
2623 fstp
->error
= error
;
2624 return(VFS_RETURNED_DONE
);
2631 return(VFS_RETURNED
);
2635 * Get statistics on all filesystems.
2638 getfsstat(__unused proc_t p
, struct getfsstat_args
*uap
, int *retval
)
2640 struct __mac_getfsstat_args muap
;
2642 muap
.buf
= uap
->buf
;
2643 muap
.bufsize
= uap
->bufsize
;
2644 muap
.mac
= USER_ADDR_NULL
;
2646 muap
.flags
= uap
->flags
;
2648 return (__mac_getfsstat(p
, &muap
, retval
));
2652 * __mac_getfsstat: Get MAC-related file system statistics
2654 * Parameters: p (ignored)
2655 * uap User argument descriptor (see below)
2656 * retval Count of file system statistics (N stats)
2658 * Indirect: uap->bufsize Buffer size
2659 * uap->macsize MAC info size
2660 * uap->buf Buffer where information will be returned
2662 * uap->flags File system flags
2665 * Returns: 0 Success
2670 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args
*uap
, int *retval
)
2674 size_t count
, maxcount
, bufsize
, macsize
;
2675 struct getfsstat_struct fst
;
2677 bufsize
= (size_t) uap
->bufsize
;
2678 macsize
= (size_t) uap
->macsize
;
2680 if (IS_64BIT_PROCESS(p
)) {
2681 maxcount
= bufsize
/ sizeof(struct user64_statfs
);
2684 maxcount
= bufsize
/ sizeof(struct user32_statfs
);
2692 if (uap
->mac
!= USER_ADDR_NULL
) {
2697 count
= (macsize
/ (IS_64BIT_PROCESS(p
) ? 8 : 4));
2698 if (count
!= maxcount
)
2701 /* Copy in the array */
2702 MALLOC(mp0
, u_int32_t
*, macsize
, M_MACTEMP
, M_WAITOK
);
2707 error
= copyin(uap
->mac
, mp0
, macsize
);
2709 FREE(mp0
, M_MACTEMP
);
2713 /* Normalize to an array of user_addr_t */
2714 MALLOC(mp
, user_addr_t
*, count
* sizeof(user_addr_t
), M_MACTEMP
, M_WAITOK
);
2716 FREE(mp0
, M_MACTEMP
);
2720 for (i
= 0; i
< count
; i
++) {
2721 if (IS_64BIT_PROCESS(p
))
2722 mp
[i
] = ((user_addr_t
*)mp0
)[i
];
2724 mp
[i
] = (user_addr_t
)mp0
[i
];
2726 FREE(mp0
, M_MACTEMP
);
2733 fst
.flags
= uap
->flags
;
2736 fst
.maxcount
= maxcount
;
2739 vfs_iterate(0, getfsstat_callback
, &fst
);
2742 FREE(mp
, M_MACTEMP
);
2745 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2749 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2750 *retval
= fst
.maxcount
;
2752 *retval
= fst
.count
;
2757 getfsstat64_callback(mount_t mp
, void * arg
)
2759 struct getfsstat_struct
*fstp
= (struct getfsstat_struct
*)arg
;
2760 struct vfsstatfs
*sp
;
2763 if (fstp
->sfsp
&& fstp
->count
< fstp
->maxcount
) {
2764 sp
= &mp
->mnt_vfsstat
;
2766 * If MNT_NOWAIT is specified, do not refresh the fsstat
2767 * cache. MNT_WAIT overrides MNT_NOWAIT.
2769 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2770 * getfsstat, since the constants are out of the same
2773 if (((fstp
->flags
& MNT_NOWAIT
) == 0 ||
2774 (fstp
->flags
& (MNT_WAIT
| MNT_DWAIT
))) &&
2775 (error
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
))) {
2776 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
);
2777 return(VFS_RETURNED
);
2780 error
= statfs64_common(mp
, sp
, fstp
->sfsp
);
2782 fstp
->error
= error
;
2783 return(VFS_RETURNED_DONE
);
2785 fstp
->sfsp
+= sizeof(struct statfs64
);
2788 return(VFS_RETURNED
);
2792 * Get statistics on all file systems in 64 bit mode.
2795 getfsstat64(__unused proc_t p
, struct getfsstat64_args
*uap
, int *retval
)
2798 int count
, maxcount
;
2799 struct getfsstat_struct fst
;
2801 maxcount
= uap
->bufsize
/ sizeof(struct statfs64
);
2807 fst
.flags
= uap
->flags
;
2810 fst
.maxcount
= maxcount
;
2812 vfs_iterate(0, getfsstat64_callback
, &fst
);
2815 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
);
2819 if (fst
.sfsp
&& fst
.count
> fst
.maxcount
)
2820 *retval
= fst
.maxcount
;
2822 *retval
= fst
.count
;
2828 * gets the associated vnode with the file descriptor passed.
2832 * ctx - vfs context of caller
2833 * fd - file descriptor for which vnode is required.
2834 * vpp - Pointer to pointer to vnode to be returned.
2836 * The vnode is returned with an iocount so any vnode obtained
2837 * by this call needs a vnode_put
2841 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t
*vpp
)
2845 struct fileproc
*fp
;
2846 proc_t p
= vfs_context_proc(ctx
);
2850 error
= fp_getfvp(p
, fd
, &fp
, &vp
);
2854 error
= vnode_getwithref(vp
);
2856 (void)fp_drop(p
, fd
, fp
, 0);
2860 (void)fp_drop(p
, fd
, fp
, 0);
2866 * Wrapper function around namei to start lookup from a directory
2867 * specified by a file descriptor ni_dirfd.
2869 * In addition to all the errors returned by namei, this call can
2870 * return ENOTDIR if the file descriptor does not refer to a directory.
2871 * and EBADF if the file descriptor is not valid.
2874 nameiat(struct nameidata
*ndp
, int dirfd
)
2876 if ((dirfd
!= AT_FDCWD
) &&
2877 !(ndp
->ni_flag
& NAMEI_CONTLOOKUP
) &&
2878 !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
2882 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
2883 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
2887 c
= *((char *)(ndp
->ni_dirp
));
2893 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
2898 if (vnode_vtype(dvp_at
) != VDIR
) {
2903 ndp
->ni_dvp
= dvp_at
;
2904 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
2906 ndp
->ni_cnd
.cn_flags
&= ~USEDVP
;
2912 return (namei(ndp
));
2916 * Change current working directory to a given file descriptor.
2920 common_fchdir(proc_t p
, struct fchdir_args
*uap
, int per_thread
)
2922 struct filedesc
*fdp
= p
->p_fd
;
2928 vfs_context_t ctx
= vfs_context_current();
2930 AUDIT_ARG(fd
, uap
->fd
);
2931 if (per_thread
&& uap
->fd
== -1) {
2933 * Switching back from per-thread to per process CWD; verify we
2934 * in fact have one before proceeding. The only success case
2935 * for this code path is to return 0 preemptively after zapping
2936 * the thread structure contents.
2938 thread_t th
= vfs_context_thread(ctx
);
2940 uthread_t uth
= get_bsdthread_info(th
);
2942 uth
->uu_cdir
= NULLVP
;
2943 if (tvp
!= NULLVP
) {
2951 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
2953 if ( (error
= vnode_getwithref(vp
)) ) {
2958 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
2960 if (vp
->v_type
!= VDIR
) {
2966 error
= mac_vnode_check_chdir(ctx
, vp
);
2970 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
2974 while (!error
&& (mp
= vp
->v_mountedhere
) != NULL
) {
2975 if (vfs_busy(mp
, LK_NOWAIT
)) {
2979 error
= VFS_ROOT(mp
, &tdp
, ctx
);
2988 if ( (error
= vnode_ref(vp
)) )
2993 thread_t th
= vfs_context_thread(ctx
);
2995 uthread_t uth
= get_bsdthread_info(th
);
2998 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3023 fchdir(proc_t p
, struct fchdir_args
*uap
, __unused
int32_t *retval
)
3025 return common_fchdir(p
, uap
, 0);
3029 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args
*uap
, __unused
int32_t *retval
)
3031 return common_fchdir(p
, (void *)uap
, 1);
3035 * Change current working directory (".").
3037 * Returns: 0 Success
3038 * change_dir:ENOTDIR
3040 * vnode_ref:ENOENT No such file or directory
3044 common_chdir(proc_t p
, struct chdir_args
*uap
, int per_thread
)
3046 struct filedesc
*fdp
= p
->p_fd
;
3048 struct nameidata nd
;
3050 vfs_context_t ctx
= vfs_context_current();
3052 NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW
| AUDITVNPATH1
,
3053 UIO_USERSPACE
, uap
->path
, ctx
);
3054 error
= change_dir(&nd
, ctx
);
3057 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3058 vnode_put(nd
.ni_vp
);
3062 * drop the iocount we picked up in change_dir
3064 vnode_put(nd
.ni_vp
);
3067 thread_t th
= vfs_context_thread(ctx
);
3069 uthread_t uth
= get_bsdthread_info(th
);
3071 uth
->uu_cdir
= nd
.ni_vp
;
3072 OSBitOrAtomic(P_THCWD
, &p
->p_flag
);
3074 vnode_rele(nd
.ni_vp
);
3080 fdp
->fd_cdir
= nd
.ni_vp
;
3094 * Change current working directory (".") for the entire process
3096 * Parameters: p Process requesting the call
3097 * uap User argument descriptor (see below)
3100 * Indirect parameters: uap->path Directory path
3102 * Returns: 0 Success
3103 * common_chdir: ENOTDIR
3104 * common_chdir: ENOENT No such file or directory
3109 chdir(proc_t p
, struct chdir_args
*uap
, __unused
int32_t *retval
)
3111 return common_chdir(p
, (void *)uap
, 0);
3117 * Change current working directory (".") for a single thread
3119 * Parameters: p Process requesting the call
3120 * uap User argument descriptor (see below)
3123 * Indirect parameters: uap->path Directory path
3125 * Returns: 0 Success
3126 * common_chdir: ENOTDIR
3127 * common_chdir: ENOENT No such file or directory
3132 __pthread_chdir(proc_t p
, struct __pthread_chdir_args
*uap
, __unused
int32_t *retval
)
3134 return common_chdir(p
, (void *)uap
, 1);
3139 * Change notion of root (``/'') directory.
3143 chroot(proc_t p
, struct chroot_args
*uap
, __unused
int32_t *retval
)
3145 struct filedesc
*fdp
= p
->p_fd
;
3147 struct nameidata nd
;
3149 vfs_context_t ctx
= vfs_context_current();
3151 if ((error
= suser(kauth_cred_get(), &p
->p_acflag
)))
3154 NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW
| AUDITVNPATH1
,
3155 UIO_USERSPACE
, uap
->path
, ctx
);
3156 error
= change_dir(&nd
, ctx
);
3161 error
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
,
3164 vnode_put(nd
.ni_vp
);
3169 if ( (error
= vnode_ref(nd
.ni_vp
)) ) {
3170 vnode_put(nd
.ni_vp
);
3173 vnode_put(nd
.ni_vp
);
3177 fdp
->fd_rdir
= nd
.ni_vp
;
3178 fdp
->fd_flags
|= FD_CHROOT
;
3188 * Common routine for chroot and chdir.
3190 * Returns: 0 Success
3191 * ENOTDIR Not a directory
3192 * namei:??? [anything namei can return]
3193 * vnode_authorize:??? [anything vnode_authorize can return]
3196 change_dir(struct nameidata
*ndp
, vfs_context_t ctx
)
3201 if ((error
= namei(ndp
)))
3206 if (vp
->v_type
!= VDIR
) {
3212 error
= mac_vnode_check_chdir(ctx
, vp
);
3219 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
);
3229 * Free the vnode data (for directories) associated with the file glob.
3232 fg_vn_data_alloc(void)
3234 struct fd_vn_data
*fvdata
;
3236 /* Allocate per fd vnode data */
3237 MALLOC(fvdata
, struct fd_vn_data
*, (sizeof(struct fd_vn_data
)),
3238 M_FD_VN_DATA
, M_WAITOK
| M_ZERO
);
3239 lck_mtx_init(&fvdata
->fv_lock
, fd_vn_lck_grp
, fd_vn_lck_attr
);
3244 * Free the vnode data (for directories) associated with the file glob.
3247 fg_vn_data_free(void *fgvndata
)
3249 struct fd_vn_data
*fvdata
= (struct fd_vn_data
*)fgvndata
;
3252 FREE(fvdata
->fv_buf
, M_FD_DIRBUF
);
3253 lck_mtx_destroy(&fvdata
->fv_lock
, fd_vn_lck_grp
);
3254 FREE(fvdata
, M_FD_VN_DATA
);
3258 * Check permissions, allocate an open file structure,
3259 * and call the device open routine if any.
3261 * Returns: 0 Success
3272 * XXX Need to implement uid, gid
3275 open1(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3276 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
,
3279 proc_t p
= vfs_context_proc(ctx
);
3280 uthread_t uu
= get_bsdthread_info(vfs_context_thread(ctx
));
3281 struct fileproc
*fp
;
3284 int type
, indx
, error
;
3286 int no_controlling_tty
= 0;
3287 int deny_controlling_tty
= 0;
3288 struct session
*sessp
= SESSION_NULL
;
3292 if ((oflags
& O_ACCMODE
) == O_ACCMODE
)
3294 flags
= FFLAGS(uflags
);
3296 AUDIT_ARG(fflags
, oflags
);
3297 AUDIT_ARG(mode
, vap
->va_mode
);
3299 if ((error
= falloc_withalloc(p
,
3300 &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) {
3303 uu
->uu_dupfd
= -indx
- 1;
3305 if (!(p
->p_flag
& P_CONTROLT
)) {
3306 sessp
= proc_session(p
);
3307 no_controlling_tty
= 1;
3309 * If conditions would warrant getting a controlling tty if
3310 * the device being opened is a tty (see ttyopen in tty.c),
3311 * but the open flags deny it, set a flag in the session to
3314 if (SESS_LEADER(p
, sessp
) &&
3315 sessp
->s_ttyvp
== NULL
&&
3316 (flags
& O_NOCTTY
)) {
3317 session_lock(sessp
);
3318 sessp
->s_flags
|= S_NOCTTY
;
3319 session_unlock(sessp
);
3320 deny_controlling_tty
= 1;
3324 if ((error
= vn_open_auth(ndp
, &flags
, vap
))) {
3325 if ((error
== ENODEV
|| error
== ENXIO
) && (uu
->uu_dupfd
>= 0)){ /* XXX from fdopen */
3326 if ((error
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) {
3327 fp_drop(p
, indx
, NULL
, 0);
3329 if (deny_controlling_tty
) {
3330 session_lock(sessp
);
3331 sessp
->s_flags
&= ~S_NOCTTY
;
3332 session_unlock(sessp
);
3334 if (sessp
!= SESSION_NULL
)
3335 session_rele(sessp
);
3339 if (error
== ERESTART
)
3341 fp_free(p
, indx
, fp
);
3343 if (deny_controlling_tty
) {
3344 session_lock(sessp
);
3345 sessp
->s_flags
&= ~S_NOCTTY
;
3346 session_unlock(sessp
);
3348 if (sessp
!= SESSION_NULL
)
3349 session_rele(sessp
);
3355 fp
->f_fglob
->fg_flag
= flags
& (FMASK
| O_EVTONLY
);
3356 fp
->f_fglob
->fg_ops
= &vnops
;
3357 fp
->f_fglob
->fg_data
= (caddr_t
)vp
;
3360 if (VATTR_IS_ACTIVE (vap
, va_dataprotect_flags
)) {
3361 if (vap
->va_dataprotect_flags
& VA_DP_RAWENCRYPTED
) {
3362 fp
->f_fglob
->fg_flag
|= FENCRYPTED
;
3367 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
3368 lf
.l_whence
= SEEK_SET
;
3371 if (flags
& O_EXLOCK
)
3372 lf
.l_type
= F_WRLCK
;
3374 lf
.l_type
= F_RDLCK
;
3376 if ((flags
& FNONBLOCK
) == 0)
3379 error
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->f_fglob
,
3384 if ((error
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->f_fglob
, F_SETLK
, &lf
, type
, ctx
, NULL
)))
3386 fp
->f_fglob
->fg_flag
|= FHASLOCK
;
3389 /* try to truncate by setting the size attribute */
3390 if ((flags
& O_TRUNC
) && ((error
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0))
3394 * If the open flags denied the acquisition of a controlling tty,
3395 * clear the flag in the session structure that prevented the lower
3396 * level code from assigning one.
3398 if (deny_controlling_tty
) {
3399 session_lock(sessp
);
3400 sessp
->s_flags
&= ~S_NOCTTY
;
3401 session_unlock(sessp
);
3405 * If a controlling tty was set by the tty line discipline, then we
3406 * want to set the vp of the tty into the session structure. We have
3407 * a race here because we can't get to the vp for the tp in ttyopen,
3408 * because it's not passed as a parameter in the open path.
3410 if (no_controlling_tty
&& (p
->p_flag
& P_CONTROLT
)) {
3413 session_lock(sessp
);
3414 ttyvp
= sessp
->s_ttyvp
;
3415 sessp
->s_ttyvp
= vp
;
3416 sessp
->s_ttyvid
= vnode_vid(vp
);
3417 session_unlock(sessp
);
3421 * For directories we hold some additional information in the fd.
3423 if (vnode_vtype(vp
) == VDIR
) {
3424 fp
->f_fglob
->fg_vn_data
= fg_vn_data_alloc();
3426 fp
->f_fglob
->fg_vn_data
= NULL
;
3432 if (flags
& O_CLOEXEC
)
3433 *fdflags(p
, indx
) |= UF_EXCLOSE
;
3434 if (flags
& O_CLOFORK
)
3435 *fdflags(p
, indx
) |= UF_FORKCLOSE
;
3436 procfdtbl_releasefd(p
, indx
, NULL
);
3437 fp_drop(p
, indx
, fp
, 1);
3442 if (sessp
!= SESSION_NULL
)
3443 session_rele(sessp
);
3446 if (deny_controlling_tty
) {
3447 session_lock(sessp
);
3448 sessp
->s_flags
&= ~S_NOCTTY
;
3449 session_unlock(sessp
);
3451 if (sessp
!= SESSION_NULL
)
3452 session_rele(sessp
);
3454 struct vfs_context context
= *vfs_context_current();
3455 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3457 if ((fp
->f_fglob
->fg_flag
& FHASLOCK
) &&
3458 (FILEGLOB_DTYPE(fp
->f_fglob
) == DTYPE_VNODE
)) {
3459 lf
.l_whence
= SEEK_SET
;
3462 lf
.l_type
= F_UNLCK
;
3465 vp
, (caddr_t
)fp
->f_fglob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
);
3468 vn_close(vp
, fp
->f_fglob
->fg_flag
, &context
);
3470 fp_free(p
, indx
, fp
);
3476 * While most of the *at syscall handlers can call nameiat() which
3477 * is a wrapper around namei, the use of namei and initialisation
3478 * of nameidata are far removed and in different functions - namei
3479 * gets called in vn_open_auth for open1. So we'll just do here what
3483 open1at(vfs_context_t ctx
, struct nameidata
*ndp
, int uflags
,
3484 struct vnode_attr
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
,
3487 if ((dirfd
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags
& USEDVP
)) {
3491 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) {
3492 error
= copyin(ndp
->ni_dirp
, &c
, sizeof(char));
3496 c
= *((char *)(ndp
->ni_dirp
));
3502 error
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
,
3507 if (vnode_vtype(dvp_at
) != VDIR
) {
3512 ndp
->ni_dvp
= dvp_at
;
3513 ndp
->ni_cnd
.cn_flags
|= USEDVP
;
3514 error
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
,
3521 return (open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
));
3525 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3527 * Parameters: p Process requesting the open
3528 * uap User argument descriptor (see below)
3529 * retval Pointer to an area to receive the
3530 * return calue from the system call
3532 * Indirect: uap->path Path to open (same as 'open')
3533 * uap->flags Flags to open (same as 'open'
3534 * uap->uid UID to set, if creating
3535 * uap->gid GID to set, if creating
3536 * uap->mode File mode, if creating (same as 'open')
3537 * uap->xsecurity ACL to set, if creating
3539 * Returns: 0 Success
3542 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3544 * XXX: We should enummerate the possible errno values here, and where
3545 * in the code they originated.
3548 open_extended(proc_t p
, struct open_extended_args
*uap
, int32_t *retval
)
3550 struct filedesc
*fdp
= p
->p_fd
;
3552 kauth_filesec_t xsecdst
;
3553 struct vnode_attr va
;
3554 struct nameidata nd
;
3557 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3560 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
3561 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
3565 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3566 VATTR_SET(&va
, va_mode
, cmode
);
3567 if (uap
->uid
!= KAUTH_UID_NONE
)
3568 VATTR_SET(&va
, va_uid
, uap
->uid
);
3569 if (uap
->gid
!= KAUTH_GID_NONE
)
3570 VATTR_SET(&va
, va_gid
, uap
->gid
);
3571 if (xsecdst
!= NULL
)
3572 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3574 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3575 uap
->path
, vfs_context_current());
3577 ciferror
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3578 fileproc_alloc_init
, NULL
, retval
);
3579 if (xsecdst
!= NULL
)
3580 kauth_filesec_free(xsecdst
);
3586 * Go through the data-protected atomically controlled open (2)
3588 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3590 int open_dprotected_np (__unused proc_t p
, struct open_dprotected_np_args
*uap
, int32_t *retval
) {
3591 int flags
= uap
->flags
;
3592 int class = uap
->class;
3593 int dpflags
= uap
->dpflags
;
3596 * Follow the same path as normal open(2)
3597 * Look up the item if it exists, and acquire the vnode.
3599 struct filedesc
*fdp
= p
->p_fd
;
3600 struct vnode_attr va
;
3601 struct nameidata nd
;
3606 /* Mask off all but regular access permissions */
3607 cmode
= ((uap
->mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3608 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3610 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
3611 uap
->path
, vfs_context_current());
3614 * Initialize the extra fields in vnode_attr to pass down our
3616 * 1. target cprotect class.
3617 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3619 if (flags
& O_CREAT
) {
3620 VATTR_SET(&va
, va_dataprotect_class
, class);
3623 if (dpflags
& O_DP_GETRAWENCRYPTED
) {
3624 if ( flags
& (O_RDWR
| O_WRONLY
)) {
3625 /* Not allowed to write raw encrypted bytes */
3628 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
3631 error
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
,
3632 fileproc_alloc_init
, NULL
, retval
);
3638 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
,
3639 int fd
, enum uio_seg segflg
, int *retval
)
3641 struct filedesc
*fdp
= (vfs_context_proc(ctx
))->p_fd
;
3642 struct vnode_attr va
;
3643 struct nameidata nd
;
3647 /* Mask off all but regular access permissions */
3648 cmode
= ((mode
&~ fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
;
3649 VATTR_SET(&va
, va_mode
, cmode
& ACCESSPERMS
);
3651 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
| AUDITVNPATH1
,
3654 return (open1at(ctx
, &nd
, flags
, &va
, fileproc_alloc_init
, NULL
,
3659 open(proc_t p
, struct open_args
*uap
, int32_t *retval
)
3661 __pthread_testcancel(1);
3662 return(open_nocancel(p
, (struct open_nocancel_args
*)uap
, retval
));
3666 open_nocancel(__unused proc_t p
, struct open_nocancel_args
*uap
,
3669 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3670 uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
));
3674 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args
*uap
,
3677 return (openat_internal(vfs_context_current(), uap
->path
, uap
->flags
,
3678 uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
));
3682 openat(proc_t p
, struct openat_args
*uap
, int32_t *retval
)
3684 __pthread_testcancel(1);
3685 return(openat_nocancel(p
, (struct openat_nocancel_args
*)uap
, retval
));
3689 * openbyid_np: open a file given a file system id and a file system object id
3690 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3691 * file systems that don't support object ids it is a node id (uint64_t).
3693 * Parameters: p Process requesting the open
3694 * uap User argument descriptor (see below)
3695 * retval Pointer to an area to receive the
3696 * return calue from the system call
3698 * Indirect: uap->path Path to open (same as 'open')
3700 * uap->fsid id of target file system
3701 * uap->objid id of target file system object
3702 * uap->flags Flags to open (same as 'open')
3704 * Returns: 0 Success
3708 * XXX: We should enummerate the possible errno values here, and where
3709 * in the code they originated.
3712 openbyid_np(__unused proc_t p
, struct openbyid_np_args
*uap
, int *retval
)
3718 int buflen
= MAXPATHLEN
;
3720 vfs_context_t ctx
= vfs_context_current();
3722 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
3726 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3727 if ((error
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) {
3731 AUDIT_ARG(value32
, fsid
.val
[0]);
3732 AUDIT_ARG(value64
, objid
);
3734 /*resolve path from fsis, objid*/
3736 MALLOC(buf
, char *, buflen
+ 1, M_TEMP
, M_WAITOK
);
3741 error
= fsgetpath_internal(
3742 ctx
, fsid
.val
[0], objid
,
3743 buflen
, buf
, &pathlen
);
3749 } while (error
== ENOSPC
&& (buflen
+= MAXPATHLEN
));
3757 error
= openat_internal(
3758 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
);
3767 * Create a special file.
3769 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
);
3772 mknod(proc_t p
, struct mknod_args
*uap
, __unused
int32_t *retval
)
3774 struct vnode_attr va
;
3775 vfs_context_t ctx
= vfs_context_current();
3777 struct nameidata nd
;
3781 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3782 VATTR_SET(&va
, va_rdev
, uap
->dev
);
3784 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3785 if ((uap
->mode
& S_IFMT
) == S_IFIFO
)
3786 return(mkfifo1(ctx
, uap
->path
, &va
));
3788 AUDIT_ARG(mode
, uap
->mode
);
3789 AUDIT_ARG(value32
, uap
->dev
);
3791 if ((error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
3793 NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT
| AUDITVNPATH1
,
3794 UIO_USERSPACE
, uap
->path
, ctx
);
3806 switch (uap
->mode
& S_IFMT
) {
3807 case S_IFMT
: /* used by badsect to flag bad sectors */
3808 VATTR_SET(&va
, va_type
, VBAD
);
3811 VATTR_SET(&va
, va_type
, VCHR
);
3814 VATTR_SET(&va
, va_type
, VBLK
);
3822 error
= mac_vnode_check_create(ctx
,
3823 nd
.ni_dvp
, &nd
.ni_cnd
, &va
);
3828 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
3831 if ((error
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0)
3835 int update_flags
= 0;
3837 // Make sure the name & parent pointers are hooked up
3838 if (vp
->v_name
== NULL
)
3839 update_flags
|= VNODE_UPDATE_NAME
;
3840 if (vp
->v_parent
== NULLVP
)
3841 update_flags
|= VNODE_UPDATE_PARENT
;
3844 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
3847 add_fsevent(FSE_CREATE_FILE
, ctx
,
3855 * nameidone has to happen before we vnode_put(dvp)
3856 * since it may need to release the fs_nodelock on the dvp
3868 * Create a named pipe.
3870 * Returns: 0 Success
3873 * vnode_authorize:???
3877 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr
*vap
)
3881 struct nameidata nd
;
3883 NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT
| AUDITVNPATH1
,
3884 UIO_USERSPACE
, upath
, ctx
);
3891 /* check that this is a new file and authorize addition */
3896 VATTR_SET(vap
, va_type
, VFIFO
);
3898 if ((error
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0)
3901 error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
);
3904 * nameidone has to happen before we vnode_put(dvp)
3905 * since it may need to release the fs_nodelock on the dvp
3918 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3920 * Parameters: p Process requesting the open
3921 * uap User argument descriptor (see below)
3924 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3925 * uap->uid UID to set
3926 * uap->gid GID to set
3927 * uap->mode File mode to set (same as 'mkfifo')
3928 * uap->xsecurity ACL to set, if creating
3930 * Returns: 0 Success
3933 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3935 * XXX: We should enummerate the possible errno values here, and where
3936 * in the code they originated.
3939 mkfifo_extended(proc_t p
, struct mkfifo_extended_args
*uap
, __unused
int32_t *retval
)
3942 kauth_filesec_t xsecdst
;
3943 struct vnode_attr va
;
3945 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
3947 xsecdst
= KAUTH_FILESEC_NONE
;
3948 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
3949 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
3954 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3955 if (uap
->uid
!= KAUTH_UID_NONE
)
3956 VATTR_SET(&va
, va_uid
, uap
->uid
);
3957 if (uap
->gid
!= KAUTH_GID_NONE
)
3958 VATTR_SET(&va
, va_gid
, uap
->gid
);
3959 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3960 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
3962 ciferror
= mkfifo1(vfs_context_current(), uap
->path
, &va
);
3964 if (xsecdst
!= KAUTH_FILESEC_NONE
)
3965 kauth_filesec_free(xsecdst
);
3971 mkfifo(proc_t p
, struct mkfifo_args
*uap
, __unused
int32_t *retval
)
3973 struct vnode_attr va
;
3976 VATTR_SET(&va
, va_mode
, (uap
->mode
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
);
3978 return(mkfifo1(vfs_context_current(), uap
->path
, &va
));
3983 my_strrchr(char *p
, int ch
)
3987 for (save
= NULL
;; ++p
) {
3996 extern int safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
);
3999 safe_getpath(struct vnode
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
)
4001 int ret
, len
= _len
;
4003 *truncated_path
= 0;
4004 ret
= vn_getpath(dvp
, path
, &len
);
4005 if (ret
== 0 && len
< (MAXPATHLEN
- 1)) {
4008 len
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN
-len
) + 1;
4009 if (len
> MAXPATHLEN
) {
4012 // the string got truncated!
4013 *truncated_path
= 1;
4014 ptr
= my_strrchr(path
, '/');
4016 *ptr
= '\0'; // chop off the string at the last directory component
4018 len
= strlen(path
) + 1;
4021 } else if (ret
== 0) {
4022 *truncated_path
= 1;
4023 } else if (ret
!= 0) {
4024 struct vnode
*mydvp
=dvp
;
4026 if (ret
!= ENOSPC
) {
4027 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4028 dvp
, dvp
->v_name
? dvp
->v_name
: "no-name", ret
);
4030 *truncated_path
= 1;
4033 if (mydvp
->v_parent
!= NULL
) {
4034 mydvp
= mydvp
->v_parent
;
4035 } else if (mydvp
->v_mount
) {
4036 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
);
4039 // no parent and no mount point? only thing is to punt and say "/" changed
4040 strlcpy(path
, "/", _len
);
4045 if (mydvp
== NULL
) {
4050 ret
= vn_getpath(mydvp
, path
, &len
);
4051 } while (ret
== ENOSPC
);
4059 * Make a hard file link.
4061 * Returns: 0 Success
4066 * vnode_authorize:???
4071 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
,
4072 user_addr_t link
, int flag
, enum uio_seg segflg
)
4074 vnode_t vp
, dvp
, lvp
;
4075 struct nameidata nd
;
4081 int need_event
, has_listeners
;
4082 char *target_path
= NULL
;
4085 vp
= dvp
= lvp
= NULLVP
;
4087 /* look up the object we are linking to */
4088 follow
= (flag
& AT_SYMLINK_FOLLOW
) ? FOLLOW
: NOFOLLOW
;
4089 NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1
| follow
,
4092 error
= nameiat(&nd
, fd1
);
4100 * Normally, linking to directories is not supported.
4101 * However, some file systems may have limited support.
4103 if (vp
->v_type
== VDIR
) {
4104 if (!(vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSDIRLINKS
)) {
4105 error
= EPERM
; /* POSIX */
4108 /* Linking to a directory requires ownership. */
4109 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) {
4110 struct vnode_attr dva
;
4113 VATTR_WANTED(&dva
, va_uid
);
4114 if (vnode_getattr(vp
, &dva
, ctx
) != 0 ||
4115 !VATTR_IS_SUPPORTED(&dva
, va_uid
) ||
4116 (dva
.va_uid
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) {
4123 /* lookup the target node */
4127 nd
.ni_cnd
.cn_nameiop
= CREATE
;
4128 nd
.ni_cnd
.cn_flags
= LOCKPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
;
4130 error
= nameiat(&nd
, fd2
);
4137 if ((error
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0)
4141 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4142 if ((error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0)
4145 /* target node must not exist */
4146 if (lvp
!= NULLVP
) {
4150 /* cannot link across mountpoints */
4151 if (vnode_mount(vp
) != vnode_mount(dvp
)) {
4156 /* authorize creation of the target note */
4157 if ((error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
4160 /* and finally make the link */
4161 error
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
);
4166 (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
);
4170 need_event
= need_fsevent(FSE_CREATE_FILE
, dvp
);
4174 has_listeners
= kauth_authorize_fileop_has_listeners();
4176 if (need_event
|| has_listeners
) {
4177 char *link_to_path
= NULL
;
4178 int len
, link_name_len
;
4180 /* build the path to the new link file */
4181 GET_PATH(target_path
);
4182 if (target_path
== NULL
) {
4187 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
);
4189 if (has_listeners
) {
4190 /* build the path to file we are linking to */
4191 GET_PATH(link_to_path
);
4192 if (link_to_path
== NULL
) {
4197 link_name_len
= MAXPATHLEN
;
4198 if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) {
4200 * Call out to allow 3rd party notification of rename.
4201 * Ignore result of kauth_authorize_fileop call.
4203 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
,
4204 (uintptr_t)link_to_path
,
4205 (uintptr_t)target_path
);
4207 if (link_to_path
!= NULL
) {
4208 RELEASE_PATH(link_to_path
);
4213 /* construct fsevent */
4214 if (get_fse_info(vp
, &finfo
, ctx
) == 0) {
4216 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4219 // build the path to the destination of the link
4220 add_fsevent(FSE_CREATE_FILE
, ctx
,
4221 FSE_ARG_STRING
, len
, target_path
,
4222 FSE_ARG_FINFO
, &finfo
,
4226 add_fsevent(FSE_STAT_CHANGED
, ctx
,
4227 FSE_ARG_VNODE
, vp
->v_parent
,
4235 * nameidone has to happen before we vnode_put(dvp)
4236 * since it may need to release the fs_nodelock on the dvp
4239 if (target_path
!= NULL
) {
4240 RELEASE_PATH(target_path
);
4252 link(__unused proc_t p
, struct link_args
*uap
, __unused
int32_t *retval
)
4254 return (linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4255 AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
));
4259 linkat(__unused proc_t p
, struct linkat_args
*uap
, __unused
int32_t *retval
)
4261 if (uap
->flag
& ~AT_SYMLINK_FOLLOW
)
4264 return (linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
,
4265 uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
));
4269 * Make a symbolic link.
4271 * We could add support for ACLs here too...
4275 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
,
4276 user_addr_t link
, enum uio_seg segflg
)
4278 struct vnode_attr va
;
4281 struct nameidata nd
;
4283 uint32_t dfflags
; // Directory file flags
4288 if (UIO_SEG_IS_USER_SPACE(segflg
)) {
4289 MALLOC_ZONE(path
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
4290 error
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
);
4292 path
= (char *)path_data
;
4296 AUDIT_ARG(text
, path
); /* This is the link string */
4298 NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT
| AUDITVNPATH1
,
4301 error
= nameiat(&nd
, fd
);
4307 p
= vfs_context_proc(ctx
);
4309 VATTR_SET(&va
, va_type
, VLNK
);
4310 VATTR_SET(&va
, va_mode
, ACCESSPERMS
& ~p
->p_fd
->fd_cmask
);
4313 * Handle inheritance of restricted flag
4315 error
= vnode_flags(dvp
, &dfflags
, ctx
);
4318 if (dfflags
& SF_RESTRICTED
)
4319 VATTR_SET(&va
, va_flags
, SF_RESTRICTED
);
4322 error
= mac_vnode_check_create(ctx
,
4323 dvp
, &nd
.ni_cnd
, &va
);
4336 error
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
);
4337 /* get default ownership, etc. */
4339 error
= vnode_authattr_new(dvp
, &va
, 0, ctx
);
4341 error
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
);
4345 error
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
);
4348 /* do fallback attribute handling */
4350 error
= vnode_setattr_fallback(vp
, &va
, ctx
);
4353 int update_flags
= 0;
4356 nd
.ni_cnd
.cn_nameiop
= LOOKUP
;
4358 nd
.ni_op
= OP_LOOKUP
;
4360 nd
.ni_cnd
.cn_flags
= 0;
4361 error
= nameiat(&nd
, fd
);
4368 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4369 /* call out to allow 3rd party notification of rename.
4370 * Ignore result of kauth_authorize_fileop call.
4372 if (kauth_authorize_fileop_has_listeners() &&
4374 char *new_link_path
= NULL
;
4377 /* build the path to the new link file */
4378 new_link_path
= get_pathbuff();
4380 vn_getpath(dvp
, new_link_path
, &len
);
4381 if ((len
+ 1 + nd
.ni_cnd
.cn_namelen
+ 1) < MAXPATHLEN
) {
4382 new_link_path
[len
- 1] = '/';
4383 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN
-len
);
4386 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
,
4387 (uintptr_t)path
, (uintptr_t)new_link_path
);
4388 if (new_link_path
!= NULL
)
4389 release_pathbuff(new_link_path
);
4392 // Make sure the name & parent pointers are hooked up
4393 if (vp
->v_name
== NULL
)
4394 update_flags
|= VNODE_UPDATE_NAME
;
4395 if (vp
->v_parent
== NULLVP
)
4396 update_flags
|= VNODE_UPDATE_PARENT
;
4399 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
4402 add_fsevent(FSE_CREATE_FILE
, ctx
,
4410 * nameidone has to happen before we vnode_put(dvp)
4411 * since it may need to release the fs_nodelock on the dvp
4419 if (path
&& (path
!= (char *)path_data
))
4420 FREE_ZONE(path
, MAXPATHLEN
, M_NAMEI
);
4426 symlink(__unused proc_t p
, struct symlink_args
*uap
, __unused
int32_t *retval
)
4428 return (symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
,
4429 uap
->link
, UIO_USERSPACE
));
4433 symlinkat(__unused proc_t p
, struct symlinkat_args
*uap
,
4434 __unused
int32_t *retval
)
4436 return (symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
,
4437 uap
->path2
, UIO_USERSPACE
));
4441 * Delete a whiteout from the filesystem.
4442 * No longer supported.
4445 undelete(__unused proc_t p
, __unused
struct undelete_args
*uap
, __unused
int32_t *retval
)
4451 * Delete a name from the filesystem.
4455 unlink1at(vfs_context_t ctx
, struct nameidata
*ndp
, int unlink_flags
, int fd
)
4459 struct componentname
*cnp
;
4464 struct vnode_attr va
;
4468 int has_listeners
= 0;
4469 int truncated_path
=0;
4471 struct vnode_attr
*vap
= NULL
;
4474 /* unlink or delete is allowed on rsrc forks and named streams */
4475 ndp
->ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
4478 ndp
->ni_cnd
.cn_flags
|= LOCKPARENT
;
4479 ndp
->ni_flag
|= NAMEI_COMPOUNDREMOVE
;
4483 error
= nameiat(ndp
, fd
);
4491 /* With Carbon delete semantics, busy files cannot be deleted */
4492 if (unlink_flags
& VNODE_REMOVE_NODELETEBUSY
) {
4493 flags
|= VNODE_REMOVE_NODELETEBUSY
;
4496 /* Skip any potential upcalls if told to. */
4497 if (unlink_flags
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) {
4498 flags
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
;
4502 batched
= vnode_compound_remove_available(vp
);
4504 * The root of a mounted filesystem cannot be deleted.
4506 if (vp
->v_flag
& VROOT
) {
4511 error
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
);
4519 if (!vnode_compound_remove_available(dvp
)) {
4520 panic("No vp, but no compound remove?");
4525 need_event
= need_fsevent(FSE_DELETE
, dvp
);
4528 if ((vp
->v_flag
& VISHARDLINK
) == 0) {
4529 /* XXX need to get these data in batched VNOP */
4530 get_fse_info(vp
, &finfo
, ctx
);
4533 error
= vfs_get_notify_attributes(&va
);
4542 has_listeners
= kauth_authorize_fileop_has_listeners();
4543 if (need_event
|| has_listeners
) {
4551 len
= safe_getpath(dvp
, ndp
->ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
);
4555 if (ndp
->ni_cnd
.cn_flags
& CN_WANTSRSRCFORK
)
4556 error
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
);
4560 error
= vn_remove(dvp
, &ndp
->ni_vp
, ndp
, flags
, vap
, ctx
);
4562 if (error
== EKEEPLOOKING
) {
4564 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4567 if ((ndp
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
4568 panic("EKEEPLOOKING, but continue flag not set?");
4571 if (vnode_isdir(vp
)) {
4575 goto lookup_continue
;
4580 * Call out to allow 3rd party notification of delete.
4581 * Ignore result of kauth_authorize_fileop call.
4584 if (has_listeners
) {
4585 kauth_authorize_fileop(vfs_context_ucred(ctx
),
4586 KAUTH_FILEOP_DELETE
,
4591 if (vp
->v_flag
& VISHARDLINK
) {
4593 // if a hardlink gets deleted we want to blow away the
4594 // v_parent link because the path that got us to this
4595 // instance of the link is no longer valid. this will
4596 // force the next call to get the path to ask the file
4597 // system instead of just following the v_parent link.
4599 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
4604 if (vp
->v_flag
& VISHARDLINK
) {
4605 get_fse_info(vp
, &finfo
, ctx
);
4607 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
4609 if (truncated_path
) {
4610 finfo
.mode
|= FSE_TRUNCATED_PATH
;
4612 add_fsevent(FSE_DELETE
, ctx
,
4613 FSE_ARG_STRING
, len
, path
,
4614 FSE_ARG_FINFO
, &finfo
,
4625 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4626 * will cause its shadow file to go away if necessary.
4628 if (vp
&& (vnode_isnamedstream(vp
)) &&
4629 (vp
->v_parent
!= NULLVP
) &&
4630 vnode_isshadow(vp
)) {
4635 * nameidone has to happen before we vnode_put(dvp)
4636 * since it may need to release the fs_nodelock on the dvp
4647 unlink1(vfs_context_t ctx
, struct nameidata
*ndp
, int unlink_flags
)
4649 return (unlink1at(ctx
, ndp
, unlink_flags
, AT_FDCWD
));
4653 * Delete a name from the filesystem using POSIX semantics.
4656 unlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
4657 enum uio_seg segflg
)
4659 struct nameidata nd
;
4661 NDINIT(&nd
, DELETE
, OP_UNLINK
, AUDITVNPATH1
, segflg
,
4663 return (unlink1at(ctx
, &nd
, 0, fd
));
4667 unlink(__unused proc_t p
, struct unlink_args
*uap
, __unused
int32_t *retval
)
4669 return (unlinkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
4674 unlinkat(__unused proc_t p
, struct unlinkat_args
*uap
, __unused
int32_t *retval
)
4676 if (uap
->flag
& ~AT_REMOVEDIR
)
4679 if (uap
->flag
& AT_REMOVEDIR
)
4680 return (rmdirat_internal(vfs_context_current(), uap
->fd
,
4681 uap
->path
, UIO_USERSPACE
));
4683 return (unlinkat_internal(vfs_context_current(), uap
->fd
,
4684 uap
->path
, UIO_USERSPACE
));
4688 * Delete a name from the filesystem using Carbon semantics.
4691 delete(__unused proc_t p
, struct delete_args
*uap
, __unused
int32_t *retval
)
4693 struct nameidata nd
;
4694 vfs_context_t ctx
= vfs_context_current();
4696 NDINIT(&nd
, DELETE
, OP_UNLINK
, AUDITVNPATH1
, UIO_USERSPACE
,
4698 return unlink1(ctx
, &nd
, VNODE_REMOVE_NODELETEBUSY
);
4702 * Reposition read/write file offset.
4705 lseek(proc_t p
, struct lseek_args
*uap
, off_t
*retval
)
4707 struct fileproc
*fp
;
4709 struct vfs_context
*ctx
;
4710 off_t offset
= uap
->offset
, file_size
;
4713 if ( (error
= fp_getfvp(p
,uap
->fd
, &fp
, &vp
)) ) {
4714 if (error
== ENOTSUP
)
4718 if (vnode_isfifo(vp
)) {
4724 ctx
= vfs_context_current();
4726 if (uap
->whence
== L_INCR
&& uap
->offset
== 0)
4727 error
= mac_file_check_get_offset(vfs_context_ucred(ctx
),
4730 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
4737 if ( (error
= vnode_getwithref(vp
)) ) {
4742 switch (uap
->whence
) {
4744 offset
+= fp
->f_fglob
->fg_offset
;
4747 if ((error
= vnode_size(vp
, &file_size
, ctx
)) != 0)
4749 offset
+= file_size
;
4757 if (uap
->offset
> 0 && offset
< 0) {
4758 /* Incremented/relative move past max size */
4762 * Allow negative offsets on character devices, per
4763 * POSIX 1003.1-2001. Most likely for writing disk
4766 if (offset
< 0 && vp
->v_type
!= VCHR
) {
4767 /* Decremented/relative move before start */
4771 fp
->f_fglob
->fg_offset
= offset
;
4772 *retval
= fp
->f_fglob
->fg_offset
;
4778 * An lseek can affect whether data is "available to read." Use
4779 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4781 post_event_if_success(vp
, error
, NOTE_NONE
);
4782 (void)vnode_put(vp
);
4789 * Check access permissions.
4791 * Returns: 0 Success
4792 * vnode_authorize:???
4795 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
)
4797 kauth_action_t action
;
4801 * If just the regular access bits, convert them to something
4802 * that vnode_authorize will understand.
4804 if (!(uflags
& _ACCESS_EXTENDED_MASK
)) {
4807 action
|= KAUTH_VNODE_READ_DATA
; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4808 if (uflags
& W_OK
) {
4809 if (vnode_isdir(vp
)) {
4810 action
|= KAUTH_VNODE_ADD_FILE
|
4811 KAUTH_VNODE_ADD_SUBDIRECTORY
;
4812 /* might want delete rights here too */
4814 action
|= KAUTH_VNODE_WRITE_DATA
;
4817 if (uflags
& X_OK
) {
4818 if (vnode_isdir(vp
)) {
4819 action
|= KAUTH_VNODE_SEARCH
;
4821 action
|= KAUTH_VNODE_EXECUTE
;
4825 /* take advantage of definition of uflags */
4826 action
= uflags
>> 8;
4830 error
= mac_vnode_check_access(ctx
, vp
, uflags
);
4835 /* action == 0 means only check for existence */
4837 error
= vnode_authorize(vp
, dvp
, action
| KAUTH_VNODE_ACCESS
, ctx
);
4848 * access_extended: Check access permissions in bulk.
4850 * Description: uap->entries Pointer to an array of accessx
4851 * descriptor structs, plus one or
4852 * more NULL terminated strings (see
4853 * "Notes" section below).
4854 * uap->size Size of the area pointed to by
4856 * uap->results Pointer to the results array.
4858 * Returns: 0 Success
4859 * ENOMEM Insufficient memory
4860 * EINVAL Invalid arguments
4861 * namei:EFAULT Bad address
4862 * namei:ENAMETOOLONG Filename too long
4863 * namei:ENOENT No such file or directory
4864 * namei:ELOOP Too many levels of symbolic links
4865 * namei:EBADF Bad file descriptor
4866 * namei:ENOTDIR Not a directory
4871 * uap->results Array contents modified
4873 * Notes: The uap->entries are structured as an arbitrary length array
4874 * of accessx descriptors, followed by one or more NULL terminated
4877 * struct accessx_descriptor[0]
4879 * struct accessx_descriptor[n]
4880 * char name_data[0];
4882 * We determine the entry count by walking the buffer containing
4883 * the uap->entries argument descriptor. For each descriptor we
4884 * see, the valid values for the offset ad_name_offset will be
4885 * in the byte range:
4887 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4889 * [ uap->entries + uap->size - 2 ]
4891 * since we must have at least one string, and the string must
4892 * be at least one character plus the NULL terminator in length.
4894 * XXX: Need to support the check-as uid argument
4897 access_extended(__unused proc_t p
, struct access_extended_args
*uap
, __unused
int32_t *retval
)
4899 struct accessx_descriptor
*input
= NULL
;
4900 errno_t
*result
= NULL
;
4903 unsigned int desc_max
, desc_actual
, i
, j
;
4904 struct vfs_context context
;
4905 struct nameidata nd
;
4909 #define ACCESSX_MAX_DESCR_ON_STACK 10
4910 struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
];
4912 context
.vc_ucred
= NULL
;
4915 * Validate parameters; if valid, copy the descriptor array and string
4916 * arguments into local memory. Before proceeding, the following
4917 * conditions must have been met:
4919 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4920 * o There must be sufficient room in the request for at least one
4921 * descriptor and a one yte NUL terminated string.
4922 * o The allocation of local storage must not fail.
4924 if (uap
->size
> ACCESSX_MAX_TABLESIZE
)
4926 if (uap
->size
< (sizeof(struct accessx_descriptor
) + 2))
4928 if (uap
->size
<= sizeof (stack_input
)) {
4929 input
= stack_input
;
4931 MALLOC(input
, struct accessx_descriptor
*, uap
->size
, M_TEMP
, M_WAITOK
);
4932 if (input
== NULL
) {
4937 error
= copyin(uap
->entries
, input
, uap
->size
);
4941 AUDIT_ARG(opaque
, input
, uap
->size
);
4944 * Force NUL termination of the copyin buffer to avoid nami() running
4945 * off the end. If the caller passes us bogus data, they may get a
4948 ((char *)input
)[uap
->size
- 1] = 0;
4951 * Access is defined as checking against the process' real identity,
4952 * even if operations are checking the effective identity. This
4953 * requires that we use a local vfs context.
4955 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
4956 context
.vc_thread
= current_thread();
4959 * Find out how many entries we have, so we can allocate the result
4960 * array by walking the list and adjusting the count downward by the
4961 * earliest string offset we see.
4963 desc_max
= (uap
->size
- 2) / sizeof(struct accessx_descriptor
);
4964 desc_actual
= desc_max
;
4965 for (i
= 0; i
< desc_actual
; i
++) {
4967 * Take the offset to the name string for this entry and
4968 * convert to an input array index, which would be one off
4969 * the end of the array if this entry was the lowest-addressed
4972 j
= input
[i
].ad_name_offset
/ sizeof(struct accessx_descriptor
);
4975 * An offset greater than the max allowable offset is an error.
4976 * It is also an error for any valid entry to point
4977 * to a location prior to the end of the current entry, if
4978 * it's not a reference to the string of the previous entry.
4980 if (j
> desc_max
|| (j
!= 0 && j
<= i
)) {
4986 * An offset of 0 means use the previous descriptor's offset;
4987 * this is used to chain multiple requests for the same file
4988 * to avoid multiple lookups.
4991 /* This is not valid for the first entry */
5000 * If the offset of the string for this descriptor is before
5001 * what we believe is the current actual last descriptor,
5002 * then we need to adjust our estimate downward; this permits
5003 * the string table following the last descriptor to be out
5004 * of order relative to the descriptor list.
5006 if (j
< desc_actual
)
5011 * We limit the actual number of descriptors we are willing to process
5012 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5013 * requested does not exceed this limit,
5015 if (desc_actual
> ACCESSX_MAX_DESCRIPTORS
) {
5019 MALLOC(result
, errno_t
*, desc_actual
* sizeof(errno_t
), M_TEMP
, M_WAITOK
);
5020 if (result
== NULL
) {
5026 * Do the work by iterating over the descriptor entries we know to
5027 * at least appear to contain valid data.
5030 for (i
= 0; i
< desc_actual
; i
++) {
5032 * If the ad_name_offset is 0, then we use the previous
5033 * results to make the check; otherwise, we are looking up
5036 if (input
[i
].ad_name_offset
!= 0) {
5037 /* discard old vnodes */
5048 * Scan forward in the descriptor list to see if we
5049 * need the parent vnode. We will need it if we are
5050 * deleting, since we must have rights to remove
5051 * entries in the parent directory, as well as the
5052 * rights to delete the object itself.
5054 wantdelete
= input
[i
].ad_flags
& _DELETE_OK
;
5055 for (j
= i
+ 1; (j
< desc_actual
) && (input
[j
].ad_name_offset
== 0); j
++)
5056 if (input
[j
].ad_flags
& _DELETE_OK
)
5059 niopts
= FOLLOW
| AUDITVNPATH1
;
5061 /* need parent for vnode_authorize for deletion test */
5063 niopts
|= WANTPARENT
;
5066 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
,
5067 CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
),
5079 * Handle lookup errors.
5089 /* run this access check */
5090 result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
);
5093 /* fatal lookup error */
5099 AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
);
5101 /* copy out results */
5102 error
= copyout(result
, uap
->results
, desc_actual
* sizeof(errno_t
));
5105 if (input
&& input
!= stack_input
)
5106 FREE(input
, M_TEMP
);
5108 FREE(result
, M_TEMP
);
5113 if (IS_VALID_CRED(context
.vc_ucred
))
5114 kauth_cred_unref(&context
.vc_ucred
);
5120 * Returns: 0 Success
5121 * namei:EFAULT Bad address
5122 * namei:ENAMETOOLONG Filename too long
5123 * namei:ENOENT No such file or directory
5124 * namei:ELOOP Too many levels of symbolic links
5125 * namei:EBADF Bad file descriptor
5126 * namei:ENOTDIR Not a directory
5131 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
,
5132 int flag
, enum uio_seg segflg
)
5135 struct nameidata nd
;
5137 struct vfs_context context
;
5139 int is_namedstream
= 0;
5143 * Unless the AT_EACCESS option is used, Access is defined as checking
5144 * against the process' real identity, even if operations are checking
5145 * the effective identity. So we need to tweak the credential
5146 * in the context for that case.
5148 if (!(flag
& AT_EACCESS
))
5149 context
.vc_ucred
= kauth_cred_copy_real(kauth_cred_get());
5151 context
.vc_ucred
= ctx
->vc_ucred
;
5152 context
.vc_thread
= ctx
->vc_thread
;
5155 niopts
= FOLLOW
| AUDITVNPATH1
;
5156 /* need parent for vnode_authorize for deletion test */
5157 if (amode
& _DELETE_OK
)
5158 niopts
|= WANTPARENT
;
5159 NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
,
5163 /* access(F_OK) calls are allowed for resource forks. */
5165 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5167 error
= nameiat(&nd
, fd
);
5172 /* Grab reference on the shadow stream file vnode to
5173 * force an inactive on release which will mark it
5176 if (vnode_isnamedstream(nd
.ni_vp
) &&
5177 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5178 vnode_isshadow(nd
.ni_vp
)) {
5180 vnode_ref(nd
.ni_vp
);
5184 error
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
);
5187 if (is_namedstream
) {
5188 vnode_rele(nd
.ni_vp
);
5192 vnode_put(nd
.ni_vp
);
5193 if (amode
& _DELETE_OK
)
5194 vnode_put(nd
.ni_dvp
);
5198 if (!(flag
& AT_EACCESS
))
5199 kauth_cred_unref(&context
.vc_ucred
);
5204 access(__unused proc_t p
, struct access_args
*uap
, __unused
int32_t *retval
)
5206 return (faccessat_internal(vfs_context_current(), AT_FDCWD
,
5207 uap
->path
, uap
->flags
, 0, UIO_USERSPACE
));
5211 faccessat(__unused proc_t p
, struct faccessat_args
*uap
,
5212 __unused
int32_t *retval
)
5214 if (uap
->flag
& ~AT_EACCESS
)
5217 return (faccessat_internal(vfs_context_current(), uap
->fd
,
5218 uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
));
5222 * Returns: 0 Success
5229 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
,
5230 user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
,
5231 enum uio_seg segflg
, int fd
, int flag
)
5233 struct nameidata nd
;
5240 struct user64_stat user64_sb
;
5241 struct user32_stat user32_sb
;
5242 struct user64_stat64 user64_sb64
;
5243 struct user32_stat64 user32_sb64
;
5247 kauth_filesec_t fsec
;
5248 size_t xsecurity_bufsize
;
5251 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5252 NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow
| AUDITVNPATH1
,
5256 int is_namedstream
= 0;
5257 /* stat calls are allowed for resource forks. */
5258 nd
.ni_cnd
.cn_flags
|= CN_ALLOWRSRCFORK
;
5260 error
= nameiat(&nd
, fd
);
5263 fsec
= KAUTH_FILESEC_NONE
;
5265 statptr
= (void *)&source
;
5268 /* Grab reference on the shadow stream file vnode to
5269 * force an inactive on release which will mark it
5272 if (vnode_isnamedstream(nd
.ni_vp
) &&
5273 (nd
.ni_vp
->v_parent
!= NULLVP
) &&
5274 vnode_isshadow(nd
.ni_vp
)) {
5276 vnode_ref(nd
.ni_vp
);
5280 error
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity
!= USER_ADDR_NULL
? &fsec
: NULL
), isstat64
, ctx
);
5283 if (is_namedstream
) {
5284 vnode_rele(nd
.ni_vp
);
5287 vnode_put(nd
.ni_vp
);
5292 /* Zap spare fields */
5293 if (isstat64
!= 0) {
5294 source
.sb64
.st_lspare
= 0;
5295 source
.sb64
.st_qspare
[0] = 0LL;
5296 source
.sb64
.st_qspare
[1] = 0LL;
5297 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5298 munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
);
5299 my_size
= sizeof(dest
.user64_sb64
);
5300 sbp
= (caddr_t
)&dest
.user64_sb64
;
5302 munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
);
5303 my_size
= sizeof(dest
.user32_sb64
);
5304 sbp
= (caddr_t
)&dest
.user32_sb64
;
5307 * Check if we raced (post lookup) against the last unlink of a file.
5309 if ((source
.sb64
.st_nlink
== 0) && S_ISREG(source
.sb64
.st_mode
)) {
5310 source
.sb64
.st_nlink
= 1;
5313 source
.sb
.st_lspare
= 0;
5314 source
.sb
.st_qspare
[0] = 0LL;
5315 source
.sb
.st_qspare
[1] = 0LL;
5316 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) {
5317 munge_user64_stat(&source
.sb
, &dest
.user64_sb
);
5318 my_size
= sizeof(dest
.user64_sb
);
5319 sbp
= (caddr_t
)&dest
.user64_sb
;
5321 munge_user32_stat(&source
.sb
, &dest
.user32_sb
);
5322 my_size
= sizeof(dest
.user32_sb
);
5323 sbp
= (caddr_t
)&dest
.user32_sb
;
5327 * Check if we raced (post lookup) against the last unlink of a file.
5329 if ((source
.sb
.st_nlink
== 0) && S_ISREG(source
.sb
.st_mode
)) {
5330 source
.sb
.st_nlink
= 1;
5333 if ((error
= copyout(sbp
, ub
, my_size
)) != 0)
5336 /* caller wants extended security information? */
5337 if (xsecurity
!= USER_ADDR_NULL
) {
5339 /* did we get any? */
5340 if (fsec
== KAUTH_FILESEC_NONE
) {
5341 if (susize(xsecurity_size
, 0) != 0) {
5346 /* find the user buffer size */
5347 xsecurity_bufsize
= fusize(xsecurity_size
);
5349 /* copy out the actual data size */
5350 if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) {
5355 /* if the caller supplied enough room, copy out to it */
5356 if (xsecurity_bufsize
>= KAUTH_FILESEC_COPYSIZE(fsec
))
5357 error
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
));
5361 if (fsec
!= KAUTH_FILESEC_NONE
)
5362 kauth_filesec_free(fsec
);
5367 * stat_extended: Get file status; with extended security (ACL).
5369 * Parameters: p (ignored)
5370 * uap User argument descriptor (see below)
5373 * Indirect: uap->path Path of file to get status from
5374 * uap->ub User buffer (holds file status info)
5375 * uap->xsecurity ACL to get (extended security)
5376 * uap->xsecurity_size Size of ACL
5378 * Returns: 0 Success
5383 stat_extended(__unused proc_t p
, struct stat_extended_args
*uap
,
5384 __unused
int32_t *retval
)
5386 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5387 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5392 * Returns: 0 Success
5393 * fstatat_internal:??? [see fstatat_internal() in this file]
5396 stat(__unused proc_t p
, struct stat_args
*uap
, __unused
int32_t *retval
)
5398 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5399 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0));
5403 stat64(__unused proc_t p
, struct stat64_args
*uap
, __unused
int32_t *retval
)
5405 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5406 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0));
5410 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5412 * Parameters: p (ignored)
5413 * uap User argument descriptor (see below)
5416 * Indirect: uap->path Path of file to get status from
5417 * uap->ub User buffer (holds file status info)
5418 * uap->xsecurity ACL to get (extended security)
5419 * uap->xsecurity_size Size of ACL
5421 * Returns: 0 Success
5426 stat64_extended(__unused proc_t p
, struct stat64_extended_args
*uap
, __unused
int32_t *retval
)
5428 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5429 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5434 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5436 * Parameters: p (ignored)
5437 * uap User argument descriptor (see below)
5440 * Indirect: uap->path Path of file to get status from
5441 * uap->ub User buffer (holds file status info)
5442 * uap->xsecurity ACL to get (extended security)
5443 * uap->xsecurity_size Size of ACL
5445 * Returns: 0 Success
5450 lstat_extended(__unused proc_t p
, struct lstat_extended_args
*uap
, __unused
int32_t *retval
)
5452 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5453 uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
,
5454 AT_SYMLINK_NOFOLLOW
));
5458 * Get file status; this version does not follow links.
5461 lstat(__unused proc_t p
, struct lstat_args
*uap
, __unused
int32_t *retval
)
5463 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5464 0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5468 lstat64(__unused proc_t p
, struct lstat64_args
*uap
, __unused
int32_t *retval
)
5470 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5471 0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
));
5475 * lstat64_extended: Get file status; can handle large inode numbers; does not
5476 * follow links; with extended security (ACL).
5478 * Parameters: p (ignored)
5479 * uap User argument descriptor (see below)
5482 * Indirect: uap->path Path of file to get status from
5483 * uap->ub User buffer (holds file status info)
5484 * uap->xsecurity ACL to get (extended security)
5485 * uap->xsecurity_size Size of ACL
5487 * Returns: 0 Success
5492 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args
*uap
, __unused
int32_t *retval
)
5494 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5495 uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
,
5496 AT_SYMLINK_NOFOLLOW
));
5500 fstatat(__unused proc_t p
, struct fstatat_args
*uap
, __unused
int32_t *retval
)
5502 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5505 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5506 0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5510 fstatat64(__unused proc_t p
, struct fstatat64_args
*uap
,
5511 __unused
int32_t *retval
)
5513 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5516 return (fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
,
5517 0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
));
5521 * Get configurable pathname variables.
5523 * Returns: 0 Success
5527 * Notes: Global implementation constants are intended to be
5528 * implemented in this function directly; all other constants
5529 * are per-FS implementation, and therefore must be handled in
5530 * each respective FS, instead.
5532 * XXX We implement some things globally right now that should actually be
5533 * XXX per-FS; we will need to deal with this at some point.
5537 pathconf(__unused proc_t p
, struct pathconf_args
*uap
, int32_t *retval
)
5540 struct nameidata nd
;
5541 vfs_context_t ctx
= vfs_context_current();
5543 NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW
| AUDITVNPATH1
,
5544 UIO_USERSPACE
, uap
->path
, ctx
);
5549 error
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
);
5551 vnode_put(nd
.ni_vp
);
5557 * Return target name of a symbolic link.
5561 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
,
5562 enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
,
5568 struct nameidata nd
;
5569 char uio_buf
[ UIO_SIZEOF(1) ];
5571 NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW
| AUDITVNPATH1
,
5574 error
= nameiat(&nd
, fd
);
5581 auio
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
,
5582 &uio_buf
[0], sizeof(uio_buf
));
5583 uio_addiov(auio
, buf
, bufsize
);
5584 if (vp
->v_type
!= VLNK
) {
5588 error
= mac_vnode_check_readlink(ctx
, vp
);
5591 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
,
5594 error
= VNOP_READLINK(vp
, auio
, ctx
);
5598 *retval
= bufsize
- (int)uio_resid(auio
);
5603 readlink(proc_t p
, struct readlink_args
*uap
, int32_t *retval
)
5605 enum uio_seg procseg
;
5607 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5608 return (readlinkat_internal(vfs_context_current(), AT_FDCWD
,
5609 CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
),
5610 uap
->count
, procseg
, retval
));
5614 readlinkat(proc_t p
, struct readlinkat_args
*uap
, int32_t *retval
)
5616 enum uio_seg procseg
;
5618 procseg
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
5619 return (readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
5620 procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
));
5624 * Change file flags.
5627 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
)
5629 struct vnode_attr va
;
5630 kauth_action_t action
;
5634 VATTR_SET(&va
, va_flags
, flags
);
5637 error
= mac_vnode_check_setflags(ctx
, vp
, flags
);
5642 /* request authorisation, disregard immutability */
5643 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
5646 * Request that the auth layer disregard those file flags it's allowed to when
5647 * authorizing this operation; we need to do this in order to be able to
5648 * clear immutable flags.
5650 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0))
5652 error
= vnode_setattr(vp
, &va
, ctx
);
5654 if ((error
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) {
5663 * Change flags of a file given a path name.
5667 chflags(__unused proc_t p
, struct chflags_args
*uap
, __unused
int32_t *retval
)
5670 vfs_context_t ctx
= vfs_context_current();
5672 struct nameidata nd
;
5674 AUDIT_ARG(fflags
, uap
->flags
);
5675 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
5676 UIO_USERSPACE
, uap
->path
, ctx
);
5683 error
= chflags1(vp
, uap
->flags
, ctx
);
5689 * Change flags of a file given a file descriptor.
5693 fchflags(__unused proc_t p
, struct fchflags_args
*uap
, __unused
int32_t *retval
)
5698 AUDIT_ARG(fd
, uap
->fd
);
5699 AUDIT_ARG(fflags
, uap
->flags
);
5700 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
5703 if ((error
= vnode_getwithref(vp
))) {
5708 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5710 error
= chflags1(vp
, uap
->flags
, vfs_context_current());
5717 * Change security information on a filesystem object.
5719 * Returns: 0 Success
5720 * EPERM Operation not permitted
5721 * vnode_authattr:??? [anything vnode_authattr can return]
5722 * vnode_authorize:??? [anything vnode_authorize can return]
5723 * vnode_setattr:??? [anything vnode_setattr can return]
5725 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5726 * translated to EPERM before being returned.
5729 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr
*vap
)
5731 kauth_action_t action
;
5734 AUDIT_ARG(mode
, vap
->va_mode
);
5735 /* XXX audit new args */
5738 /* chmod calls are not allowed for resource forks. */
5739 if (vp
->v_flag
& VISNAMEDSTREAM
) {
5745 if (VATTR_IS_ACTIVE(vap
, va_mode
) &&
5746 (error
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0)
5750 /* make sure that the caller is allowed to set this security information */
5751 if (((error
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) ||
5752 ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
5753 if (error
== EACCES
)
5758 error
= vnode_setattr(vp
, vap
, ctx
);
5765 * Change mode of a file given a path name.
5767 * Returns: 0 Success
5768 * namei:??? [anything namei can return]
5769 * chmod_vnode:??? [anything chmod_vnode can return]
5772 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
,
5773 int fd
, int flag
, enum uio_seg segflg
)
5775 struct nameidata nd
;
5778 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
5779 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
,
5781 if ((error
= nameiat(&nd
, fd
)))
5783 error
= chmod_vnode(ctx
, nd
.ni_vp
, vap
);
5784 vnode_put(nd
.ni_vp
);
5790 * chmod_extended: Change the mode of a file given a path name; with extended
5791 * argument list (including extended security (ACL)).
5793 * Parameters: p Process requesting the open
5794 * uap User argument descriptor (see below)
5797 * Indirect: uap->path Path to object (same as 'chmod')
5798 * uap->uid UID to set
5799 * uap->gid GID to set
5800 * uap->mode File mode to set (same as 'chmod')
5801 * uap->xsecurity ACL to set (or delete)
5803 * Returns: 0 Success
5806 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5808 * XXX: We should enummerate the possible errno values here, and where
5809 * in the code they originated.
5812 chmod_extended(__unused proc_t p
, struct chmod_extended_args
*uap
, __unused
int32_t *retval
)
5815 struct vnode_attr va
;
5816 kauth_filesec_t xsecdst
;
5818 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5821 if (uap
->mode
!= -1)
5822 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5823 if (uap
->uid
!= KAUTH_UID_NONE
)
5824 VATTR_SET(&va
, va_uid
, uap
->uid
);
5825 if (uap
->gid
!= KAUTH_GID_NONE
)
5826 VATTR_SET(&va
, va_gid
, uap
->gid
);
5829 switch(uap
->xsecurity
) {
5830 /* explicit remove request */
5831 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5832 VATTR_SET(&va
, va_acl
, NULL
);
5835 case USER_ADDR_NULL
:
5838 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5840 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5841 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
);
5844 error
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0,
5847 if (xsecdst
!= NULL
)
5848 kauth_filesec_free(xsecdst
);
5853 * Returns: 0 Success
5854 * chmodat:??? [anything chmodat can return]
5857 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
,
5858 int flag
, enum uio_seg segflg
)
5860 struct vnode_attr va
;
5863 VATTR_SET(&va
, va_mode
, mode
& ALLPERMS
);
5865 return (chmodat(ctx
, path
, &va
, fd
, flag
, segflg
));
5869 chmod(__unused proc_t p
, struct chmod_args
*uap
, __unused
int32_t *retval
)
5871 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
5872 AT_FDCWD
, 0, UIO_USERSPACE
));
5876 fchmodat(__unused proc_t p
, struct fchmodat_args
*uap
, __unused
int32_t *retval
)
5878 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
5881 return (fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
,
5882 uap
->fd
, uap
->flag
, UIO_USERSPACE
));
5886 * Change mode of a file given a file descriptor.
5889 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr
*vap
)
5896 if ((error
= file_vnode(fd
, &vp
)) != 0)
5898 if ((error
= vnode_getwithref(vp
)) != 0) {
5902 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
5904 error
= chmod_vnode(vfs_context_current(), vp
, vap
);
5905 (void)vnode_put(vp
);
5912 * fchmod_extended: Change mode of a file given a file descriptor; with
5913 * extended argument list (including extended security (ACL)).
5915 * Parameters: p Process requesting to change file mode
5916 * uap User argument descriptor (see below)
5919 * Indirect: uap->mode File mode to set (same as 'chmod')
5920 * uap->uid UID to set
5921 * uap->gid GID to set
5922 * uap->xsecurity ACL to set (or delete)
5923 * uap->fd File descriptor of file to change mode
5925 * Returns: 0 Success
5930 fchmod_extended(proc_t p
, struct fchmod_extended_args
*uap
, __unused
int32_t *retval
)
5933 struct vnode_attr va
;
5934 kauth_filesec_t xsecdst
;
5936 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
5939 if (uap
->mode
!= -1)
5940 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5941 if (uap
->uid
!= KAUTH_UID_NONE
)
5942 VATTR_SET(&va
, va_uid
, uap
->uid
);
5943 if (uap
->gid
!= KAUTH_GID_NONE
)
5944 VATTR_SET(&va
, va_gid
, uap
->gid
);
5947 switch(uap
->xsecurity
) {
5948 case USER_ADDR_NULL
:
5949 VATTR_SET(&va
, va_acl
, NULL
);
5951 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5952 VATTR_SET(&va
, va_acl
, NULL
);
5955 case CAST_USER_ADDR_T(-1):
5958 if ((error
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
5960 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
5963 error
= fchmod1(p
, uap
->fd
, &va
);
5966 switch(uap
->xsecurity
) {
5967 case USER_ADDR_NULL
:
5968 case CAST_USER_ADDR_T(-1):
5971 if (xsecdst
!= NULL
)
5972 kauth_filesec_free(xsecdst
);
5978 fchmod(proc_t p
, struct fchmod_args
*uap
, __unused
int32_t *retval
)
5980 struct vnode_attr va
;
5983 VATTR_SET(&va
, va_mode
, uap
->mode
& ALLPERMS
);
5985 return(fchmod1(p
, uap
->fd
, &va
));
5990 * Set ownership given a path name.
5994 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
,
5995 gid_t gid
, int flag
, enum uio_seg segflg
)
5998 struct vnode_attr va
;
6000 struct nameidata nd
;
6002 kauth_action_t action
;
6004 AUDIT_ARG(owner
, uid
, gid
);
6006 follow
= (flag
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW
: FOLLOW
;
6007 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow
| AUDITVNPATH1
, segflg
,
6009 error
= nameiat(&nd
, fd
);
6017 if (uid
!= (uid_t
)VNOVAL
)
6018 VATTR_SET(&va
, va_uid
, uid
);
6019 if (gid
!= (gid_t
)VNOVAL
)
6020 VATTR_SET(&va
, va_gid
, gid
);
6023 error
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
);
6028 /* preflight and authorize attribute changes */
6029 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6031 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6033 error
= vnode_setattr(vp
, &va
, ctx
);
6037 * EACCES is only allowed from namei(); permissions failure should
6038 * return EPERM, so we need to translate the error code.
6040 if (error
== EACCES
)
6048 chown(__unused proc_t p
, struct chown_args
*uap
, __unused
int32_t *retval
)
6050 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6051 uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
));
6055 lchown(__unused proc_t p
, struct lchown_args
*uap
, __unused
int32_t *retval
)
6057 return (fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
,
6058 uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
));
6062 fchownat(__unused proc_t p
, struct fchownat_args
*uap
, __unused
int32_t *retval
)
6064 if (uap
->flag
& ~AT_SYMLINK_NOFOLLOW
)
6067 return (fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
,
6068 uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
));
6072 * Set ownership given a file descriptor.
6076 fchown(__unused proc_t p
, struct fchown_args
*uap
, __unused
int32_t *retval
)
6078 struct vnode_attr va
;
6079 vfs_context_t ctx
= vfs_context_current();
6082 kauth_action_t action
;
6084 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
6085 AUDIT_ARG(fd
, uap
->fd
);
6087 if ( (error
= file_vnode(uap
->fd
, &vp
)) )
6090 if ( (error
= vnode_getwithref(vp
)) ) {
6094 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6097 if (uap
->uid
!= VNOVAL
)
6098 VATTR_SET(&va
, va_uid
, uap
->uid
);
6099 if (uap
->gid
!= VNOVAL
)
6100 VATTR_SET(&va
, va_gid
, uap
->gid
);
6103 /* chown calls are not allowed for resource forks. */
6104 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6111 error
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
);
6116 /* preflight and authorize attribute changes */
6117 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6119 if (action
&& ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6120 if (error
== EACCES
)
6124 error
= vnode_setattr(vp
, &va
, ctx
);
6127 (void)vnode_put(vp
);
6133 getutimes(user_addr_t usrtvp
, struct timespec
*tsp
)
6137 if (usrtvp
== USER_ADDR_NULL
) {
6138 struct timeval old_tv
;
6139 /* XXX Y2038 bug because of microtime argument */
6141 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]);
6144 if (IS_64BIT_PROCESS(current_proc())) {
6145 struct user64_timeval tv
[2];
6146 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6149 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6150 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6152 struct user32_timeval tv
[2];
6153 error
= copyin(usrtvp
, (void *)tv
, sizeof(tv
));
6156 TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]);
6157 TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]);
6164 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec
*ts
,
6168 struct vnode_attr va
;
6169 kauth_action_t action
;
6171 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6174 VATTR_SET(&va
, va_access_time
, ts
[0]);
6175 VATTR_SET(&va
, va_modify_time
, ts
[1]);
6177 va
.va_vaflags
|= VA_UTIMES_NULL
;
6180 /* utimes calls are not allowed for resource forks. */
6181 if (vp
->v_flag
& VISNAMEDSTREAM
) {
6188 error
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]);
6192 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) {
6193 if (!nullflag
&& error
== EACCES
)
6198 /* since we may not need to auth anything, check here */
6199 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) {
6200 if (!nullflag
&& error
== EACCES
)
6204 error
= vnode_setattr(vp
, &va
, ctx
);
6211 * Set the access and modification times of a file.
6215 utimes(__unused proc_t p
, struct utimes_args
*uap
, __unused
int32_t *retval
)
6217 struct timespec ts
[2];
6220 struct nameidata nd
;
6221 vfs_context_t ctx
= vfs_context_current();
6224 * AUDIT: Needed to change the order of operations to do the
6225 * name lookup first because auditing wants the path.
6227 NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW
| AUDITVNPATH1
,
6228 UIO_USERSPACE
, uap
->path
, ctx
);
6235 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6236 * the current time instead.
6239 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6242 error
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp
== USER_ADDR_NULL
);
6245 vnode_put(nd
.ni_vp
);
6250 * Set the access and modification times of a file.
6254 futimes(__unused proc_t p
, struct futimes_args
*uap
, __unused
int32_t *retval
)
6256 struct timespec ts
[2];
6261 AUDIT_ARG(fd
, uap
->fd
);
6263 if ((error
= getutimes(usrtvp
, ts
)) != 0)
6265 if ((error
= file_vnode(uap
->fd
, &vp
)) != 0)
6267 if((error
= vnode_getwithref(vp
))) {
6272 error
= setutimes(vfs_context_current(), vp
, ts
, usrtvp
== 0);
6279 * Truncate a file given its path name.
6283 truncate(__unused proc_t p
, struct truncate_args
*uap
, __unused
int32_t *retval
)
6286 struct vnode_attr va
;
6287 vfs_context_t ctx
= vfs_context_current();
6289 struct nameidata nd
;
6290 kauth_action_t action
;
6292 if (uap
->length
< 0)
6294 NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW
| AUDITVNPATH1
,
6295 UIO_USERSPACE
, uap
->path
, ctx
);
6296 if ((error
= namei(&nd
)))
6303 VATTR_SET(&va
, va_data_size
, uap
->length
);
6306 error
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
);
6311 if ((error
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0)
6313 if ((action
!= 0) && ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0))
6315 error
= vnode_setattr(vp
, &va
, ctx
);
6322 * Truncate a file given a file descriptor.
6326 ftruncate(proc_t p
, struct ftruncate_args
*uap
, int32_t *retval
)
6328 vfs_context_t ctx
= vfs_context_current();
6329 struct vnode_attr va
;
6331 struct fileproc
*fp
;
6335 AUDIT_ARG(fd
, uap
->fd
);
6336 if (uap
->length
< 0)
6339 if ( (error
= fp_lookup(p
,fd
,&fp
,0)) ) {
6343 switch (FILEGLOB_DTYPE(fp
->f_fglob
)) {
6345 error
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
);
6354 vp
= (vnode_t
)fp
->f_fglob
->fg_data
;
6356 if ((fp
->f_fglob
->fg_flag
& FWRITE
) == 0) {
6357 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
6362 if ((error
= vnode_getwithref(vp
)) != 0) {
6366 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6369 error
= mac_vnode_check_truncate(ctx
,
6370 fp
->f_fglob
->fg_cred
, vp
);
6372 (void)vnode_put(vp
);
6377 VATTR_SET(&va
, va_data_size
, uap
->length
);
6378 error
= vnode_setattr(vp
, &va
, ctx
);
6379 (void)vnode_put(vp
);
6387 * Sync an open file with synchronized I/O _file_ integrity completion
6391 fsync(proc_t p
, struct fsync_args
*uap
, __unused
int32_t *retval
)
6393 __pthread_testcancel(1);
6394 return(fsync_common(p
, uap
, MNT_WAIT
));
6399 * Sync an open file with synchronized I/O _file_ integrity completion
6401 * Notes: This is a legacy support function that does not test for
6402 * thread cancellation points.
6406 fsync_nocancel(proc_t p
, struct fsync_nocancel_args
*uap
, __unused
int32_t *retval
)
6408 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_WAIT
));
6413 * Sync an open file with synchronized I/O _data_ integrity completion
6417 fdatasync(proc_t p
, struct fdatasync_args
*uap
, __unused
int32_t *retval
)
6419 __pthread_testcancel(1);
6420 return(fsync_common(p
, (struct fsync_args
*)uap
, MNT_DWAIT
));
6427 * Common fsync code to support both synchronized I/O file integrity completion
6428 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6430 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6431 * will only guarantee that the file data contents are retrievable. If
6432 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6433 * includes additional metadata unnecessary for retrieving the file data
6434 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6437 * Parameters: p The process
6438 * uap->fd The descriptor to synchronize
6439 * flags The data integrity flags
6441 * Returns: int Success
6442 * fp_getfvp:EBADF Bad file descriptor
6443 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6444 * VNOP_FSYNC:??? unspecified
6446 * Notes: We use struct fsync_args because it is a short name, and all
6447 * caller argument structures are otherwise identical.
6450 fsync_common(proc_t p
, struct fsync_args
*uap
, int flags
)
6453 struct fileproc
*fp
;
6454 vfs_context_t ctx
= vfs_context_current();
6457 AUDIT_ARG(fd
, uap
->fd
);
6459 if ( (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
)) )
6461 if ( (error
= vnode_getwithref(vp
)) ) {
6466 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
6468 error
= VNOP_FSYNC(vp
, flags
, ctx
);
6471 /* Sync resource fork shadow file if necessary. */
6473 (vp
->v_flag
& VISNAMEDSTREAM
) &&
6474 (vp
->v_parent
!= NULLVP
) &&
6475 vnode_isshadow(vp
) &&
6476 (fp
->f_flags
& FP_WRITTEN
)) {
6477 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
);
6481 (void)vnode_put(vp
);
6487 * Duplicate files. Source must be a file, target must be a file or
6490 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6491 * perform inheritance correctly.
6495 copyfile(__unused proc_t p
, struct copyfile_args
*uap
, __unused
int32_t *retval
)
6497 vnode_t tvp
, fvp
, tdvp
, sdvp
;
6498 struct nameidata fromnd
, tond
;
6500 vfs_context_t ctx
= vfs_context_current();
6502 /* Check that the flags are valid. */
6504 if (uap
->flags
& ~CPF_MASK
) {
6508 NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, SAVESTART
| AUDITVNPATH1
,
6509 UIO_USERSPACE
, uap
->from
, ctx
);
6510 if ((error
= namei(&fromnd
)))
6514 NDINIT(&tond
, CREATE
, OP_LINK
,
6515 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6516 UIO_USERSPACE
, uap
->to
, ctx
);
6517 if ((error
= namei(&tond
))) {
6524 if (!(uap
->flags
& CPF_OVERWRITE
)) {
6529 if (fvp
->v_type
== VDIR
|| (tvp
&& tvp
->v_type
== VDIR
)) {
6534 if ((error
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0)
6540 * If source is the same as the destination (that is the
6541 * same inode number) then there is nothing to do.
6542 * (fixed to have POSIX semantics - CSM 3/2/98)
6547 error
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
);
6549 sdvp
= tond
.ni_startdir
;
6551 * nameidone has to happen before we vnode_put(tdvp)
6552 * since it may need to release the fs_nodelock on the tdvp
6563 if (fromnd
.ni_startdir
)
6564 vnode_put(fromnd
.ni_startdir
);
6574 * Rename files. Source and destination must either both be directories,
6575 * or both not be directories. If target is a directory, it must be empty.
6579 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
,
6580 int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
)
6584 struct nameidata
*fromnd
, *tond
;
6589 const char *oname
= NULL
;
6590 char *from_name
= NULL
, *to_name
= NULL
;
6591 int from_len
=0, to_len
=0;
6592 int holding_mntlock
;
6593 mount_t locked_mp
= NULL
;
6594 vnode_t oparent
= NULLVP
;
6596 fse_info from_finfo
, to_finfo
;
6598 int from_truncated
=0, to_truncated
;
6600 struct vnode_attr
*fvap
, *tvap
;
6602 /* carving out a chunk for structs that are too big to be on stack. */
6604 struct nameidata from_node
, to_node
;
6605 struct vnode_attr fv_attr
, tv_attr
;
6607 MALLOC(__rename_data
, void *, sizeof(*__rename_data
), M_TEMP
, M_WAITOK
);
6608 fromnd
= &__rename_data
->from_node
;
6609 tond
= &__rename_data
->to_node
;
6611 holding_mntlock
= 0;
6619 NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT
| AUDITVNPATH1
,
6621 fromnd
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6623 NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT
| AUDITVNPATH2
| CN_NBMOUNTLOOK
,
6625 tond
->ni_flag
= NAMEI_COMPOUNDRENAME
;
6628 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6629 if ( (error
= nameiat(fromnd
, fromfd
)) )
6631 fdvp
= fromnd
->ni_dvp
;
6632 fvp
= fromnd
->ni_vp
;
6634 if (fvp
&& fvp
->v_type
== VDIR
)
6635 tond
->ni_cnd
.cn_flags
|= WILLBEDIR
;
6638 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) {
6639 if ( (error
= nameiat(tond
, tofd
)) ) {
6641 * Translate error code for rename("dir1", "dir2/.").
6643 if (error
== EISDIR
&& fvp
->v_type
== VDIR
)
6647 tdvp
= tond
->ni_dvp
;
6651 batched
= vnode_compound_rename_available(fdvp
);
6654 * Claim: this check will never reject a valid rename.
6655 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6656 * Suppose fdvp and tdvp are not on the same mount.
6657 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6658 * then you can't move it to within another dir on the same mountpoint.
6659 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6661 * If this check passes, then we are safe to pass these vnodes to the same FS.
6663 if (fdvp
->v_mount
!= tdvp
->v_mount
) {
6667 goto skipped_lookup
;
6671 error
= vn_authorize_rename(fdvp
, fvp
, &fromnd
->ni_cnd
, tdvp
, tvp
, &tond
->ni_cnd
, ctx
, NULL
);
6673 if (error
== ENOENT
) {
6675 * We encountered a race where after doing the namei, tvp stops
6676 * being valid. If so, simply re-drive the rename call from the
6686 * If the source and destination are the same (i.e. they're
6687 * links to the same vnode) and the target file system is
6688 * case sensitive, then there is nothing to do.
6690 * XXX Come back to this.
6696 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6697 * then assume that this file system is case sensitive.
6699 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 ||
6700 pathconf_val
!= 0) {
6706 * Allow the renaming of mount points.
6707 * - target must not exist
6708 * - target must reside in the same directory as source
6709 * - union mounts cannot be renamed
6710 * - "/" cannot be renamed
6712 * XXX Handle this in VFS after a continued lookup (if we missed
6713 * in the cache to start off)
6715 if ((fvp
->v_flag
& VROOT
) &&
6716 (fvp
->v_type
== VDIR
) &&
6718 (fvp
->v_mountedhere
== NULL
) &&
6720 ((fvp
->v_mount
->mnt_flag
& (MNT_UNION
| MNT_ROOTFS
)) == 0) &&
6721 (fvp
->v_mount
->mnt_vnodecovered
!= NULLVP
)) {
6724 /* switch fvp to the covered vnode */
6725 coveredvp
= fvp
->v_mount
->mnt_vnodecovered
;
6726 if ( (vnode_getwithref(coveredvp
)) ) {
6736 * Check for cross-device rename.
6738 if ((fvp
->v_mount
!= tdvp
->v_mount
) ||
6739 (tvp
&& (fvp
->v_mount
!= tvp
->v_mount
))) {
6745 * If source is the same as the destination (that is the
6746 * same inode number) then there is nothing to do...
6747 * EXCEPT if the underlying file system supports case
6748 * insensitivity and is case preserving. In this case
6749 * the file system needs to handle the special case of
6750 * getting the same vnode as target (fvp) and source (tvp).
6752 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6753 * and _PC_CASE_PRESERVING can have this exception, and they need to
6754 * handle the special case of getting the same vnode as target and
6755 * source. NOTE: Then the target is unlocked going into vnop_rename,
6756 * so not to cause locking problems. There is a single reference on tvp.
6758 * NOTE - that fvp == tvp also occurs if they are hard linked and
6759 * that correct behaviour then is just to return success without doing
6762 * XXX filesystem should take care of this itself, perhaps...
6764 if (fvp
== tvp
&& fdvp
== tdvp
) {
6765 if (fromnd
->ni_cnd
.cn_namelen
== tond
->ni_cnd
.cn_namelen
&&
6766 !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
,
6767 fromnd
->ni_cnd
.cn_namelen
)) {
6772 if (holding_mntlock
&& fvp
->v_mount
!= locked_mp
) {
6774 * we're holding a reference and lock
6775 * on locked_mp, but it no longer matches
6776 * what we want to do... so drop our hold
6778 mount_unlock_renames(locked_mp
);
6779 mount_drop(locked_mp
, 0);
6780 holding_mntlock
= 0;
6782 if (tdvp
!= fdvp
&& fvp
->v_type
== VDIR
) {
6784 * serialize renames that re-shape
6785 * the tree... if holding_mntlock is
6786 * set, then we're ready to go...
6788 * first need to drop the iocounts
6789 * we picked up, second take the
6790 * lock to serialize the access,
6791 * then finally start the lookup
6792 * process over with the lock held
6794 if (!holding_mntlock
) {
6796 * need to grab a reference on
6797 * the mount point before we
6798 * drop all the iocounts... once
6799 * the iocounts are gone, the mount
6802 locked_mp
= fvp
->v_mount
;
6803 mount_ref(locked_mp
, 0);
6806 * nameidone has to happen before we vnode_put(tvp)
6807 * since it may need to release the fs_nodelock on the tvp
6816 * nameidone has to happen before we vnode_put(fdvp)
6817 * since it may need to release the fs_nodelock on the fvp
6824 mount_lock_renames(locked_mp
);
6825 holding_mntlock
= 1;
6831 * when we dropped the iocounts to take
6832 * the lock, we allowed the identity of
6833 * the various vnodes to change... if they did,
6834 * we may no longer be dealing with a rename
6835 * that reshapes the tree... once we're holding
6836 * the iocounts, the vnodes can't change type
6837 * so we're free to drop the lock at this point
6840 if (holding_mntlock
) {
6841 mount_unlock_renames(locked_mp
);
6842 mount_drop(locked_mp
, 0);
6843 holding_mntlock
= 0;
6847 // save these off so we can later verify that fvp is the same
6848 oname
= fvp
->v_name
;
6849 oparent
= fvp
->v_parent
;
6853 need_event
= need_fsevent(FSE_RENAME
, fdvp
);
6856 get_fse_info(fvp
, &from_finfo
, ctx
);
6858 error
= vfs_get_notify_attributes(&__rename_data
->fv_attr
);
6863 fvap
= &__rename_data
->fv_attr
;
6867 get_fse_info(tvp
, &to_finfo
, ctx
);
6868 } else if (batched
) {
6869 error
= vfs_get_notify_attributes(&__rename_data
->tv_attr
);
6874 tvap
= &__rename_data
->tv_attr
;
6879 #endif /* CONFIG_FSE */
6881 if (need_event
|| kauth_authorize_fileop_has_listeners()) {
6882 if (from_name
== NULL
) {
6883 GET_PATH(from_name
);
6884 if (from_name
== NULL
) {
6890 from_len
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
);
6892 if (to_name
== NULL
) {
6894 if (to_name
== NULL
) {
6900 to_len
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
);
6902 #if CONFIG_SECLUDED_RENAME
6903 if (flags
& VFS_SECLUDE_RENAME
) {
6904 fromnd
->ni_cnd
.cn_flags
|= CN_SECLUDE_RENAME
;
6907 #pragma unused(flags)
6909 error
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
,
6910 tdvp
, &tvp
, &tond
->ni_cnd
, tvap
,
6913 if (holding_mntlock
) {
6915 * we can drop our serialization
6918 mount_unlock_renames(locked_mp
);
6919 mount_drop(locked_mp
, 0);
6920 holding_mntlock
= 0;
6923 if (error
== EKEEPLOOKING
) {
6924 if ((fromnd
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6925 if ((tond
->ni_flag
& NAMEI_CONTLOOKUP
) == 0) {
6926 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6930 fromnd
->ni_vp
= fvp
;
6933 goto continue_lookup
;
6937 * We may encounter a race in the VNOP where the destination didn't
6938 * exist when we did the namei, but it does by the time we go and
6939 * try to create the entry. In this case, we should re-drive this rename
6940 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6941 * but other filesystems susceptible to this race could return it, too.
6943 if (error
== ERECYCLE
) {
6950 /* call out to allow 3rd party notification of rename.
6951 * Ignore result of kauth_authorize_fileop call.
6953 kauth_authorize_fileop(vfs_context_ucred(ctx
),
6954 KAUTH_FILEOP_RENAME
,
6955 (uintptr_t)from_name
, (uintptr_t)to_name
);
6958 if (from_name
!= NULL
&& to_name
!= NULL
) {
6959 if (from_truncated
|| to_truncated
) {
6960 // set it here since only the from_finfo gets reported up to user space
6961 from_finfo
.mode
|= FSE_TRUNCATED_PATH
;
6965 vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
);
6968 vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
);
6972 add_fsevent(FSE_RENAME
, ctx
,
6973 FSE_ARG_STRING
, from_len
, from_name
,
6974 FSE_ARG_FINFO
, &from_finfo
,
6975 FSE_ARG_STRING
, to_len
, to_name
,
6976 FSE_ARG_FINFO
, &to_finfo
,
6979 add_fsevent(FSE_RENAME
, ctx
,
6980 FSE_ARG_STRING
, from_len
, from_name
,
6981 FSE_ARG_FINFO
, &from_finfo
,
6982 FSE_ARG_STRING
, to_len
, to_name
,
6986 #endif /* CONFIG_FSE */
6989 * update filesystem's mount point data
6992 char *cp
, *pathend
, *mpname
;
6998 mp
= fvp
->v_mountedhere
;
7000 if (vfs_busy(mp
, LK_NOWAIT
)) {
7004 MALLOC_ZONE(tobuf
, char *, MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
7006 if (UIO_SEG_IS_USER_SPACE(segflg
))
7007 error
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
);
7009 error
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
);
7011 /* find current mount point prefix */
7012 pathend
= &mp
->mnt_vfsstat
.f_mntonname
[0];
7013 for (cp
= pathend
; *cp
!= '\0'; ++cp
) {
7017 /* find last component of target name */
7018 for (mpname
= cp
= tobuf
; *cp
!= '\0'; ++cp
) {
7022 /* append name to prefix */
7023 maxlen
= MAXPATHLEN
- (pathend
- mp
->mnt_vfsstat
.f_mntonname
);
7024 bzero(pathend
, maxlen
);
7025 strlcpy(pathend
, mpname
, maxlen
);
7027 FREE_ZONE(tobuf
, MAXPATHLEN
, M_NAMEI
);
7032 * fix up name & parent pointers. note that we first
7033 * check that fvp has the same name/parent pointers it
7034 * had before the rename call... this is a 'weak' check
7037 * XXX oparent and oname may not be set in the compound vnop case
7039 if (batched
|| (oname
== fvp
->v_name
&& oparent
== fvp
->v_parent
)) {
7042 update_flags
= VNODE_UPDATE_NAME
;
7045 update_flags
|= VNODE_UPDATE_PARENT
;
7047 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
);
7050 if (to_name
!= NULL
) {
7051 RELEASE_PATH(to_name
);
7054 if (from_name
!= NULL
) {
7055 RELEASE_PATH(from_name
);
7058 if (holding_mntlock
) {
7059 mount_unlock_renames(locked_mp
);
7060 mount_drop(locked_mp
, 0);
7061 holding_mntlock
= 0;
7065 * nameidone has to happen before we vnode_put(tdvp)
7066 * since it may need to release the fs_nodelock on the tdvp
7076 * nameidone has to happen before we vnode_put(fdvp)
7077 * since it may need to release the fs_nodelock on the fdvp
7087 * If things changed after we did the namei, then we will re-drive
7088 * this rename call from the top.
7095 FREE(__rename_data
, M_TEMP
);
7100 rename(__unused proc_t p
, struct rename_args
*uap
, __unused
int32_t *retval
)
7102 return (renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
,
7103 AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0));
7106 #if CONFIG_SECLUDED_RENAME
7107 int rename_ext(__unused proc_t p
, struct rename_ext_args
*uap
, __unused
int32_t *retval
)
7109 return renameat_internal(
7110 vfs_context_current(),
7111 AT_FDCWD
, uap
->from
,
7113 UIO_USERSPACE
, uap
->flags
);
7118 renameat(__unused proc_t p
, struct renameat_args
*uap
, __unused
int32_t *retval
)
7120 return (renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
,
7121 uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0));
7125 * Make a directory file.
7127 * Returns: 0 Success
7130 * vnode_authorize:???
7135 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr
*vap
, int fd
,
7136 enum uio_seg segflg
)
7140 int update_flags
= 0;
7142 struct nameidata nd
;
7144 AUDIT_ARG(mode
, vap
->va_mode
);
7145 NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT
| AUDITVNPATH1
, segflg
,
7147 nd
.ni_cnd
.cn_flags
|= WILLBEDIR
;
7148 nd
.ni_flag
= NAMEI_COMPOUNDMKDIR
;
7151 error
= nameiat(&nd
, fd
);
7162 batched
= vnode_compound_mkdir_available(dvp
);
7164 VATTR_SET(vap
, va_type
, VDIR
);
7168 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7169 * only get EXISTS or EISDIR for existing path components, and not that it could see
7170 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7171 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7173 if ((error
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) {
7174 if (error
== EACCES
|| error
== EPERM
) {
7182 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7183 * rather than EACCESS if the target exists.
7185 NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
,
7187 error2
= nameiat(&nd
, fd
);
7201 * make the directory
7203 if ((error
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) {
7204 if (error
== EKEEPLOOKING
) {
7206 goto continue_lookup
;
7212 // Make sure the name & parent pointers are hooked up
7213 if (vp
->v_name
== NULL
)
7214 update_flags
|= VNODE_UPDATE_NAME
;
7215 if (vp
->v_parent
== NULLVP
)
7216 update_flags
|= VNODE_UPDATE_PARENT
;
7219 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
);
7222 add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
7227 * nameidone has to happen before we vnode_put(dvp)
7228 * since it may need to release the fs_nodelock on the dvp
7241 * mkdir_extended: Create a directory; with extended security (ACL).
7243 * Parameters: p Process requesting to create the directory
7244 * uap User argument descriptor (see below)
7247 * Indirect: uap->path Path of directory to create
7248 * uap->mode Access permissions to set
7249 * uap->xsecurity ACL to set
7251 * Returns: 0 Success
7256 mkdir_extended(proc_t p
, struct mkdir_extended_args
*uap
, __unused
int32_t *retval
)
7259 kauth_filesec_t xsecdst
;
7260 struct vnode_attr va
;
7262 AUDIT_ARG(owner
, uap
->uid
, uap
->gid
);
7265 if ((uap
->xsecurity
!= USER_ADDR_NULL
) &&
7266 ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0))
7270 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7271 if (xsecdst
!= NULL
)
7272 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
);
7274 ciferror
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7276 if (xsecdst
!= NULL
)
7277 kauth_filesec_free(xsecdst
);
7282 mkdir(proc_t p
, struct mkdir_args
*uap
, __unused
int32_t *retval
)
7284 struct vnode_attr va
;
7287 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7289 return (mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
,
7294 mkdirat(proc_t p
, struct mkdirat_args
*uap
, __unused
int32_t *retval
)
7296 struct vnode_attr va
;
7299 VATTR_SET(&va
, va_mode
, (uap
->mode
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
);
7301 return(mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
,
7306 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
,
7307 enum uio_seg segflg
)
7311 struct nameidata nd
;
7314 int has_listeners
= 0;
7318 struct vnode_attr va
;
7319 #endif /* CONFIG_FSE */
7320 struct vnode_attr
*vap
= NULL
;
7326 * This loop exists to restart rmdir in the unlikely case that two
7327 * processes are simultaneously trying to remove the same directory
7328 * containing orphaned appleDouble files.
7331 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT
| AUDITVNPATH1
,
7332 segflg
, dirpath
, ctx
);
7333 nd
.ni_flag
= NAMEI_COMPOUNDRMDIR
;
7338 error
= nameiat(&nd
, fd
);
7346 batched
= vnode_compound_rmdir_available(vp
);
7348 if (vp
->v_flag
& VROOT
) {
7350 * The root of a mounted filesystem cannot be deleted.
7357 * Removed a check here; we used to abort if vp's vid
7358 * was not the same as what we'd seen the last time around.
7359 * I do not think that check was valid, because if we retry
7360 * and all dirents are gone, the directory could legitimately
7361 * be recycled but still be present in a situation where we would
7362 * have had permission to delete. Therefore, we won't make
7363 * an effort to preserve that check now that we may not have a
7368 error
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
);
7376 if (!vnode_compound_rmdir_available(dvp
)) {
7377 panic("No error, but no compound rmdir?");
7384 need_event
= need_fsevent(FSE_DELETE
, dvp
);
7387 get_fse_info(vp
, &finfo
, ctx
);
7389 error
= vfs_get_notify_attributes(&va
);
7398 has_listeners
= kauth_authorize_fileop_has_listeners();
7399 if (need_event
|| has_listeners
) {
7408 len
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated
);
7411 finfo
.mode
|= FSE_TRUNCATED_PATH
;
7416 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7419 /* Couldn't find a vnode */
7423 if (error
== EKEEPLOOKING
) {
7424 goto continue_lookup
;
7426 #if CONFIG_APPLEDOUBLE
7428 * Special case to remove orphaned AppleDouble
7429 * files. I don't like putting this in the kernel,
7430 * but carbon does not like putting this in carbon either,
7433 if (error
== ENOTEMPTY
) {
7434 error
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
);
7435 if (error
== EBUSY
) {
7441 * Assuming everything went well, we will try the RMDIR again
7444 error
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
);
7446 #endif /* CONFIG_APPLEDOUBLE */
7448 * Call out to allow 3rd party notification of delete.
7449 * Ignore result of kauth_authorize_fileop call.
7452 if (has_listeners
) {
7453 kauth_authorize_fileop(vfs_context_ucred(ctx
),
7454 KAUTH_FILEOP_DELETE
,
7459 if (vp
->v_flag
& VISHARDLINK
) {
7460 // see the comment in unlink1() about why we update
7461 // the parent of a hard link when it is removed
7462 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
);
7468 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
);
7470 add_fsevent(FSE_DELETE
, ctx
,
7471 FSE_ARG_STRING
, len
, path
,
7472 FSE_ARG_FINFO
, &finfo
,
7484 * nameidone has to happen before we vnode_put(dvp)
7485 * since it may need to release the fs_nodelock on the dvp
7493 if (restart_flag
== 0) {
7494 wakeup_one((caddr_t
)vp
);
7497 tsleep(vp
, PVFS
, "rm AD", 1);
7499 } while (restart_flag
!= 0);
7506 * Remove a directory file.
7510 rmdir(__unused proc_t p
, struct rmdir_args
*uap
, __unused
int32_t *retval
)
7512 return (rmdirat_internal(vfs_context_current(), AT_FDCWD
,
7513 CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
));
7516 /* Get direntry length padded to 8 byte alignment */
7517 #define DIRENT64_LEN(namlen) \
7518 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7521 vnode_readdir64(struct vnode
*vp
, struct uio
*uio
, int flags
, int *eofflag
,
7522 int *numdirent
, vfs_context_t ctxp
)
7524 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7525 if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags
& VFC_VFSREADDIR_EXTENDED
) &&
7526 ((vp
->v_mount
->mnt_kern_flag
& MNTK_DENY_READDIREXT
) == 0)) {
7527 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
);
7532 struct direntry
*entry64
;
7538 * Our kernel buffer needs to be smaller since re-packing
7539 * will expand each dirent. The worse case (when the name
7540 * length is 3) corresponds to a struct direntry size of 32
7541 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7542 * (4-byte aligned). So having a buffer that is 3/8 the size
7543 * will prevent us from reading more than we can pack.
7545 * Since this buffer is wired memory, we will limit the
7546 * buffer size to a maximum of 32K. We would really like to
7547 * use 32K in the MIN(), but we use magic number 87371 to
7548 * prevent uio_resid() * 3 / 8 from overflowing.
7550 bufsize
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8;
7551 MALLOC(bufptr
, void *, bufsize
, M_TEMP
, M_WAITOK
);
7552 if (bufptr
== NULL
) {
7556 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
7557 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
);
7558 auio
->uio_offset
= uio
->uio_offset
;
7560 error
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
);
7562 dep
= (struct dirent
*)bufptr
;
7563 bytesread
= bufsize
- uio_resid(auio
);
7565 MALLOC(entry64
, struct direntry
*, sizeof(struct direntry
),
7568 * Convert all the entries and copy them out to user's buffer.
7570 while (error
== 0 && (char *)dep
< ((char *)bufptr
+ bytesread
)) {
7571 size_t enbufsize
= DIRENT64_LEN(dep
->d_namlen
);
7573 bzero(entry64
, enbufsize
);
7574 /* Convert a dirent to a dirent64. */
7575 entry64
->d_ino
= dep
->d_ino
;
7576 entry64
->d_seekoff
= 0;
7577 entry64
->d_reclen
= enbufsize
;
7578 entry64
->d_namlen
= dep
->d_namlen
;
7579 entry64
->d_type
= dep
->d_type
;
7580 bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen
+ 1);
7582 /* Move to next entry. */
7583 dep
= (struct dirent
*)((char *)dep
+ dep
->d_reclen
);
7585 /* Copy entry64 to user's buffer. */
7586 error
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
);
7589 /* Update the real offset using the offset we got from VNOP_READDIR. */
7591 uio
->uio_offset
= auio
->uio_offset
;
7594 FREE(bufptr
, M_TEMP
);
7595 FREE(entry64
, M_TEMP
);
7600 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7603 * Read a block of directory entries in a file system independent format.
7606 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t
*bytesread
,
7607 off_t
*offset
, int flags
)
7610 struct vfs_context context
= *vfs_context_current(); /* local copy */
7611 struct fileproc
*fp
;
7613 int spacetype
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7615 int error
, eofflag
, numdirent
;
7616 char uio_buf
[ UIO_SIZEOF(1) ];
7618 error
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
);
7622 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7623 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7628 if (bufsize
> GETDIRENTRIES_MAXBUFSIZE
)
7629 bufsize
= GETDIRENTRIES_MAXBUFSIZE
;
7632 error
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->f_fglob
);
7636 if ( (error
= vnode_getwithref(vp
)) ) {
7639 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7642 if (vp
->v_type
!= VDIR
) {
7643 (void)vnode_put(vp
);
7649 error
= mac_vnode_check_readdir(&context
, vp
);
7651 (void)vnode_put(vp
);
7656 loff
= fp
->f_fglob
->fg_offset
;
7657 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7658 uio_addiov(auio
, bufp
, bufsize
);
7660 if (flags
& VNODE_READDIR_EXTENDED
) {
7661 error
= vnode_readdir64(vp
, auio
, flags
, &eofflag
, &numdirent
, &context
);
7662 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7664 error
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &numdirent
, &context
);
7665 fp
->f_fglob
->fg_offset
= uio_offset(auio
);
7668 (void)vnode_put(vp
);
7672 if ((user_ssize_t
)bufsize
== uio_resid(auio
)){
7673 if (union_dircheckp
) {
7674 error
= union_dircheckp(&vp
, fp
, &context
);
7681 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
)) {
7682 struct vnode
*tvp
= vp
;
7683 if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) {
7685 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
7686 fp
->f_fglob
->fg_offset
= 0;
7700 *bytesread
= bufsize
- uio_resid(auio
);
7708 getdirentries(__unused
struct proc
*p
, struct getdirentries_args
*uap
, int32_t *retval
)
7714 AUDIT_ARG(fd
, uap
->fd
);
7715 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, &bytesread
, &offset
, 0);
7718 if (proc_is64bit(p
)) {
7719 user64_long_t base
= (user64_long_t
)offset
;
7720 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
));
7722 user32_long_t base
= (user32_long_t
)offset
;
7723 error
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
));
7725 *retval
= bytesread
;
7731 getdirentries64(__unused
struct proc
*p
, struct getdirentries64_args
*uap
, user_ssize_t
*retval
)
7737 AUDIT_ARG(fd
, uap
->fd
);
7738 error
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->bufsize
, &bytesread
, &offset
, VNODE_READDIR_EXTENDED
);
7741 *retval
= bytesread
;
7742 error
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
));
7749 * Set the mode mask for creation of filesystem nodes.
7750 * XXX implement xsecurity
7752 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7754 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
)
7756 struct filedesc
*fdp
;
7758 AUDIT_ARG(mask
, newmask
);
7761 *retval
= fdp
->fd_cmask
;
7762 fdp
->fd_cmask
= newmask
& ALLPERMS
;
7768 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7770 * Parameters: p Process requesting to set the umask
7771 * uap User argument descriptor (see below)
7772 * retval umask of the process (parameter p)
7774 * Indirect: uap->newmask umask to set
7775 * uap->xsecurity ACL to set
7777 * Returns: 0 Success
7782 umask_extended(proc_t p
, struct umask_extended_args
*uap
, int32_t *retval
)
7785 kauth_filesec_t xsecdst
;
7787 xsecdst
= KAUTH_FILESEC_NONE
;
7788 if (uap
->xsecurity
!= USER_ADDR_NULL
) {
7789 if ((ciferror
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)
7792 xsecdst
= KAUTH_FILESEC_NONE
;
7795 ciferror
= umask1(p
, uap
->newmask
, xsecdst
, retval
);
7797 if (xsecdst
!= KAUTH_FILESEC_NONE
)
7798 kauth_filesec_free(xsecdst
);
7803 umask(proc_t p
, struct umask_args
*uap
, int32_t *retval
)
7805 return(umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
));
7809 * Void all references to file by ripping underlying filesystem
7814 revoke(proc_t p
, struct revoke_args
*uap
, __unused
int32_t *retval
)
7817 struct vnode_attr va
;
7818 vfs_context_t ctx
= vfs_context_current();
7820 struct nameidata nd
;
7822 NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW
| AUDITVNPATH1
, UIO_USERSPACE
,
7831 if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) {
7836 if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) {
7842 error
= mac_vnode_check_revoke(ctx
, vp
);
7848 VATTR_WANTED(&va
, va_uid
);
7849 if ((error
= vnode_getattr(vp
, &va
, ctx
)))
7851 if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid
&&
7852 (error
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
)))
7854 if (vp
->v_usecount
> 0 || (vnode_isaliased(vp
)))
7855 VNOP_REVOKE(vp
, REVOKEALL
, ctx
);
7863 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7864 * The following system calls are designed to support features
7865 * which are specific to the HFS & HFS Plus volume formats
7870 * Obtain attribute information on objects in a directory while enumerating
7875 getdirentriesattr (proc_t p
, struct getdirentriesattr_args
*uap
, int32_t *retval
)
7878 struct fileproc
*fp
;
7880 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
7881 uint32_t count
, savecount
;
7885 struct attrlist attributelist
;
7886 vfs_context_t ctx
= vfs_context_current();
7888 char uio_buf
[ UIO_SIZEOF(1) ];
7889 kauth_action_t action
;
7893 /* Get the attributes into kernel space */
7894 if ((error
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) {
7897 if ((error
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) {
7901 if ( (error
= fp_getfvp(p
, fd
, &fp
, &vp
)) ) {
7904 if ((fp
->f_fglob
->fg_flag
& FREAD
) == 0) {
7905 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
);
7912 error
= mac_file_check_change_offset(vfs_context_ucred(ctx
),
7919 if ( (error
= vnode_getwithref(vp
)) )
7922 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
7925 if (vp
->v_type
!= VDIR
) {
7926 (void)vnode_put(vp
);
7932 error
= mac_vnode_check_readdir(ctx
, vp
);
7934 (void)vnode_put(vp
);
7939 /* set up the uio structure which will contain the users return buffer */
7940 loff
= fp
->f_fglob
->fg_offset
;
7941 auio
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
7942 uio_addiov(auio
, uap
->buffer
, uap
->buffersize
);
7945 * If the only item requested is file names, we can let that past with
7946 * just LIST_DIRECTORY. If they want any other attributes, that means
7947 * they need SEARCH as well.
7949 action
= KAUTH_VNODE_LIST_DIRECTORY
;
7950 if ((attributelist
.commonattr
& ~ATTR_CMN_NAME
) ||
7951 attributelist
.fileattr
|| attributelist
.dirattr
)
7952 action
|= KAUTH_VNODE_SEARCH
;
7954 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) {
7956 /* Believe it or not, uap->options only has 32-bits of valid
7957 * info, so truncate before extending again */
7959 error
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
,
7960 (u_long
)(uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
);
7964 (void) vnode_put(vp
);
7969 * If we've got the last entry of a directory in a union mount
7970 * then reset the eofflag and pretend there's still more to come.
7971 * The next call will again set eofflag and the buffer will be empty,
7972 * so traverse to the underlying directory and do the directory
7975 if (eofflag
&& vp
->v_mount
->mnt_flag
& MNT_UNION
) {
7976 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries
7978 } else { // Empty buffer
7979 struct vnode
*tvp
= vp
;
7980 if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) {
7981 vnode_ref_ext(vp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0);
7982 fp
->f_fglob
->fg_data
= (caddr_t
) vp
;
7983 fp
->f_fglob
->fg_offset
= 0; // reset index for new dir
7985 vnode_rele_internal(tvp
, fp
->f_fglob
->fg_flag
& O_EVTONLY
, 0, 0);
7993 (void)vnode_put(vp
);
7997 fp
->f_fglob
->fg_offset
= uio_offset(auio
); /* should be multiple of dirent, not variable */
7999 if ((error
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
))))
8001 if ((error
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
))))
8003 if ((error
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
))))
8006 *retval
= eofflag
; /* similar to getdirentries */
8010 return (error
); /* return error earlier, an retval of 0 or 1 now */
8012 } /* end of getdirentriesattr system call */
8015 * Exchange data between two files
8020 exchangedata (__unused proc_t p
, struct exchangedata_args
*uap
, __unused
int32_t *retval
)
8023 struct nameidata fnd
, snd
;
8024 vfs_context_t ctx
= vfs_context_current();
8028 u_int32_t nameiflags
;
8032 int from_truncated
=0, to_truncated
=0;
8034 fse_info f_finfo
, s_finfo
;
8038 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8040 NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags
| AUDITVNPATH1
,
8041 UIO_USERSPACE
, uap
->path1
, ctx
);
8043 error
= namei(&fnd
);
8050 NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK
| nameiflags
| AUDITVNPATH2
,
8051 UIO_USERSPACE
, uap
->path2
, ctx
);
8053 error
= namei(&snd
);
8062 * if the files are the same, return an inval error
8070 * if the files are on different volumes, return an error
8072 if (svp
->v_mount
!= fvp
->v_mount
) {
8077 /* If they're not files, return an error */
8078 if ( (vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) {
8084 error
= mac_vnode_check_exchangedata(ctx
,
8089 if (((error
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) ||
8090 ((error
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0))
8095 need_fsevent(FSE_EXCHANGE
, fvp
) ||
8097 kauth_authorize_fileop_has_listeners()) {
8100 if (fpath
== NULL
|| spath
== NULL
) {
8105 flen
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
);
8106 slen
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
);
8109 get_fse_info(fvp
, &f_finfo
, ctx
);
8110 get_fse_info(svp
, &s_finfo
, ctx
);
8111 if (from_truncated
|| to_truncated
) {
8112 // set it here since only the f_finfo gets reported up to user space
8113 f_finfo
.mode
|= FSE_TRUNCATED_PATH
;
8117 /* Ok, make the call */
8118 error
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
);
8121 const char *tmpname
;
8123 if (fpath
!= NULL
&& spath
!= NULL
) {
8124 /* call out to allow 3rd party notification of exchangedata.
8125 * Ignore result of kauth_authorize_fileop call.
8127 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
,
8128 (uintptr_t)fpath
, (uintptr_t)spath
);
8132 tmpname
= fvp
->v_name
;
8133 fvp
->v_name
= svp
->v_name
;
8134 svp
->v_name
= tmpname
;
8136 if (fvp
->v_parent
!= svp
->v_parent
) {
8139 tmp
= fvp
->v_parent
;
8140 fvp
->v_parent
= svp
->v_parent
;
8141 svp
->v_parent
= tmp
;
8143 name_cache_unlock();
8146 if (fpath
!= NULL
&& spath
!= NULL
) {
8147 add_fsevent(FSE_EXCHANGE
, ctx
,
8148 FSE_ARG_STRING
, flen
, fpath
,
8149 FSE_ARG_FINFO
, &f_finfo
,
8150 FSE_ARG_STRING
, slen
, spath
,
8151 FSE_ARG_FINFO
, &s_finfo
,
8159 RELEASE_PATH(fpath
);
8161 RELEASE_PATH(spath
);
8169 * Return (in MB) the amount of freespace on the given vnode's volume.
8171 uint32_t freespace_mb(vnode_t vp
);
8174 freespace_mb(vnode_t vp
)
8176 vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
);
8177 return (((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail
*
8178 vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20);
8186 searchfs(proc_t p
, struct searchfs_args
*uap
, __unused
int32_t *retval
)
8191 struct nameidata nd
;
8192 struct user64_fssearchblock searchblock
;
8193 struct searchstate
*state
;
8194 struct attrlist
*returnattrs
;
8195 struct timeval timelimit
;
8196 void *searchparams1
,*searchparams2
;
8198 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
8199 uint32_t nummatches
;
8201 uint32_t nameiflags
;
8202 vfs_context_t ctx
= vfs_context_current();
8203 char uio_buf
[ UIO_SIZEOF(1) ];
8205 /* Start by copying in fsearchblock parameter list */
8206 if (IS_64BIT_PROCESS(p
)) {
8207 error
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
));
8208 timelimit
.tv_sec
= searchblock
.timelimit
.tv_sec
;
8209 timelimit
.tv_usec
= searchblock
.timelimit
.tv_usec
;
8212 struct user32_fssearchblock tmp_searchblock
;
8214 error
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
));
8215 // munge into 64-bit version
8216 searchblock
.returnattrs
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
);
8217 searchblock
.returnbuffer
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
);
8218 searchblock
.returnbuffersize
= tmp_searchblock
.returnbuffersize
;
8219 searchblock
.maxmatches
= tmp_searchblock
.maxmatches
;
8221 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8222 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8224 timelimit
.tv_sec
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
;
8225 timelimit
.tv_usec
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
;
8226 searchblock
.searchparams1
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
);
8227 searchblock
.sizeofsearchparams1
= tmp_searchblock
.sizeofsearchparams1
;
8228 searchblock
.searchparams2
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
);
8229 searchblock
.sizeofsearchparams2
= tmp_searchblock
.sizeofsearchparams2
;
8230 searchblock
.searchattrs
= tmp_searchblock
.searchattrs
;
8235 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8237 if (searchblock
.sizeofsearchparams1
> SEARCHFS_MAX_SEARCHPARMS
||
8238 searchblock
.sizeofsearchparams2
> SEARCHFS_MAX_SEARCHPARMS
)
8241 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8242 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8243 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8246 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8247 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8248 /* assumes the size is still 556 bytes it will continue to work */
8250 mallocsize
= searchblock
.sizeofsearchparams1
+ searchblock
.sizeofsearchparams2
+
8251 sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2*sizeof(uint32_t));
8253 MALLOC(searchparams1
, void *, mallocsize
, M_TEMP
, M_WAITOK
);
8255 /* Now set up the various pointers to the correct place in our newly allocated memory */
8257 searchparams2
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
);
8258 returnattrs
= (struct attrlist
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
);
8259 state
= (struct searchstate
*) (((caddr_t
) returnattrs
) + sizeof (struct attrlist
));
8261 /* Now copy in the stuff given our local variables. */
8263 if ((error
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
)))
8266 if ((error
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
)))
8269 if ((error
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
))))
8272 if ((error
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
))))
8276 * When searching a union mount, need to set the
8277 * start flag at the first call on each layer to
8278 * reset state for the new volume.
8280 if (uap
->options
& SRCHFS_START
)
8281 state
->ss_union_layer
= 0;
8283 uap
->options
|= state
->ss_union_flags
;
8284 state
->ss_union_flags
= 0;
8287 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8288 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8289 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8290 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8291 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8294 if (searchblock
.searchattrs
.commonattr
& ATTR_CMN_NAME
) {
8295 attrreference_t
* string_ref
;
8296 u_int32_t
* start_length
;
8297 user64_size_t param_length
;
8299 /* validate searchparams1 */
8300 param_length
= searchblock
.sizeofsearchparams1
;
8301 /* skip the word that specifies length of the buffer */
8302 start_length
= (u_int32_t
*) searchparams1
;
8303 start_length
= start_length
+1;
8304 string_ref
= (attrreference_t
*) start_length
;
8306 /* ensure no negative offsets or too big offsets */
8307 if (string_ref
->attr_dataoffset
< 0 ) {
8311 if (string_ref
->attr_length
> MAXPATHLEN
) {
8316 /* Check for pointer overflow in the string ref */
8317 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) {
8322 if (((char*) string_ref
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1
+ param_length
)) {
8326 if (((char*)string_ref
+ string_ref
->attr_dataoffset
+ string_ref
->attr_length
) > ((char*)searchparams1
+ param_length
)) {
8332 /* set up the uio structure which will contain the users return buffer */
8333 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
8334 uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
);
8337 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
8338 NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags
| AUDITVNPATH1
,
8339 UIO_USERSPACE
, uap
->path
, ctx
);
8348 * Switch to the root vnode for the volume
8350 error
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
);
8357 * If it's a union mount, the path lookup takes
8358 * us to the top layer. But we may need to descend
8359 * to a lower layer. For non-union mounts the layer
8362 for (i
= 0; i
< (int) state
->ss_union_layer
; i
++) {
8363 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) == 0)
8366 vp
= vp
->v_mount
->mnt_vnodecovered
;
8372 vnode_getwithref(vp
);
8377 error
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
);
8386 * If searchblock.maxmatches == 0, then skip the search. This has happened
8387 * before and sometimes the underlying code doesnt deal with it well.
8389 if (searchblock
.maxmatches
== 0) {
8395 * Allright, we have everything we need, so lets make that call.
8397 * We keep special track of the return value from the file system:
8398 * EAGAIN is an acceptable error condition that shouldn't keep us
8399 * from copying out any results...
8402 fserror
= VNOP_SEARCHFS(vp
,
8405 &searchblock
.searchattrs
,
8406 (u_long
)searchblock
.maxmatches
,
8410 (u_long
)uap
->scriptcode
,
8411 (u_long
)uap
->options
,
8413 (struct searchstate
*) &state
->ss_fsstate
,
8417 * If it's a union mount we need to be called again
8418 * to search the mounted-on filesystem.
8420 if ((vp
->v_mount
->mnt_flag
& MNT_UNION
) && fserror
== 0) {
8421 state
->ss_union_flags
= SRCHFS_START
;
8422 state
->ss_union_layer
++; // search next layer down
8430 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8431 search state. Everything was already put into he return buffer by the vop call. */
8433 if ((error
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0)
8436 if ((error
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0)
8443 FREE(searchparams1
,M_TEMP
);
8448 } /* end of searchfs system call */
8450 #else /* CONFIG_SEARCHFS */
8453 searchfs(__unused proc_t p
, __unused
struct searchfs_args
*uap
, __unused
int32_t *retval
)
8458 #endif /* CONFIG_SEARCHFS */
8461 lck_grp_attr_t
* nspace_group_attr
;
8462 lck_attr_t
* nspace_lock_attr
;
8463 lck_grp_t
* nspace_mutex_group
;
8465 lck_mtx_t nspace_handler_lock
;
8466 lck_mtx_t nspace_handler_exclusion_lock
;
8468 time_t snapshot_timestamp
=0;
8469 int nspace_allow_virtual_devs
=0;
8471 void nspace_handler_init(void);
8473 typedef struct nspace_item_info
{
8483 #define MAX_NSPACE_ITEMS 128
8484 nspace_item_info nspace_items
[MAX_NSPACE_ITEMS
];
8485 uint32_t nspace_item_idx
=0; // also used as the sleep/wakeup rendezvous address
8486 uint32_t nspace_token_id
=0;
8487 uint32_t nspace_handler_timeout
= 15; // seconds
8489 #define NSPACE_ITEM_NEW 0x0001
8490 #define NSPACE_ITEM_PROCESSING 0x0002
8491 #define NSPACE_ITEM_DEAD 0x0004
8492 #define NSPACE_ITEM_CANCELLED 0x0008
8493 #define NSPACE_ITEM_DONE 0x0010
8494 #define NSPACE_ITEM_RESET_TIMER 0x0020
8496 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
8497 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8499 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8501 //#pragma optimization_level 0
8504 NSPACE_HANDLER_NSPACE
= 0,
8505 NSPACE_HANDLER_SNAPSHOT
= 1,
8507 NSPACE_HANDLER_COUNT
,
8511 uint64_t handler_tid
;
8512 struct proc
*handler_proc
;
8516 nspace_handler_t nspace_handlers
[NSPACE_HANDLER_COUNT
];
8518 /* namespace fsctl functions */
8519 static int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
);
8520 static int nspace_item_flags_for_type(nspace_type_t nspace_type
);
8521 static int nspace_open_flags_for_type(nspace_type_t nspace_type
);
8522 static nspace_type_t
nspace_type_for_op(uint64_t op
);
8523 static int nspace_is_special_process(struct proc
*proc
);
8524 static int vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
);
8525 static int wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
);
8526 static int validate_namespace_args (int is64bit
, int size
);
8527 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
);
8530 static inline int nspace_flags_matches_handler(uint32_t event_flags
, nspace_type_t nspace_type
)
8532 switch(nspace_type
) {
8533 case NSPACE_HANDLER_NSPACE
:
8534 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_NSPACE_EVENT
;
8535 case NSPACE_HANDLER_SNAPSHOT
:
8536 return (event_flags
& NSPACE_ITEM_ALL_EVENT_TYPES
) == NSPACE_ITEM_SNAPSHOT_EVENT
;
8538 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type
);
8543 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type
)
8545 switch(nspace_type
) {
8546 case NSPACE_HANDLER_NSPACE
:
8547 return NSPACE_ITEM_NSPACE_EVENT
;
8548 case NSPACE_HANDLER_SNAPSHOT
:
8549 return NSPACE_ITEM_SNAPSHOT_EVENT
;
8551 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type
);
8556 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type
)
8558 switch(nspace_type
) {
8559 case NSPACE_HANDLER_NSPACE
:
8560 return FREAD
| FWRITE
| O_EVTONLY
;
8561 case NSPACE_HANDLER_SNAPSHOT
:
8562 return FREAD
| O_EVTONLY
;
8564 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type
);
8569 static inline nspace_type_t
nspace_type_for_op(uint64_t op
)
8571 switch(op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
) {
8572 case NAMESPACE_HANDLER_NSPACE_EVENT
:
8573 return NSPACE_HANDLER_NSPACE
;
8574 case NAMESPACE_HANDLER_SNAPSHOT_EVENT
:
8575 return NSPACE_HANDLER_SNAPSHOT
;
8577 printf("nspace_type_for_op: invalid op mask %llx\n", op
& NAMESPACE_HANDLER_EVENT_TYPE_MASK
);
8578 return NSPACE_HANDLER_NSPACE
;
8582 static inline int nspace_is_special_process(struct proc
*proc
)
8585 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
8586 if (proc
== nspace_handlers
[i
].handler_proc
)
8593 nspace_handler_init(void)
8595 nspace_lock_attr
= lck_attr_alloc_init();
8596 nspace_group_attr
= lck_grp_attr_alloc_init();
8597 nspace_mutex_group
= lck_grp_alloc_init("nspace-mutex", nspace_group_attr
);
8598 lck_mtx_init(&nspace_handler_lock
, nspace_mutex_group
, nspace_lock_attr
);
8599 lck_mtx_init(&nspace_handler_exclusion_lock
, nspace_mutex_group
, nspace_lock_attr
);
8600 memset(&nspace_items
[0], 0, sizeof(nspace_items
));
8604 nspace_proc_exit(struct proc
*p
)
8606 int i
, event_mask
= 0;
8608 for (i
= 0; i
< NSPACE_HANDLER_COUNT
; i
++) {
8609 if (p
== nspace_handlers
[i
].handler_proc
) {
8610 event_mask
|= nspace_item_flags_for_type(i
);
8611 nspace_handlers
[i
].handler_tid
= 0;
8612 nspace_handlers
[i
].handler_proc
= NULL
;
8616 if (event_mask
== 0) {
8620 if (event_mask
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8621 // if this process was the snapshot handler, zero snapshot_timeout
8622 snapshot_timestamp
= 0;
8626 // unblock anyone that's waiting for the handler that died
8628 lck_mtx_lock(&nspace_handler_lock
);
8629 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8630 if (nspace_items
[i
].flags
& (NSPACE_ITEM_NEW
| NSPACE_ITEM_PROCESSING
)) {
8632 if ( nspace_items
[i
].flags
& event_mask
) {
8634 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
8635 vnode_lock_spin(nspace_items
[i
].vp
);
8636 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
8637 vnode_unlock(nspace_items
[i
].vp
);
8639 nspace_items
[i
].vp
= NULL
;
8640 nspace_items
[i
].vid
= 0;
8641 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
8642 nspace_items
[i
].token
= 0;
8644 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
8649 wakeup((caddr_t
)&nspace_item_idx
);
8650 lck_mtx_unlock(&nspace_handler_lock
);
8655 resolve_nspace_item(struct vnode
*vp
, uint64_t op
)
8657 return resolve_nspace_item_ext(vp
, op
, NULL
);
8661 resolve_nspace_item_ext(struct vnode
*vp
, uint64_t op
, void *arg
)
8663 int i
, error
, keep_waiting
;
8665 nspace_type_t nspace_type
= nspace_type_for_op(op
);
8667 // only allow namespace events on regular files, directories and symlinks.
8668 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&& vp
->v_type
!= VLNK
) {
8673 // if this is a snapshot event and the vnode is on a
8674 // disk image just pretend nothing happened since any
8675 // change to the disk image will cause the disk image
8676 // itself to get backed up and this avoids multi-way
8677 // deadlocks between the snapshot handler and the ever
8678 // popular diskimages-helper process. the variable
8679 // nspace_allow_virtual_devs allows this behavior to
8680 // be overridden (for use by the Mobile TimeMachine
8681 // testing infrastructure which uses disk images)
8683 if ( (op
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
)
8684 && (vp
->v_mount
!= NULL
)
8685 && (vp
->v_mount
->mnt_kern_flag
& MNTK_VIRTUALDEV
)
8686 && !nspace_allow_virtual_devs
) {
8691 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8692 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8696 if (nspace_is_special_process(current_proc())) {
8700 lck_mtx_lock(&nspace_handler_lock
);
8703 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8704 if (vp
== nspace_items
[i
].vp
&& op
== nspace_items
[i
].op
) {
8709 if (i
>= MAX_NSPACE_ITEMS
) {
8710 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8711 if (nspace_items
[i
].flags
== 0) {
8716 nspace_items
[i
].refcount
++;
8719 if (i
>= MAX_NSPACE_ITEMS
) {
8720 ts
.tv_sec
= nspace_handler_timeout
;
8723 error
= msleep((caddr_t
)&nspace_token_id
, &nspace_handler_lock
, PVFS
|PCATCH
, "nspace-no-space", &ts
);
8725 // an entry got free'd up, go see if we can get a slot
8728 lck_mtx_unlock(&nspace_handler_lock
);
8734 // if it didn't already exist, add it. if it did exist
8735 // we'll get woken up when someone does a wakeup() on
8736 // the slot in the nspace_items table.
8738 if (vp
!= nspace_items
[i
].vp
) {
8739 nspace_items
[i
].vp
= vp
;
8740 nspace_items
[i
].arg
= (arg
== NSPACE_REARM_NO_ARG
) ? NULL
: arg
; // arg is {NULL, true, uio *} - only pass uio thru to the user
8741 nspace_items
[i
].op
= op
;
8742 nspace_items
[i
].vid
= vnode_vid(vp
);
8743 nspace_items
[i
].flags
= NSPACE_ITEM_NEW
;
8744 nspace_items
[i
].flags
|= nspace_item_flags_for_type(nspace_type
);
8745 if (nspace_items
[i
].flags
& NSPACE_ITEM_SNAPSHOT_EVENT
) {
8747 vnode_lock_spin(vp
);
8748 vp
->v_flag
|= VNEEDSSNAPSHOT
;
8753 nspace_items
[i
].token
= 0;
8754 nspace_items
[i
].refcount
= 1;
8756 wakeup((caddr_t
)&nspace_item_idx
);
8760 // Now go to sleep until the handler does a wakeup on this
8761 // slot in the nspace_items table (or we timeout).
8764 while(keep_waiting
) {
8765 ts
.tv_sec
= nspace_handler_timeout
;
8767 error
= msleep((caddr_t
)&(nspace_items
[i
].vp
), &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-done", &ts
);
8769 if (nspace_items
[i
].flags
& NSPACE_ITEM_DONE
) {
8771 } else if (nspace_items
[i
].flags
& NSPACE_ITEM_CANCELLED
) {
8772 error
= nspace_items
[i
].token
;
8773 } else if (error
== EWOULDBLOCK
|| error
== ETIMEDOUT
) {
8774 if (nspace_items
[i
].flags
& NSPACE_ITEM_RESET_TIMER
) {
8775 nspace_items
[i
].flags
&= ~NSPACE_ITEM_RESET_TIMER
;
8780 } else if (error
== 0) {
8781 // hmmm, why did we get woken up?
8782 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8783 nspace_items
[i
].token
);
8786 if (--nspace_items
[i
].refcount
== 0) {
8787 nspace_items
[i
].vp
= NULL
; // clear this so that no one will match on it again
8788 nspace_items
[i
].arg
= NULL
;
8789 nspace_items
[i
].token
= 0; // clear this so that the handler will not find it anymore
8790 nspace_items
[i
].flags
= 0; // this clears it for re-use
8792 wakeup(&nspace_token_id
);
8796 lck_mtx_unlock(&nspace_handler_lock
);
8803 get_nspace_item_status(struct vnode
*vp
, int32_t *status
)
8807 lck_mtx_lock(&nspace_handler_lock
);
8808 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8809 if (nspace_items
[i
].vp
== vp
) {
8814 if (i
>= MAX_NSPACE_ITEMS
) {
8815 lck_mtx_unlock(&nspace_handler_lock
);
8819 *status
= nspace_items
[i
].flags
;
8820 lck_mtx_unlock(&nspace_handler_lock
);
8827 build_volfs_path(struct vnode
*vp
, char *path
, int *len
)
8829 struct vnode_attr va
;
8833 VATTR_WANTED(&va
, va_fsid
);
8834 VATTR_WANTED(&va
, va_fileid
);
8836 if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) {
8837 *len
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1;
8840 *len
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1;
8849 // Note: this function does NOT check permissions on all of the
8850 // parent directories leading to this vnode. It should only be
8851 // called on behalf of a root process. Otherwise a process may
8852 // get access to a file because the file itself is readable even
8853 // though its parent directories would prevent access.
8856 vn_open_with_vp(vnode_t vp
, int fmode
, vfs_context_t ctx
)
8860 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
8865 error
= mac_vnode_check_open(ctx
, vp
, fmode
);
8870 /* compute action to be authorized */
8872 if (fmode
& FREAD
) {
8873 action
|= KAUTH_VNODE_READ_DATA
;
8875 if (fmode
& (FWRITE
| O_TRUNC
)) {
8877 * If we are writing, appending, and not truncating,
8878 * indicate that we are appending so that if the
8879 * UF_APPEND or SF_APPEND bits are set, we do not deny
8882 if ((fmode
& O_APPEND
) && !(fmode
& O_TRUNC
)) {
8883 action
|= KAUTH_VNODE_APPEND_DATA
;
8885 action
|= KAUTH_VNODE_WRITE_DATA
;
8889 if ((error
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)
8894 // if the vnode is tagged VOPENEVT and the current process
8895 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8896 // flag to the open mode so that this open won't count against
8897 // the vnode when carbon delete() does a vnode_isinuse() to see
8898 // if a file is currently in use. this allows spotlight
8899 // importers to not interfere with carbon apps that depend on
8900 // the no-delete-if-busy semantics of carbon delete().
8902 if ((vp
->v_flag
& VOPENEVT
) && (current_proc()->p_flag
& P_CHECKOPENEVT
)) {
8906 if ( (error
= VNOP_OPEN(vp
, fmode
, ctx
)) ) {
8909 if ( (error
= vnode_ref_ext(vp
, fmode
, 0)) ) {
8910 VNOP_CLOSE(vp
, fmode
, ctx
);
8914 /* Call out to allow 3rd party notification of open.
8915 * Ignore result of kauth_authorize_fileop call.
8918 mac_vnode_notify_open(ctx
, vp
, fmode
);
8920 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_OPEN
,
8928 wait_for_namespace_event(namespace_handler_data
*nhd
, nspace_type_t nspace_type
)
8930 int i
, error
=0, unblock
=0;
8933 lck_mtx_lock(&nspace_handler_exclusion_lock
);
8934 if (nspace_handlers
[nspace_type
].handler_busy
) {
8935 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8938 nspace_handlers
[nspace_type
].handler_busy
= 1;
8939 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
8942 * Any process that gets here will be one of the namespace handlers.
8943 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8944 * as we can cause deadlocks to occur, because the namespace handler may prevent
8945 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8948 curtask
= current_task();
8949 bsd_set_dependency_capable (curtask
);
8951 lck_mtx_lock(&nspace_handler_lock
);
8952 if (nspace_handlers
[nspace_type
].handler_proc
== NULL
) {
8953 nspace_handlers
[nspace_type
].handler_tid
= thread_tid(current_thread());
8954 nspace_handlers
[nspace_type
].handler_proc
= current_proc();
8957 while (error
== 0) {
8959 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
8960 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
8961 if (!nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
8968 if (i
< MAX_NSPACE_ITEMS
) {
8969 nspace_items
[i
].flags
&= ~NSPACE_ITEM_NEW
;
8970 nspace_items
[i
].flags
|= NSPACE_ITEM_PROCESSING
;
8971 nspace_items
[i
].token
= ++nspace_token_id
;
8973 if (nspace_items
[i
].vp
) {
8974 struct fileproc
*fp
;
8975 int32_t indx
, fmode
;
8976 struct proc
*p
= current_proc();
8977 vfs_context_t ctx
= vfs_context_current();
8978 struct vnode_attr va
;
8982 * Use vnode pointer to acquire a file descriptor for
8983 * hand-off to userland
8985 fmode
= nspace_open_flags_for_type(nspace_type
);
8986 error
= vnode_getwithvid(nspace_items
[i
].vp
, nspace_items
[i
].vid
);
8991 error
= vn_open_with_vp(nspace_items
[i
].vp
, fmode
, ctx
);
8994 vnode_put(nspace_items
[i
].vp
);
8998 if ((error
= falloc(p
, &fp
, &indx
, ctx
))) {
8999 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9000 vnode_put(nspace_items
[i
].vp
);
9005 fp
->f_fglob
->fg_flag
= fmode
;
9006 fp
->f_fglob
->fg_ops
= &vnops
;
9007 fp
->f_fglob
->fg_data
= (caddr_t
)nspace_items
[i
].vp
;
9010 procfdtbl_releasefd(p
, indx
, NULL
);
9011 fp_drop(p
, indx
, fp
, 1);
9015 * All variants of the namespace handler struct support these three fields:
9016 * token, flags, and the FD pointer
9018 error
= copyout(&nspace_items
[i
].token
, nhd
->token
, sizeof(uint32_t));
9019 error
= copyout(&nspace_items
[i
].op
, nhd
->flags
, sizeof(uint64_t));
9020 error
= copyout(&indx
, nhd
->fdptr
, sizeof(uint32_t));
9023 * Handle optional fields:
9024 * extended version support an info ptr (offset, length), and the
9026 * namedata version supports a unique per-link object ID
9030 uio_t uio
= (uio_t
)nspace_items
[i
].arg
;
9031 uint64_t u_offset
, u_length
;
9034 u_offset
= uio_offset(uio
);
9035 u_length
= uio_resid(uio
);
9040 error
= copyout(&u_offset
, nhd
->infoptr
, sizeof(uint64_t));
9041 error
= copyout(&u_length
, nhd
->infoptr
+sizeof(uint64_t), sizeof(uint64_t));
9046 VATTR_WANTED(&va
, va_linkid
);
9047 error
= vnode_getattr(nspace_items
[i
].vp
, &va
, ctx
);
9049 uint64_t linkid
= 0;
9050 if (VATTR_IS_SUPPORTED (&va
, va_linkid
)) {
9051 linkid
= (uint64_t)va
.va_linkid
;
9053 error
= copyout (&linkid
, nhd
->objid
, sizeof(uint64_t));
9058 vn_close(nspace_items
[i
].vp
, fmode
, ctx
);
9059 fp_free(p
, indx
, fp
);
9063 vnode_put(nspace_items
[i
].vp
);
9067 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9068 i
, nspace_items
[i
].vp
, error
, nspace_items
[i
].vp
->v_name
);
9072 error
= msleep((caddr_t
)&nspace_item_idx
, &nspace_handler_lock
, PVFS
|PCATCH
, "namespace-items", 0);
9073 if ((nspace_type
== NSPACE_HANDLER_SNAPSHOT
) && (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9082 if (nspace_items
[i
].vp
&& (nspace_items
[i
].vp
->v_flag
& VNEEDSSNAPSHOT
)) {
9083 vnode_lock_spin(nspace_items
[i
].vp
);
9084 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9085 vnode_unlock(nspace_items
[i
].vp
);
9087 nspace_items
[i
].vp
= NULL
;
9088 nspace_items
[i
].vid
= 0;
9089 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9090 nspace_items
[i
].token
= 0;
9092 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9095 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
) {
9096 // just go through every snapshot event and unblock it immediately.
9097 if (error
&& (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9098 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9099 if (nspace_items
[i
].flags
& NSPACE_ITEM_NEW
) {
9100 if (nspace_flags_matches_handler(nspace_items
[i
].flags
, nspace_type
)) {
9101 nspace_items
[i
].vp
= NULL
;
9102 nspace_items
[i
].vid
= 0;
9103 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9104 nspace_items
[i
].token
= 0;
9106 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9113 lck_mtx_unlock(&nspace_handler_lock
);
9115 lck_mtx_lock(&nspace_handler_exclusion_lock
);
9116 nspace_handlers
[nspace_type
].handler_busy
= 0;
9117 lck_mtx_unlock(&nspace_handler_exclusion_lock
);
9122 static inline int validate_namespace_args (int is64bit
, int size
) {
9125 /* Must be one of these */
9126 if (size
== sizeof(user64_namespace_handler_info
)) {
9129 if (size
== sizeof(user64_namespace_handler_info_ext
)) {
9132 if (size
== sizeof(user64_namespace_handler_data
)) {
9138 /* 32 bit -- must be one of these */
9139 if (size
== sizeof(user32_namespace_handler_info
)) {
9142 if (size
== sizeof(user32_namespace_handler_info_ext
)) {
9145 if (size
== sizeof(user32_namespace_handler_data
)) {
9157 static int process_namespace_fsctl(nspace_type_t nspace_type
, int is64bit
, u_int size
, caddr_t data
)
9160 namespace_handler_data nhd
;
9162 bzero (&nhd
, sizeof(namespace_handler_data
));
9164 if (nspace_type
== NSPACE_HANDLER_SNAPSHOT
&&
9165 (snapshot_timestamp
== 0 || snapshot_timestamp
== ~0)) {
9169 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9173 error
= validate_namespace_args (is64bit
, size
);
9178 /* Copy in the userland pointers into our kernel-only struct */
9181 /* 64 bit userland structures */
9182 nhd
.token
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->token
;
9183 nhd
.flags
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->flags
;
9184 nhd
.fdptr
= (user_addr_t
)((user64_namespace_handler_info
*)data
)->fdptr
;
9186 /* If the size is greater than the standard info struct, add in extra fields */
9187 if (size
> (sizeof(user64_namespace_handler_info
))) {
9188 if (size
>= (sizeof(user64_namespace_handler_info_ext
))) {
9189 nhd
.infoptr
= (user_addr_t
)((user64_namespace_handler_info_ext
*)data
)->infoptr
;
9191 if (size
== (sizeof(user64_namespace_handler_data
))) {
9192 nhd
.objid
= (user_addr_t
)((user64_namespace_handler_data
*)data
)->objid
;
9194 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9198 /* 32 bit userland structures */
9199 nhd
.token
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->token
);
9200 nhd
.flags
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->flags
);
9201 nhd
.fdptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info
*)data
)->fdptr
);
9203 if (size
> (sizeof(user32_namespace_handler_info
))) {
9204 if (size
>= (sizeof(user32_namespace_handler_info_ext
))) {
9205 nhd
.infoptr
= CAST_USER_ADDR_T(((user32_namespace_handler_info_ext
*)data
)->infoptr
);
9207 if (size
== (sizeof(user32_namespace_handler_data
))) {
9208 nhd
.objid
= (user_addr_t
)((user32_namespace_handler_data
*)data
)->objid
;
9210 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9214 return wait_for_namespace_event(&nhd
, nspace_type
);
9218 * Make a filesystem-specific control call:
9222 fsctl_internal(proc_t p
, vnode_t
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
)
9227 #define STK_PARAMS 128
9228 char stkbuf
[STK_PARAMS
];
9230 vnode_t vp
= *arg_vp
;
9232 size
= IOCPARM_LEN(cmd
);
9233 if (size
> IOCPARM_MAX
) return (EINVAL
);
9235 is64bit
= proc_is64bit(p
);
9238 if (size
> sizeof (stkbuf
)) {
9239 if ((memp
= (caddr_t
)kalloc(size
)) == 0) return ENOMEM
;
9247 error
= copyin(udata
, data
, size
);
9256 *(user_addr_t
*)data
= udata
;
9259 *(uint32_t *)data
= (uint32_t)udata
;
9262 } else if ((cmd
& IOC_OUT
) && size
) {
9264 * Zero the buffer so the user always
9265 * gets back something deterministic.
9268 } else if (cmd
& IOC_VOID
) {
9270 *(user_addr_t
*)data
= udata
;
9273 *(uint32_t *)data
= (uint32_t)udata
;
9277 /* Check to see if it's a generic command */
9278 switch (IOCBASECMD(cmd
)) {
9280 case FSCTL_SYNC_VOLUME
: {
9281 mount_t mp
= vp
->v_mount
;
9282 int arg
= *(uint32_t*)data
;
9284 /* record vid of vp so we can drop it below. */
9285 uint32_t vvid
= vp
->v_id
;
9288 * Then grab mount_iterref so that we can release the vnode.
9289 * Without this, a thread may call vnode_iterate_prepare then
9290 * get into a deadlock because we've never released the root vp
9292 error
= mount_iterref (mp
, 0);
9298 /* issue the sync for this volume */
9299 (void)sync_callback(mp
, (arg
& FSCTL_SYNC_WAIT
) ? &arg
: NULL
);
9302 * Then release the mount_iterref once we're done syncing; it's not
9303 * needed for the VNOP_IOCTL below
9307 if (arg
& FSCTL_SYNC_FULLSYNC
) {
9308 /* re-obtain vnode iocount on the root vp, if possible */
9309 error
= vnode_getwithvid (vp
, vvid
);
9311 error
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
);
9315 /* mark the argument VP as having been released */
9320 case FSCTL_SET_PACKAGE_EXTS
: {
9321 user_addr_t ext_strings
;
9322 uint32_t num_entries
;
9325 if ( (is64bit
&& size
!= sizeof(user64_package_ext_info
))
9326 || (is64bit
== 0 && size
!= sizeof(user32_package_ext_info
))) {
9328 // either you're 64-bit and passed a 64-bit struct or
9329 // you're 32-bit and passed a 32-bit struct. otherwise
9336 ext_strings
= ((user64_package_ext_info
*)data
)->strings
;
9337 num_entries
= ((user64_package_ext_info
*)data
)->num_entries
;
9338 max_width
= ((user64_package_ext_info
*)data
)->max_width
;
9340 ext_strings
= CAST_USER_ADDR_T(((user32_package_ext_info
*)data
)->strings
);
9341 num_entries
= ((user32_package_ext_info
*)data
)->num_entries
;
9342 max_width
= ((user32_package_ext_info
*)data
)->max_width
;
9344 error
= set_package_extensions_table(ext_strings
, num_entries
, max_width
);
9348 /* namespace handlers */
9349 case FSCTL_NAMESPACE_HANDLER_GET
: {
9350 error
= process_namespace_fsctl(NSPACE_HANDLER_NSPACE
, is64bit
, size
, data
);
9354 /* Snapshot handlers */
9355 case FSCTL_OLD_SNAPSHOT_HANDLER_GET
: {
9356 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
9360 case FSCTL_SNAPSHOT_HANDLER_GET_EXT
: {
9361 error
= process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT
, is64bit
, size
, data
);
9365 case FSCTL_NAMESPACE_HANDLER_UPDATE
: {
9366 uint32_t token
, val
;
9369 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9373 if (!nspace_is_special_process(p
)) {
9378 token
= ((uint32_t *)data
)[0];
9379 val
= ((uint32_t *)data
)[1];
9381 lck_mtx_lock(&nspace_handler_lock
);
9383 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9384 if (nspace_items
[i
].token
== token
) {
9385 break; /* exit for loop, not case stmt */
9389 if (i
>= MAX_NSPACE_ITEMS
) {
9393 // if this bit is set, when resolve_nspace_item() times out
9394 // it will loop and go back to sleep.
9396 nspace_items
[i
].flags
|= NSPACE_ITEM_RESET_TIMER
;
9399 lck_mtx_unlock(&nspace_handler_lock
);
9402 printf("nspace-handler-update: did not find token %u\n", token
);
9407 case FSCTL_NAMESPACE_HANDLER_UNBLOCK
: {
9408 uint32_t token
, val
;
9411 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9415 if (!nspace_is_special_process(p
)) {
9420 token
= ((uint32_t *)data
)[0];
9421 val
= ((uint32_t *)data
)[1];
9423 lck_mtx_lock(&nspace_handler_lock
);
9425 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9426 if (nspace_items
[i
].token
== token
) {
9427 break; /* exit for loop, not case statement */
9431 if (i
>= MAX_NSPACE_ITEMS
) {
9432 printf("nspace-handler-unblock: did not find token %u\n", token
);
9435 if (val
== 0 && nspace_items
[i
].vp
) {
9436 vnode_lock_spin(nspace_items
[i
].vp
);
9437 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9438 vnode_unlock(nspace_items
[i
].vp
);
9441 nspace_items
[i
].vp
= NULL
;
9442 nspace_items
[i
].arg
= NULL
;
9443 nspace_items
[i
].op
= 0;
9444 nspace_items
[i
].vid
= 0;
9445 nspace_items
[i
].flags
= NSPACE_ITEM_DONE
;
9446 nspace_items
[i
].token
= 0;
9448 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9451 lck_mtx_unlock(&nspace_handler_lock
);
9455 case FSCTL_NAMESPACE_HANDLER_CANCEL
: {
9456 uint32_t token
, val
;
9459 if ((error
= suser(kauth_cred_get(), &(p
->p_acflag
)))) {
9463 if (!nspace_is_special_process(p
)) {
9468 token
= ((uint32_t *)data
)[0];
9469 val
= ((uint32_t *)data
)[1];
9471 lck_mtx_lock(&nspace_handler_lock
);
9473 for(i
=0; i
< MAX_NSPACE_ITEMS
; i
++) {
9474 if (nspace_items
[i
].token
== token
) {
9475 break; /* exit for loop, not case stmt */
9479 if (i
>= MAX_NSPACE_ITEMS
) {
9480 printf("nspace-handler-cancel: did not find token %u\n", token
);
9483 if (nspace_items
[i
].vp
) {
9484 vnode_lock_spin(nspace_items
[i
].vp
);
9485 nspace_items
[i
].vp
->v_flag
&= ~VNEEDSSNAPSHOT
;
9486 vnode_unlock(nspace_items
[i
].vp
);
9489 nspace_items
[i
].vp
= NULL
;
9490 nspace_items
[i
].arg
= NULL
;
9491 nspace_items
[i
].vid
= 0;
9492 nspace_items
[i
].token
= val
;
9493 nspace_items
[i
].flags
&= ~NSPACE_ITEM_PROCESSING
;
9494 nspace_items
[i
].flags
|= NSPACE_ITEM_CANCELLED
;
9496 wakeup((caddr_t
)&(nspace_items
[i
].vp
));
9499 lck_mtx_unlock(&nspace_handler_lock
);
9503 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME
: {
9504 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9508 // we explicitly do not do the namespace_handler_proc check here
9510 lck_mtx_lock(&nspace_handler_lock
);
9511 snapshot_timestamp
= ((uint32_t *)data
)[0];
9512 wakeup(&nspace_item_idx
);
9513 lck_mtx_unlock(&nspace_handler_lock
);
9514 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp
);
9519 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS
:
9521 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9525 lck_mtx_lock(&nspace_handler_lock
);
9526 nspace_allow_virtual_devs
= ((uint32_t *)data
)[0];
9527 lck_mtx_unlock(&nspace_handler_lock
);
9528 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9529 nspace_allow_virtual_devs
? "" : " NOT");
9535 case FSCTL_SET_FSTYPENAME_OVERRIDE
:
9537 if ((error
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) {
9541 mount_lock(vp
->v_mount
);
9543 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
);
9544 vp
->v_mount
->mnt_kern_flag
|= MNTK_TYPENAME_OVERRIDE
;
9545 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
9546 vp
->v_mount
->mnt_kern_flag
|= MNTK_EXTENDED_SECURITY
;
9547 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_AUTH_OPAQUE
;
9550 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) {
9551 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_EXTENDED_SECURITY
;
9553 vp
->v_mount
->mnt_kern_flag
&= ~MNTK_TYPENAME_OVERRIDE
;
9554 vp
->v_mount
->fstypename_override
[0] = '\0';
9556 mount_unlock(vp
->v_mount
);
9562 /* Invoke the filesystem-specific code */
9563 error
= VNOP_IOCTL(vp
, IOCBASECMD(cmd
), data
, options
, ctx
);
9566 } /* end switch stmt */
9569 * if no errors, copy any data to user. Size was
9570 * already set and checked above.
9572 if (error
== 0 && (cmd
& IOC_OUT
) && size
)
9573 error
= copyout(data
, udata
, size
);
9584 fsctl (proc_t p
, struct fsctl_args
*uap
, __unused
int32_t *retval
)
9587 struct nameidata nd
;
9590 vfs_context_t ctx
= vfs_context_current();
9592 AUDIT_ARG(cmd
, uap
->cmd
);
9593 AUDIT_ARG(value32
, uap
->options
);
9594 /* Get the vnode for the file we are getting info on: */
9596 if ((uap
->options
& FSOPT_NOFOLLOW
) == 0) nameiflags
|= FOLLOW
;
9597 NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags
| AUDITVNPATH1
,
9598 UIO_USERSPACE
, uap
->path
, ctx
);
9599 if ((error
= namei(&nd
))) goto done
;
9604 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
9610 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9619 ffsctl (proc_t p
, struct ffsctl_args
*uap
, __unused
int32_t *retval
)
9623 vfs_context_t ctx
= vfs_context_current();
9626 AUDIT_ARG(fd
, uap
->fd
);
9627 AUDIT_ARG(cmd
, uap
->cmd
);
9628 AUDIT_ARG(value32
, uap
->options
);
9630 /* Get the vnode for the file we are getting info on: */
9631 if ((error
= file_vnode(uap
->fd
, &vp
)))
9634 if ((error
= vnode_getwithref(vp
))) {
9639 error
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
);
9645 error
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
);
9655 /* end of fsctl system call */
9658 * Retrieve the data of an extended attribute.
9661 getxattr(proc_t p
, struct getxattr_args
*uap
, user_ssize_t
*retval
)
9664 struct nameidata nd
;
9665 char attrname
[XATTR_MAXNAMELEN
+1];
9666 vfs_context_t ctx
= vfs_context_current();
9668 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9669 size_t attrsize
= 0;
9671 u_int32_t nameiflags
;
9673 char uio_buf
[ UIO_SIZEOF(1) ];
9675 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9678 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9679 NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9680 if ((error
= namei(&nd
))) {
9686 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9689 if (xattr_protected(attrname
)) {
9690 if (!vfs_context_issuser(ctx
) || strcmp(attrname
, "com.apple.system.Security") != 0) {
9696 * the specific check for 0xffffffff is a hack to preserve
9697 * binaray compatibilty in K64 with applications that discovered
9698 * that passing in a buf pointer and a size of -1 resulted in
9699 * just the size of the indicated extended attribute being returned.
9700 * this isn't part of the documented behavior, but because of the
9701 * original implemtation's check for "uap->size > 0", this behavior
9702 * was allowed. In K32 that check turned into a signed comparison
9703 * even though uap->size is unsigned... in K64, we blow by that
9704 * check because uap->size is unsigned and doesn't get sign smeared
9705 * in the munger for a 32 bit user app. we also need to add a
9706 * check to limit the maximum size of the buffer being passed in...
9707 * unfortunately, the underlying fileystems seem to just malloc
9708 * the requested size even if the actual extended attribute is tiny.
9709 * because that malloc is for kernel wired memory, we have to put a
9712 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9713 * U64 running on K64 will yield -1 (64 bits wide)
9714 * U32/U64 running on K32 will yield -1 (32 bits wide)
9716 if (uap
->size
== 0xffffffff || uap
->size
== (size_t)-1)
9720 if (uap
->size
> (size_t)XATTR_MAXSIZE
)
9721 uap
->size
= XATTR_MAXSIZE
;
9723 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9724 &uio_buf
[0], sizeof(uio_buf
));
9725 uio_addiov(auio
, uap
->value
, uap
->size
);
9728 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
);
9733 *retval
= uap
->size
- uio_resid(auio
);
9735 *retval
= (user_ssize_t
)attrsize
;
9742 * Retrieve the data of an extended attribute.
9745 fgetxattr(proc_t p
, struct fgetxattr_args
*uap
, user_ssize_t
*retval
)
9748 char attrname
[XATTR_MAXNAMELEN
+1];
9750 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9751 size_t attrsize
= 0;
9754 char uio_buf
[ UIO_SIZEOF(1) ];
9756 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9759 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9762 if ( (error
= vnode_getwithref(vp
)) ) {
9766 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9769 if (xattr_protected(attrname
)) {
9773 if (uap
->value
&& uap
->size
> 0) {
9774 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
,
9775 &uio_buf
[0], sizeof(uio_buf
));
9776 uio_addiov(auio
, uap
->value
, uap
->size
);
9779 error
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current());
9781 (void)vnode_put(vp
);
9785 *retval
= uap
->size
- uio_resid(auio
);
9787 *retval
= (user_ssize_t
)attrsize
;
9793 * Set the data of an extended attribute.
9796 setxattr(proc_t p
, struct setxattr_args
*uap
, int *retval
)
9799 struct nameidata nd
;
9800 char attrname
[XATTR_MAXNAMELEN
+1];
9801 vfs_context_t ctx
= vfs_context_current();
9803 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9805 u_int32_t nameiflags
;
9807 char uio_buf
[ UIO_SIZEOF(1) ];
9809 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9812 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9813 if (error
== EPERM
) {
9814 /* if the string won't fit in attrname, copyinstr emits EPERM */
9815 return (ENAMETOOLONG
);
9817 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9820 if (xattr_protected(attrname
))
9822 if (uap
->size
!= 0 && uap
->value
== 0) {
9826 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9827 NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9828 if ((error
= namei(&nd
))) {
9834 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9835 &uio_buf
[0], sizeof(uio_buf
));
9836 uio_addiov(auio
, uap
->value
, uap
->size
);
9838 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
);
9841 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9852 * Set the data of an extended attribute.
9855 fsetxattr(proc_t p
, struct fsetxattr_args
*uap
, int *retval
)
9858 char attrname
[XATTR_MAXNAMELEN
+1];
9860 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9863 char uio_buf
[ UIO_SIZEOF(1) ];
9865 vfs_context_t ctx
= vfs_context_current();
9868 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9871 if ((error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
) != 0)) {
9874 if (xattr_protected(attrname
))
9876 if (uap
->size
!= 0 && uap
->value
== 0) {
9879 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9882 if ( (error
= vnode_getwithref(vp
)) ) {
9886 auio
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
,
9887 &uio_buf
[0], sizeof(uio_buf
));
9888 uio_addiov(auio
, uap
->value
, uap
->size
);
9890 error
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current());
9893 add_fsevent(FSE_XATTR_MODIFIED
, ctx
,
9905 * Remove an extended attribute.
9906 * XXX Code duplication here.
9909 removexattr(proc_t p
, struct removexattr_args
*uap
, int *retval
)
9912 struct nameidata nd
;
9913 char attrname
[XATTR_MAXNAMELEN
+1];
9914 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
9915 vfs_context_t ctx
= vfs_context_current();
9917 u_int32_t nameiflags
;
9920 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9923 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
9927 if (xattr_protected(attrname
))
9929 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
9930 NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
9931 if ((error
= namei(&nd
))) {
9937 error
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
);
9940 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
9951 * Remove an extended attribute.
9952 * XXX Code duplication here.
9955 fremovexattr(__unused proc_t p
, struct fremovexattr_args
*uap
, int *retval
)
9958 char attrname
[XATTR_MAXNAMELEN
+1];
9962 vfs_context_t ctx
= vfs_context_current();
9965 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
9968 error
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
);
9972 if (xattr_protected(attrname
))
9974 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
9977 if ( (error
= vnode_getwithref(vp
)) ) {
9982 error
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current());
9985 add_fsevent(FSE_XATTR_REMOVED
, ctx
,
9997 * Retrieve the list of extended attribute names.
9998 * XXX Code duplication here.
10001 listxattr(proc_t p
, struct listxattr_args
*uap
, user_ssize_t
*retval
)
10004 struct nameidata nd
;
10005 vfs_context_t ctx
= vfs_context_current();
10007 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10008 size_t attrsize
= 0;
10009 u_int32_t nameiflags
;
10011 char uio_buf
[ UIO_SIZEOF(1) ];
10013 if (uap
->options
& (XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10016 nameiflags
= (uap
->options
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
;
10017 NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
);
10018 if ((error
= namei(&nd
))) {
10023 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10024 auio
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
,
10025 &uio_buf
[0], sizeof(uio_buf
));
10026 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10029 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
);
10033 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10035 *retval
= (user_ssize_t
)attrsize
;
10041 * Retrieve the list of extended attribute names.
10042 * XXX Code duplication here.
10045 flistxattr(proc_t p
, struct flistxattr_args
*uap
, user_ssize_t
*retval
)
10049 int spacetype
= proc_is64bit(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
10050 size_t attrsize
= 0;
10052 char uio_buf
[ UIO_SIZEOF(1) ];
10054 if (uap
->options
& (XATTR_NOFOLLOW
| XATTR_NOSECURITY
| XATTR_NODEFAULT
))
10057 if ( (error
= file_vnode(uap
->fd
, &vp
)) ) {
10060 if ( (error
= vnode_getwithref(vp
)) ) {
10061 file_drop(uap
->fd
);
10064 if (uap
->namebuf
!= 0 && uap
->bufsize
> 0) {
10065 auio
= uio_createwithbuffer(1, 0, spacetype
,
10066 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
10067 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
);
10070 error
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current());
10073 file_drop(uap
->fd
);
10075 *retval
= (user_ssize_t
)uap
->bufsize
- uio_resid(auio
);
10077 *retval
= (user_ssize_t
)attrsize
;
10082 static int fsgetpath_internal(
10083 vfs_context_t ctx
, int volfs_id
, uint64_t objid
,
10084 vm_size_t bufsize
, caddr_t buf
, int *pathlen
)
10087 struct mount
*mp
= NULL
;
10092 if (bufsize
> PAGE_SIZE
) {
10100 if ((mp
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) {
10101 error
= ENOTSUP
; /* unexpected failure */
10107 error
= VFS_ROOT(mp
, &vp
, ctx
);
10109 error
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
);
10112 if (error
== ENOENT
&& (mp
->mnt_flag
& MNT_UNION
)) {
10114 * If the fileid isn't found and we're in a union
10115 * mount volume, then see if the fileid is in the
10116 * mounted-on volume.
10118 struct mount
*tmp
= mp
;
10119 mp
= vnode_mount(tmp
->mnt_vnodecovered
);
10121 if (vfs_busy(mp
, LK_NOWAIT
) == 0)
10132 error
= mac_vnode_check_fsgetpath(ctx
, vp
);
10139 /* Obtain the absolute path to this vnode. */
10140 bpflags
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS
: 0;
10141 bpflags
|= BUILDPATH_CHECK_MOVED
;
10142 error
= build_path(vp
, buf
, bufsize
, &length
, bpflags
, ctx
);
10149 AUDIT_ARG(text
, buf
);
10151 if (kdebug_enable
) {
10152 long dbg_parms
[NUMPARMS
];
10155 dbg_namelen
= (int)sizeof(dbg_parms
);
10157 if (length
< dbg_namelen
) {
10158 memcpy((char *)dbg_parms
, buf
, length
);
10159 memset((char *)dbg_parms
+ length
, 0, dbg_namelen
- length
);
10161 dbg_namelen
= length
;
10163 memcpy((char *)dbg_parms
, buf
+ (length
- dbg_namelen
), dbg_namelen
);
10166 kdebug_lookup_gen_events(dbg_parms
, dbg_namelen
, (void *)vp
, TRUE
);
10169 *pathlen
= (user_ssize_t
)length
; /* may be superseded by error */
10176 * Obtain the full pathname of a file system object by id.
10178 * This is a private SPI used by the File Manager.
10182 fsgetpath(__unused proc_t p
, struct fsgetpath_args
*uap
, user_ssize_t
*retval
)
10184 vfs_context_t ctx
= vfs_context_current();
10190 if ((error
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) {
10193 AUDIT_ARG(value32
, fsid
.val
[0]);
10194 AUDIT_ARG(value64
, uap
->objid
);
10195 /* Restrict output buffer size for now. */
10197 if (uap
->bufsize
> PAGE_SIZE
) {
10200 MALLOC(realpath
, char *, uap
->bufsize
, M_TEMP
, M_WAITOK
);
10201 if (realpath
== NULL
) {
10205 error
= fsgetpath_internal(
10206 ctx
, fsid
.val
[0], uap
->objid
,
10207 uap
->bufsize
, realpath
, &length
);
10213 error
= copyout((caddr_t
)realpath
, uap
->buf
, length
);
10215 *retval
= (user_ssize_t
)length
; /* may be superseded by error */
10218 FREE(realpath
, M_TEMP
);
10224 * Common routine to handle various flavors of statfs data heading out
10227 * Returns: 0 Success
10231 munge_statfs(struct mount
*mp
, struct vfsstatfs
*sfsp
,
10232 user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
,
10233 boolean_t partial_copy
)
10236 int my_size
, copy_size
;
10239 struct user64_statfs sfs
;
10240 my_size
= copy_size
= sizeof(sfs
);
10241 bzero(&sfs
, my_size
);
10242 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10243 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10244 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10245 sfs
.f_bsize
= (user64_long_t
)sfsp
->f_bsize
;
10246 sfs
.f_iosize
= (user64_long_t
)sfsp
->f_iosize
;
10247 sfs
.f_blocks
= (user64_long_t
)sfsp
->f_blocks
;
10248 sfs
.f_bfree
= (user64_long_t
)sfsp
->f_bfree
;
10249 sfs
.f_bavail
= (user64_long_t
)sfsp
->f_bavail
;
10250 sfs
.f_files
= (user64_long_t
)sfsp
->f_files
;
10251 sfs
.f_ffree
= (user64_long_t
)sfsp
->f_ffree
;
10252 sfs
.f_fsid
= sfsp
->f_fsid
;
10253 sfs
.f_owner
= sfsp
->f_owner
;
10254 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10255 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10257 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10259 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10260 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10262 if (partial_copy
) {
10263 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10265 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10268 struct user32_statfs sfs
;
10270 my_size
= copy_size
= sizeof(sfs
);
10271 bzero(&sfs
, my_size
);
10273 sfs
.f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
10274 sfs
.f_type
= mp
->mnt_vtable
->vfc_typenum
;
10275 sfs
.f_reserved1
= (short)sfsp
->f_fssubtype
;
10278 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10279 * have to fudge the numbers here in that case. We inflate the blocksize in order
10280 * to reflect the filesystem size as best we can.
10282 if ((sfsp
->f_blocks
> INT_MAX
)
10283 /* Hack for 4061702 . I think the real fix is for Carbon to
10284 * look for some volume capability and not depend on hidden
10285 * semantics agreed between a FS and carbon.
10286 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10287 * for Carbon to set bNoVolumeSizes volume attribute.
10288 * Without this the webdavfs files cannot be copied onto
10289 * disk as they look huge. This change should not affect
10290 * XSAN as they should not setting these to -1..
10292 && (sfsp
->f_blocks
!= 0xffffffffffffffffULL
)
10293 && (sfsp
->f_bfree
!= 0xffffffffffffffffULL
)
10294 && (sfsp
->f_bavail
!= 0xffffffffffffffffULL
)) {
10298 * Work out how far we have to shift the block count down to make it fit.
10299 * Note that it's possible to have to shift so far that the resulting
10300 * blocksize would be unreportably large. At that point, we will clip
10301 * any values that don't fit.
10303 * For safety's sake, we also ensure that f_iosize is never reported as
10304 * being smaller than f_bsize.
10306 for (shift
= 0; shift
< 32; shift
++) {
10307 if ((sfsp
->f_blocks
>> shift
) <= INT_MAX
)
10309 if ((sfsp
->f_bsize
<< (shift
+ 1)) > INT_MAX
)
10312 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10313 sfs
.f_blocks
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
);
10314 sfs
.f_bfree
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
);
10315 sfs
.f_bavail
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
);
10316 #undef __SHIFT_OR_CLIP
10317 sfs
.f_bsize
= (user32_long_t
)(sfsp
->f_bsize
<< shift
);
10318 sfs
.f_iosize
= lmax(sfsp
->f_iosize
, sfsp
->f_bsize
);
10320 /* filesystem is small enough to be reported honestly */
10321 sfs
.f_bsize
= (user32_long_t
)sfsp
->f_bsize
;
10322 sfs
.f_iosize
= (user32_long_t
)sfsp
->f_iosize
;
10323 sfs
.f_blocks
= (user32_long_t
)sfsp
->f_blocks
;
10324 sfs
.f_bfree
= (user32_long_t
)sfsp
->f_bfree
;
10325 sfs
.f_bavail
= (user32_long_t
)sfsp
->f_bavail
;
10327 sfs
.f_files
= (user32_long_t
)sfsp
->f_files
;
10328 sfs
.f_ffree
= (user32_long_t
)sfsp
->f_ffree
;
10329 sfs
.f_fsid
= sfsp
->f_fsid
;
10330 sfs
.f_owner
= sfsp
->f_owner
;
10331 if (mp
->mnt_kern_flag
& MNTK_TYPENAME_OVERRIDE
) {
10332 strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
);
10334 strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
);
10336 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
);
10337 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
);
10339 if (partial_copy
) {
10340 copy_size
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
));
10342 error
= copyout((caddr_t
)&sfs
, bufp
, copy_size
);
10345 if (sizep
!= NULL
) {
10352 * copy stat structure into user_stat structure.
10354 void munge_user64_stat(struct stat
*sbp
, struct user64_stat
*usbp
)
10356 bzero(usbp
, sizeof(*usbp
));
10358 usbp
->st_dev
= sbp
->st_dev
;
10359 usbp
->st_ino
= sbp
->st_ino
;
10360 usbp
->st_mode
= sbp
->st_mode
;
10361 usbp
->st_nlink
= sbp
->st_nlink
;
10362 usbp
->st_uid
= sbp
->st_uid
;
10363 usbp
->st_gid
= sbp
->st_gid
;
10364 usbp
->st_rdev
= sbp
->st_rdev
;
10365 #ifndef _POSIX_C_SOURCE
10366 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10367 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10368 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10369 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10370 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10371 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10373 usbp
->st_atime
= sbp
->st_atime
;
10374 usbp
->st_atimensec
= sbp
->st_atimensec
;
10375 usbp
->st_mtime
= sbp
->st_mtime
;
10376 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10377 usbp
->st_ctime
= sbp
->st_ctime
;
10378 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10380 usbp
->st_size
= sbp
->st_size
;
10381 usbp
->st_blocks
= sbp
->st_blocks
;
10382 usbp
->st_blksize
= sbp
->st_blksize
;
10383 usbp
->st_flags
= sbp
->st_flags
;
10384 usbp
->st_gen
= sbp
->st_gen
;
10385 usbp
->st_lspare
= sbp
->st_lspare
;
10386 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10387 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10390 void munge_user32_stat(struct stat
*sbp
, struct user32_stat
*usbp
)
10392 bzero(usbp
, sizeof(*usbp
));
10394 usbp
->st_dev
= sbp
->st_dev
;
10395 usbp
->st_ino
= sbp
->st_ino
;
10396 usbp
->st_mode
= sbp
->st_mode
;
10397 usbp
->st_nlink
= sbp
->st_nlink
;
10398 usbp
->st_uid
= sbp
->st_uid
;
10399 usbp
->st_gid
= sbp
->st_gid
;
10400 usbp
->st_rdev
= sbp
->st_rdev
;
10401 #ifndef _POSIX_C_SOURCE
10402 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10403 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10404 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10405 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10406 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10407 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10409 usbp
->st_atime
= sbp
->st_atime
;
10410 usbp
->st_atimensec
= sbp
->st_atimensec
;
10411 usbp
->st_mtime
= sbp
->st_mtime
;
10412 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10413 usbp
->st_ctime
= sbp
->st_ctime
;
10414 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10416 usbp
->st_size
= sbp
->st_size
;
10417 usbp
->st_blocks
= sbp
->st_blocks
;
10418 usbp
->st_blksize
= sbp
->st_blksize
;
10419 usbp
->st_flags
= sbp
->st_flags
;
10420 usbp
->st_gen
= sbp
->st_gen
;
10421 usbp
->st_lspare
= sbp
->st_lspare
;
10422 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10423 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10427 * copy stat64 structure into user_stat64 structure.
10429 void munge_user64_stat64(struct stat64
*sbp
, struct user64_stat64
*usbp
)
10431 bzero(usbp
, sizeof(*usbp
));
10433 usbp
->st_dev
= sbp
->st_dev
;
10434 usbp
->st_ino
= sbp
->st_ino
;
10435 usbp
->st_mode
= sbp
->st_mode
;
10436 usbp
->st_nlink
= sbp
->st_nlink
;
10437 usbp
->st_uid
= sbp
->st_uid
;
10438 usbp
->st_gid
= sbp
->st_gid
;
10439 usbp
->st_rdev
= sbp
->st_rdev
;
10440 #ifndef _POSIX_C_SOURCE
10441 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10442 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10443 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10444 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10445 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10446 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10447 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
10448 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
10450 usbp
->st_atime
= sbp
->st_atime
;
10451 usbp
->st_atimensec
= sbp
->st_atimensec
;
10452 usbp
->st_mtime
= sbp
->st_mtime
;
10453 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10454 usbp
->st_ctime
= sbp
->st_ctime
;
10455 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10456 usbp
->st_birthtime
= sbp
->st_birthtime
;
10457 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
10459 usbp
->st_size
= sbp
->st_size
;
10460 usbp
->st_blocks
= sbp
->st_blocks
;
10461 usbp
->st_blksize
= sbp
->st_blksize
;
10462 usbp
->st_flags
= sbp
->st_flags
;
10463 usbp
->st_gen
= sbp
->st_gen
;
10464 usbp
->st_lspare
= sbp
->st_lspare
;
10465 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10466 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10469 void munge_user32_stat64(struct stat64
*sbp
, struct user32_stat64
*usbp
)
10471 bzero(usbp
, sizeof(*usbp
));
10473 usbp
->st_dev
= sbp
->st_dev
;
10474 usbp
->st_ino
= sbp
->st_ino
;
10475 usbp
->st_mode
= sbp
->st_mode
;
10476 usbp
->st_nlink
= sbp
->st_nlink
;
10477 usbp
->st_uid
= sbp
->st_uid
;
10478 usbp
->st_gid
= sbp
->st_gid
;
10479 usbp
->st_rdev
= sbp
->st_rdev
;
10480 #ifndef _POSIX_C_SOURCE
10481 usbp
->st_atimespec
.tv_sec
= sbp
->st_atimespec
.tv_sec
;
10482 usbp
->st_atimespec
.tv_nsec
= sbp
->st_atimespec
.tv_nsec
;
10483 usbp
->st_mtimespec
.tv_sec
= sbp
->st_mtimespec
.tv_sec
;
10484 usbp
->st_mtimespec
.tv_nsec
= sbp
->st_mtimespec
.tv_nsec
;
10485 usbp
->st_ctimespec
.tv_sec
= sbp
->st_ctimespec
.tv_sec
;
10486 usbp
->st_ctimespec
.tv_nsec
= sbp
->st_ctimespec
.tv_nsec
;
10487 usbp
->st_birthtimespec
.tv_sec
= sbp
->st_birthtimespec
.tv_sec
;
10488 usbp
->st_birthtimespec
.tv_nsec
= sbp
->st_birthtimespec
.tv_nsec
;
10490 usbp
->st_atime
= sbp
->st_atime
;
10491 usbp
->st_atimensec
= sbp
->st_atimensec
;
10492 usbp
->st_mtime
= sbp
->st_mtime
;
10493 usbp
->st_mtimensec
= sbp
->st_mtimensec
;
10494 usbp
->st_ctime
= sbp
->st_ctime
;
10495 usbp
->st_ctimensec
= sbp
->st_ctimensec
;
10496 usbp
->st_birthtime
= sbp
->st_birthtime
;
10497 usbp
->st_birthtimensec
= sbp
->st_birthtimensec
;
10499 usbp
->st_size
= sbp
->st_size
;
10500 usbp
->st_blocks
= sbp
->st_blocks
;
10501 usbp
->st_blksize
= sbp
->st_blksize
;
10502 usbp
->st_flags
= sbp
->st_flags
;
10503 usbp
->st_gen
= sbp
->st_gen
;
10504 usbp
->st_lspare
= sbp
->st_lspare
;
10505 usbp
->st_qspare
[0] = sbp
->st_qspare
[0];
10506 usbp
->st_qspare
[1] = sbp
->st_qspare
[1];
10510 * Purge buffer cache for simulating cold starts
10512 static int vnode_purge_callback(struct vnode
*vp
, __unused
void *cargs
)
10514 ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
/* off_t *resid_off */, UBC_PUSHALL
| UBC_INVALIDATE
);
10516 return VNODE_RETURNED
;
10519 static int vfs_purge_callback(mount_t mp
, __unused
void * arg
)
10521 vnode_iterate(mp
, VNODE_WAIT
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
);
10523 return VFS_RETURNED
;
10527 vfs_purge(__unused
struct proc
*p
, __unused
struct vfs_purge_args
*uap
, __unused
int32_t *retval
)
10529 if (!kauth_cred_issuser(kauth_cred_get()))
10532 vfs_iterate(0/* flags */, vfs_purge_callback
, NULL
);