]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 1995-2020 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
f427ee49 86#include <kern/kalloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636 96#include <sys/sysproto.h>
cb323159 97#include <sys/sysctl.h>
91447636 98#include <sys/xattr.h>
b0d623f7
A
99#include <sys/fcntl.h>
100#include <sys/fsctl.h>
91447636 101#include <sys/ubc_internal.h>
593a1d5f 102#include <sys/disk.h>
3e170ce0 103#include <sys/content_protection.h>
39037602
A
104#include <sys/clonefile.h>
105#include <sys/snapshot.h>
490019cf 106#include <sys/priv.h>
cb323159 107#include <sys/fsgetpath.h>
91447636
A
108#include <machine/cons.h>
109#include <machine/limits.h>
110#include <miscfs/specfs/specdev.h>
e5568f75 111
5ba3f43e
A
112#include <vfs/vfs_disk_conditioner.h>
113
b0d623f7 114#include <security/audit/audit.h>
e5568f75
A
115#include <bsm/audit_kevents.h>
116
91447636
A
117#include <mach/mach_types.h>
118#include <kern/kern_types.h>
119#include <kern/kalloc.h>
6d2010ae 120#include <kern/task.h>
91447636
A
121
122#include <vm/vm_pageout.h>
39037602 123#include <vm/vm_protos.h>
1c79356b 124
91447636 125#include <libkern/OSAtomic.h>
f427ee49 126#include <os/atomic_private.h>
b0d623f7 127#include <pexpert/pexpert.h>
3e170ce0 128#include <IOKit/IOBSD.h>
55e303ae 129
cb323159
A
130// deps for MIG call
131#include <kern/host.h>
132#include <kern/ipc_misc.h>
133#include <mach/host_priv.h>
134#include <mach/vfs_nspace.h>
135#include <os/log.h>
136
ea3f0419
A
137#include <nfs/nfs_conf.h>
138
490019cf
A
139#if ROUTEFS
140#include <miscfs/routefs/routefs.h>
141#endif /* ROUTEFS */
142
2d21ac55
A
143#if CONFIG_MACF
144#include <security/mac.h>
145#include <security/mac_framework.h>
146#endif
1c79356b 147
39037602 148#if CONFIG_FSE
2d21ac55 149#define GET_PATH(x) \
f427ee49 150 ((x) = get_pathbuff())
2d21ac55 151#define RELEASE_PATH(x) \
f427ee49 152 release_pathbuff(x)
39037602 153#else
0a7de745 154#define GET_PATH(x) \
f427ee49 155 ((x) = zalloc(ZV_NAMEI))
2d21ac55 156#define RELEASE_PATH(x) \
f427ee49 157 zfree(ZV_NAMEI, x)
2d21ac55
A
158#endif /* CONFIG_FSE */
159
a39ff7e2
A
160#ifndef HFS_GET_BOOT_INFO
161#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
162#endif
163
164#ifndef HFS_SET_BOOT_INFO
165#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
166#endif
167
168#ifndef APFSIOC_REVERT_TO_SNAPSHOT
169#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
170#endif
171
f427ee49
A
172/*
173 * If you need accounting for KM_FD_VN_DATA consider using
174 * ZONE_VIEW_DEFINE to define a zone view.
175 */
176#define KM_FD_VN_DATA KHEAP_DEFAULT
177
5ba3f43e
A
178extern void disk_conditioner_unmount(mount_t mp);
179
2d21ac55
A
180/* struct for checkdirs iteration */
181struct cdirargs {
182 vnode_t olddp;
183 vnode_t newdp;
184};
185/* callback for checkdirs iteration */
186static int checkdirs_callback(proc_t p, void * arg);
1c79356b 187
91447636 188static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 189static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
190void enablequotas(struct mount *mp, vfs_context_t ctx);
191static int getfsstat_callback(mount_t mp, void * arg);
192static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 193static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 194static int sync_callback(mount_t, void *);
39037602 195static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
0a7de745
A
196 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
197 boolean_t partial_copy);
b0d623f7 198static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae 199static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
0a7de745
A
200 struct componentname *cnp, user_addr_t fsmountargs,
201 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
202 vfs_context_t ctx);
6d2010ae
A
203void vfs_notify_mount(vnode_t pdvp);
204
205int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 206
fe8ab488
A
207struct fd_vn_data * fg_vn_data_alloc(void);
208
c18c124e
A
209/*
210 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
211 * Concurrent lookups (or lookups by ids) on hard links can cause the
212 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
213 * does) to return ENOENT as the path cannot be returned from the name cache
214 * alone. We have no option but to retry and hope to get one namei->reverse path
215 * generation done without an intervening lookup, lookup by id on the hard link
216 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
217 * which currently are the MAC hooks for rename, unlink and rmdir.
218 */
219#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
220
bca245ac
A
221/* Max retry limit for rename due to vnode recycling. */
222#define MAX_RENAME_ERECYCLE_RETRIES 1024
223
cb323159
A
224static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg,
225 int unlink_flags);
fe8ab488 226
cb323159 227static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, uint32_t options, int *);
fe8ab488 228
b7266188 229#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
230static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
231static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
232static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
233static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
234static void mount_end_update(mount_t mp);
6d2010ae 235static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
236#endif /* CONFIG_IMGSRC_ACCESS */
237
cb323159
A
238#if CONFIG_LOCKERBOOT
239int mount_locker_protoboot(const char *fsname, const char *mntpoint,
240 const char *pbdevpath);
241#endif
242
d9a64523
A
243//snapshot functions
244#if CONFIG_MNT_ROOTSNAP
f427ee49 245static int __attribute__ ((noinline)) snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
d9a64523 246#else
f427ee49 247static int __attribute__ ((noinline)) snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
d9a64523
A
248#endif
249
2d21ac55
A
250__private_extern__
251int sync_internal(void);
252
2d21ac55 253__private_extern__
c18c124e 254int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
91447636 255
c3c9b80d
A
256static LCK_GRP_DECLARE(fd_vn_lck_grp, "fd_vnode_data");
257static LCK_ATTR_DECLARE(fd_vn_lck_attr, 0, 0);
fe8ab488 258
c3c9b80d
A
259/* vars for sync mutex */
260static LCK_GRP_DECLARE(sync_mtx_lck_grp, "sync thread");
261static LCK_MTX_DECLARE(sync_mtx_lck, &sync_mtx_lck_grp);
262
263extern lck_rw_t rootvnode_rw_lock;
f427ee49 264
2d21ac55
A
265/*
266 * incremented each time a mount or unmount operation occurs
267 * used to invalidate the cached value of the rootvp in the
268 * mount structure utilized by cache_lookup_path
269 */
b0d623f7 270uint32_t mount_generation = 0;
1c79356b
A
271
272/* counts number of mount and unmount operations */
0a7de745 273unsigned int vfs_nummntops = 0;
1c79356b 274
f427ee49
A
275/* system-wide, per-boot unique mount ID */
276static _Atomic uint64_t mount_unique_id = 1;
277
39236c6e
A
278extern const struct fileops vnops;
279#if CONFIG_APPLEDOUBLE
39037602 280extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 281#endif /* CONFIG_APPLEDOUBLE */
91447636 282
1c79356b
A
283/*
284 * Virtual File System System Calls
285 */
286
ea3f0419 287#if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
6d2010ae
A
288/*
289 * Private in-kernel mounting spi (NFS only, not exported)
290 */
0a7de745 291__private_extern__
6d2010ae
A
292boolean_t
293vfs_iskernelmount(mount_t mp)
294{
0a7de745 295 return (mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE;
6d2010ae
A
296}
297
0a7de745 298__private_extern__
6d2010ae
A
299int
300kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
cb323159 301 void *data, __unused size_t datalen, int syscall_flags, uint32_t kern_flags, vfs_context_t ctx)
6d2010ae
A
302{
303 struct nameidata nd;
304 boolean_t did_namei;
305 int error;
306
39037602 307 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
0a7de745 308 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
6d2010ae
A
309
310 /*
311 * Get the vnode to be covered if it's not supplied
312 */
313 if (vp == NULLVP) {
314 error = namei(&nd);
0a7de745 315 if (error) {
f427ee49 316 if (kern_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_VOLBYROLE_MASK)) {
cb323159
A
317 printf("failed to locate mount-on path: %s ", path);
318 }
0a7de745
A
319 return error;
320 }
6d2010ae
A
321 vp = nd.ni_vp;
322 pvp = nd.ni_dvp;
323 did_namei = TRUE;
324 } else {
325 char *pnbuf = CAST_DOWN(char *, path);
326
327 nd.ni_cnd.cn_pnbuf = pnbuf;
f427ee49 328 nd.ni_cnd.cn_pnlen = (int)(strlen(pnbuf) + 1);
6d2010ae
A
329 did_namei = FALSE;
330 }
331
332 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
0a7de745 333 syscall_flags, kern_flags, NULL, TRUE, ctx);
6d2010ae
A
334
335 if (did_namei) {
336 vnode_put(vp);
337 vnode_put(pvp);
338 nameidone(&nd);
339 }
340
0a7de745 341 return error;
6d2010ae 342}
ea3f0419 343#endif /* CONFIG_NFS_CLIENT || DEVFS */
6d2010ae 344
1c79356b
A
345/*
346 * Mount a file system.
347 */
1c79356b
A
348/* ARGSUSED */
349int
b0d623f7 350mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
351{
352 struct __mac_mount_args muap;
353
354 muap.type = uap->type;
355 muap.path = uap->path;
356 muap.flags = uap->flags;
357 muap.data = uap->data;
358 muap.mac_p = USER_ADDR_NULL;
0a7de745 359 return __mac_mount(p, &muap, retval);
2d21ac55
A
360}
361
5ba3f43e
A
362int
363fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
364{
0a7de745
A
365 struct componentname cn;
366 vfs_context_t ctx = vfs_context_current();
367 size_t dummy = 0;
368 int error;
369 int flags = uap->flags;
370 char fstypename[MFSNAMELEN];
371 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
372 vnode_t pvp;
373 vnode_t vp;
5ba3f43e
A
374
375 AUDIT_ARG(fd, uap->fd);
376 AUDIT_ARG(fflags, flags);
377 /* fstypename will get audited by mount_common */
378
379 /* Sanity check the flags */
0a7de745
A
380 if (flags & (MNT_IMGSRC_BY_INDEX | MNT_ROOTFS)) {
381 return ENOTSUP;
5ba3f43e
A
382 }
383
384 if (flags & MNT_UNION) {
0a7de745 385 return EPERM;
5ba3f43e
A
386 }
387
388 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
389 if (error) {
0a7de745 390 return error;
5ba3f43e
A
391 }
392
393 if ((error = file_vnode(uap->fd, &vp)) != 0) {
0a7de745 394 return error;
5ba3f43e
A
395 }
396
397 if ((error = vnode_getwithref(vp)) != 0) {
398 file_drop(uap->fd);
0a7de745 399 return error;
5ba3f43e
A
400 }
401
402 pvp = vnode_getparent(vp);
403 if (pvp == NULL) {
404 vnode_put(vp);
405 file_drop(uap->fd);
0a7de745 406 return EINVAL;
5ba3f43e
A
407 }
408
409 memset(&cn, 0, sizeof(struct componentname));
f427ee49 410 cn.cn_pnbuf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
5ba3f43e
A
411 cn.cn_pnlen = MAXPATHLEN;
412
0a7de745 413 if ((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
f427ee49 414 zfree(ZV_NAMEI, cn.cn_pnbuf);
5ba3f43e
A
415 vnode_put(pvp);
416 vnode_put(vp);
417 file_drop(uap->fd);
0a7de745 418 return error;
5ba3f43e
A
419 }
420
421 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
422
f427ee49 423 zfree(ZV_NAMEI, cn.cn_pnbuf);
5ba3f43e
A
424 vnode_put(pvp);
425 vnode_put(vp);
426 file_drop(uap->fd);
427
0a7de745 428 return error;
5ba3f43e
A
429}
430
6d2010ae 431void
39037602 432vfs_notify_mount(vnode_t pdvp)
6d2010ae
A
433{
434 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
435 lock_vnode_and_post(pdvp, NOTE_WRITE);
436}
437
b0d623f7
A
438/*
439 * __mac_mount:
440 * Mount a file system taking into account MAC label behavior.
441 * See mount(2) man page for more information
442 *
443 * Parameters: p Process requesting the mount
444 * uap User argument descriptor (see below)
39037602 445 * retval (ignored)
b0d623f7
A
446 *
447 * Indirect: uap->type Filesystem type
448 * uap->path Path to mount
39037602
A
449 * uap->data Mount arguments
450 * uap->mac_p MAC info
b0d623f7 451 * uap->flags Mount flags
39037602 452 *
b0d623f7
A
453 *
454 * Returns: 0 Success
455 * !0 Not success
456 */
6d2010ae
A
457boolean_t root_fs_upgrade_try = FALSE;
458
2d21ac55 459int
b0d623f7 460__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 461{
39236c6e 462 vnode_t pvp = NULL;
0a7de745 463 vnode_t vp = NULL;
39236c6e 464 int need_nameidone = 0;
6d2010ae
A
465 vfs_context_t ctx = vfs_context_current();
466 char fstypename[MFSNAMELEN];
467 struct nameidata nd;
0a7de745 468 size_t dummy = 0;
6d2010ae 469 char *labelstr = NULL;
f427ee49 470 size_t labelsz = 0;
6d2010ae
A
471 int flags = uap->flags;
472 int error;
39037602 473#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 474 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
475#else
476#pragma unused(p)
477#endif
6d2010ae
A
478 /*
479 * Get the fs type name from user space
480 */
481 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
0a7de745
A
482 if (error) {
483 return error;
484 }
6d2010ae
A
485
486 /*
487 * Get the vnode to be covered
488 */
39037602 489 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
0a7de745 490 UIO_USERSPACE, uap->path, ctx);
6d2010ae 491 error = namei(&nd);
39236c6e
A
492 if (error) {
493 goto out;
494 }
495 need_nameidone = 1;
6d2010ae
A
496 vp = nd.ni_vp;
497 pvp = nd.ni_dvp;
39037602 498
6d2010ae
A
499#ifdef CONFIG_IMGSRC_ACCESS
500 /* Mounting image source cannot be batched with other operations */
501 if (flags == MNT_IMGSRC_BY_INDEX) {
502 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
0a7de745 503 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
6d2010ae
A
504 goto out;
505 }
506#endif /* CONFIG_IMGSRC_ACCESS */
507
508#if CONFIG_MACF
509 /*
510 * Get the label string (if any) from user space
511 */
512 if (uap->mac_p != USER_ADDR_NULL) {
513 struct user_mac mac;
514 size_t ulen = 0;
515
516 if (is_64bit) {
517 struct user64_mac mac64;
518 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
f427ee49
A
519 mac.m_buflen = (user_size_t)mac64.m_buflen;
520 mac.m_string = (user_addr_t)mac64.m_string;
6d2010ae
A
521 } else {
522 struct user32_mac mac32;
523 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
524 mac.m_buflen = mac32.m_buflen;
525 mac.m_string = mac32.m_string;
526 }
0a7de745 527 if (error) {
6d2010ae 528 goto out;
0a7de745 529 }
6d2010ae
A
530 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
531 (mac.m_buflen < 2)) {
532 error = EINVAL;
533 goto out;
534 }
f427ee49
A
535 labelsz = mac.m_buflen;
536 labelstr = kheap_alloc(KHEAP_TEMP, labelsz, Z_WAITOK);
6d2010ae
A
537 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
538 if (error) {
539 goto out;
540 }
541 AUDIT_ARG(mac_string, labelstr);
542 }
543#endif /* CONFIG_MACF */
544
545 AUDIT_ARG(fflags, flags);
546
4bd07ac2
A
547#if SECURE_KERNEL
548 if (flags & MNT_UNION) {
549 /* No union mounts on release kernels */
550 error = EPERM;
551 goto out;
552 }
553#endif
554
6d2010ae 555 if ((vp->v_flag & VROOT) &&
0a7de745 556 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
39236c6e 557 if (!(flags & MNT_UNION)) {
6d2010ae 558 flags |= MNT_UPDATE;
0a7de745 559 } else {
39037602 560 /*
39236c6e 561 * For a union mount on '/', treat it as fresh
39037602
A
562 * mount instead of update.
563 * Otherwise, union mouting on '/' used to panic the
564 * system before, since mnt_vnodecovered was found to
565 * be NULL for '/' which is required for unionlookup
39236c6e
A
566 * after it gets ENOENT on union mount.
567 */
568 flags = (flags & ~(MNT_UPDATE));
569 }
570
4bd07ac2 571#if SECURE_KERNEL
39236c6e
A
572 if ((flags & MNT_RDONLY) == 0) {
573 /* Release kernels are not allowed to mount "/" as rw */
574 error = EPERM;
39037602 575 goto out;
39236c6e 576 }
39236c6e
A
577#endif
578 /*
579 * See 7392553 for more details on why this check exists.
580 * Suffice to say: If this check is ON and something tries
581 * to mount the rootFS RW, we'll turn off the codesign
39037602
A
582 * bitmap optimization.
583 */
6d2010ae 584#if CHECK_CS_VALIDATION_BITMAP
0a7de745 585 if ((flags & MNT_RDONLY) == 0) {
6d2010ae
A
586 root_fs_upgrade_try = TRUE;
587 }
588#endif
589 }
590
591 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
0a7de745 592 labelstr, FALSE, ctx);
39236c6e 593
6d2010ae 594out:
39236c6e 595
6d2010ae 596#if CONFIG_MACF
f427ee49 597 kheap_free(KHEAP_DEFAULT, labelstr, labelsz);
6d2010ae
A
598#endif /* CONFIG_MACF */
599
39236c6e
A
600 if (vp) {
601 vnode_put(vp);
602 }
603 if (pvp) {
604 vnode_put(pvp);
605 }
606 if (need_nameidone) {
607 nameidone(&nd);
608 }
6d2010ae 609
0a7de745 610 return error;
6d2010ae
A
611}
612
613/*
614 * common mount implementation (final stage of mounting)
0a7de745 615 *
6d2010ae
A
616 * Arguments:
617 * fstypename file system type (ie it's vfs name)
618 * pvp parent of covered vnode
619 * vp covered vnode
620 * cnp component name (ie path) of covered vnode
621 * flags generic mount flags
622 * fsmountargs file system specific data
623 * labelstr optional MAC label
624 * kernelmount TRUE for mounts initiated from inside the kernel
625 * ctx caller's context
626 */
627static int
628mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
0a7de745
A
629 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
630 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
6d2010ae 631{
39236c6e
A
632#if !CONFIG_MACF
633#pragma unused(labelstr)
634#endif
91447636
A
635 struct vnode *devvp = NULLVP;
636 struct vnode *device_vnode = NULLVP;
2d21ac55
A
637#if CONFIG_MACF
638 struct vnode *rvp;
639#endif
1c79356b 640 struct mount *mp;
6601e61a 641 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 642 struct proc *p = vfs_context_proc(ctx);
91447636 643 int error, flag = 0;
f427ee49 644 bool flag_set = false;
91447636 645 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
646 int ronly = 0;
647 int mntalloc = 0;
b0d623f7 648 boolean_t vfsp_ref = FALSE;
743b1565 649 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
650 boolean_t did_rele = FALSE;
651 boolean_t have_usecount = FALSE;
f427ee49 652 boolean_t did_set_lmount = FALSE;
9bccf70c 653
f427ee49 654#if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM || CONFIG_BASESYSTEMROOT
cb323159 655 /* Check for mutually-exclusive flag bits */
f427ee49 656 uint32_t checkflags = (internal_flags & (KERNEL_MOUNT_VOLBYROLE_MASK | KERNEL_MOUNT_BASESYSTEMROOT));
cb323159
A
657 int bitcount = 0;
658 while (checkflags != 0) {
659 checkflags &= (checkflags - 1);
660 bitcount++;
661 }
662
663 if (bitcount > 1) {
664 //not allowed to request multiple mount-by-role flags
665 error = EINVAL;
666 goto out1;
667 }
668#endif
669
1c79356b 670 /*
6d2010ae 671 * Process an update for an existing mount
1c79356b 672 */
6d2010ae 673 if (flags & MNT_UPDATE) {
1c79356b 674 if ((vp->v_flag & VROOT) == 0) {
91447636
A
675 error = EINVAL;
676 goto out1;
1c79356b
A
677 }
678 mp = vp->v_mount;
d12e1678 679
f427ee49 680 /* if unmount or mount in progress, return error */
b0d623f7 681 mount_lock_spin(mp);
f427ee49 682 if (mp->mnt_lflag & (MNT_LUNMOUNT | MNT_LMOUNT)) {
91447636
A
683 mount_unlock(mp);
684 error = EBUSY;
685 goto out1;
d12e1678 686 }
f427ee49
A
687 mp->mnt_lflag |= MNT_LMOUNT;
688 did_set_lmount = TRUE;
91447636
A
689 mount_unlock(mp);
690 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 691 is_rwlock_locked = TRUE;
1c79356b
A
692 /*
693 * We only allow the filesystem to be reloaded if it
694 * is currently mounted read-only.
695 */
6d2010ae 696 if ((flags & MNT_RELOAD) &&
1c79356b 697 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
698 error = ENOTSUP;
699 goto out1;
1c79356b 700 }
b7266188 701
316670eb
A
702 /*
703 * If content protection is enabled, update mounts are not
704 * allowed to turn it off.
705 */
39037602 706 if ((mp->mnt_flag & MNT_CPROTECT) &&
0a7de745 707 ((flags & MNT_CPROTECT) == 0)) {
316670eb
A
708 error = EINVAL;
709 goto out1;
710 }
711
cb323159
A
712 /*
713 * can't turn off MNT_REMOVABLE either but it may be an unexpected
714 * failure to return an error for this so we'll just silently
715 * add it if it is not passed in.
716 */
717 if ((mp->mnt_flag & MNT_REMOVABLE) &&
718 ((flags & MNT_REMOVABLE) == 0)) {
719 flags |= MNT_REMOVABLE;
720 }
721
b7266188
A
722 /* Can't downgrade the backer of the root FS */
723 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
0a7de745 724 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
725 error = ENOTSUP;
726 goto out1;
727 }
b7266188 728
1c79356b
A
729 /*
730 * Only root, or the user that did the original mount is
731 * permitted to update it.
732 */
2d21ac55
A
733 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
734 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
735 goto out1;
736 }
737#if CONFIG_MACF
738 error = mac_mount_check_remount(ctx, mp);
739 if (error != 0) {
91447636 740 goto out1;
1c79356b 741 }
2d21ac55 742#endif
1c79356b 743 /*
91447636
A
744 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
745 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 746 */
6d2010ae
A
747 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
748 flags |= MNT_NOSUID | MNT_NODEV;
0a7de745 749 if (mp->mnt_flag & MNT_NOEXEC) {
6d2010ae 750 flags |= MNT_NOEXEC;
0a7de745 751 }
1c79356b 752 }
d12e1678 753 flag = mp->mnt_flag;
f427ee49 754 flag_set = true;
d12e1678 755
316670eb
A
756
757
6d2010ae 758 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 759
91447636 760 vfsp = mp->mnt_vtable;
1c79356b 761 goto update;
cb323159 762 } // MNT_UPDATE
5ba3f43e 763
1c79356b 764 /*
91447636 765 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
766 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
767 */
6d2010ae
A
768 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
769 flags |= MNT_NOSUID | MNT_NODEV;
0a7de745 770 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
6d2010ae 771 flags |= MNT_NOEXEC;
0a7de745 772 }
1c79356b 773 }
91447636 774
55e303ae
A
775 /* XXXAUDIT: Should we capture the type on the error path as well? */
776 AUDIT_ARG(text, fstypename);
91447636 777 mount_list_lock();
0a7de745 778 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
b0d623f7
A
779 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
780 vfsp->vfc_refcount++;
781 vfsp_ref = TRUE;
1c79356b 782 break;
b0d623f7 783 }
0a7de745 784 }
91447636 785 mount_list_unlock();
1c79356b 786 if (vfsp == NULL) {
91447636
A
787 error = ENODEV;
788 goto out1;
1c79356b 789 }
6d2010ae
A
790
791 /*
cb323159 792 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
f427ee49 793 * except in ROSV configs and for the initial BaseSystem root.
6d2010ae 794 */
cb323159 795 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) &&
f427ee49
A
796 ((internal_flags & KERNEL_MOUNT_VOLBYROLE_MASK) == 0) &&
797 ((internal_flags & KERNEL_MOUNT_BASESYSTEMROOT) == 0)) {
6d2010ae 798 error = EINVAL; /* unsupported request */
2d21ac55 799 goto out1;
6d2010ae
A
800 }
801
802 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
803 if (error != 0) {
91447636 804 goto out1;
1c79356b 805 }
1c79356b
A
806
807 /*
6d2010ae 808 * Allocate and initialize the filesystem (mount_t)
1c79356b 809 */
f427ee49 810 mp = zalloc_flags(mount_zone, Z_WAITOK | Z_ZERO);
91447636 811 mntalloc = 1;
0b4e3aa0
A
812
813 /* Initialize the default IO constraints */
814 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
815 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
816 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
817 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
818 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 819 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
820 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
821 mp->mnt_ioscale = 1;
2d21ac55
A
822 mp->mnt_ioflags = 0;
823 mp->mnt_realrootvp = NULLVP;
824 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636 825
f427ee49
A
826 mp->mnt_lflag |= MNT_LMOUNT;
827 did_set_lmount = TRUE;
828
91447636
A
829 TAILQ_INIT(&mp->mnt_vnodelist);
830 TAILQ_INIT(&mp->mnt_workerqueue);
831 TAILQ_INIT(&mp->mnt_newvnodes);
832 mount_lock_init(mp);
833 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 834 is_rwlock_locked = TRUE;
1c79356b 835 mp->mnt_op = vfsp->vfc_vfsops;
91447636 836 mp->mnt_vtable = vfsp;
91447636 837 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 838 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488 839 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
cb323159
A
840 do {
841 int pathlen = MAXPATHLEN;
842
843 if (vn_getpath_ext(vp, pvp, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER)) {
844 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
845 }
846 } while (0);
1c79356b 847 mp->mnt_vnodecovered = vp;
2d21ac55 848 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
849 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
850 mp->mnt_devbsdunit = 0;
f427ee49 851 mp->mnt_mount_id = os_atomic_inc_orig(&mount_unique_id, relaxed);
1c79356b 852
91447636
A
853 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
854 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 855
ea3f0419 856#if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
0a7de745 857 if (kernelmount) {
6d2010ae 858 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
0a7de745
A
859 }
860 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) {
6d2010ae 861 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
0a7de745 862 }
ea3f0419 863#endif /* CONFIG_NFS_CLIENT || DEVFS */
6d2010ae 864
c3c9b80d
A
865 if (KERNEL_MOUNT_DEVFS & internal_flags) {
866 // kernel mounted devfs
867 mp->mnt_kern_flag |= MNTK_SYSTEM;
868 }
869
1c79356b 870update:
5ba3f43e 871
1c79356b
A
872 /*
873 * Set the mount level flags.
874 */
0a7de745 875 if (flags & MNT_RDONLY) {
1c79356b 876 mp->mnt_flag |= MNT_RDONLY;
0a7de745 877 } else if (mp->mnt_flag & MNT_RDONLY) {
6d2010ae
A
878 // disallow read/write upgrades of file systems that
879 // had the TYPENAME_OVERRIDE feature set.
880 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
881 error = EPERM;
882 goto out1;
883 }
1c79356b 884 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 885 }
0b4e3aa0 886 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
0a7de745
A
887 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
888 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
cb323159 889 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
0a7de745 890 MNT_QUARANTINE | MNT_CPROTECT);
813fb2f6
A
891
892#if SECURE_KERNEL
893#if !CONFIG_MNT_SUID
894 /*
5ba3f43e 895 * On release builds of iOS based platforms, always enforce NOSUID on
813fb2f6
A
896 * all mounts. We do this here because we can catch update mounts as well as
897 * non-update mounts in this case.
898 */
899 mp->mnt_flag |= (MNT_NOSUID);
900#endif
901#endif
902
6d2010ae 903 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
0a7de745
A
904 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
905 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
cb323159 906 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
0a7de745 907 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
908
909#if CONFIG_MACF
6d2010ae 910 if (flags & MNT_MULTILABEL) {
2d21ac55
A
911 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
912 error = EINVAL;
913 goto out1;
914 }
915 mp->mnt_flag |= MNT_MULTILABEL;
916 }
917#endif
6d2010ae 918 /*
f427ee49
A
919 * Process device path for local file systems if requested.
920 *
921 * Snapshot and mount-by-role mounts do not use this path; they are
922 * passing other opaque data in the device path field.
923 *
924 * Basesystemroot mounts pass a device path to be resolved here,
925 * but it's just a char * already inside the kernel, which
926 * kernel_mount() shoved into a user_addr_t to call us. So for such
927 * mounts we must skip copyin (both of the address and of the string
928 * (in NDINIT).
6d2010ae 929 */
39037602 930 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
f427ee49
A
931 !(internal_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_VOLBYROLE_MASK))) {
932 boolean_t do_copyin_devpath = true;
933#if CONFIG_BASESYSTEMROOT
934 if (internal_flags & KERNEL_MOUNT_BASESYSTEMROOT) {
935 // KERNEL_MOUNT_BASESYSTEMROOT implies subtle behavior worh nothing:
936 // We have been passed fsmountargs, which is typed as a user_addr_t,
937 // but is actually a char ** pointing to a (kernelspace) string.
938 // We manually unpack it with a series of casts and dereferences
939 // that reverses what was done just above us on the stack in
940 // imageboot_pivot_image().
941 // After retrieving the path to the dev node (which we will NDINIT
942 // in a moment), we pass NULL fsmountargs on to the filesystem.
943 _Static_assert(sizeof(char **) == sizeof(fsmountargs), "fsmountargs should fit a (kernel) address");
944 char **devnamepp = (char **)fsmountargs;
945 char *devnamep = *devnamepp;
946 devpath = CAST_USER_ADDR_T(devnamep);
947 do_copyin_devpath = false;
948 fsmountargs = USER_ADDR_NULL;
949
950 //Now that we have a mp, denote that this mount is for the basesystem.
951 mp->mnt_supl_kern_flag |= MNTK_SUPL_BASESYSTEM;
952 }
953#endif // CONFIG_BASESYSTEMROOT
954
955 if (do_copyin_devpath) {
956 if (vfs_context_is64bit(ctx)) {
957 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
958 goto out1;
959 }
960 fsmountargs += sizeof(devpath);
961 } else {
962 user32_addr_t tmp;
963 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
964 goto out1;
965 }
966 /* munge into LP64 addr */
967 devpath = CAST_USER_ADDR_T(tmp);
968 fsmountargs += sizeof(tmp);
0a7de745 969 }
91447636
A
970 }
971
6d2010ae 972 /* Lookup device and authorize access to it */
91447636 973 if ((devpath)) {
6d2010ae
A
974 struct nameidata nd;
975
f427ee49
A
976 enum uio_seg seg = UIO_USERSPACE;
977#if CONFIG_BASESYSTEMROOT
978 if (internal_flags & KERNEL_MOUNT_BASESYSTEMROOT) {
979 seg = UIO_SYSSPACE;
980 }
981#endif // CONFIG_BASESYSTEMROOT
982
983 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, seg, devpath, ctx);
0a7de745 984 if ((error = namei(&nd))) {
91447636 985 goto out1;
0a7de745 986 }
91447636 987
3e170ce0 988 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
6d2010ae 989 devvp = nd.ni_vp;
91447636 990
6d2010ae 991 nameidone(&nd);
91447636
A
992
993 if (devvp->v_type != VBLK) {
994 error = ENOTBLK;
995 goto out2;
996 }
997 if (major(devvp->v_rdev) >= nblkdev) {
998 error = ENXIO;
999 goto out2;
1000 }
1001 /*
0a7de745
A
1002 * If mount by non-root, then verify that user has necessary
1003 * permissions on the device.
1004 */
2d21ac55 1005 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
1006 mode_t accessmode = KAUTH_VNODE_READ_DATA;
1007
0a7de745 1008 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
91447636 1009 accessmode |= KAUTH_VNODE_WRITE_DATA;
0a7de745
A
1010 }
1011 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0) {
91447636 1012 goto out2;
0a7de745 1013 }
91447636
A
1014 }
1015 }
6d2010ae
A
1016 /* On first mount, preflight and open device */
1017 if (devpath && ((flags & MNT_UPDATE) == 0)) {
0a7de745 1018 if ((error = vnode_ref(devvp))) {
91447636 1019 goto out2;
0a7de745 1020 }
91447636 1021 /*
0a7de745
A
1022 * Disallow multiple mounts of the same device.
1023 * Disallow mounting of a device that is currently in use
1024 * (except for root, which might share swap device for miniroot).
1025 * Flush out any old buffers remaining from a previous use.
1026 */
1027 if ((error = vfs_mountedon(devvp))) {
91447636 1028 goto out3;
0a7de745 1029 }
39037602 1030
91447636
A
1031 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
1032 error = EBUSY;
1033 goto out3;
1034 }
0a7de745 1035 if ((error = VNOP_FSYNC(devvp, MNT_WAIT, ctx))) {
91447636
A
1036 error = ENOTBLK;
1037 goto out3;
1038 }
0a7de745 1039 if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) {
91447636 1040 goto out3;
0a7de745 1041 }
91447636
A
1042
1043 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
1044#if CONFIG_MACF
1045 error = mac_vnode_check_open(ctx,
1046 devvp,
0a7de745
A
1047 ronly ? FREAD : FREAD | FWRITE);
1048 if (error) {
2d21ac55 1049 goto out3;
0a7de745 1050 }
2d21ac55 1051#endif /* MAC */
0a7de745 1052 if ((error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, ctx))) {
91447636 1053 goto out3;
0a7de745 1054 }
91447636
A
1055
1056 mp->mnt_devvp = devvp;
1057 device_vnode = devvp;
6d2010ae 1058 } else if ((mp->mnt_flag & MNT_RDONLY) &&
0a7de745
A
1059 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
1060 (device_vnode = mp->mnt_devvp)) {
6d2010ae
A
1061 dev_t dev;
1062 int maj;
1063 /*
1064 * If upgrade to read-write by non-root, then verify
1065 * that user has necessary permissions on the device.
1066 */
1067 vnode_getalways(device_vnode);
b0d623f7 1068
6d2010ae 1069 if (suser(vfs_context_ucred(ctx), NULL) &&
39037602 1070 (error = vnode_authorize(device_vnode, NULL,
0a7de745
A
1071 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
1072 ctx)) != 0) {
6d2010ae
A
1073 vnode_put(device_vnode);
1074 goto out2;
1075 }
b0d623f7 1076
6d2010ae
A
1077 /* Tell the device that we're upgrading */
1078 dev = (dev_t)device_vnode->v_rdev;
1079 maj = major(dev);
b0d623f7 1080
0a7de745 1081 if ((u_int)maj >= (u_int)nblkdev) {
6d2010ae 1082 panic("Volume mounted on a device with invalid major number.");
0a7de745 1083 }
b0d623f7 1084
6d2010ae
A
1085 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
1086 vnode_put(device_vnode);
91447636 1087 device_vnode = NULLVP;
6d2010ae
A
1088 if (error != 0) {
1089 goto out2;
1090 }
91447636 1091 }
cb323159
A
1092 } // localargs && !(snapshot | data | vm)
1093
2d21ac55 1094#if CONFIG_MACF
6d2010ae 1095 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
1096 mac_mount_label_init(mp);
1097 mac_mount_label_associate(ctx, mp);
1098 }
6d2010ae
A
1099 if (labelstr) {
1100 if ((flags & MNT_UPDATE) != 0) {
1101 error = mac_mount_check_label_update(ctx, mp);
0a7de745 1102 if (error != 0) {
2d21ac55 1103 goto out3;
0a7de745 1104 }
2d21ac55 1105 }
2d21ac55
A
1106 }
1107#endif
1c79356b 1108 /*
cb323159
A
1109 * Mount the filesystem. We already asserted that internal_flags
1110 * cannot have more than one mount-by-role bit set.
1c79356b 1111 */
39037602
A
1112 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
1113 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
1114 (caddr_t)fsmountargs, 0, ctx);
cb323159
A
1115 } else if (internal_flags & KERNEL_MOUNT_DATAVOL) {
1116#if CONFIG_ROSV_STARTUP
1117 struct mount *origin_mp = (struct mount*)fsmountargs;
1118 fs_role_mount_args_t frma = {origin_mp, VFS_DATA_ROLE};
1119 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1120 if (error) {
1121 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE, error);
1122 } else {
1123 /* Mark volume associated with system volume */
1124 mp->mnt_kern_flag |= MNTK_SYSTEM;
1125
1126 /* Attempt to acquire the mnt_devvp and set it up */
1127 struct vnode *mp_devvp = NULL;
1128 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1129 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1130 0, &mp_devvp, vfs_context_kernel());
1131 if (!lerr) {
1132 mp->mnt_devvp = mp_devvp;
1133 //vnode_lookup took an iocount, need to drop it.
1134 vnode_put(mp_devvp);
1135 // now set `device_vnode` to the devvp that was acquired.
1136 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1137 // note that though the iocount above was dropped, the mount acquires
1138 // an implicit reference against the device.
1139 device_vnode = mp_devvp;
1140 }
1141 }
1142 }
1143#else
1144 error = EINVAL;
1145#endif
1146 } else if (internal_flags & KERNEL_MOUNT_VMVOL) {
1147#if CONFIG_MOUNT_VM
1148 struct mount *origin_mp = (struct mount*)fsmountargs;
1149 fs_role_mount_args_t frma = {origin_mp, VFS_VM_ROLE};
1150 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1151 if (error) {
1152 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE, error);
1153 } else {
1154 /* Mark volume associated with system volume and a swap mount */
1155 mp->mnt_kern_flag |= (MNTK_SYSTEM | MNTK_SWAP_MOUNT);
1156 /* Attempt to acquire the mnt_devvp and set it up */
1157 struct vnode *mp_devvp = NULL;
1158 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1159 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1160 0, &mp_devvp, vfs_context_kernel());
1161 if (!lerr) {
1162 mp->mnt_devvp = mp_devvp;
1163 //vnode_lookup took an iocount, need to drop it.
1164 vnode_put(mp_devvp);
1165
1166 // now set `device_vnode` to the devvp that was acquired.
1167 // note that though the iocount above was dropped, the mount acquires
1168 // an implicit reference against the device.
1169 device_vnode = mp_devvp;
1170 }
1171 }
1172 }
1173#else
1174 error = EINVAL;
f427ee49
A
1175#endif
1176 } else if ((internal_flags & KERNEL_MOUNT_PREBOOTVOL) || (internal_flags & KERNEL_MOUNT_RECOVERYVOL)) {
1177#if CONFIG_MOUNT_PREBOOTRECOVERY
1178 struct mount *origin_mp = (struct mount*)fsmountargs;
1179 uint32_t mount_role = 0;
1180 if (internal_flags & KERNEL_MOUNT_PREBOOTVOL) {
1181 mount_role = VFS_PREBOOT_ROLE;
1182 } else if (internal_flags & KERNEL_MOUNT_RECOVERYVOL) {
1183 mount_role = VFS_RECOVERY_ROLE;
1184 }
1185
1186 if (mount_role != 0) {
1187 fs_role_mount_args_t frma = {origin_mp, mount_role};
1188 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1189 if (error) {
1190 printf("MOUNT-BY-ROLE (%d) failed! (%d)", mount_role, error);
1191 } else {
1192 // NOT YET - need to qualify how this interacts with shutdown, ERP/ERB, etc
1193 /* Mark volume associated with system volume */
1194 //mp->mnt_kern_flag |= MNTK_SYSTEM;
1195 /* Attempt to acquire the mnt_devvp and set it up */
1196 struct vnode *mp_devvp = NULL;
1197 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1198 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1199 0, &mp_devvp, vfs_context_kernel());
1200 if (!lerr) {
1201 mp->mnt_devvp = mp_devvp;
1202 //vnode_lookup took an iocount, need to drop it.
1203 vnode_put(mp_devvp);
1204
1205 // now set `device_vnode` to the devvp that was acquired.
1206 // note that though the iocount above was dropped, the mount acquires
1207 // an implicit reference against the device.
1208 device_vnode = mp_devvp;
1209 }
1210 }
1211 }
1212 } else {
1213 printf("MOUNT-BY-ROLE (%d) failed - ROLE UNRECOGNIZED! (%d)", mount_role, error);
1214 error = EINVAL;
1215 }
1216#else
1217 error = EINVAL;
cb323159 1218#endif
39037602
A
1219 } else {
1220 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
1221 }
d12e1678 1222
6d2010ae 1223 if (flags & MNT_UPDATE) {
0a7de745 1224 if (mp->mnt_kern_flag & MNTK_WANTRDWR) {
1c79356b 1225 mp->mnt_flag &= ~MNT_RDONLY;
0a7de745
A
1226 }
1227 mp->mnt_flag &= ~
1c79356b 1228 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
0a7de745
A
1229 mp->mnt_kern_flag &= ~MNTK_WANTRDWR;
1230 if (error) {
6d2010ae 1231 mp->mnt_flag = flag; /* restore flag value */
0a7de745 1232 }
91447636
A
1233 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
1234 lck_rw_done(&mp->mnt_rwlock);
743b1565 1235 is_rwlock_locked = FALSE;
0a7de745 1236 if (!error) {
2d21ac55 1237 enablequotas(mp, ctx);
0a7de745 1238 }
6d2010ae 1239 goto exit;
1c79356b 1240 }
6d2010ae 1241
1c79356b
A
1242 /*
1243 * Put the new filesystem on the mount list after root.
1244 */
6601e61a 1245 if (error == 0) {
0a7de745 1246 struct vfs_attr vfsattr;
2d21ac55 1247#if CONFIG_MACF
cb323159
A
1248 error = mac_mount_check_mount_late(ctx, mp);
1249 if (error != 0) {
f427ee49 1250 goto out4;
cb323159
A
1251 }
1252
2d21ac55
A
1253 if (vfs_flags(mp) & MNT_MULTILABEL) {
1254 error = VFS_ROOT(mp, &rvp, ctx);
1255 if (error) {
1256 printf("%s() VFS_ROOT returned %d\n", __func__, error);
f427ee49 1257 goto out4;
2d21ac55 1258 }
2d21ac55 1259 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
0a7de745 1260 /*
b0d623f7
A
1261 * drop reference provided by VFS_ROOT
1262 */
1263 vnode_put(rvp);
1264
0a7de745 1265 if (error) {
f427ee49 1266 goto out4;
0a7de745 1267 }
2d21ac55 1268 }
0a7de745 1269#endif /* MAC */
2d21ac55
A
1270
1271 vnode_lock_spin(vp);
1272 CLR(vp->v_flag, VMOUNT);
91447636
A
1273 vp->v_mountedhere = mp;
1274 vnode_unlock(vp);
1275
2d21ac55
A
1276 /*
1277 * taking the name_cache_lock exclusively will
1278 * insure that everyone is out of the fast path who
1279 * might be trying to use a now stale copy of
1280 * vp->v_mountedhere->mnt_realrootvp
1281 * bumping mount_generation causes the cached values
1282 * to be invalidated
1283 */
1284 name_cache_lock();
1285 mount_generation++;
1286 name_cache_unlock();
1287
b0d623f7
A
1288 error = vnode_ref(vp);
1289 if (error != 0) {
1290 goto out4;
1291 }
1292
1293 have_usecount = TRUE;
91447636 1294
2d21ac55 1295 error = checkdirs(vp, ctx);
0a7de745 1296 if (error != 0) {
6601e61a
A
1297 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1298 goto out4;
1299 }
39037602
A
1300 /*
1301 * there is no cleanup code here so I have made it void
91447636
A
1302 * we need to revisit this
1303 */
2d21ac55 1304 (void)VFS_START(mp, 0, ctx);
1c79356b 1305
6d2010ae
A
1306 if (mount_list_add(mp) != 0) {
1307 /*
1308 * The system is shutting down trying to umount
1309 * everything, so fail with a plausible errno.
1310 */
1311 error = EBUSY;
b0d623f7
A
1312 goto out4;
1313 }
6601e61a
A
1314 lck_rw_done(&mp->mnt_rwlock);
1315 is_rwlock_locked = FALSE;
1316
2d21ac55
A
1317 /* Check if this mounted file system supports EAs or named streams. */
1318 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1319 VFSATTR_INIT(&vfsattr);
1320 VFSATTR_WANTED(&vfsattr, f_capabilities);
1321 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
39037602 1322 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
2d21ac55
A
1323 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1324 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1325 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1326 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1327 }
1328#if NAMEDSTREAMS
1329 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1330 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1331 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1332 }
1333#endif
1334 /* Check if this file system supports path from id lookups. */
1335 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1336 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1337 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1338 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1339 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1340 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1341 }
39037602
A
1342
1343 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
0a7de745 1344 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
39037602
A
1345 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1346 }
2d21ac55
A
1347 }
1348 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1349 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1350 }
1351 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1352 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1353 }
1c79356b 1354 /* increment the operations count */
b0d623f7 1355 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 1356 enablequotas(mp, ctx);
91447636
A
1357
1358 if (device_vnode) {
1359 device_vnode->v_specflags |= SI_MOUNTEDON;
1360
1361 /*
1362 * cache the IO attributes for the underlying physical media...
1363 * an error return indicates the underlying driver doesn't
1364 * support all the queries necessary... however, reasonable
1365 * defaults will have been set, so no reason to bail or care
1366 */
1367 vfs_init_io_attributes(device_vnode, mp);
39037602 1368 }
6601e61a
A
1369
1370 /* Now that mount is setup, notify the listeners */
6d2010ae 1371 vfs_notify_mount(pvp);
3e170ce0 1372 IOBSDMountChange(mp, kIOMountChangeMount);
1c79356b 1373 } else {
6d2010ae
A
1374 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1375 if (mp->mnt_vnodelist.tqh_first != NULL) {
39037602 1376 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
0a7de745 1377 mp->mnt_vtable->vfc_name, error);
6d2010ae
A
1378 }
1379
2d21ac55 1380 vnode_lock_spin(vp);
1c79356b 1381 CLR(vp->v_flag, VMOUNT);
6601e61a 1382 vnode_unlock(vp);
91447636
A
1383 mount_list_lock();
1384 mp->mnt_vtable->vfc_refcount--;
1385 mount_list_unlock();
55e303ae 1386
0a7de745 1387 if (device_vnode) {
91447636 1388 vnode_rele(device_vnode);
0a7de745 1389 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD | FWRITE, ctx);
91447636
A
1390 }
1391 lck_rw_done(&mp->mnt_rwlock);
743b1565 1392 is_rwlock_locked = FALSE;
39037602 1393
6d2010ae
A
1394 /*
1395 * if we get here, we have a mount structure that needs to be freed,
1396 * but since the coveredvp hasn't yet been updated to point at it,
1397 * no need to worry about other threads holding a crossref on this mp
1398 * so it's ok to just free it
1399 */
91447636 1400 mount_lock_destroy(mp);
2d21ac55
A
1401#if CONFIG_MACF
1402 mac_mount_label_destroy(mp);
1403#endif
f427ee49
A
1404 zfree(mount_zone, mp);
1405 did_set_lmount = false;
1c79356b 1406 }
6d2010ae 1407exit:
91447636 1408 /*
6d2010ae 1409 * drop I/O count on the device vp if there was one
91447636 1410 */
0a7de745
A
1411 if (devpath && devvp) {
1412 vnode_put(devvp);
1413 }
b0d623f7 1414
f427ee49
A
1415 if (did_set_lmount) {
1416 mount_lock_spin(mp);
1417 mp->mnt_lflag &= ~MNT_LMOUNT;
1418 mount_unlock(mp);
1419 }
1420
0a7de745 1421 return error;
b0d623f7 1422
6d2010ae 1423/* Error condition exits */
6601e61a 1424out4:
2d21ac55 1425 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
39037602
A
1426
1427 /*
6d2010ae
A
1428 * If the mount has been placed on the covered vp,
1429 * it may have been discovered by now, so we have
1430 * to treat this just like an unmount
1431 */
1432 mount_lock_spin(mp);
1433 mp->mnt_lflag |= MNT_LDEAD;
1434 mount_unlock(mp);
1435
6601e61a 1436 if (device_vnode != NULLVP) {
b0d623f7 1437 vnode_rele(device_vnode);
0a7de745
A
1438 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
1439 ctx);
b0d623f7 1440 did_rele = TRUE;
6601e61a 1441 }
6d2010ae 1442
2d21ac55 1443 vnode_lock_spin(vp);
6d2010ae
A
1444
1445 mp->mnt_crossref++;
6601e61a 1446 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1447
6601e61a 1448 vnode_unlock(vp);
6d2010ae 1449
b0d623f7
A
1450 if (have_usecount) {
1451 vnode_rele(vp);
1452 }
91447636 1453out3:
0a7de745 1454 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele)) {
2d21ac55 1455 vnode_rele(devvp);
0a7de745 1456 }
91447636 1457out2:
0a7de745
A
1458 if (devpath && devvp) {
1459 vnode_put(devvp);
1460 }
91447636 1461out1:
743b1565
A
1462 /* Release mnt_rwlock only when it was taken */
1463 if (is_rwlock_locked == TRUE) {
f427ee49
A
1464 if (flag_set) {
1465 mp->mnt_flag = flag; /* restore mnt_flag value */
1466 }
743b1565
A
1467 lck_rw_done(&mp->mnt_rwlock);
1468 }
39037602 1469
f427ee49
A
1470 if (did_set_lmount) {
1471 mount_lock_spin(mp);
1472 mp->mnt_lflag &= ~MNT_LMOUNT;
1473 mount_unlock(mp);
1474 }
1475
6601e61a 1476 if (mntalloc) {
0a7de745 1477 if (mp->mnt_crossref) {
6d2010ae 1478 mount_dropcrossref(mp, vp, 0);
0a7de745 1479 } else {
6d2010ae 1480 mount_lock_destroy(mp);
2d21ac55 1481#if CONFIG_MACF
6d2010ae 1482 mac_mount_label_destroy(mp);
2d21ac55 1483#endif
f427ee49 1484 zfree(mount_zone, mp);
6d2010ae 1485 }
b0d623f7 1486 }
b0d623f7 1487 if (vfsp_ref) {
6601e61a
A
1488 mount_list_lock();
1489 vfsp->vfc_refcount--;
1490 mount_list_unlock();
6601e61a 1491 }
91447636 1492
0a7de745 1493 return error;
1c79356b
A
1494}
1495
39037602 1496/*
b7266188
A
1497 * Flush in-core data, check for competing mount attempts,
1498 * and set VMOUNT
1499 */
6d2010ae
A
1500int
1501prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1502{
39236c6e
A
1503#if !CONFIG_MACF
1504#pragma unused(cnp,fsname)
1505#endif
b7266188
A
1506 struct vnode_attr va;
1507 int error;
1508
6d2010ae
A
1509 if (!skip_auth) {
1510 /*
1511 * If the user is not root, ensure that they own the directory
1512 * onto which we are attempting to mount.
1513 */
1514 VATTR_INIT(&va);
1515 VATTR_WANTED(&va, va_uid);
1516 if ((error = vnode_getattr(vp, &va, ctx)) ||
0a7de745
A
1517 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1518 (!vfs_context_issuser(ctx)))) {
6d2010ae
A
1519 error = EPERM;
1520 goto out;
1521 }
b7266188
A
1522 }
1523
0a7de745 1524 if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) {
b7266188 1525 goto out;
0a7de745 1526 }
b7266188 1527
0a7de745 1528 if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
b7266188 1529 goto out;
0a7de745 1530 }
b7266188
A
1531
1532 if (vp->v_type != VDIR) {
1533 error = ENOTDIR;
1534 goto out;
1535 }
1536
1537 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1538 error = EBUSY;
1539 goto out;
1540 }
1541
1542#if CONFIG_MACF
1543 error = mac_mount_check_mount(ctx, vp,
1544 cnp, fsname);
0a7de745 1545 if (error != 0) {
b7266188 1546 goto out;
0a7de745 1547 }
b7266188
A
1548#endif
1549
1550 vnode_lock_spin(vp);
1551 SET(vp->v_flag, VMOUNT);
1552 vnode_unlock(vp);
1553
1554out:
1555 return error;
1556}
1557
6d2010ae
A
1558#if CONFIG_IMGSRC_ACCESS
1559
cb323159
A
1560#define DEBUG_IMGSRC 0
1561
1562#if DEBUG_IMGSRC
1563#define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
6d2010ae
A
1564#else
1565#define IMGSRC_DEBUG(args...) do { } while(0)
39037602 1566#endif
6d2010ae 1567
b7266188
A
1568static int
1569authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1570{
1571 struct nameidata nd;
6d2010ae 1572 vnode_t vp, realdevvp;
b7266188
A
1573 mode_t accessmode;
1574 int error;
cb323159
A
1575 enum uio_seg uio = UIO_USERSPACE;
1576
1577 if (ctx == vfs_context_kernel()) {
1578 uio = UIO_SYSSPACE;
1579 }
b7266188 1580
cb323159 1581 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, uio, devpath, ctx);
0a7de745 1582 if ((error = namei(&nd))) {
6d2010ae 1583 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1584 return error;
6d2010ae 1585 }
b7266188 1586
b7266188 1587 vp = nd.ni_vp;
b7266188 1588
6d2010ae
A
1589 if (!vnode_isblk(vp)) {
1590 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1591 error = ENOTBLK;
1592 goto out;
1593 }
6d2010ae
A
1594
1595 realdevvp = mp->mnt_devvp;
1596 if (realdevvp == NULLVP) {
1597 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1598 error = ENXIO;
1599 goto out;
1600 }
6d2010ae
A
1601
1602 error = vnode_getwithref(realdevvp);
1603 if (error != 0) {
1604 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1605 goto out;
1606 }
1607
1608 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1609 IMGSRC_DEBUG("Wrong dev_t.\n");
1610 error = ENXIO;
1611 goto out1;
1612 }
1613
1614 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1615
b7266188
A
1616 /*
1617 * If mount by non-root, then verify that user has necessary
1618 * permissions on the device.
1619 */
1620 if (!vfs_context_issuser(ctx)) {
1621 accessmode = KAUTH_VNODE_READ_DATA;
0a7de745 1622 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
b7266188 1623 accessmode |= KAUTH_VNODE_WRITE_DATA;
0a7de745 1624 }
6d2010ae
A
1625 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1626 IMGSRC_DEBUG("Access denied.\n");
1627 goto out1;
1628 }
b7266188
A
1629 }
1630
1631 *devvpp = vp;
6d2010ae
A
1632
1633out1:
1634 vnode_put(realdevvp);
cb323159 1635
b7266188 1636out:
6d2010ae 1637 nameidone(&nd);
cb323159 1638
b7266188
A
1639 if (error) {
1640 vnode_put(vp);
1641 }
1642
1643 return error;
1644}
1645
1646/*
1647 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1648 * and call checkdirs()
1649 */
1650static int
1651place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1652{
1653 int error;
1654
1655 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1656
cb323159
A
1657 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1658 mp->mnt_vtable->vfc_name, vnode_getname(vp));
1659
b7266188
A
1660 vnode_lock_spin(vp);
1661 CLR(vp->v_flag, VMOUNT);
1662 vp->v_mountedhere = mp;
1663 vnode_unlock(vp);
1664
1665 /*
1666 * taking the name_cache_lock exclusively will
1667 * insure that everyone is out of the fast path who
1668 * might be trying to use a now stale copy of
1669 * vp->v_mountedhere->mnt_realrootvp
1670 * bumping mount_generation causes the cached values
1671 * to be invalidated
1672 */
1673 name_cache_lock();
1674 mount_generation++;
1675 name_cache_unlock();
1676
1677 error = vnode_ref(vp);
1678 if (error != 0) {
1679 goto out;
1680 }
1681
1682 error = checkdirs(vp, ctx);
0a7de745 1683 if (error != 0) {
b7266188
A
1684 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1685 vnode_rele(vp);
1686 goto out;
1687 }
1688
1689out:
1690 if (error != 0) {
1691 mp->mnt_vnodecovered = NULLVP;
1692 }
1693 return error;
1694}
1695
1696static void
1697undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1698{
1699 vnode_rele(vp);
1700 vnode_lock_spin(vp);
1701 vp->v_mountedhere = (mount_t)NULL;
1702 vnode_unlock(vp);
1703
1704 mp->mnt_vnodecovered = NULLVP;
1705}
1706
1707static int
1708mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1709{
1710 int error;
1711
1712 /* unmount in progress return error */
1713 mount_lock_spin(mp);
f427ee49 1714 if (mp->mnt_lflag & (MNT_LUNMOUNT | MNT_LMOUNT)) {
b7266188
A
1715 mount_unlock(mp);
1716 return EBUSY;
1717 }
1718 mount_unlock(mp);
1719 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1720
1721 /*
1722 * We only allow the filesystem to be reloaded if it
1723 * is currently mounted read-only.
1724 */
1725 if ((flags & MNT_RELOAD) &&
0a7de745 1726 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
b7266188
A
1727 error = ENOTSUP;
1728 goto out;
1729 }
1730
1731 /*
1732 * Only root, or the user that did the original mount is
1733 * permitted to update it.
1734 */
1735 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
0a7de745 1736 (!vfs_context_issuser(ctx))) {
b7266188
A
1737 error = EPERM;
1738 goto out;
1739 }
1740#if CONFIG_MACF
1741 error = mac_mount_check_remount(ctx, mp);
1742 if (error != 0) {
1743 goto out;
1744 }
1745#endif
1746
1747out:
1748 if (error) {
1749 lck_rw_done(&mp->mnt_rwlock);
1750 }
1751
1752 return error;
1753}
1754
39037602 1755static void
b7266188
A
1756mount_end_update(mount_t mp)
1757{
1758 lck_rw_done(&mp->mnt_rwlock);
1759}
1760
1761static int
6d2010ae
A
1762get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1763{
1764 vnode_t vp;
1765
1766 if (height >= MAX_IMAGEBOOT_NESTING) {
1767 return EINVAL;
1768 }
1769
1770 vp = imgsrc_rootvnodes[height];
1771 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1772 *rvpp = vp;
1773 return 0;
1774 } else {
1775 return ENOENT;
1776 }
1777}
1778
1779static int
cb323159
A
1780relocate_imageboot_source(vnode_t pvp, vnode_t vp,
1781 struct componentname *cnp, const char *fsname, vfs_context_t ctx,
0a7de745 1782 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1783{
1784 int error;
1785 mount_t mp;
1786 boolean_t placed = FALSE;
b7266188
A
1787 struct vfstable *vfsp;
1788 user_addr_t devpath;
1789 char *old_mntonname;
6d2010ae 1790 vnode_t rvp;
cb323159 1791 vnode_t devvp;
6d2010ae
A
1792 uint32_t height;
1793 uint32_t flags;
b7266188
A
1794
1795 /* If we didn't imageboot, nothing to move */
6d2010ae 1796 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1797 return EINVAL;
1798 }
1799
1800 /* Only root can do this */
1801 if (!vfs_context_issuser(ctx)) {
1802 return EPERM;
1803 }
1804
6d2010ae
A
1805 IMGSRC_DEBUG("looking for root vnode.\n");
1806
1807 /*
1808 * Get root vnode of filesystem we're moving.
1809 */
1810 if (by_index) {
1811 if (is64bit) {
1812 struct user64_mnt_imgsrc_args mia64;
1813 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1814 if (error != 0) {
1815 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1816 return error;
1817 }
1818
1819 height = mia64.mi_height;
1820 flags = mia64.mi_flags;
f427ee49 1821 devpath = (user_addr_t)mia64.mi_devpath;
6d2010ae
A
1822 } else {
1823 struct user32_mnt_imgsrc_args mia32;
1824 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1825 if (error != 0) {
1826 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1827 return error;
1828 }
1829
1830 height = mia32.mi_height;
1831 flags = mia32.mi_flags;
1832 devpath = mia32.mi_devpath;
1833 }
1834 } else {
1835 /*
1836 * For binary compatibility--assumes one level of nesting.
1837 */
1838 if (is64bit) {
0a7de745 1839 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
6d2010ae 1840 return error;
0a7de745 1841 }
6d2010ae
A
1842 } else {
1843 user32_addr_t tmp;
0a7de745 1844 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
6d2010ae 1845 return error;
0a7de745 1846 }
6d2010ae
A
1847
1848 /* munge into LP64 addr */
1849 devpath = CAST_USER_ADDR_T(tmp);
1850 }
1851
1852 height = 0;
1853 flags = 0;
1854 }
1855
1856 if (flags != 0) {
1857 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1858 return EINVAL;
1859 }
1860
1861 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1862 if (error != 0) {
cb323159 1863 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error);
b7266188
A
1864 return error;
1865 }
1866
cb323159 1867 IMGSRC_DEBUG("got old root vnode\n");
6d2010ae 1868
f427ee49 1869 old_mntonname = zalloc_flags(ZV_NAMEI, Z_WAITOK);
b7266188
A
1870
1871 /* Can only move once */
6d2010ae 1872 mp = vnode_mount(rvp);
b7266188 1873 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1874 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1875 error = EBUSY;
1876 goto out0;
1877 }
1878
cb323159 1879 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp->mnt_vtable->vfc_name);
6d2010ae
A
1880 IMGSRC_DEBUG("Starting updated.\n");
1881
b7266188 1882 /* Get exclusive rwlock on mount, authorize update on mp */
0a7de745 1883 error = mount_begin_update(mp, ctx, 0);
b7266188 1884 if (error != 0) {
6d2010ae 1885 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1886 goto out0;
1887 }
1888
39037602 1889 /*
b7266188
A
1890 * It can only be moved once. Flag is set under the rwlock,
1891 * so we're now safe to proceed.
1892 */
1893 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1894 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1895 goto out1;
1896 }
39037602 1897
6d2010ae 1898 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1899
1900 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1901 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1902 if (error != 0) {
6d2010ae 1903 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1904 goto out1;
1905 }
39037602 1906
6d2010ae
A
1907 IMGSRC_DEBUG("Covered vp OK.\n");
1908
b7266188
A
1909 /* Sanity check the name caller has provided */
1910 vfsp = mp->mnt_vtable;
1911 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
cb323159
A
1912 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1913 vfsp->vfc_name, fsname);
b7266188
A
1914 error = EINVAL;
1915 goto out2;
1916 }
1917
1918 /* Check the device vnode and update mount-from name, for local filesystems */
1919 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1920 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1921
1922 if (devpath != USER_ADDR_NULL) {
1923 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1924 if (error) {
6d2010ae 1925 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1926 goto out2;
1927 }
1928
1929 vnode_put(devvp);
1930 }
1931 }
1932
39037602 1933 /*
b7266188 1934 * Place mp on top of vnode, ref the vnode, call checkdirs(),
39037602 1935 * and increment the name cache's mount generation
b7266188 1936 */
6d2010ae
A
1937
1938 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1939 error = place_mount_and_checkdirs(mp, vp, ctx);
1940 if (error != 0) {
1941 goto out2;
1942 }
1943
1944 placed = TRUE;
1945
3e170ce0
A
1946 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1947 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
b7266188
A
1948
1949 /* Forbid future moves */
1950 mount_lock(mp);
1951 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1952 mount_unlock(mp);
1953
1954 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1955 if (mount_list_add(mp) != 0) {
1956 /*
1957 * The system is shutting down trying to umount
1958 * everything, so fail with a plausible errno.
1959 */
1960 error = EBUSY;
b7266188
A
1961 goto out3;
1962 }
1963
1964 mount_end_update(mp);
6d2010ae 1965 vnode_put(rvp);
f427ee49 1966 zfree(ZV_NAMEI, old_mntonname);
b7266188 1967
6d2010ae
A
1968 vfs_notify_mount(pvp);
1969
b7266188
A
1970 return 0;
1971out3:
3e170ce0 1972 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
b7266188
A
1973
1974 mount_lock(mp);
1975 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1976 mount_unlock(mp);
1977
1978out2:
39037602 1979 /*
b7266188 1980 * Placing the mp on the vnode clears VMOUNT,
39037602 1981 * so cleanup is different after that point
b7266188
A
1982 */
1983 if (placed) {
1984 /* Rele the vp, clear VMOUNT and v_mountedhere */
1985 undo_place_on_covered_vp(mp, vp);
1986 } else {
1987 vnode_lock_spin(vp);
1988 CLR(vp->v_flag, VMOUNT);
1989 vnode_unlock(vp);
1990 }
1991out1:
1992 mount_end_update(mp);
1993
1994out0:
6d2010ae 1995 vnode_put(rvp);
f427ee49 1996 zfree(ZV_NAMEI, old_mntonname);
b7266188
A
1997 return error;
1998}
1999
cb323159
A
2000#if CONFIG_LOCKERBOOT
2001__private_extern__
2002int
2003mount_locker_protoboot(const char *fsname, const char *mntpoint,
2004 const char *pbdevpath)
2005{
2006 int error = -1;
2007 struct nameidata nd;
2008 boolean_t cleanup_nd = FALSE;
2009 vfs_context_t ctx = vfs_context_kernel();
2010 boolean_t is64 = TRUE;
2011 boolean_t by_index = TRUE;
2012 struct user64_mnt_imgsrc_args mia64 = {
2013 .mi_height = 0,
2014 .mi_flags = 0,
2015 .mi_devpath = CAST_USER_ADDR_T(pbdevpath),
2016 };
2017 user_addr_t mia64addr = CAST_USER_ADDR_T(&mia64);
2018
2019 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
2020 UIO_SYSSPACE, CAST_USER_ADDR_T(mntpoint), ctx);
2021 error = namei(&nd);
2022 if (error) {
2023 IMGSRC_DEBUG("namei: %d\n", error);
2024 goto out;
2025 }
2026
2027 cleanup_nd = TRUE;
2028 error = relocate_imageboot_source(nd.ni_dvp, nd.ni_vp,
2029 &nd.ni_cnd, fsname, ctx, is64, mia64addr, by_index);
2030
2031out:
2032 if (cleanup_nd) {
2033 int stashed = error;
2034
2035 error = vnode_put(nd.ni_vp);
2036 if (error) {
2037 panic("vnode_put() returned non-zero: %d", error);
2038 }
2039
2040 if (nd.ni_dvp) {
2041 error = vnode_put(nd.ni_dvp);
2042 if (error) {
2043 panic("vnode_put() returned non-zero: %d", error);
2044 }
2045 }
2046 nameidone(&nd);
2047
2048 error = stashed;
2049 }
2050 return error;
2051}
2052#endif /* CONFIG_LOCKERBOOT */
b7266188
A
2053#endif /* CONFIG_IMGSRC_ACCESS */
2054
91447636 2055void
2d21ac55 2056enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 2057{
9bccf70c
A
2058 struct nameidata qnd;
2059 int type;
2060 char qfpath[MAXPATHLEN];
91447636
A
2061 const char *qfname = QUOTAFILENAME;
2062 const char *qfopsname = QUOTAOPSNAME;
2063 const char *qfextension[] = INITQFNAMES;
9bccf70c 2064
2d21ac55 2065 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
0a7de745 2066 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0) {
b0d623f7
A
2067 return;
2068 }
39037602 2069 /*
9bccf70c
A
2070 * Enable filesystem disk quotas if necessary.
2071 * We ignore errors as this should not interfere with final mount
2072 */
0a7de745 2073 for (type = 0; type < MAXQUOTAS; type++) {
2d21ac55 2074 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae 2075 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
0a7de745
A
2076 CAST_USER_ADDR_T(qfpath), ctx);
2077 if (namei(&qnd) != 0) {
2078 continue; /* option file to trigger quotas is not present */
2079 }
91447636
A
2080 vnode_put(qnd.ni_vp);
2081 nameidone(&qnd);
0a7de745 2082 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 2083
2d21ac55 2084 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
2085 }
2086 return;
2087}
2088
2d21ac55
A
2089
2090static int
39037602 2091checkdirs_callback(proc_t p, void * arg)
2d21ac55 2092{
0a7de745 2093 struct cdirargs * cdrp = (struct cdirargs *)arg;
2d21ac55
A
2094 vnode_t olddp = cdrp->olddp;
2095 vnode_t newdp = cdrp->newdp;
2096 struct filedesc *fdp;
94ff46dc
A
2097 vnode_t new_cvp = newdp;
2098 vnode_t new_rvp = newdp;
2099 vnode_t old_cvp = NULL;
2100 vnode_t old_rvp = NULL;
2d21ac55
A
2101
2102 /*
2103 * XXX Also needs to iterate each thread in the process to see if it
2104 * XXX is using a per-thread current working directory, and, if so,
2105 * XXX update that as well.
2106 */
2107
94ff46dc
A
2108 /*
2109 * First, with the proc_fdlock held, check to see if we will need
2110 * to do any work. If not, we will get out fast.
2111 */
2d21ac55
A
2112 proc_fdlock(p);
2113 fdp = p->p_fd;
94ff46dc
A
2114 if (fdp == NULL ||
2115 (fdp->fd_cdir != olddp && fdp->fd_rdir != olddp)) {
2d21ac55 2116 proc_fdunlock(p);
0a7de745 2117 return PROC_RETURNED;
2d21ac55 2118 }
2d21ac55
A
2119 proc_fdunlock(p);
2120
94ff46dc
A
2121 /*
2122 * Ok, we will have to do some work. Always take two refs
2123 * because we might need that many. We'll dispose of whatever
2124 * we ended up not using.
2125 */
2126 if (vnode_ref(newdp) != 0) {
2127 return PROC_RETURNED;
2d21ac55 2128 }
94ff46dc
A
2129 if (vnode_ref(newdp) != 0) {
2130 vnode_rele(newdp);
2131 return PROC_RETURNED;
2d21ac55 2132 }
94ff46dc 2133
bca245ac 2134 proc_dirs_lock_exclusive(p);
94ff46dc
A
2135 /*
2136 * Now do the work. Note: we dropped the proc_fdlock, so we
2137 * have to do all of the checks again.
2138 */
2139 proc_fdlock(p);
2140 fdp = p->p_fd;
2141 if (fdp != NULL) {
2142 if (fdp->fd_cdir == olddp) {
2143 old_cvp = olddp;
2144 fdp->fd_cdir = newdp;
2145 new_cvp = NULL;
2146 }
2147 if (fdp->fd_rdir == olddp) {
2148 old_rvp = olddp;
2149 fdp->fd_rdir = newdp;
2150 new_rvp = NULL;
2151 }
2152 }
2153 proc_fdunlock(p);
bca245ac 2154 proc_dirs_unlock_exclusive(p);
94ff46dc
A
2155
2156 /*
2157 * Dispose of any references that are no longer needed.
2158 */
2159 if (old_cvp != NULL) {
2160 vnode_rele(old_cvp);
2161 }
2162 if (old_rvp != NULL) {
2163 vnode_rele(old_rvp);
2d21ac55 2164 }
94ff46dc
A
2165 if (new_cvp != NULL) {
2166 vnode_rele(new_cvp);
2167 }
2168 if (new_rvp != NULL) {
2169 vnode_rele(new_rvp);
2170 }
2171
0a7de745 2172 return PROC_RETURNED;
2d21ac55
A
2173}
2174
2175
2176
1c79356b
A
2177/*
2178 * Scan all active processes to see if any of them have a current
2179 * or root directory onto which the new filesystem has just been
2180 * mounted. If so, replace them with the new mount point.
2181 */
6601e61a 2182static int
2d21ac55 2183checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 2184{
2d21ac55
A
2185 vnode_t newdp;
2186 vnode_t tvp;
6601e61a 2187 int err;
2d21ac55 2188 struct cdirargs cdr;
1c79356b 2189
0a7de745
A
2190 if (olddp->v_usecount == 1) {
2191 return 0;
2192 }
2d21ac55 2193 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
2194
2195 if (err != 0) {
6601e61a 2196#if DIAGNOSTIC
2d21ac55 2197 panic("mount: lost mount: error %d", err);
6601e61a 2198#endif
0a7de745 2199 return err;
6601e61a 2200 }
91447636 2201
2d21ac55
A
2202 cdr.olddp = olddp;
2203 cdr.newdp = newdp;
2204 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2205 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 2206
1c79356b 2207 if (rootvnode == olddp) {
91447636 2208 vnode_ref(newdp);
c3c9b80d 2209 lck_rw_lock_exclusive(&rootvnode_rw_lock);
fa4905b1 2210 tvp = rootvnode;
1c79356b 2211 rootvnode = newdp;
c3c9b80d 2212 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
91447636 2213 vnode_rele(tvp);
1c79356b 2214 }
91447636
A
2215
2216 vnode_put(newdp);
0a7de745 2217 return 0;
1c79356b
A
2218}
2219
2220/*
2221 * Unmount a file system.
2222 *
2223 * Note: unmount takes a path to the vnode mounted on as argument,
2224 * not special file (as before).
2225 */
1c79356b
A
2226/* ARGSUSED */
2227int
b0d623f7 2228unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 2229{
2d21ac55 2230 vnode_t vp;
1c79356b
A
2231 struct mount *mp;
2232 int error;
2233 struct nameidata nd;
2d21ac55 2234 vfs_context_t ctx = vfs_context_current();
91447636 2235
39037602 2236 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
0a7de745 2237 UIO_USERSPACE, uap->path, ctx);
55e303ae 2238 error = namei(&nd);
0a7de745
A
2239 if (error) {
2240 return error;
2241 }
1c79356b
A
2242 vp = nd.ni_vp;
2243 mp = vp->v_mount;
91447636 2244 nameidone(&nd);
1c79356b 2245
2d21ac55
A
2246#if CONFIG_MACF
2247 error = mac_mount_check_umount(ctx, mp);
2248 if (error != 0) {
2249 vnode_put(vp);
0a7de745 2250 return error;
2d21ac55
A
2251 }
2252#endif
55e303ae
A
2253 /*
2254 * Must be the root of the filesystem
2255 */
2256 if ((vp->v_flag & VROOT) == 0) {
91447636 2257 vnode_put(vp);
0a7de745 2258 return EINVAL;
55e303ae 2259 }
6601e61a 2260 mount_ref(mp, 0);
91447636 2261 vnode_put(vp);
6601e61a 2262 /* safedounmount consumes the mount ref */
0a7de745 2263 return safedounmount(mp, uap->flags, ctx);
2d21ac55
A
2264}
2265
2266int
39037602 2267vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2d21ac55
A
2268{
2269 mount_t mp;
2270
2271 mp = mount_list_lookupby_fsid(fsid, 0, 1);
2272 if (mp == (mount_t)0) {
0a7de745 2273 return ENOENT;
2d21ac55
A
2274 }
2275 mount_ref(mp, 0);
2276 mount_iterdrop(mp);
2277 /* safedounmount consumes the mount ref */
0a7de745 2278 return safedounmount(mp, flags, ctx);
55e303ae
A
2279}
2280
f427ee49
A
2281#define ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT \
2282 "com.apple.private.vfs.role-account-unmount"
2d21ac55 2283
55e303ae 2284/*
6601e61a 2285 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
2286 * Do the actual file system unmount, prevent some common foot shooting.
2287 */
2288int
2d21ac55 2289safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
2290{
2291 int error;
2d21ac55 2292 proc_t p = vfs_context_proc(ctx);
55e303ae 2293
316670eb
A
2294 /*
2295 * If the file system is not responding and MNT_NOBLOCK
2296 * is set and not a forced unmount then return EBUSY.
2297 */
2298 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
0a7de745 2299 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
316670eb
A
2300 error = EBUSY;
2301 goto out;
2302 }
2303
1c79356b 2304 /*
f427ee49
A
2305 * Skip authorization in two cases:
2306 * - If the process running the unmount has ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT.
2307 * This entitlement allows non-root processes unmount volumes mounted by
2308 * other processes.
2309 * - If the mount is tagged as permissive and this is not a forced-unmount
2310 * attempt.
1c79356b 2311 */
f427ee49
A
2312 if (!IOTaskHasEntitlement(current_task(), ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT) &&
2313 (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0)))) {
6d2010ae
A
2314 /*
2315 * Only root, or the user that did the original mount is
2316 * permitted to unmount this filesystem.
2317 */
2318 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
0a7de745 2319 (error = suser(kauth_cred_get(), &p->p_acflag))) {
6d2010ae 2320 goto out;
0a7de745 2321 }
6d2010ae 2322 }
1c79356b 2323 /*
f427ee49
A
2324 * Don't allow unmounting the root file system, or other volumes
2325 * associated with it (for example, the associated VM or DATA mounts) .
1c79356b 2326 */
cb323159 2327 if ((mp->mnt_flag & MNT_ROOTFS) || (mp->mnt_kern_flag & MNTK_SYSTEM)) {
c3c9b80d
A
2328 if (!(mp->mnt_flag & MNT_ROOTFS)) {
2329 printf("attempt to unmount a system mount (%s), will return EBUSY\n",
2330 mp->mnt_vfsstat.f_mntonname);
2331 }
cb323159 2332 error = EBUSY; /* the root (or associated volumes) is always busy */
6601e61a
A
2333 goto out;
2334 }
1c79356b 2335
f427ee49
A
2336 /*
2337 * If the mount is providing the root filesystem's disk image
2338 * (i.e. imageboot), don't allow unmounting
2339 */
b7266188
A
2340 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
2341 error = EBUSY;
2342 goto out;
2343 }
b7266188 2344
0a7de745 2345 return dounmount(mp, flags, 1, ctx);
2d21ac55 2346
6601e61a
A
2347out:
2348 mount_drop(mp, 0);
0a7de745 2349 return error;
1c79356b
A
2350}
2351
2352/*
2353 * Do the actual file system unmount.
2354 */
2355int
2d21ac55 2356dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 2357{
2d21ac55 2358 vnode_t coveredvp = (vnode_t)0;
1c79356b 2359 int error;
91447636 2360 int needwakeup = 0;
91447636
A
2361 int forcedunmount = 0;
2362 int lflags = 0;
593a1d5f 2363 struct vnode *devvp = NULLVP;
6d2010ae 2364#if CONFIG_TRIGGERS
39236c6e 2365 proc_t p = vfs_context_proc(ctx);
6d2010ae 2366 int did_vflush = 0;
39236c6e 2367 int pflags_save = 0;
6d2010ae 2368#endif /* CONFIG_TRIGGERS */
91447636 2369
813fb2f6
A
2370#if CONFIG_FSE
2371 if (!(flags & MNT_FORCE)) {
2372 fsevent_unmount(mp, ctx); /* has to come first! */
2373 }
2374#endif
2375
91447636 2376 mount_lock(mp);
fe8ab488
A
2377
2378 /*
2379 * If already an unmount in progress just return EBUSY.
2380 * Even a forced unmount cannot override.
2381 */
f427ee49 2382 if (mp->mnt_lflag & (MNT_LUNMOUNT | MNT_LMOUNT)) {
0a7de745 2383 if (withref != 0) {
6601e61a 2384 mount_drop(mp, 1);
0a7de745 2385 }
fe8ab488 2386 mount_unlock(mp);
0a7de745 2387 return EBUSY;
9bccf70c 2388 }
39236c6e 2389
fe8ab488
A
2390 if (flags & MNT_FORCE) {
2391 forcedunmount = 1;
2392 mp->mnt_lflag |= MNT_LFORCE;
2393 }
2394
39236c6e 2395#if CONFIG_TRIGGERS
0a7de745 2396 if (flags & MNT_NOBLOCK && p != kernproc) {
39236c6e 2397 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
0a7de745 2398 }
39236c6e
A
2399#endif
2400
1c79356b 2401 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636 2402 mp->mnt_lflag |= MNT_LUNMOUNT;
0a7de745 2403 mp->mnt_flag &= ~MNT_ASYNC;
2d21ac55
A
2404 /*
2405 * anyone currently in the fast path that
2406 * trips over the cached rootvp will be
2407 * dumped out and forced into the slow path
2408 * to regenerate a new cached value
2409 */
2410 mp->mnt_realrootvp = NULLVP;
91447636 2411 mount_unlock(mp);
39037602 2412
fe8ab488
A
2413 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2414 /*
2415 * Force unmount any mounts in this filesystem.
2416 * If any unmounts fail - just leave them dangling.
2417 * Avoids recursion.
2418 */
2419 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2420 }
2421
2d21ac55
A
2422 /*
2423 * taking the name_cache_lock exclusively will
2424 * insure that everyone is out of the fast path who
2425 * might be trying to use a now stale copy of
2426 * vp->v_mountedhere->mnt_realrootvp
2427 * bumping mount_generation causes the cached values
2428 * to be invalidated
2429 */
2430 name_cache_lock();
2431 mount_generation++;
2432 name_cache_unlock();
2433
2434
91447636 2435 lck_rw_lock_exclusive(&mp->mnt_rwlock);
0a7de745 2436 if (withref != 0) {
6601e61a 2437 mount_drop(mp, 0);
0a7de745 2438 }
91447636
A
2439 error = 0;
2440 if (forcedunmount == 0) {
0a7de745 2441 ubc_umount(mp); /* release cached vnodes */
91447636 2442 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 2443 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
2444 if (error) {
2445 mount_lock(mp);
2446 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2447 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2448 mp->mnt_lflag &= ~MNT_LFORCE;
2449 goto out;
2450 }
2451 }
2452 }
6d2010ae 2453
3e170ce0
A
2454 IOBSDMountChange(mp, kIOMountChangeUnmount);
2455
6d2010ae
A
2456#if CONFIG_TRIGGERS
2457 vfs_nested_trigger_unmounts(mp, flags, ctx);
2458 did_vflush = 1;
39037602 2459#endif
0a7de745 2460 if (forcedunmount) {
91447636 2461 lflags |= FORCECLOSE;
0a7de745 2462 }
91447636
A
2463 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2464 if ((forcedunmount == 0) && error) {
2465 mount_lock(mp);
9bccf70c 2466 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2467 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2468 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
2469 goto out;
2470 }
91447636
A
2471
2472 /* make sure there are no one in the mount iterations or lookup */
2473 mount_iterdrain(mp);
2474
2d21ac55 2475 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 2476 if (error) {
91447636
A
2477 mount_iterreset(mp);
2478 mount_lock(mp);
1c79356b 2479 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2480 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2481 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
2482 goto out;
2483 }
2484
2485 /* increment the operations count */
0a7de745 2486 if (!error) {
b0d623f7 2487 OSAddAtomic(1, &vfs_nummntops);
0a7de745 2488 }
91447636 2489
0a7de745 2490 if (mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
2491 /* hold an io reference and drop the usecount before close */
2492 devvp = mp->mnt_devvp;
593a1d5f
A
2493 vnode_getalways(devvp);
2494 vnode_rele(devvp);
0a7de745
A
2495 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
2496 ctx);
b0d623f7 2497 vnode_clearmountedon(devvp);
593a1d5f 2498 vnode_put(devvp);
91447636
A
2499 }
2500 lck_rw_done(&mp->mnt_rwlock);
2501 mount_list_remove(mp);
2502 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 2503
91447636 2504 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 2505 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
2506 /*
2507 * The covered vnode needs special handling. Trying to get an
2508 * iocount must not block here as this may lead to deadlocks
2509 * if the Filesystem to which the covered vnode belongs is
2510 * undergoing forced unmounts. Since we hold a usecount, the
2511 * vnode cannot be reused (it can, however, still be terminated)
2512 */
2513 vnode_getalways(coveredvp);
6d2010ae
A
2514 vnode_lock_spin(coveredvp);
2515
2516 mp->mnt_crossref++;
2517 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 2518 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
2519
2520 vnode_unlock(coveredvp);
2521 vnode_put(coveredvp);
1c79356b 2522 }
91447636
A
2523
2524 mount_list_lock();
2525 mp->mnt_vtable->vfc_refcount--;
2526 mount_list_unlock();
2527
0a7de745 2528 cache_purgevfs(mp); /* remove cache entries for this file sys */
91447636
A
2529 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2530 mount_lock(mp);
2531 mp->mnt_lflag |= MNT_LDEAD;
2532
2533 if (mp->mnt_lflag & MNT_LWAIT) {
0a7de745 2534 /*
91447636
A
2535 * do the wakeup here
2536 * in case we block in mount_refdrain
2537 * which will drop the mount lock
2538 * and allow anyone blocked in vfs_busy
2539 * to wakeup and see the LDEAD state
2540 */
2541 mp->mnt_lflag &= ~MNT_LWAIT;
2542 wakeup((caddr_t)mp);
1c79356b 2543 }
91447636 2544 mount_refdrain(mp);
cb323159
A
2545
2546 /* free disk_conditioner_info structure for this mount */
2547 disk_conditioner_unmount(mp);
2548
1c79356b 2549out:
91447636
A
2550 if (mp->mnt_lflag & MNT_LWAIT) {
2551 mp->mnt_lflag &= ~MNT_LWAIT;
39037602 2552 needwakeup = 1;
91447636 2553 }
6d2010ae 2554
6d2010ae 2555#if CONFIG_TRIGGERS
39236c6e 2556 if (flags & MNT_NOBLOCK && p != kernproc) {
0a7de745
A
2557 // Restore P_NOREMOTEHANG bit to its previous value
2558 if ((pflags_save & P_NOREMOTEHANG) == 0) {
39236c6e 2559 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
0a7de745 2560 }
39236c6e
A
2561 }
2562
39037602 2563 /*
6d2010ae 2564 * Callback and context are set together under the mount lock, and
39037602 2565 * never cleared, so we're safe to examine them here, drop the lock,
6d2010ae
A
2566 * and call out.
2567 */
2568 if (mp->mnt_triggercallback != NULL) {
2569 mount_unlock(mp);
2570 if (error == 0) {
2571 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2572 } else if (did_vflush) {
2573 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2574 }
2575 } else {
2576 mount_unlock(mp);
2577 }
39037602 2578#else
91447636 2579 mount_unlock(mp);
6d2010ae
A
2580#endif /* CONFIG_TRIGGERS */
2581
91447636
A
2582 lck_rw_done(&mp->mnt_rwlock);
2583
0a7de745 2584 if (needwakeup) {
1c79356b 2585 wakeup((caddr_t)mp);
0a7de745 2586 }
6d2010ae 2587
55e303ae 2588 if (!error) {
91447636 2589 if ((coveredvp != NULLVP)) {
fe8ab488 2590 vnode_t pvp = NULLVP;
b0d623f7 2591
fe8ab488
A
2592 /*
2593 * The covered vnode needs special handling. Trying to
2594 * get an iocount must not block here as this may lead
2595 * to deadlocks if the Filesystem to which the covered
2596 * vnode belongs is undergoing forced unmounts. Since we
2597 * hold a usecount, the vnode cannot be reused
2598 * (it can, however, still be terminated).
2599 */
2600 vnode_getalways(coveredvp);
6d2010ae
A
2601
2602 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2603 /*
2604 * We'll _try_ to detect if this really needs to be
2605 * done. The coveredvp can only be in termination (or
2606 * terminated) if the coveredvp's mount point is in a
2607 * forced unmount (or has been) since we still hold the
2608 * ref.
2609 */
2610 if (!vnode_isrecycled(coveredvp)) {
2611 pvp = vnode_getparent(coveredvp);
6d2010ae 2612#if CONFIG_TRIGGERS
fe8ab488
A
2613 if (coveredvp->v_resolve) {
2614 vnode_trigger_rearm(coveredvp, ctx);
2615 }
2616#endif
2617 }
2618
2619 vnode_rele(coveredvp);
91447636 2620 vnode_put(coveredvp);
fe8ab488 2621 coveredvp = NULLVP;
b0d623f7
A
2622
2623 if (pvp) {
2624 lock_vnode_and_post(pvp, NOTE_WRITE);
2625 vnode_put(pvp);
2626 }
91447636 2627 } else if (mp->mnt_flag & MNT_ROOTFS) {
0a7de745 2628 mount_lock_destroy(mp);
2d21ac55 2629#if CONFIG_MACF
0a7de745 2630 mac_mount_label_destroy(mp);
2d21ac55 2631#endif
f427ee49 2632 zfree(mount_zone, mp);
0a7de745 2633 } else {
91447636 2634 panic("dounmount: no coveredvp");
0a7de745 2635 }
55e303ae 2636 }
0a7de745 2637 return error;
1c79356b
A
2638}
2639
fe8ab488
A
2640/*
2641 * Unmount any mounts in this filesystem.
2642 */
2643void
2644dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2645{
0a7de745 2646 mount_t smp;
fe8ab488
A
2647 fsid_t *fsids, fsid;
2648 int fsids_sz;
2649 int count = 0, i, m = 0;
2650 vnode_t vp;
2651
2652 mount_list_lock();
2653
2654 // Get an array to hold the submounts fsids.
2655 TAILQ_FOREACH(smp, &mountlist, mnt_list)
0a7de745 2656 count++;
fe8ab488 2657 fsids_sz = count * sizeof(fsid_t);
f427ee49 2658 fsids = kheap_alloc(KHEAP_TEMP, fsids_sz, Z_NOWAIT);
fe8ab488
A
2659 if (fsids == NULL) {
2660 mount_list_unlock();
2661 goto out;
2662 }
0a7de745 2663 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
fe8ab488
A
2664
2665 /*
2666 * Fill the array with submount fsids.
2667 * Since mounts are always added to the tail of the mount list, the
39037602 2668 * list is always in mount order.
fe8ab488
A
2669 * For each mount check if the mounted-on vnode belongs to a
2670 * mount that's already added to our array of mounts to be unmounted.
2671 */
2672 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2673 vp = smp->mnt_vnodecovered;
0a7de745 2674 if (vp == NULL) {
fe8ab488 2675 continue;
0a7de745
A
2676 }
2677 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
fe8ab488
A
2678 for (i = 0; i <= m; i++) {
2679 if (fsids[i].val[0] == fsid.val[0] &&
2680 fsids[i].val[1] == fsid.val[1]) {
2681 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2682 break;
2683 }
2684 }
2685 }
2686 mount_list_unlock();
2687
2688 // Unmount the submounts in reverse order. Ignore errors.
2689 for (i = m; i > 0; i--) {
2690 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2691 if (smp) {
2692 mount_ref(smp, 0);
2693 mount_iterdrop(smp);
2694 (void) dounmount(smp, flags, 1, ctx);
2695 }
2696 }
2697out:
f427ee49 2698 kheap_free(KHEAP_TEMP, fsids, fsids_sz);
fe8ab488
A
2699}
2700
91447636
A
2701void
2702mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2703{
6d2010ae
A
2704 vnode_lock(dp);
2705 mp->mnt_crossref--;
2706
0a7de745 2707 if (mp->mnt_crossref < 0) {
6d2010ae 2708 panic("mount cross refs -ve");
0a7de745 2709 }
6d2010ae
A
2710
2711 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
0a7de745 2712 if (need_put) {
6d2010ae 2713 vnode_put_locked(dp);
0a7de745 2714 }
91447636 2715 vnode_unlock(dp);
6d2010ae
A
2716
2717 mount_lock_destroy(mp);
2718#if CONFIG_MACF
2719 mac_mount_label_destroy(mp);
2720#endif
f427ee49 2721 zfree(mount_zone, mp);
6d2010ae
A
2722 return;
2723 }
0a7de745 2724 if (need_put) {
6d2010ae 2725 vnode_put_locked(dp);
0a7de745 2726 }
6d2010ae 2727 vnode_unlock(dp);
91447636
A
2728}
2729
2730
1c79356b
A
2731/*
2732 * Sync each mounted filesystem.
2733 */
2734#if DIAGNOSTIC
2735int syncprt = 0;
1c79356b
A
2736#endif
2737
0a7de745 2738int print_vmpage_stat = 0;
a39ff7e2 2739
cb323159
A
2740/*
2741 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2742 * mounted read-write with the passed waitfor value.
2743 *
2744 * Parameters: mp mount-point descriptor per mounted file-system instance.
2745 * arg user argument (please see below)
2746 *
2747 * User argument is a pointer to 32 bit unsigned integer which describes the
2748 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2749 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2750 * waitfor value.
2751 *
2752 * Returns: VFS_RETURNED
2753 */
39037602 2754static int
cb323159 2755sync_callback(mount_t mp, void *arg)
1c79356b 2756{
91447636 2757 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488 2758 int asyncflag = mp->mnt_flag & MNT_ASYNC;
cb323159
A
2759 unsigned waitfor = MNT_NOWAIT;
2760
2761 if (arg) {
2762 waitfor = *(uint32_t*)arg;
2763 }
2764
2765 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2766 if (waitfor != MNT_WAIT &&
2767 waitfor != (MNT_WAIT | MNT_VOLUME) &&
2768 waitfor != MNT_NOWAIT &&
2769 waitfor != (MNT_NOWAIT | MNT_VOLUME) &&
2770 waitfor != MNT_DWAIT &&
2771 waitfor != (MNT_DWAIT | MNT_VOLUME)) {
2772 panic("Passed inappropriate waitfor %u to "
2773 "sync_callback()", waitfor);
2774 }
fe8ab488
A
2775
2776 mp->mnt_flag &= ~MNT_ASYNC;
cb323159 2777 (void)VFS_SYNC(mp, waitfor, vfs_context_kernel());
0a7de745 2778 if (asyncflag) {
fe8ab488 2779 mp->mnt_flag |= MNT_ASYNC;
0a7de745 2780 }
1c79356b 2781 }
1c79356b 2782
0a7de745 2783 return VFS_RETURNED;
fe8ab488 2784}
91447636 2785
91447636
A
2786/* ARGSUSED */
2787int
b0d623f7 2788sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2789{
fe8ab488 2790 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2791
fe8ab488
A
2792 if (print_vmpage_stat) {
2793 vm_countdirtypages();
2794 }
2795
2796#if DIAGNOSTIC
0a7de745 2797 if (syncprt) {
fe8ab488 2798 vfs_bufstats();
0a7de745 2799 }
fe8ab488
A
2800#endif /* DIAGNOSTIC */
2801 return 0;
2802}
2803
d9a64523
A
2804typedef enum {
2805 SYNC_ALL = 0,
2806 SYNC_ONLY_RELIABLE_MEDIA = 1,
2807 SYNC_ONLY_UNRELIABLE_MEDIA = 2
2808} sync_type_t;
2809
2810static int
2811sync_internal_callback(mount_t mp, void *arg)
2812{
2813 if (arg) {
2814 int is_reliable = !(mp->mnt_kern_flag & MNTK_VIRTUALDEV) &&
0a7de745 2815 (mp->mnt_flag & MNT_LOCAL);
d9a64523
A
2816 sync_type_t sync_type = *((sync_type_t *)arg);
2817
0a7de745
A
2818 if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable) {
2819 return VFS_RETURNED;
cb323159 2820 } else if ((sync_type == SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable) {
0a7de745
A
2821 return VFS_RETURNED;
2822 }
d9a64523
A
2823 }
2824
2825 (void)sync_callback(mp, NULL);
2826
0a7de745 2827 return VFS_RETURNED;
d9a64523
A
2828}
2829
2830int sync_thread_state = 0;
2831int sync_timeout_seconds = 5;
2832
2833#define SYNC_THREAD_RUN 0x0001
2834#define SYNC_THREAD_RUNNING 0x0002
2835
f427ee49
A
2836#if CONFIG_PHYS_WRITE_ACCT
2837thread_t pm_sync_thread;
2838#endif /* CONFIG_PHYS_WRITE_ACCT */
2839
fe8ab488 2840static void
d9a64523 2841sync_thread(__unused void *arg, __unused wait_result_t wr)
fe8ab488 2842{
d9a64523 2843 sync_type_t sync_type;
f427ee49
A
2844#if CONFIG_PHYS_WRITE_ACCT
2845 pm_sync_thread = current_thread();
2846#endif /* CONFIG_PHYS_WRITE_ACCT */
fe8ab488 2847
c3c9b80d 2848 lck_mtx_lock(&sync_mtx_lck);
d9a64523
A
2849 while (sync_thread_state & SYNC_THREAD_RUN) {
2850 sync_thread_state &= ~SYNC_THREAD_RUN;
c3c9b80d 2851 lck_mtx_unlock(&sync_mtx_lck);
d9a64523
A
2852
2853 sync_type = SYNC_ONLY_RELIABLE_MEDIA;
2854 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2855 sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
2856 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2857
c3c9b80d 2858 lck_mtx_lock(&sync_mtx_lck);
d9a64523
A
2859 }
2860 /*
2861 * This wakeup _has_ to be issued before the lock is released otherwise
2862 * we may end up waking up a thread in sync_internal which is
2863 * expecting a wakeup from a thread it just created and not from this
2864 * thread which is about to exit.
2865 */
2866 wakeup(&sync_thread_state);
2867 sync_thread_state &= ~SYNC_THREAD_RUNNING;
f427ee49
A
2868#if CONFIG_PHYS_WRITE_ACCT
2869 pm_sync_thread = NULL;
2870#endif /* CONFIG_PHYS_WRITE_ACCT */
c3c9b80d 2871 lck_mtx_unlock(&sync_mtx_lck);
fe8ab488 2872
fe8ab488 2873 if (print_vmpage_stat) {
1c79356b 2874 vm_countdirtypages();
1c79356b 2875 }
39236c6e 2876
1c79356b 2877#if DIAGNOSTIC
0a7de745 2878 if (syncprt) {
1c79356b 2879 vfs_bufstats();
0a7de745 2880 }
1c79356b 2881#endif /* DIAGNOSTIC */
1c79356b
A
2882}
2883
cb323159 2884struct timeval sync_timeout_last_print = {.tv_sec = 0, .tv_usec = 0};
d9a64523 2885
1c79356b 2886/*
d9a64523
A
2887 * An in-kernel sync for power management to call.
2888 * This function always returns within sync_timeout seconds.
1c79356b 2889 */
d9a64523
A
2890__private_extern__ int
2891sync_internal(void)
2d21ac55 2892{
fe8ab488 2893 thread_t thd;
2d21ac55 2894 int error;
d9a64523 2895 int thread_created = FALSE;
cb323159 2896 struct timespec ts = {.tv_sec = sync_timeout_seconds, .tv_nsec = 0};
fe8ab488 2897
c3c9b80d 2898 lck_mtx_lock(&sync_mtx_lck);
d9a64523
A
2899 sync_thread_state |= SYNC_THREAD_RUN;
2900 if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
2901 int kr;
2902
2903 sync_thread_state |= SYNC_THREAD_RUNNING;
2904 kr = kernel_thread_start(sync_thread, NULL, &thd);
2905 if (kr != KERN_SUCCESS) {
2906 sync_thread_state &= ~SYNC_THREAD_RUNNING;
c3c9b80d 2907 lck_mtx_unlock(&sync_mtx_lck);
d9a64523 2908 printf("sync_thread failed\n");
0a7de745 2909 return 0;
d9a64523
A
2910 }
2911 thread_created = TRUE;
fe8ab488
A
2912 }
2913
c3c9b80d 2914 error = msleep((caddr_t)&sync_thread_state, &sync_mtx_lck,
d9a64523 2915 (PVFS | PDROP | PCATCH), "sync_thread", &ts);
fe8ab488 2916 if (error) {
d9a64523
A
2917 struct timeval now;
2918
2919 microtime(&now);
2920 if (now.tv_sec - sync_timeout_last_print.tv_sec > 120) {
2921 printf("sync timed out: %d sec\n", sync_timeout_seconds);
2922 sync_timeout_last_print.tv_sec = now.tv_sec;
2923 }
fe8ab488 2924 }
fe8ab488 2925
0a7de745 2926 if (thread_created) {
d9a64523 2927 thread_deallocate(thd);
0a7de745 2928 }
2d21ac55 2929
0a7de745 2930 return 0;
fe8ab488
A
2931} /* end of sync_internal call */
2932
2933/*
2934 * Change filesystem quotas.
2935 */
2936#if QUOTA
2937int
2938quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2939{
2d21ac55 2940 struct mount *mp;
d9a64523 2941 int error, quota_cmd, quota_status = 0;
91447636
A
2942 caddr_t datap;
2943 size_t fnamelen;
1c79356b 2944 struct nameidata nd;
2d21ac55 2945 vfs_context_t ctx = vfs_context_current();
d9a64523 2946 struct dqblk my_dqblk = {};
91447636 2947
b0d623f7 2948 AUDIT_ARG(uid, uap->uid);
55e303ae 2949 AUDIT_ARG(cmd, uap->cmd);
6d2010ae 2950 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
0a7de745 2951 uap->path, ctx);
55e303ae 2952 error = namei(&nd);
0a7de745
A
2953 if (error) {
2954 return error;
2955 }
1c79356b 2956 mp = nd.ni_vp->v_mount;
c6bf4f31 2957 mount_ref(mp, 0);
91447636
A
2958 vnode_put(nd.ni_vp);
2959 nameidone(&nd);
2960
2961 /* copyin any data we will need for downstream code */
2962 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2963
2964 switch (quota_cmd) {
2965 case Q_QUOTAON:
2966 /* uap->arg specifies a file from which to take the quotas */
2967 fnamelen = MAXPATHLEN;
f427ee49 2968 datap = zalloc(ZV_NAMEI);
91447636
A
2969 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2970 break;
2971 case Q_GETQUOTA:
2972 /* uap->arg is a pointer to a dqblk structure. */
2973 datap = (caddr_t) &my_dqblk;
2974 break;
2975 case Q_SETQUOTA:
2976 case Q_SETUSE:
2977 /* uap->arg is a pointer to a dqblk structure. */
2978 datap = (caddr_t) &my_dqblk;
2979 if (proc_is64bit(p)) {
0a7de745
A
2980 struct user_dqblk my_dqblk64;
2981 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof(my_dqblk64));
91447636
A
2982 if (error == 0) {
2983 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2984 }
0a7de745
A
2985 } else {
2986 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof(my_dqblk));
91447636
A
2987 }
2988 break;
2989 case Q_QUOTASTAT:
2990 /* uap->arg is a pointer to an integer */
2991 datap = (caddr_t) &quota_status;
2992 break;
2993 default:
2994 datap = NULL;
2995 break;
2996 } /* switch */
2997
2998 if (error == 0) {
2d21ac55 2999 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
3000 }
3001
3002 switch (quota_cmd) {
3003 case Q_QUOTAON:
0a7de745 3004 if (datap != NULL) {
f427ee49 3005 zfree(ZV_NAMEI, datap);
0a7de745 3006 }
91447636
A
3007 break;
3008 case Q_GETQUOTA:
3009 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
3010 if (error == 0) {
3011 if (proc_is64bit(p)) {
0a7de745 3012 struct user_dqblk my_dqblk64;
5ba3f43e
A
3013
3014 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
91447636 3015 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
0a7de745
A
3016 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof(my_dqblk64));
3017 } else {
3018 error = copyout(datap, uap->arg, sizeof(struct dqblk));
91447636
A
3019 }
3020 }
3021 break;
3022 case Q_QUOTASTAT:
3023 /* uap->arg is a pointer to an integer */
3024 if (error == 0) {
3025 error = copyout(datap, uap->arg, sizeof(quota_status));
3026 }
3027 break;
3028 default:
3029 break;
3030 } /* switch */
3031
c6bf4f31 3032 mount_drop(mp, 0);
0a7de745 3033 return error;
1c79356b 3034}
2d21ac55
A
3035#else
3036int
b0d623f7 3037quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55 3038{
0a7de745 3039 return EOPNOTSUPP;
2d21ac55
A
3040}
3041#endif /* QUOTA */
1c79356b
A
3042
3043/*
3044 * Get filesystem statistics.
2d21ac55
A
3045 *
3046 * Returns: 0 Success
3047 * namei:???
3048 * vfs_update_vfsstat:???
3049 * munge_statfs:EFAULT
1c79356b 3050 */
1c79356b
A
3051/* ARGSUSED */
3052int
b0d623f7 3053statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 3054{
91447636
A
3055 struct mount *mp;
3056 struct vfsstatfs *sp;
1c79356b
A
3057 int error;
3058 struct nameidata nd;
2d21ac55 3059 vfs_context_t ctx = vfs_context_current();
91447636 3060 vnode_t vp;
1c79356b 3061
39037602 3062 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
0a7de745 3063 UIO_USERSPACE, uap->path, ctx);
55e303ae 3064 error = namei(&nd);
0a7de745
A
3065 if (error != 0) {
3066 return error;
3067 }
91447636
A
3068 vp = nd.ni_vp;
3069 mp = vp->v_mount;
3070 sp = &mp->mnt_vfsstat;
3071 nameidone(&nd);
3072
39037602
A
3073#if CONFIG_MACF
3074 error = mac_mount_check_stat(ctx, mp);
0a7de745 3075 if (error != 0) {
cb323159 3076 vnode_put(vp);
0a7de745
A
3077 return error;
3078 }
39037602
A
3079#endif
3080
2d21ac55 3081 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39037602 3082 if (error != 0) {
39236c6e 3083 vnode_put(vp);
0a7de745 3084 return error;
39236c6e 3085 }
91447636
A
3086
3087 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 3088 vnode_put(vp);
0a7de745 3089 return error;
1c79356b
A
3090}
3091
3092/*
3093 * Get filesystem statistics.
3094 */
1c79356b
A
3095/* ARGSUSED */
3096int
b0d623f7 3097fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 3098{
2d21ac55 3099 vnode_t vp;
1c79356b 3100 struct mount *mp;
91447636 3101 struct vfsstatfs *sp;
1c79356b
A
3102 int error;
3103
55e303ae
A
3104 AUDIT_ARG(fd, uap->fd);
3105
0a7de745
A
3106 if ((error = file_vnode(uap->fd, &vp))) {
3107 return error;
3108 }
55e303ae 3109
d1ecb069
A
3110 error = vnode_getwithref(vp);
3111 if (error) {
3112 file_drop(uap->fd);
0a7de745 3113 return error;
d1ecb069
A
3114 }
3115
91447636 3116 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 3117
91447636
A
3118 mp = vp->v_mount;
3119 if (!mp) {
d1ecb069
A
3120 error = EBADF;
3121 goto out;
91447636 3122 }
39037602
A
3123
3124#if CONFIG_MACF
3125 error = mac_mount_check_stat(vfs_context_current(), mp);
0a7de745 3126 if (error != 0) {
39037602 3127 goto out;
0a7de745 3128 }
39037602
A
3129#endif
3130
91447636 3131 sp = &mp->mnt_vfsstat;
39037602 3132 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 3133 goto out;
91447636 3134 }
91447636
A
3135
3136 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
3137
d1ecb069
A
3138out:
3139 file_drop(uap->fd);
3140 vnode_put(vp);
3141
0a7de745 3142 return error;
1c79356b
A
3143}
3144
cb323159
A
3145void
3146vfs_get_statfs64(struct mount *mp, struct statfs64 *sfs)
3147{
3148 struct vfsstatfs *vsfs = &mp->mnt_vfsstat;
3149
3150 bzero(sfs, sizeof(*sfs));
3151
3152 sfs->f_bsize = vsfs->f_bsize;
3153 sfs->f_iosize = (int32_t)vsfs->f_iosize;
3154 sfs->f_blocks = vsfs->f_blocks;
3155 sfs->f_bfree = vsfs->f_bfree;
3156 sfs->f_bavail = vsfs->f_bavail;
3157 sfs->f_files = vsfs->f_files;
3158 sfs->f_ffree = vsfs->f_ffree;
3159 sfs->f_fsid = vsfs->f_fsid;
3160 sfs->f_owner = vsfs->f_owner;
3161 sfs->f_type = mp->mnt_vtable->vfc_typenum;
3162 sfs->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3163 sfs->f_fssubtype = vsfs->f_fssubtype;
f427ee49 3164 sfs->f_flags_ext = (mp->mnt_kern_flag & MNTK_SYSTEMDATA) ? MNT_EXT_ROOT_DATA_VOL : 0;
6d2010ae 3165 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
cb323159 3166 strlcpy(&sfs->f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
6d2010ae 3167 } else {
cb323159 3168 strlcpy(&sfs->f_fstypename[0], &vsfs->f_fstypename[0], MFSTYPENAMELEN);
6d2010ae 3169 }
cb323159
A
3170 strlcpy(&sfs->f_mntonname[0], &vsfs->f_mntonname[0], MAXPATHLEN);
3171 strlcpy(&sfs->f_mntfromname[0], &vsfs->f_mntfromname[0], MAXPATHLEN);
2d21ac55
A
3172}
3173
39037602
A
3174/*
3175 * Get file system statistics in 64-bit mode
2d21ac55
A
3176 */
3177int
b0d623f7 3178statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
3179{
3180 struct mount *mp;
2d21ac55 3181 int error;
f427ee49
A
3182 struct nameidata *ndp;
3183 struct statfs64 *sfsp;
2d21ac55
A
3184 vfs_context_t ctxp = vfs_context_current();
3185 vnode_t vp;
f427ee49
A
3186 union {
3187 struct nameidata nd;
3188 struct statfs64 sfs;
3189 } *__nameidata_statfs64;
2d21ac55 3190
f427ee49
A
3191 __nameidata_statfs64 = kheap_alloc(KHEAP_TEMP, sizeof(*__nameidata_statfs64),
3192 Z_WAITOK);
3193 ndp = &__nameidata_statfs64->nd;
3194
3195 NDINIT(ndp, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
0a7de745 3196 UIO_USERSPACE, uap->path, ctxp);
f427ee49 3197 error = namei(ndp);
0a7de745 3198 if (error != 0) {
f427ee49 3199 goto out;
0a7de745 3200 }
f427ee49 3201 vp = ndp->ni_vp;
2d21ac55 3202 mp = vp->v_mount;
f427ee49 3203 nameidone(ndp);
2d21ac55 3204
39037602
A
3205#if CONFIG_MACF
3206 error = mac_mount_check_stat(ctxp, mp);
0a7de745 3207 if (error != 0) {
cb323159 3208 vnode_put(vp);
f427ee49 3209 goto out;
0a7de745 3210 }
39037602
A
3211#endif
3212
2d21ac55 3213 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39037602 3214 if (error != 0) {
39236c6e 3215 vnode_put(vp);
f427ee49 3216 goto out;
39236c6e 3217 }
2d21ac55 3218
f427ee49
A
3219 sfsp = &__nameidata_statfs64->sfs;
3220 vfs_get_statfs64(mp, sfsp);
3221 if ((mp->mnt_kern_flag & MNTK_SYSTEMDATA) &&
cb323159
A
3222 (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
3223 /* This process does not want to see a seperate data volume mountpoint */
f427ee49 3224 strlcpy(&sfsp->f_mntonname[0], "/", sizeof("/"));
cb323159 3225 }
f427ee49 3226 error = copyout(sfsp, uap->buf, sizeof(*sfsp));
39236c6e 3227 vnode_put(vp);
2d21ac55 3228
f427ee49
A
3229out:
3230 kheap_free(KHEAP_TEMP, __nameidata_statfs64, sizeof(*__nameidata_statfs64));
3231
0a7de745 3232 return error;
2d21ac55
A
3233}
3234
39037602
A
3235/*
3236 * Get file system statistics in 64-bit mode
2d21ac55
A
3237 */
3238int
b0d623f7 3239fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
3240{
3241 struct vnode *vp;
3242 struct mount *mp;
cb323159 3243 struct statfs64 sfs;
2d21ac55
A
3244 int error;
3245
3246 AUDIT_ARG(fd, uap->fd);
3247
0a7de745
A
3248 if ((error = file_vnode(uap->fd, &vp))) {
3249 return error;
3250 }
2d21ac55 3251
d1ecb069
A
3252 error = vnode_getwithref(vp);
3253 if (error) {
3254 file_drop(uap->fd);
0a7de745 3255 return error;
d1ecb069
A
3256 }
3257
2d21ac55
A
3258 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3259
3260 mp = vp->v_mount;
3261 if (!mp) {
316670eb 3262 error = EBADF;
d1ecb069 3263 goto out;
2d21ac55 3264 }
39037602
A
3265
3266#if CONFIG_MACF
3267 error = mac_mount_check_stat(vfs_context_current(), mp);
0a7de745 3268 if (error != 0) {
39037602 3269 goto out;
0a7de745 3270 }
39037602
A
3271#endif
3272
2d21ac55 3273 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 3274 goto out;
2d21ac55 3275 }
2d21ac55 3276
cb323159 3277 vfs_get_statfs64(mp, &sfs);
f427ee49 3278 if ((mp->mnt_kern_flag & MNTK_SYSTEMDATA) &&
cb323159
A
3279 (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
3280 /* This process does not want to see a seperate data volume mountpoint */
3281 strlcpy(&sfs.f_mntonname[0], "/", sizeof("/"));
3282 }
3283 error = copyout(&sfs, uap->buf, sizeof(sfs));
2d21ac55 3284
d1ecb069
A
3285out:
3286 file_drop(uap->fd);
3287 vnode_put(vp);
3288
0a7de745 3289 return error;
2d21ac55 3290}
91447636
A
3291
3292struct getfsstat_struct {
0a7de745
A
3293 user_addr_t sfsp;
3294 user_addr_t *mp;
3295 int count;
3296 int maxcount;
3297 int flags;
3298 int error;
1c79356b 3299};
1c79356b 3300
91447636
A
3301
3302static int
3303getfsstat_callback(mount_t mp, void * arg)
3304{
91447636
A
3305 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3306 struct vfsstatfs *sp;
91447636 3307 int error, my_size;
2d21ac55 3308 vfs_context_t ctx = vfs_context_current();
91447636
A
3309
3310 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
3311#if CONFIG_MACF
3312 error = mac_mount_check_stat(ctx, mp);
3313 if (error != 0) {
3314 fstp->error = error;
0a7de745 3315 return VFS_RETURNED_DONE;
39037602
A
3316 }
3317#endif
91447636
A
3318 sp = &mp->mnt_vfsstat;
3319 /*
3320 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 3321 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 3322 */
cb323159
A
3323 if ((mp->mnt_lflag & MNT_LDEAD) ||
3324 (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3325 (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
3326 (error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT)))) {
91447636 3327 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
0a7de745 3328 return VFS_RETURNED;
1c79356b 3329 }
91447636
A
3330
3331 /*
3332 * Need to handle LP64 version of struct statfs
3333 */
2d21ac55 3334 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
3335 if (error) {
3336 fstp->error = error;
0a7de745 3337 return VFS_RETURNED_DONE;
1c79356b 3338 }
91447636 3339 fstp->sfsp += my_size;
2d21ac55
A
3340
3341 if (fstp->mp) {
39236c6e 3342#if CONFIG_MACF
2d21ac55
A
3343 error = mac_mount_label_get(mp, *fstp->mp);
3344 if (error) {
3345 fstp->error = error;
0a7de745 3346 return VFS_RETURNED_DONE;
2d21ac55 3347 }
39236c6e 3348#endif
2d21ac55
A
3349 fstp->mp++;
3350 }
3351 }
91447636 3352 fstp->count++;
0a7de745 3353 return VFS_RETURNED;
91447636
A
3354}
3355
3356/*
3357 * Get statistics on all filesystems.
3358 */
3359int
3360getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
3361{
3362 struct __mac_getfsstat_args muap;
3363
3364 muap.buf = uap->buf;
3365 muap.bufsize = uap->bufsize;
3366 muap.mac = USER_ADDR_NULL;
3367 muap.macsize = 0;
3368 muap.flags = uap->flags;
3369
0a7de745 3370 return __mac_getfsstat(p, &muap, retval);
2d21ac55
A
3371}
3372
b0d623f7
A
3373/*
3374 * __mac_getfsstat: Get MAC-related file system statistics
3375 *
3376 * Parameters: p (ignored)
3377 * uap User argument descriptor (see below)
39037602 3378 * retval Count of file system statistics (N stats)
b0d623f7
A
3379 *
3380 * Indirect: uap->bufsize Buffer size
3381 * uap->macsize MAC info size
3382 * uap->buf Buffer where information will be returned
3383 * uap->mac MAC info
3384 * uap->flags File system flags
39037602 3385 *
b0d623f7
A
3386 *
3387 * Returns: 0 Success
3388 * !0 Not success
3389 *
3390 */
2d21ac55
A
3391int
3392__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
3393{
3394 user_addr_t sfsp;
2d21ac55 3395 user_addr_t *mp;
b0d623f7 3396 size_t count, maxcount, bufsize, macsize;
91447636
A
3397 struct getfsstat_struct fst;
3398
cb323159
A
3399 if ((unsigned)uap->bufsize > INT_MAX || (unsigned)uap->macsize > INT_MAX) {
3400 return EINVAL;
3401 }
3402
b0d623f7
A
3403 bufsize = (size_t) uap->bufsize;
3404 macsize = (size_t) uap->macsize;
3405
91447636 3406 if (IS_64BIT_PROCESS(p)) {
b0d623f7 3407 maxcount = bufsize / sizeof(struct user64_statfs);
0a7de745 3408 } else {
b0d623f7 3409 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
3410 }
3411 sfsp = uap->buf;
3412 count = 0;
3413
2d21ac55
A
3414 mp = NULL;
3415
3416#if CONFIG_MACF
3417 if (uap->mac != USER_ADDR_NULL) {
3418 u_int32_t *mp0;
3419 int error;
b0d623f7 3420 unsigned int i;
2d21ac55 3421
b0d623f7 3422 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
0a7de745
A
3423 if (count != maxcount) {
3424 return EINVAL;
3425 }
2d21ac55
A
3426
3427 /* Copy in the array */
f427ee49 3428 mp0 = kheap_alloc(KHEAP_TEMP, macsize, Z_WAITOK);
b0d623f7 3429 if (mp0 == NULL) {
0a7de745 3430 return ENOMEM;
b0d623f7
A
3431 }
3432
3433 error = copyin(uap->mac, mp0, macsize);
3434 if (error) {
f427ee49 3435 kheap_free(KHEAP_TEMP, mp0, macsize);
0a7de745 3436 return error;
b0d623f7 3437 }
2d21ac55
A
3438
3439 /* Normalize to an array of user_addr_t */
f427ee49 3440 mp = kheap_alloc(KHEAP_TEMP, count * sizeof(user_addr_t), Z_WAITOK);
b0d623f7 3441 if (mp == NULL) {
f427ee49 3442 kheap_free(KHEAP_TEMP, mp0, macsize);
0a7de745 3443 return ENOMEM;
b0d623f7
A
3444 }
3445
2d21ac55 3446 for (i = 0; i < count; i++) {
0a7de745 3447 if (IS_64BIT_PROCESS(p)) {
2d21ac55 3448 mp[i] = ((user_addr_t *)mp0)[i];
0a7de745 3449 } else {
2d21ac55 3450 mp[i] = (user_addr_t)mp0[i];
0a7de745 3451 }
2d21ac55 3452 }
f427ee49 3453 kheap_free(KHEAP_TEMP, mp0, macsize);
2d21ac55
A
3454 }
3455#endif
3456
3457
91447636 3458 fst.sfsp = sfsp;
2d21ac55 3459 fst.mp = mp;
91447636
A
3460 fst.flags = uap->flags;
3461 fst.count = 0;
3462 fst.error = 0;
f427ee49 3463 fst.maxcount = (int)maxcount;
91447636 3464
39037602 3465
cb323159 3466 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat_callback, &fst);
91447636 3467
0a7de745 3468 if (mp) {
f427ee49 3469 kheap_free(KHEAP_TEMP, mp, count * sizeof(user_addr_t));
0a7de745 3470 }
2d21ac55 3471
0a7de745 3472 if (fst.error) {
91447636 3473 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
0a7de745 3474 return fst.error;
91447636
A
3475 }
3476
0a7de745 3477 if (fst.sfsp && fst.count > fst.maxcount) {
91447636 3478 *retval = fst.maxcount;
0a7de745 3479 } else {
91447636 3480 *retval = fst.count;
0a7de745
A
3481 }
3482 return 0;
1c79356b
A
3483}
3484
2d21ac55
A
3485static int
3486getfsstat64_callback(mount_t mp, void * arg)
3487{
3488 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3489 struct vfsstatfs *sp;
cb323159 3490 struct statfs64 sfs;
2d21ac55
A
3491 int error;
3492
3493 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
3494#if CONFIG_MACF
3495 error = mac_mount_check_stat(vfs_context_current(), mp);
3496 if (error != 0) {
3497 fstp->error = error;
0a7de745 3498 return VFS_RETURNED_DONE;
39037602
A
3499 }
3500#endif
2d21ac55
A
3501 sp = &mp->mnt_vfsstat;
3502 /*
b0d623f7
A
3503 * If MNT_NOWAIT is specified, do not refresh the fsstat
3504 * cache. MNT_WAIT overrides MNT_NOWAIT.
3505 *
3506 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3507 * getfsstat, since the constants are out of the same
3508 * namespace.
2d21ac55 3509 */
cb323159
A
3510 if ((mp->mnt_lflag & MNT_LDEAD) ||
3511 ((((fstp->flags & MNT_NOWAIT) == 0) || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3512 (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
3513 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)))) {
2d21ac55 3514 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
0a7de745 3515 return VFS_RETURNED;
2d21ac55
A
3516 }
3517
cb323159
A
3518 vfs_get_statfs64(mp, &sfs);
3519 error = copyout(&sfs, fstp->sfsp, sizeof(sfs));
2d21ac55
A
3520 if (error) {
3521 fstp->error = error;
0a7de745 3522 return VFS_RETURNED_DONE;
2d21ac55 3523 }
cb323159 3524 fstp->sfsp += sizeof(sfs);
2d21ac55
A
3525 }
3526 fstp->count++;
0a7de745 3527 return VFS_RETURNED;
2d21ac55
A
3528}
3529
3530/*
3531 * Get statistics on all file systems in 64 bit mode.
3532 */
3533int
3534getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
3535{
3536 user_addr_t sfsp;
3537 int count, maxcount;
3538 struct getfsstat_struct fst;
3539
3540 maxcount = uap->bufsize / sizeof(struct statfs64);
3541
3542 sfsp = uap->buf;
3543 count = 0;
3544
3545 fst.sfsp = sfsp;
3546 fst.flags = uap->flags;
3547 fst.count = 0;
3548 fst.error = 0;
3549 fst.maxcount = maxcount;
3550
cb323159 3551 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat64_callback, &fst);
2d21ac55 3552
0a7de745 3553 if (fst.error) {
2d21ac55 3554 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
0a7de745 3555 return fst.error;
2d21ac55
A
3556 }
3557
0a7de745 3558 if (fst.sfsp && fst.count > fst.maxcount) {
2d21ac55 3559 *retval = fst.maxcount;
0a7de745 3560 } else {
2d21ac55 3561 *retval = fst.count;
0a7de745 3562 }
2d21ac55 3563
0a7de745 3564 return 0;
2d21ac55
A
3565}
3566
fe8ab488
A
3567/*
3568 * gets the associated vnode with the file descriptor passed.
3569 * as input
3570 *
3571 * INPUT
3572 * ctx - vfs context of caller
3573 * fd - file descriptor for which vnode is required.
3574 * vpp - Pointer to pointer to vnode to be returned.
3575 *
3576 * The vnode is returned with an iocount so any vnode obtained
3577 * by this call needs a vnode_put
3578 *
3579 */
39037602 3580int
fe8ab488
A
3581vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3582{
3583 int error;
3584 vnode_t vp;
3585 struct fileproc *fp;
3586 proc_t p = vfs_context_proc(ctx);
3587
3588 *vpp = NULLVP;
3589
3590 error = fp_getfvp(p, fd, &fp, &vp);
0a7de745
A
3591 if (error) {
3592 return error;
3593 }
fe8ab488
A
3594
3595 error = vnode_getwithref(vp);
3596 if (error) {
3597 (void)fp_drop(p, fd, fp, 0);
0a7de745 3598 return error;
fe8ab488
A
3599 }
3600
3601 (void)fp_drop(p, fd, fp, 0);
3602 *vpp = vp;
0a7de745 3603 return error;
fe8ab488
A
3604}
3605
3606/*
3607 * Wrapper function around namei to start lookup from a directory
3608 * specified by a file descriptor ni_dirfd.
3609 *
3610 * In addition to all the errors returned by namei, this call can
3611 * return ENOTDIR if the file descriptor does not refer to a directory.
3612 * and EBADF if the file descriptor is not valid.
3613 */
3614int
3615nameiat(struct nameidata *ndp, int dirfd)
3616{
3617 if ((dirfd != AT_FDCWD) &&
3618 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3619 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3620 int error = 0;
3621 char c;
3622
3623 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3624 error = copyin(ndp->ni_dirp, &c, sizeof(char));
0a7de745
A
3625 if (error) {
3626 return error;
3627 }
fe8ab488
A
3628 } else {
3629 c = *((char *)(ndp->ni_dirp));
3630 }
3631
3632 if (c != '/') {
3633 vnode_t dvp_at;
3634
3635 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3636 &dvp_at);
0a7de745
A
3637 if (error) {
3638 return error;
3639 }
fe8ab488
A
3640
3641 if (vnode_vtype(dvp_at) != VDIR) {
3642 vnode_put(dvp_at);
0a7de745 3643 return ENOTDIR;
fe8ab488
A
3644 }
3645
3646 ndp->ni_dvp = dvp_at;
3647 ndp->ni_cnd.cn_flags |= USEDVP;
3648 error = namei(ndp);
3649 ndp->ni_cnd.cn_flags &= ~USEDVP;
3650 vnode_put(dvp_at);
0a7de745 3651 return error;
fe8ab488
A
3652 }
3653 }
3654
0a7de745 3655 return namei(ndp);
fe8ab488
A
3656}
3657
1c79356b
A
3658/*
3659 * Change current working directory to a given file descriptor.
3660 */
1c79356b 3661/* ARGSUSED */
2d21ac55
A
3662static int
3663common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 3664{
2d21ac55
A
3665 struct filedesc *fdp = p->p_fd;
3666 vnode_t vp;
3667 vnode_t tdp;
3668 vnode_t tvp;
1c79356b 3669 struct mount *mp;
f427ee49 3670 int error, should_put = 1;
2d21ac55 3671 vfs_context_t ctx = vfs_context_current();
1c79356b 3672
b0d623f7 3673 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
3674 if (per_thread && uap->fd == -1) {
3675 /*
3676 * Switching back from per-thread to per process CWD; verify we
3677 * in fact have one before proceeding. The only success case
3678 * for this code path is to return 0 preemptively after zapping
3679 * the thread structure contents.
3680 */
3681 thread_t th = vfs_context_thread(ctx);
3682 if (th) {
3683 uthread_t uth = get_bsdthread_info(th);
3684 tvp = uth->uu_cdir;
3685 uth->uu_cdir = NULLVP;
3686 if (tvp != NULLVP) {
3687 vnode_rele(tvp);
0a7de745 3688 return 0;
2d21ac55
A
3689 }
3690 }
0a7de745 3691 return EBADF;
2d21ac55 3692 }
91447636 3693
0a7de745
A
3694 if ((error = file_vnode(uap->fd, &vp))) {
3695 return error;
3696 }
3697 if ((error = vnode_getwithref(vp))) {
3698 file_drop(uap->fd);
3699 return error;
91447636 3700 }
55e303ae
A
3701
3702 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3703
2d21ac55 3704 if (vp->v_type != VDIR) {
1c79356b 3705 error = ENOTDIR;
2d21ac55
A
3706 goto out;
3707 }
3708
3709#if CONFIG_MACF
3710 error = mac_vnode_check_chdir(ctx, vp);
0a7de745 3711 if (error) {
2d21ac55 3712 goto out;
0a7de745 3713 }
2d21ac55
A
3714#endif
3715 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
0a7de745 3716 if (error) {
2d21ac55 3717 goto out;
0a7de745 3718 }
2d21ac55 3719
1c79356b 3720 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
3721 if (vfs_busy(mp, LK_NOWAIT)) {
3722 error = EACCES;
3723 goto out;
55e303ae 3724 }
2d21ac55 3725 error = VFS_ROOT(mp, &tdp, ctx);
91447636 3726 vfs_unbusy(mp);
0a7de745 3727 if (error) {
1c79356b 3728 break;
0a7de745 3729 }
91447636 3730 vnode_put(vp);
1c79356b
A
3731 vp = tdp;
3732 }
0a7de745
A
3733 if (error) {
3734 goto out;
3735 }
3736 if ((error = vnode_ref(vp))) {
91447636 3737 goto out;
0a7de745 3738 }
91447636 3739 vnode_put(vp);
f427ee49 3740 should_put = 0;
91447636 3741
2d21ac55
A
3742 if (per_thread) {
3743 thread_t th = vfs_context_thread(ctx);
3744 if (th) {
3745 uthread_t uth = get_bsdthread_info(th);
3746 tvp = uth->uu_cdir;
3747 uth->uu_cdir = vp;
b0d623f7 3748 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3749 } else {
3750 vnode_rele(vp);
f427ee49
A
3751 error = ENOENT;
3752 goto out;
2d21ac55
A
3753 }
3754 } else {
bca245ac 3755 proc_dirs_lock_exclusive(p);
2d21ac55
A
3756 proc_fdlock(p);
3757 tvp = fdp->fd_cdir;
3758 fdp->fd_cdir = vp;
3759 proc_fdunlock(p);
bca245ac 3760 proc_dirs_unlock_exclusive(p);
2d21ac55 3761 }
91447636 3762
0a7de745
A
3763 if (tvp) {
3764 vnode_rele(tvp);
3765 }
91447636 3766
91447636 3767out:
f427ee49
A
3768 if (should_put) {
3769 vnode_put(vp);
3770 }
91447636
A
3771 file_drop(uap->fd);
3772
0a7de745 3773 return error;
1c79356b
A
3774}
3775
2d21ac55 3776int
b0d623f7 3777fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3778{
3779 return common_fchdir(p, uap, 0);
3780}
3781
3782int
b0d623f7 3783__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3784{
3785 return common_fchdir(p, (void *)uap, 1);
3786}
3787
cb323159 3788
1c79356b 3789/*
b0d623f7 3790 * Change current working directory (".").
2d21ac55
A
3791 *
3792 * Returns: 0 Success
3793 * change_dir:ENOTDIR
3794 * change_dir:???
3795 * vnode_ref:ENOENT No such file or directory
1c79356b 3796 */
1c79356b 3797/* ARGSUSED */
cb323159
A
3798int
3799chdir_internal(proc_t p, vfs_context_t ctx, struct nameidata *ndp, int per_thread)
1c79356b 3800{
2d21ac55 3801 struct filedesc *fdp = p->p_fd;
1c79356b 3802 int error;
2d21ac55 3803 vnode_t tvp;
91447636 3804
cb323159 3805 error = change_dir(ndp, ctx);
0a7de745
A
3806 if (error) {
3807 return error;
3808 }
cb323159
A
3809 if ((error = vnode_ref(ndp->ni_vp))) {
3810 vnode_put(ndp->ni_vp);
0a7de745 3811 return error;
91447636
A
3812 }
3813 /*
3814 * drop the iocount we picked up in change_dir
3815 */
cb323159 3816 vnode_put(ndp->ni_vp);
91447636 3817
2d21ac55
A
3818 if (per_thread) {
3819 thread_t th = vfs_context_thread(ctx);
3820 if (th) {
3821 uthread_t uth = get_bsdthread_info(th);
3822 tvp = uth->uu_cdir;
cb323159 3823 uth->uu_cdir = ndp->ni_vp;
b0d623f7 3824 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55 3825 } else {
cb323159 3826 vnode_rele(ndp->ni_vp);
0a7de745 3827 return ENOENT;
2d21ac55
A
3828 }
3829 } else {
bca245ac 3830 proc_dirs_lock_exclusive(p);
2d21ac55
A
3831 proc_fdlock(p);
3832 tvp = fdp->fd_cdir;
cb323159 3833 fdp->fd_cdir = ndp->ni_vp;
2d21ac55 3834 proc_fdunlock(p);
bca245ac 3835 proc_dirs_unlock_exclusive(p);
2d21ac55 3836 }
91447636 3837
0a7de745
A
3838 if (tvp) {
3839 vnode_rele(tvp);
3840 }
91447636 3841
0a7de745 3842 return 0;
1c79356b
A
3843}
3844
b0d623f7 3845
cb323159
A
3846/*
3847 * Change current working directory (".").
3848 *
3849 * Returns: 0 Success
3850 * chdir_internal:ENOTDIR
3851 * chdir_internal:ENOENT No such file or directory
3852 * chdir_internal:???
3853 */
3854/* ARGSUSED */
3855static int
3856common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3857{
3858 struct nameidata nd;
3859 vfs_context_t ctx = vfs_context_current();
3860
3861 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3862 UIO_USERSPACE, uap->path, ctx);
3863
3864 return chdir_internal(p, ctx, &nd, per_thread);
3865}
3866
3867
b0d623f7
A
3868/*
3869 * chdir
3870 *
3871 * Change current working directory (".") for the entire process
3872 *
3873 * Parameters: p Process requesting the call
0a7de745
A
3874 * uap User argument descriptor (see below)
3875 * retval (ignored)
b0d623f7
A
3876 *
3877 * Indirect parameters: uap->path Directory path
3878 *
3879 * Returns: 0 Success
0a7de745
A
3880 * common_chdir: ENOTDIR
3881 * common_chdir: ENOENT No such file or directory
3882 * common_chdir: ???
b0d623f7
A
3883 *
3884 */
2d21ac55 3885int
b0d623f7 3886chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3887{
3888 return common_chdir(p, (void *)uap, 0);
3889}
3890
b0d623f7
A
3891/*
3892 * __pthread_chdir
3893 *
3894 * Change current working directory (".") for a single thread
3895 *
3896 * Parameters: p Process requesting the call
0a7de745
A
3897 * uap User argument descriptor (see below)
3898 * retval (ignored)
b0d623f7
A
3899 *
3900 * Indirect parameters: uap->path Directory path
3901 *
3902 * Returns: 0 Success
0a7de745 3903 * common_chdir: ENOTDIR
b0d623f7
A
3904 * common_chdir: ENOENT No such file or directory
3905 * common_chdir: ???
3906 *
3907 */
2d21ac55 3908int
b0d623f7 3909__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3910{
3911 return common_chdir(p, (void *)uap, 1);
3912}
3913
3914
1c79356b
A
3915/*
3916 * Change notion of root (``/'') directory.
3917 */
1c79356b
A
3918/* ARGSUSED */
3919int
b0d623f7 3920chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3921{
2d21ac55 3922 struct filedesc *fdp = p->p_fd;
1c79356b
A
3923 int error;
3924 struct nameidata nd;
2d21ac55
A
3925 vnode_t tvp;
3926 vfs_context_t ctx = vfs_context_current();
1c79356b 3927
0a7de745
A
3928 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
3929 return error;
3930 }
1c79356b 3931
39037602 3932 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
0a7de745 3933 UIO_USERSPACE, uap->path, ctx);
2d21ac55 3934 error = change_dir(&nd, ctx);
0a7de745
A
3935 if (error) {
3936 return error;
3937 }
1c79356b 3938
2d21ac55
A
3939#if CONFIG_MACF
3940 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3941 &nd.ni_cnd);
3942 if (error) {
91447636 3943 vnode_put(nd.ni_vp);
0a7de745 3944 return error;
91447636 3945 }
2d21ac55
A
3946#endif
3947
0a7de745
A
3948 if ((error = vnode_ref(nd.ni_vp))) {
3949 vnode_put(nd.ni_vp);
3950 return error;
1c79356b 3951 }
91447636 3952 vnode_put(nd.ni_vp);
1c79356b 3953
bca245ac
A
3954 /*
3955 * This lock provides the guarantee that as long as you hold the lock
3956 * fdp->fd_rdir has a usecount on it. This is used to take an iocount
3957 * on a referenced vnode in namei when determining the rootvnode for
3958 * a process.
3959 */
3960 /* needed for synchronization with lookup */
3961 proc_dirs_lock_exclusive(p);
3962 /* needed for setting the flag and other activities on the fd itself */
91447636 3963 proc_fdlock(p);
fa4905b1 3964 tvp = fdp->fd_rdir;
1c79356b 3965 fdp->fd_rdir = nd.ni_vp;
91447636
A
3966 fdp->fd_flags |= FD_CHROOT;
3967 proc_fdunlock(p);
bca245ac 3968 proc_dirs_unlock_exclusive(p);
91447636 3969
0a7de745 3970 if (tvp != NULL) {
91447636 3971 vnode_rele(tvp);
0a7de745 3972 }
91447636 3973
0a7de745 3974 return 0;
1c79356b
A
3975}
3976
f427ee49
A
3977#define PATHSTATICBUFLEN 256
3978#define PIVOT_ROOT_ENTITLEMENT \
3979 "com.apple.private.vfs.pivot-root"
3980
3981#if defined(XNU_TARGET_OS_OSX)
3982int
3983pivot_root(proc_t p, struct pivot_root_args *uap, __unused int *retval)
3984{
3985 int error;
3986 char new_rootfs_path_before[PATHSTATICBUFLEN] = {0};
3987 char old_rootfs_path_after[PATHSTATICBUFLEN] = {0};
3988 char *new_rootfs_path_before_buf = NULL;
3989 char *old_rootfs_path_after_buf = NULL;
3990 char *incoming = NULL;
3991 char *outgoing = NULL;
3992 vnode_t incoming_rootvp = NULLVP;
3993 size_t bytes_copied;
3994
3995 /*
3996 * XXX : Additional restrictions needed
3997 * - perhaps callable only once.
3998 */
3999 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
4000 return error;
4001 }
4002
4003 /*
4004 * pivot_root can be executed by launchd only.
4005 * Enforce entitlement.
4006 */
4007 if ((p->p_pid != 1) || !IOTaskHasEntitlement(current_task(), PIVOT_ROOT_ENTITLEMENT)) {
4008 return EPERM;
4009 }
4010
4011 error = copyinstr(uap->new_rootfs_path_before, &new_rootfs_path_before[0], PATHSTATICBUFLEN, &bytes_copied);
4012 if (error == ENAMETOOLONG) {
4013 new_rootfs_path_before_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
4014 error = copyinstr(uap->new_rootfs_path_before, new_rootfs_path_before_buf, MAXPATHLEN, &bytes_copied);
4015 }
4016
4017 if (error) {
4018 goto out;
4019 }
4020
4021 error = copyinstr(uap->old_rootfs_path_after, &old_rootfs_path_after[0], PATHSTATICBUFLEN, &bytes_copied);
4022 if (error == ENAMETOOLONG) {
4023 old_rootfs_path_after_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
4024 error = copyinstr(uap->old_rootfs_path_after, old_rootfs_path_after_buf, MAXPATHLEN, &bytes_copied);
4025 }
4026 if (error) {
4027 goto out;
4028 }
4029
4030 if (new_rootfs_path_before_buf) {
4031 incoming = new_rootfs_path_before_buf;
4032 } else {
4033 incoming = &new_rootfs_path_before[0];
4034 }
4035
4036 if (old_rootfs_path_after_buf) {
4037 outgoing = old_rootfs_path_after_buf;
4038 } else {
4039 outgoing = &old_rootfs_path_after[0];
4040 }
4041
4042 /*
4043 * The proposed incoming FS MUST be authenticated (i.e. not a chunklist DMG).
4044 * Userland is not allowed to pivot to an image.
4045 */
4046 error = vnode_lookup(incoming, 0, &incoming_rootvp, vfs_context_kernel());
4047 if (error) {
4048 goto out;
4049 }
4050 error = VNOP_IOCTL(incoming_rootvp, FSIOC_KERNEL_ROOTAUTH, NULL, 0, vfs_context_kernel());
4051 if (error) {
4052 goto out;
4053 }
4054
4055 error = vfs_switch_root(incoming, outgoing, VFSSR_VIRTUALDEV_PROHIBITED);
4056
4057out:
4058 if (incoming_rootvp != NULLVP) {
4059 vnode_put(incoming_rootvp);
4060 incoming_rootvp = NULLVP;
4061 }
4062
4063 if (old_rootfs_path_after_buf) {
4064 zfree(ZV_NAMEI, old_rootfs_path_after_buf);
4065 }
4066
4067 if (new_rootfs_path_before_buf) {
4068 zfree(ZV_NAMEI, new_rootfs_path_before_buf);
4069 }
4070
4071 return error;
4072}
4073#else
4074int
4075pivot_root(proc_t p, __unused struct pivot_root_args *uap, int *retval)
4076{
4077 return nosys(p, NULL, retval);
4078}
4079#endif /* XNU_TARGET_OS_OSX */
4080
1c79356b
A
4081/*
4082 * Common routine for chroot and chdir.
2d21ac55
A
4083 *
4084 * Returns: 0 Success
4085 * ENOTDIR Not a directory
4086 * namei:??? [anything namei can return]
4087 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
4088 */
4089static int
91447636 4090change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 4091{
2d21ac55 4092 vnode_t vp;
1c79356b
A
4093 int error;
4094
0a7de745
A
4095 if ((error = namei(ndp))) {
4096 return error;
4097 }
91447636 4098 nameidone(ndp);
1c79356b 4099 vp = ndp->ni_vp;
2d21ac55
A
4100
4101 if (vp->v_type != VDIR) {
91447636 4102 vnode_put(vp);
0a7de745 4103 return ENOTDIR;
2d21ac55
A
4104 }
4105
4106#if CONFIG_MACF
4107 error = mac_vnode_check_chdir(ctx, vp);
4108 if (error) {
4109 vnode_put(vp);
0a7de745 4110 return error;
2d21ac55
A
4111 }
4112#endif
4113
4114 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
4115 if (error) {
4116 vnode_put(vp);
0a7de745 4117 return error;
2d21ac55 4118 }
91447636 4119
0a7de745 4120 return error;
1c79356b
A
4121}
4122
fe8ab488
A
4123/*
4124 * Free the vnode data (for directories) associated with the file glob.
4125 */
4126struct fd_vn_data *
4127fg_vn_data_alloc(void)
4128{
4129 struct fd_vn_data *fvdata;
4130
4131 /* Allocate per fd vnode data */
f427ee49
A
4132 fvdata = kheap_alloc(KM_FD_VN_DATA, sizeof(struct fd_vn_data),
4133 Z_WAITOK | Z_ZERO);
c3c9b80d 4134 lck_mtx_init(&fvdata->fv_lock, &fd_vn_lck_grp, &fd_vn_lck_attr);
fe8ab488
A
4135 return fvdata;
4136}
4137
4138/*
4139 * Free the vnode data (for directories) associated with the file glob.
4140 */
4141void
4142fg_vn_data_free(void *fgvndata)
4143{
4144 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
4145
f427ee49 4146 kheap_free(KHEAP_DATA_BUFFERS, fvdata->fv_buf, fvdata->fv_bufallocsiz);
c3c9b80d 4147 lck_mtx_destroy(&fvdata->fv_lock, &fd_vn_lck_grp);
f427ee49 4148 kheap_free(KM_FD_VN_DATA, fvdata, sizeof(struct fd_vn_data));
fe8ab488
A
4149}
4150
1c79356b
A
4151/*
4152 * Check permissions, allocate an open file structure,
4153 * and call the device open routine if any.
2d21ac55
A
4154 *
4155 * Returns: 0 Success
4156 * EINVAL
4157 * EINTR
4158 * falloc:ENFILE
4159 * falloc:EMFILE
4160 * falloc:ENOMEM
4161 * vn_open_auth:???
4162 * dupfdopen:???
4163 * VNOP_ADVLOCK:???
4164 * vnode_setsize:???
b0d623f7
A
4165 *
4166 * XXX Need to implement uid, gid
1c79356b 4167 */
2d21ac55 4168int
39236c6e
A
4169open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
4170 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
4171 int32_t *retval)
1c79356b 4172{
2d21ac55
A
4173 proc_t p = vfs_context_proc(ctx);
4174 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
4175 struct fileproc *fp;
4176 vnode_t vp;
91447636 4177 int flags, oflags;
1c79356b 4178 int type, indx, error;
3e170ce0 4179 struct vfs_context context;
ccc36f2f 4180
91447636 4181 oflags = uflags;
ccc36f2f 4182
0a7de745
A
4183 if ((oflags & O_ACCMODE) == O_ACCMODE) {
4184 return EINVAL;
4185 }
3e170ce0 4186
91447636 4187 flags = FFLAGS(uflags);
3e170ce0
A
4188 CLR(flags, FENCRYPTED);
4189 CLR(flags, FUNENCRYPTED);
91447636
A
4190
4191 AUDIT_ARG(fflags, oflags);
4192 AUDIT_ARG(mode, vap->va_mode);
4193
39236c6e
A
4194 if ((error = falloc_withalloc(p,
4195 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
0a7de745 4196 return error;
91447636 4197 }
2d21ac55 4198 uu->uu_dupfd = -indx - 1;
91447636 4199
2d21ac55 4200 if ((error = vn_open_auth(ndp, &flags, vap))) {
0a7de745 4201 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)) { /* XXX from fdopen */
39236c6e 4202 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 4203 fp_drop(p, indx, NULL, 0);
0a7de745
A
4204 *retval = indx;
4205 return 0;
91447636 4206 }
1c79356b 4207 }
0a7de745
A
4208 if (error == ERESTART) {
4209 error = EINTR;
4210 }
91447636 4211 fp_free(p, indx, fp);
0a7de745 4212 return error;
1c79356b 4213 }
2d21ac55
A
4214 uu->uu_dupfd = 0;
4215 vp = ndp->ni_vp;
55e303ae 4216
f427ee49
A
4217 fp->fp_glob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
4218 fp->fp_glob->fg_ops = &vnops;
4219 fp->fp_glob->fg_data = (caddr_t)vp;
91447636 4220
1c79356b 4221 if (flags & (O_EXLOCK | O_SHLOCK)) {
f427ee49
A
4222 struct flock lf = {
4223 .l_whence = SEEK_SET,
4224 };
4225
0a7de745 4226 if (flags & O_EXLOCK) {
1c79356b 4227 lf.l_type = F_WRLCK;
0a7de745 4228 } else {
1c79356b 4229 lf.l_type = F_RDLCK;
0a7de745 4230 }
1c79356b 4231 type = F_FLOCK;
0a7de745 4232 if ((flags & FNONBLOCK) == 0) {
1c79356b 4233 type |= F_WAIT;
0a7de745 4234 }
2d21ac55 4235#if CONFIG_MACF
f427ee49 4236 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->fp_glob,
2d21ac55 4237 F_SETLK, &lf);
0a7de745 4238 if (error) {
2d21ac55 4239 goto bad;
0a7de745 4240 }
2d21ac55 4241#endif
f427ee49 4242 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
55e303ae 4243 goto bad;
0a7de745 4244 }
f427ee49 4245 fp->fp_glob->fg_flag |= FWASLOCKED;
1c79356b 4246 }
55e303ae 4247
91447636 4248 /* try to truncate by setting the size attribute */
0a7de745 4249 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) {
91447636 4250 goto bad;
0a7de745 4251 }
55e303ae 4252
fe8ab488
A
4253 /*
4254 * For directories we hold some additional information in the fd.
4255 */
4256 if (vnode_vtype(vp) == VDIR) {
f427ee49 4257 fp->fp_glob->fg_vn_data = fg_vn_data_alloc();
fe8ab488 4258 } else {
f427ee49 4259 fp->fp_glob->fg_vn_data = NULL;
2d21ac55
A
4260 }
4261
91447636 4262 vnode_put(vp);
55e303ae 4263
3e170ce0
A
4264 /*
4265 * The first terminal open (without a O_NOCTTY) by a session leader
4266 * results in it being set as the controlling terminal.
4267 */
4268 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
4269 !(flags & O_NOCTTY)) {
4270 int tmp = 0;
4271
f427ee49 4272 (void)(*fp->fp_glob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3e170ce0
A
4273 (caddr_t)&tmp, ctx);
4274 }
4275
91447636 4276 proc_fdlock(p);
0a7de745 4277 if (flags & O_CLOEXEC) {
6d2010ae 4278 *fdflags(p, indx) |= UF_EXCLOSE;
0a7de745
A
4279 }
4280 if (flags & O_CLOFORK) {
39236c6e 4281 *fdflags(p, indx) |= UF_FORKCLOSE;
0a7de745 4282 }
6601e61a 4283 procfdtbl_releasefd(p, indx, NULL);
39037602
A
4284
4285#if CONFIG_SECLUDED_MEMORY
4286 if (secluded_for_filecache &&
f427ee49 4287 FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_VNODE &&
39037602
A
4288 vnode_vtype(vp) == VREG) {
4289 memory_object_control_t moc;
4290
4291 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
4292
4293 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
4294 /* nothing to do... */
f427ee49 4295 } else if (fp->fp_glob->fg_flag & FWRITE) {
39037602
A
4296 /* writable -> no longer eligible for secluded pages */
4297 memory_object_mark_eligible_for_secluded(moc,
0a7de745 4298 FALSE);
39037602
A
4299 } else if (secluded_for_filecache == 1) {
4300 char pathname[32] = { 0, };
4301 size_t copied;
4302 /* XXX FBDP: better way to detect /Applications/ ? */
4303 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
cb323159 4304 (void)copyinstr(ndp->ni_dirp,
0a7de745
A
4305 pathname,
4306 sizeof(pathname),
4307 &copied);
39037602
A
4308 } else {
4309 copystr(CAST_DOWN(void *, ndp->ni_dirp),
0a7de745
A
4310 pathname,
4311 sizeof(pathname),
4312 &copied);
39037602 4313 }
0a7de745 4314 pathname[sizeof(pathname) - 1] = '\0';
39037602 4315 if (strncmp(pathname,
0a7de745
A
4316 "/Applications/",
4317 strlen("/Applications/")) == 0 &&
39037602 4318 strncmp(pathname,
0a7de745
A
4319 "/Applications/Camera.app/",
4320 strlen("/Applications/Camera.app/")) != 0) {
39037602
A
4321 /*
4322 * not writable
4323 * AND from "/Applications/"
4324 * AND not from "/Applications/Camera.app/"
4325 * ==> eligible for secluded
4326 */
4327 memory_object_mark_eligible_for_secluded(moc,
0a7de745 4328 TRUE);
39037602
A
4329 }
4330 } else if (secluded_for_filecache == 2) {
5ba3f43e
A
4331#if __arm64__
4332#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4333#elif __arm__
4334#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4335#else
39037602 4336/* not implemented... */
5ba3f43e 4337#endif
cb323159
A
4338 size_t len = strlen(vp->v_name);
4339 if (!strncmp(vp->v_name, DYLD_SHARED_CACHE_NAME, len) ||
4340 !strncmp(vp->v_name, "dyld", len) ||
4341 !strncmp(vp->v_name, "launchd", len) ||
4342 !strncmp(vp->v_name, "Camera", len) ||
4343 !strncmp(vp->v_name, "mediaserverd", len) ||
4344 !strncmp(vp->v_name, "SpringBoard", len) ||
4345 !strncmp(vp->v_name, "backboardd", len)) {
39037602
A
4346 /*
4347 * This file matters when launching Camera:
4348 * do not store its contents in the secluded
4349 * pool that will be drained on Camera launch.
4350 */
4351 memory_object_mark_eligible_for_secluded(moc,
0a7de745 4352 FALSE);
39037602
A
4353 }
4354 }
4355 }
4356#endif /* CONFIG_SECLUDED_MEMORY */
4357
91447636
A
4358 fp_drop(p, indx, fp, 1);
4359 proc_fdunlock(p);
4360
1c79356b 4361 *retval = indx;
91447636 4362
0a7de745 4363 return 0;
55e303ae 4364bad:
3e170ce0 4365 context = *vfs_context_current();
f427ee49 4366 context.vc_ucred = fp->fp_glob->fg_cred;
39037602 4367
f427ee49
A
4368 if ((fp->fp_glob->fg_flag & FWASLOCKED) &&
4369 (FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_VNODE)) {
4370 struct flock lf = {
4371 .l_whence = SEEK_SET,
4372 .l_type = F_UNLCK,
4373 };
39037602 4374
0a7de745 4375 (void)VNOP_ADVLOCK(
f427ee49 4376 vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
fe8ab488 4377 }
2d21ac55 4378
f427ee49 4379 vn_close(vp, fp->fp_glob->fg_flag, &context);
91447636
A
4380 vnode_put(vp);
4381 fp_free(p, indx, fp);
4382
0a7de745 4383 return error;
1c79356b
A
4384}
4385
fe8ab488
A
4386/*
4387 * While most of the *at syscall handlers can call nameiat() which
4388 * is a wrapper around namei, the use of namei and initialisation
4389 * of nameidata are far removed and in different functions - namei
4390 * gets called in vn_open_auth for open1. So we'll just do here what
4391 * nameiat() does.
4392 */
4393static int
4394open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
4395 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
4396 int dirfd)
4397{
4398 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
4399 int error;
4400 char c;
4401
4402 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
4403 error = copyin(ndp->ni_dirp, &c, sizeof(char));
0a7de745
A
4404 if (error) {
4405 return error;
4406 }
fe8ab488
A
4407 } else {
4408 c = *((char *)(ndp->ni_dirp));
4409 }
4410
4411 if (c != '/') {
4412 vnode_t dvp_at;
4413
4414 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
4415 &dvp_at);
0a7de745
A
4416 if (error) {
4417 return error;
4418 }
fe8ab488
A
4419
4420 if (vnode_vtype(dvp_at) != VDIR) {
4421 vnode_put(dvp_at);
0a7de745 4422 return ENOTDIR;
fe8ab488
A
4423 }
4424
4425 ndp->ni_dvp = dvp_at;
4426 ndp->ni_cnd.cn_flags |= USEDVP;
4427 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
4428 retval);
4429 vnode_put(dvp_at);
0a7de745 4430 return error;
fe8ab488
A
4431 }
4432 }
4433
0a7de745 4434 return open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval);
fe8ab488
A
4435}
4436
0c530ab8 4437/*
b0d623f7 4438 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
4439 *
4440 * Parameters: p Process requesting the open
4441 * uap User argument descriptor (see below)
4442 * retval Pointer to an area to receive the
4443 * return calue from the system call
4444 *
4445 * Indirect: uap->path Path to open (same as 'open')
4446 * uap->flags Flags to open (same as 'open'
4447 * uap->uid UID to set, if creating
4448 * uap->gid GID to set, if creating
4449 * uap->mode File mode, if creating (same as 'open')
4450 * uap->xsecurity ACL to set, if creating
4451 *
4452 * Returns: 0 Success
4453 * !0 errno value
4454 *
4455 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4456 *
4457 * XXX: We should enummerate the possible errno values here, and where
4458 * in the code they originated.
4459 */
1c79356b 4460int
b0d623f7 4461open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 4462{
2d21ac55 4463 struct filedesc *fdp = p->p_fd;
91447636
A
4464 int ciferror;
4465 kauth_filesec_t xsecdst;
4466 struct vnode_attr va;
2d21ac55 4467 struct nameidata nd;
91447636
A
4468 int cmode;
4469
b0d623f7
A
4470 AUDIT_ARG(owner, uap->uid, uap->gid);
4471
91447636
A
4472 xsecdst = NULL;
4473 if ((uap->xsecurity != USER_ADDR_NULL) &&
0a7de745 4474 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
91447636 4475 return ciferror;
0a7de745 4476 }
91447636 4477
91447636 4478 VATTR_INIT(&va);
0a7de745 4479 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4ba76501 4480 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
0a7de745 4481 if (uap->uid != KAUTH_UID_NONE) {
91447636 4482 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
4483 }
4484 if (uap->gid != KAUTH_GID_NONE) {
91447636 4485 VATTR_SET(&va, va_gid, uap->gid);
0a7de745
A
4486 }
4487 if (xsecdst != NULL) {
91447636 4488 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
0a7de745 4489 }
91447636 4490
6d2010ae 4491 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
0a7de745 4492 uap->path, vfs_context_current());
2d21ac55 4493
39236c6e 4494 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
0a7de745
A
4495 fileproc_alloc_init, NULL, retval);
4496 if (xsecdst != NULL) {
91447636 4497 kauth_filesec_free(xsecdst);
0a7de745 4498 }
91447636
A
4499
4500 return ciferror;
4501}
4502
39037602 4503/*
316670eb 4504 * Go through the data-protected atomically controlled open (2)
39037602 4505 *
316670eb
A
4506 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4507 */
0a7de745
A
4508int
4509open_dprotected_np(__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval)
4510{
316670eb
A
4511 int flags = uap->flags;
4512 int class = uap->class;
4513 int dpflags = uap->dpflags;
4514
39037602 4515 /*
316670eb
A
4516 * Follow the same path as normal open(2)
4517 * Look up the item if it exists, and acquire the vnode.
4518 */
4519 struct filedesc *fdp = p->p_fd;
4520 struct vnode_attr va;
4521 struct nameidata nd;
4522 int cmode;
4523 int error;
39037602 4524
316670eb
A
4525 VATTR_INIT(&va);
4526 /* Mask off all but regular access permissions */
0a7de745 4527 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
316670eb
A
4528 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4529
4530 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
0a7de745 4531 uap->path, vfs_context_current());
316670eb 4532
39037602
A
4533 /*
4534 * Initialize the extra fields in vnode_attr to pass down our
316670eb
A
4535 * extra fields.
4536 * 1. target cprotect class.
39037602
A
4537 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4538 */
4539 if (flags & O_CREAT) {
0a7de745
A
4540 /* lower level kernel code validates that the class is valid before applying it. */
4541 if (class != PROTECTION_CLASS_DEFAULT) {
4542 /*
4543 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4544 * file behave the same as open (2)
4545 */
4546 VATTR_SET(&va, va_dataprotect_class, class);
4547 }
4548 }
4549
4550 if (dpflags & (O_DP_GETRAWENCRYPTED | O_DP_GETRAWUNENCRYPTED)) {
4551 if (flags & (O_RDWR | O_WRONLY)) {
316670eb 4552 /* Not allowed to write raw encrypted bytes */
39037602
A
4553 return EINVAL;
4554 }
3e170ce0 4555 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
0a7de745 4556 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3e170ce0
A
4557 }
4558 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
0a7de745 4559 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3e170ce0 4560 }
316670eb
A
4561 }
4562
39236c6e 4563 error = open1(vfs_context_current(), &nd, uap->flags, &va,
0a7de745 4564 fileproc_alloc_init, NULL, retval);
316670eb
A
4565
4566 return error;
4567}
4568
fe8ab488
A
4569static int
4570openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
4571 int fd, enum uio_seg segflg, int *retval)
2d21ac55 4572{
fe8ab488 4573 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
f427ee49
A
4574 struct {
4575 struct vnode_attr va;
4576 struct nameidata nd;
4577 } *__open_data;
4578 struct vnode_attr *vap;
4579 struct nameidata *ndp;
91447636 4580 int cmode;
f427ee49 4581 int error;
1c79356b 4582
f427ee49
A
4583 __open_data = kheap_alloc(KHEAP_TEMP, sizeof(*__open_data), Z_WAITOK);
4584 vap = &__open_data->va;
4585 ndp = &__open_data->nd;
4586
4587 VATTR_INIT(vap);
91447636 4588 /* Mask off all but regular access permissions */
0a7de745 4589 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
f427ee49 4590 VATTR_SET(vap, va_mode, cmode & ACCESSPERMS);
91447636 4591
f427ee49 4592 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
fe8ab488 4593 segflg, path, ctx);
2d21ac55 4594
f427ee49
A
4595 error = open1at(ctx, ndp, flags, vap, fileproc_alloc_init, NULL,
4596 retval, fd);
4597
4598 kheap_free(KHEAP_TEMP, __open_data, sizeof(*__open_data));
4599
4600 return error;
1c79356b 4601}
91447636 4602
fe8ab488
A
4603int
4604open(proc_t p, struct open_args *uap, int32_t *retval)
4605{
4606 __pthread_testcancel(1);
0a7de745 4607 return open_nocancel(p, (struct open_nocancel_args *)uap, retval);
fe8ab488 4608}
1c79356b 4609
fe8ab488
A
4610int
4611open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
4612 int32_t *retval)
4613{
0a7de745
A
4614 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4615 uap->mode, AT_FDCWD, UIO_USERSPACE, retval);
fe8ab488 4616}
91447636 4617
1c79356b 4618int
fe8ab488 4619openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
0a7de745 4620 int32_t *retval)
1c79356b 4621{
0a7de745
A
4622 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4623 uap->mode, uap->fd, UIO_USERSPACE, retval);
fe8ab488 4624}
91447636 4625
fe8ab488
A
4626int
4627openat(proc_t p, struct openat_args *uap, int32_t *retval)
4628{
4629 __pthread_testcancel(1);
0a7de745 4630 return openat_nocancel(p, (struct openat_nocancel_args *)uap, retval);
fe8ab488
A
4631}
4632
4633/*
4634 * openbyid_np: open a file given a file system id and a file system object id
4635 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4636 * file systems that don't support object ids it is a node id (uint64_t).
4637 *
4638 * Parameters: p Process requesting the open
4639 * uap User argument descriptor (see below)
4640 * retval Pointer to an area to receive the
4641 * return calue from the system call
4642 *
4643 * Indirect: uap->path Path to open (same as 'open')
4644 *
4645 * uap->fsid id of target file system
4646 * uap->objid id of target file system object
4647 * uap->flags Flags to open (same as 'open')
4648 *
4649 * Returns: 0 Success
4650 * !0 errno value
4651 *
4652 *
4653 * XXX: We should enummerate the possible errno values here, and where
4654 * in the code they originated.
4655 */
4656int
4657openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
4658{
4659 fsid_t fsid;
4660 uint64_t objid;
4661 int error;
4662 char *buf = NULL;
4663 int buflen = MAXPATHLEN;
4664 int pathlen = 0;
4665 vfs_context_t ctx = vfs_context_current();
4666
490019cf 4667 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
0a7de745 4668 return error;
490019cf
A
4669 }
4670
fe8ab488 4671 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
0a7de745 4672 return error;
fe8ab488
A
4673 }
4674
4675 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4676 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
0a7de745 4677 return error;
fe8ab488
A
4678 }
4679
4680 AUDIT_ARG(value32, fsid.val[0]);
4681 AUDIT_ARG(value64, objid);
4682
4683 /*resolve path from fsis, objid*/
4684 do {
f427ee49 4685 buf = kheap_alloc(KHEAP_TEMP, buflen + 1, Z_WAITOK);
fe8ab488 4686 if (buf == NULL) {
0a7de745 4687 return ENOMEM;
fe8ab488
A
4688 }
4689
cb323159
A
4690 error = fsgetpath_internal( ctx, fsid.val[0], objid, buflen,
4691 buf, FSOPT_ISREALFSID, &pathlen);
fe8ab488
A
4692
4693 if (error) {
f427ee49 4694 kheap_free(KHEAP_TEMP, buf, buflen + 1);
fe8ab488
A
4695 buf = NULL;
4696 }
4697 } while (error == ENOSPC && (buflen += MAXPATHLEN));
4698
4699 if (error) {
4700 return error;
4701 }
4702
4703 buf[pathlen] = 0;
4704
4705 error = openat_internal(
4706 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
4707
f427ee49 4708 kheap_free(KHEAP_TEMP, buf, buflen + 1);
fe8ab488
A
4709
4710 return error;
4711}
4712
4713
4714/*
4715 * Create a special file.
4716 */
4717static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4718
4719int
4720mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4721{
4722 struct vnode_attr va;
4723 vfs_context_t ctx = vfs_context_current();
4724 int error;
4725 struct nameidata nd;
0a7de745 4726 vnode_t vp, dvp;
fe8ab488 4727
0a7de745
A
4728 VATTR_INIT(&va);
4729 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4730 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
4731
4732 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
0a7de745
A
4733 if ((uap->mode & S_IFMT) == S_IFIFO) {
4734 return mkfifo1(ctx, uap->path, &va);
4735 }
1c79356b 4736
f427ee49 4737 AUDIT_ARG(mode, (mode_t)uap->mode);
b0d623f7 4738 AUDIT_ARG(value32, uap->dev);
91447636 4739
0a7de745
A
4740 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
4741 return error;
4742 }
39037602 4743 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
0a7de745 4744 UIO_USERSPACE, uap->path, ctx);
55e303ae 4745 error = namei(&nd);
0a7de745
A
4746 if (error) {
4747 return error;
4748 }
91447636 4749 dvp = nd.ni_dvp;
1c79356b 4750 vp = nd.ni_vp;
91447636
A
4751
4752 if (vp != NULL) {
1c79356b 4753 error = EEXIST;
91447636 4754 goto out;
1c79356b 4755 }
55e303ae 4756
91447636 4757 switch (uap->mode & S_IFMT) {
91447636
A
4758 case S_IFCHR:
4759 VATTR_SET(&va, va_type, VCHR);
4760 break;
4761 case S_IFBLK:
4762 VATTR_SET(&va, va_type, VBLK);
4763 break;
91447636
A
4764 default:
4765 error = EINVAL;
4766 goto out;
4767 }
2d21ac55
A
4768
4769#if CONFIG_MACF
6d2010ae
A
4770 error = mac_vnode_check_create(ctx,
4771 nd.ni_dvp, &nd.ni_cnd, &va);
0a7de745 4772 if (error) {
6d2010ae 4773 goto out;
0a7de745 4774 }
2d21ac55
A
4775#endif
4776
0a7de745
A
4777 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4778 goto out;
4779 }
2d21ac55 4780
0a7de745 4781 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0) {
91447636 4782 goto out;
0a7de745 4783 }
91447636
A
4784
4785 if (vp) {
0a7de745 4786 int update_flags = 0;
91447636 4787
0a7de745
A
4788 // Make sure the name & parent pointers are hooked up
4789 if (vp->v_name == NULL) {
91447636 4790 update_flags |= VNODE_UPDATE_NAME;
0a7de745
A
4791 }
4792 if (vp->v_parent == NULLVP) {
4793 update_flags |= VNODE_UPDATE_PARENT;
4794 }
91447636 4795
0a7de745
A
4796 if (update_flags) {
4797 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4798 }
91447636 4799
2d21ac55
A
4800#if CONFIG_FSE
4801 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4802 FSE_ARG_VNODE, vp,
4803 FSE_ARG_DONE);
2d21ac55 4804#endif
1c79356b 4805 }
91447636
A
4806
4807out:
4808 /*
4809 * nameidone has to happen before we vnode_put(dvp)
4810 * since it may need to release the fs_nodelock on the dvp
4811 */
4812 nameidone(&nd);
4813
0a7de745
A
4814 if (vp) {
4815 vnode_put(vp);
4816 }
91447636
A
4817 vnode_put(dvp);
4818
0a7de745 4819 return error;
1c79356b
A
4820}
4821
4822/*
4823 * Create a named pipe.
2d21ac55
A
4824 *
4825 * Returns: 0 Success
4826 * EEXIST
4827 * namei:???
4828 * vnode_authorize:???
4829 * vn_create:???
1c79356b 4830 */
91447636
A
4831static int
4832mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 4833{
0a7de745 4834 vnode_t vp, dvp;
1c79356b
A
4835 int error;
4836 struct nameidata nd;
55e303ae 4837
39037602 4838 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
0a7de745 4839 UIO_USERSPACE, upath, ctx);
55e303ae 4840 error = namei(&nd);
0a7de745
A
4841 if (error) {
4842 return error;
4843 }
91447636
A
4844 dvp = nd.ni_dvp;
4845 vp = nd.ni_vp;
4846
0a7de745
A
4847 /* check that this is a new file and authorize addition */
4848 if (vp != NULL) {
4849 error = EEXIST;
4850 goto out;
4851 }
4852 VATTR_SET(vap, va_type, VFIFO);
2d21ac55 4853
0a7de745 4854 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
2d21ac55 4855 goto out;
0a7de745 4856 }
2d21ac55 4857
0a7de745 4858 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
4859out:
4860 /*
4861 * nameidone has to happen before we vnode_put(dvp)
4862 * since it may need to release the fs_nodelock on the dvp
4863 */
4864 nameidone(&nd);
4865
0a7de745
A
4866 if (vp) {
4867 vnode_put(vp);
4868 }
91447636
A
4869 vnode_put(dvp);
4870
55e303ae 4871 return error;
91447636
A
4872}
4873
0c530ab8
A
4874
4875/*
b0d623f7 4876 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
4877 *
4878 * Parameters: p Process requesting the open
4879 * uap User argument descriptor (see below)
4880 * retval (Ignored)
4881 *
4882 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4883 * uap->uid UID to set
4884 * uap->gid GID to set
4885 * uap->mode File mode to set (same as 'mkfifo')
4886 * uap->xsecurity ACL to set, if creating
4887 *
4888 * Returns: 0 Success
4889 * !0 errno value
4890 *
4891 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4892 *
4893 * XXX: We should enummerate the possible errno values here, and where
4894 * in the code they originated.
4895 */
91447636 4896int
b0d623f7 4897mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
4898{
4899 int ciferror;
4900 kauth_filesec_t xsecdst;
91447636
A
4901 struct vnode_attr va;
4902
b0d623f7
A
4903 AUDIT_ARG(owner, uap->uid, uap->gid);
4904
91447636
A
4905 xsecdst = KAUTH_FILESEC_NONE;
4906 if (uap->xsecurity != USER_ADDR_NULL) {
0a7de745 4907 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
91447636 4908 return ciferror;
0a7de745 4909 }
91447636
A
4910 }
4911
91447636 4912 VATTR_INIT(&va);
0a7de745
A
4913 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4914 if (uap->uid != KAUTH_UID_NONE) {
91447636 4915 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
4916 }
4917 if (uap->gid != KAUTH_GID_NONE) {
91447636 4918 VATTR_SET(&va, va_gid, uap->gid);
0a7de745
A
4919 }
4920 if (xsecdst != KAUTH_FILESEC_NONE) {
91447636 4921 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
0a7de745 4922 }
91447636 4923
2d21ac55 4924 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636 4925
0a7de745 4926 if (xsecdst != KAUTH_FILESEC_NONE) {
91447636 4927 kauth_filesec_free(xsecdst);
0a7de745 4928 }
91447636
A
4929 return ciferror;
4930}
4931
4932/* ARGSUSED */
4933int
b0d623f7 4934mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 4935{
91447636
A
4936 struct vnode_attr va;
4937
0a7de745
A
4938 VATTR_INIT(&va);
4939 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
91447636 4940
0a7de745 4941 return mkfifo1(vfs_context_current(), uap->path, &va);
1c79356b
A
4942}
4943
cb323159 4944extern int safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink);
b0d623f7 4945extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
cb323159 4946extern int safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
b0d623f7
A
4947
4948int
cb323159 4949safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink)
b0d623f7
A
4950{
4951 int ret, len = _len;
4952
4953 *truncated_path = 0;
cb323159
A
4954
4955 if (firmlink) {
4956 ret = vn_getpath(dvp, path, &len);
4957 } else {
4958 ret = vn_getpath_no_firmlink(dvp, path, &len);
4959 }
b0d623f7
A
4960 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4961 if (leafname) {
0a7de745
A
4962 path[len - 1] = '/';
4963 len += strlcpy(&path[len], leafname, MAXPATHLEN - len) + 1;
b0d623f7
A
4964 if (len > MAXPATHLEN) {
4965 char *ptr;
39037602 4966
b0d623f7
A
4967 // the string got truncated!
4968 *truncated_path = 1;
f427ee49 4969 ptr = strrchr(path, '/');
b0d623f7
A
4970 if (ptr) {
4971 *ptr = '\0'; // chop off the string at the last directory component
4972 }
f427ee49 4973 len = (int)strlen(path) + 1;
b0d623f7
A
4974 }
4975 }
4976 } else if (ret == 0) {
4977 *truncated_path = 1;
4978 } else if (ret != 0) {
0a7de745 4979 struct vnode *mydvp = dvp;
b0d623f7
A
4980
4981 if (ret != ENOSPC) {
4982 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
0a7de745 4983 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
39037602 4984 }
b0d623f7 4985 *truncated_path = 1;
39037602 4986
b0d623f7
A
4987 do {
4988 if (mydvp->v_parent != NULL) {
4989 mydvp = mydvp->v_parent;
4990 } else if (mydvp->v_mount) {
4991 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4992 break;
4993 } else {
4994 // no parent and no mount point? only thing is to punt and say "/" changed
4995 strlcpy(path, "/", _len);
4996 len = 2;
4997 mydvp = NULL;
4998 }
39037602 4999
b0d623f7
A
5000 if (mydvp == NULL) {
5001 break;
5002 }
5003
5004 len = _len;
cb323159
A
5005 if (firmlink) {
5006 ret = vn_getpath(mydvp, path, &len);
5007 } else {
5008 ret = vn_getpath_no_firmlink(mydvp, path, &len);
5009 }
b0d623f7
A
5010 } while (ret == ENOSPC);
5011 }
5012
5013 return len;
5014}
5015
cb323159
A
5016int
5017safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
5018{
5019 return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 1);
5020}
5021
5022int
5023safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
5024{
5025 return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 0);
5026}
b0d623f7 5027
1c79356b
A
5028/*
5029 * Make a hard file link.
2d21ac55
A
5030 *
5031 * Returns: 0 Success
5032 * EPERM
5033 * EEXIST
5034 * EXDEV
5035 * namei:???
5036 * vnode_authorize:???
5037 * VNOP_LINK:???
1c79356b 5038 */
1c79356b 5039/* ARGSUSED */
fe8ab488
A
5040static int
5041linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
5042 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 5043{
cb323159 5044 vnode_t vp, pvp, dvp, lvp;
1c79356b 5045 struct nameidata nd;
fe8ab488 5046 int follow;
1c79356b 5047 int error;
b0d623f7 5048#if CONFIG_FSE
91447636 5049 fse_info finfo;
b0d623f7 5050#endif
b226f5e5 5051 int need_event, has_listeners, need_kpath2;
2d21ac55 5052 char *target_path = NULL;
0a7de745 5053 int truncated = 0;
1c79356b 5054
91447636
A
5055 vp = dvp = lvp = NULLVP;
5056
5057 /* look up the object we are linking to */
fe8ab488
A
5058 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
5059 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
5060 segflg, path, ctx);
5061
5062 error = nameiat(&nd, fd1);
0a7de745
A
5063 if (error) {
5064 return error;
5065 }
1c79356b 5066 vp = nd.ni_vp;
91447636
A
5067
5068 nameidone(&nd);
5069
2d21ac55
A
5070 /*
5071 * Normally, linking to directories is not supported.
5072 * However, some file systems may have limited support.
5073 */
91447636 5074 if (vp->v_type == VDIR) {
39037602 5075 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
2d21ac55
A
5076 error = EPERM; /* POSIX */
5077 goto out;
5078 }
39037602 5079
2d21ac55
A
5080 /* Linking to a directory requires ownership. */
5081 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
5082 struct vnode_attr dva;
5083
5084 VATTR_INIT(&dva);
5085 VATTR_WANTED(&dva, va_uid);
5086 if (vnode_getattr(vp, &dva, ctx) != 0 ||
5087 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
5088 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
5089 error = EACCES;
5090 goto out;
5091 }
5092 }
91447636
A
5093 }
5094
91447636 5095 /* lookup the target node */
6d2010ae
A
5096#if CONFIG_TRIGGERS
5097 nd.ni_op = OP_LINK;
5098#endif
91447636 5099 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 5100 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
5101 nd.ni_dirp = link;
5102 error = nameiat(&nd, fd2);
0a7de745 5103 if (error != 0) {
91447636 5104 goto out;
0a7de745 5105 }
91447636
A
5106 dvp = nd.ni_dvp;
5107 lvp = nd.ni_vp;
2d21ac55
A
5108
5109#if CONFIG_MACF
0a7de745 5110 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) {
2d21ac55 5111 goto out2;
0a7de745 5112 }
2d21ac55
A
5113#endif
5114
0a7de745
A
5115 /* or to anything that kauth doesn't want us to (eg. immutable items) */
5116 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) {
5117 goto out2;
5118 }
2d21ac55 5119
91447636
A
5120 /* target node must not exist */
5121 if (lvp != NULLVP) {
5122 error = EEXIST;
5123 goto out2;
5124 }
0a7de745
A
5125 /* cannot link across mountpoints */
5126 if (vnode_mount(vp) != vnode_mount(dvp)) {
5127 error = EXDEV;
5128 goto out2;
5129 }
39037602 5130
0a7de745
A
5131 /* authorize creation of the target note */
5132 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
5133 goto out2;
5134 }
91447636
A
5135
5136 /* and finally make the link */
2d21ac55 5137 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
0a7de745 5138 if (error) {
91447636 5139 goto out2;
0a7de745 5140 }
91447636 5141
39236c6e
A
5142#if CONFIG_MACF
5143 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
5144#endif
5145
2d21ac55 5146#if CONFIG_FSE
91447636 5147 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
5148#else
5149 need_event = 0;
5150#endif
91447636
A
5151 has_listeners = kauth_authorize_fileop_has_listeners();
5152
b226f5e5
A
5153 need_kpath2 = 0;
5154#if CONFIG_AUDIT
5155 if (AUDIT_RECORD_EXISTS()) {
5156 need_kpath2 = 1;
5157 }
5158#endif
5159
5160 if (need_event || has_listeners || need_kpath2) {
91447636
A
5161 char *link_to_path = NULL;
5162 int len, link_name_len;
5163
5164 /* build the path to the new link file */
2d21ac55 5165 GET_PATH(target_path);
2d21ac55 5166
b0d623f7 5167 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636 5168
b226f5e5
A
5169 AUDIT_ARG(kpath, target_path, ARG_KPATH2);
5170
91447636 5171 if (has_listeners) {
0a7de745 5172 /* build the path to file we are linking to */
2d21ac55 5173 GET_PATH(link_to_path);
2d21ac55 5174
91447636 5175 link_name_len = MAXPATHLEN;
fe8ab488
A
5176 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
5177 /*
39037602 5178 * Call out to allow 3rd party notification of rename.
fe8ab488
A
5179 * Ignore result of kauth_authorize_fileop call.
5180 */
39037602 5181 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
0a7de745
A
5182 (uintptr_t)link_to_path,
5183 (uintptr_t)target_path);
fe8ab488 5184 }
2d21ac55
A
5185 if (link_to_path != NULL) {
5186 RELEASE_PATH(link_to_path);
5187 }
91447636 5188 }
2d21ac55 5189#if CONFIG_FSE
91447636 5190 if (need_event) {
0a7de745
A
5191 /* construct fsevent */
5192 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
5193 if (truncated) {
5194 finfo.mode |= FSE_TRUNCATED_PATH;
5195 }
5196
0a7de745
A
5197 // build the path to the destination of the link
5198 add_fsevent(FSE_CREATE_FILE, ctx,
5199 FSE_ARG_STRING, len, target_path,
5200 FSE_ARG_FINFO, &finfo,
5201 FSE_ARG_DONE);
1c79356b 5202 }
cb323159
A
5203
5204 pvp = vp->v_parent;
5205 // need an iocount on pvp in this case
5206 if (pvp && pvp != dvp) {
5207 error = vnode_get(pvp);
5208 if (error) {
5209 pvp = NULLVP;
5210 error = 0;
5211 }
5212 }
5213 if (pvp) {
0a7de745 5214 add_fsevent(FSE_STAT_CHANGED, ctx,
cb323159
A
5215 FSE_ARG_VNODE, pvp, FSE_ARG_DONE);
5216 }
5217 if (pvp && pvp != dvp) {
5218 vnode_put(pvp);
b0d623f7 5219 }
1c79356b 5220 }
2d21ac55 5221#endif
1c79356b 5222 }
91447636
A
5223out2:
5224 /*
5225 * nameidone has to happen before we vnode_put(dvp)
5226 * since it may need to release the fs_nodelock on the dvp
5227 */
5228 nameidone(&nd);
2d21ac55
A
5229 if (target_path != NULL) {
5230 RELEASE_PATH(target_path);
5231 }
91447636 5232out:
0a7de745 5233 if (lvp) {
91447636 5234 vnode_put(lvp);
0a7de745
A
5235 }
5236 if (dvp) {
91447636 5237 vnode_put(dvp);
0a7de745 5238 }
91447636 5239 vnode_put(vp);
0a7de745 5240 return error;
91447636 5241}
1c79356b 5242
fe8ab488
A
5243int
5244link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
5245{
0a7de745
A
5246 return linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
5247 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE);
fe8ab488
A
5248}
5249
5250int
5251linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
5252{
0a7de745
A
5253 if (uap->flag & ~AT_SYMLINK_FOLLOW) {
5254 return EINVAL;
5255 }
fe8ab488 5256
0a7de745
A
5257 return linkat_internal(vfs_context_current(), uap->fd1, uap->path,
5258 uap->fd2, uap->link, uap->flag, UIO_USERSPACE);
fe8ab488
A
5259}
5260
1c79356b
A
5261/*
5262 * Make a symbolic link.
91447636
A
5263 *
5264 * We could add support for ACLs here too...
1c79356b 5265 */
1c79356b 5266/* ARGSUSED */
fe8ab488
A
5267static int
5268symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
5269 user_addr_t link, enum uio_seg segflg)
1c79356b 5270{
91447636
A
5271 struct vnode_attr va;
5272 char *path;
1c79356b
A
5273 int error;
5274 struct nameidata nd;
0a7de745
A
5275 vnode_t vp, dvp;
5276 size_t dummy = 0;
fe8ab488
A
5277 proc_t p;
5278
5279 error = 0;
5280 if (UIO_SEG_IS_USER_SPACE(segflg)) {
f427ee49 5281 path = zalloc(ZV_NAMEI);
fe8ab488
A
5282 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
5283 } else {
5284 path = (char *)path_data;
5285 }
0a7de745 5286 if (error) {
1c79356b 5287 goto out;
0a7de745
A
5288 }
5289 AUDIT_ARG(text, path); /* This is the link string */
91447636 5290
fe8ab488 5291 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
0a7de745 5292 segflg, link, ctx);
fe8ab488
A
5293
5294 error = nameiat(&nd, fd);
0a7de745 5295 if (error) {
1c79356b 5296 goto out;
0a7de745 5297 }
91447636
A
5298 dvp = nd.ni_dvp;
5299 vp = nd.ni_vp;
55e303ae 5300
fe8ab488 5301 p = vfs_context_proc(ctx);
2d21ac55
A
5302 VATTR_INIT(&va);
5303 VATTR_SET(&va, va_type, VLNK);
5304 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488 5305
2d21ac55
A
5306#if CONFIG_MACF
5307 error = mac_vnode_check_create(ctx,
0a7de745 5308 dvp, &nd.ni_cnd, &va);
2d21ac55
A
5309#endif
5310 if (error != 0) {
0a7de745 5311 goto skipit;
2d21ac55 5312 }
91447636 5313
2d21ac55 5314 if (vp != NULL) {
0a7de745
A
5315 error = EEXIST;
5316 goto skipit;
2d21ac55
A
5317 }
5318
5319 /* authorize */
0a7de745 5320 if (error == 0) {
2d21ac55 5321 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
0a7de745 5322 }
2d21ac55 5323 /* get default ownership, etc. */
0a7de745 5324 if (error == 0) {
2d21ac55 5325 error = vnode_authattr_new(dvp, &va, 0, ctx);
0a7de745
A
5326 }
5327 if (error == 0) {
2d21ac55 5328 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
0a7de745 5329 }
2d21ac55 5330
ea3f0419 5331 /* do fallback attribute handling */
0a7de745 5332 if (error == 0 && vp) {
ea3f0419 5333 error = vnode_setattr_fallback(vp, &va, ctx);
0a7de745 5334 }
39236c6e 5335
ea3f0419 5336#if CONFIG_MACF
0a7de745 5337 if (error == 0 && vp) {
ea3f0419 5338 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
0a7de745 5339 }
ea3f0419 5340#endif
39236c6e 5341
2d21ac55 5342 if (error == 0) {
0a7de745 5343 int update_flags = 0;
55e303ae 5344
3e170ce0 5345 /*check if a new vnode was created, else try to get one*/
2d21ac55
A
5346 if (vp == NULL) {
5347 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
5348#if CONFIG_TRIGGERS
5349 nd.ni_op = OP_LOOKUP;
5350#endif
2d21ac55 5351 nd.ni_cnd.cn_flags = 0;
fe8ab488 5352 error = nameiat(&nd, fd);
2d21ac55 5353 vp = nd.ni_vp;
55e303ae 5354
0a7de745 5355 if (vp == NULL) {
2d21ac55 5356 goto skipit;
0a7de745 5357 }
2d21ac55 5358 }
fe8ab488 5359
91447636 5360#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 5361 /* call out to allow 3rd party notification of rename.
2d21ac55
A
5362 * Ignore result of kauth_authorize_fileop call.
5363 */
5364 if (kauth_authorize_fileop_has_listeners() &&
5365 namei(&nd) == 0) {
5366 char *new_link_path = NULL;
0a7de745 5367 int len;
fe8ab488 5368
2d21ac55
A
5369 /* build the path to the new link file */
5370 new_link_path = get_pathbuff();
5371 len = MAXPATHLEN;
5372 vn_getpath(dvp, new_link_path, &len);
5373 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 5374 new_link_path[len - 1] = '/';
0a7de745 5375 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN - len);
91447636 5376 }
fe8ab488
A
5377
5378 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
0a7de745
A
5379 (uintptr_t)path, (uintptr_t)new_link_path);
5380 if (new_link_path != NULL) {
2d21ac55 5381 release_pathbuff(new_link_path);
0a7de745 5382 }
2d21ac55 5383 }
fe8ab488 5384#endif
2d21ac55 5385 // Make sure the name & parent pointers are hooked up
0a7de745 5386 if (vp->v_name == NULL) {
2d21ac55 5387 update_flags |= VNODE_UPDATE_NAME;
0a7de745
A
5388 }
5389 if (vp->v_parent == NULLVP) {
2d21ac55 5390 update_flags |= VNODE_UPDATE_PARENT;
0a7de745 5391 }
fe8ab488 5392
0a7de745 5393 if (update_flags) {
2d21ac55 5394 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
0a7de745 5395 }
91447636 5396
2d21ac55
A
5397#if CONFIG_FSE
5398 add_fsevent(FSE_CREATE_FILE, ctx,
0a7de745
A
5399 FSE_ARG_VNODE, vp,
5400 FSE_ARG_DONE);
2d21ac55
A
5401#endif
5402 }
91447636
A
5403
5404skipit:
5405 /*
5406 * nameidone has to happen before we vnode_put(dvp)
5407 * since it may need to release the fs_nodelock on the dvp
5408 */
5409 nameidone(&nd);
5410
0a7de745
A
5411 if (vp) {
5412 vnode_put(vp);
5413 }
91447636 5414 vnode_put(dvp);
1c79356b 5415out:
0a7de745 5416 if (path && (path != (char *)path_data)) {
f427ee49 5417 zfree(ZV_NAMEI, path);
0a7de745 5418 }
91447636 5419
0a7de745 5420 return error;
1c79356b
A
5421}
5422
fe8ab488
A
5423int
5424symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
5425{
0a7de745
A
5426 return symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
5427 uap->link, UIO_USERSPACE);
fe8ab488
A
5428}
5429
5430int
5431symlinkat(__unused proc_t p, struct symlinkat_args *uap,
5432 __unused int32_t *retval)
5433{
0a7de745
A
5434 return symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
5435 uap->path2, UIO_USERSPACE);
fe8ab488
A
5436}
5437
1c79356b
A
5438/*
5439 * Delete a whiteout from the filesystem.
fe8ab488 5440 * No longer supported.
1c79356b 5441 */
1c79356b 5442int
fe8ab488 5443undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 5444{
0a7de745 5445 return ENOTSUP;
1c79356b
A
5446}
5447
5448/*
5449 * Delete a name from the filesystem.
5450 */
1c79356b 5451/* ARGSUSED */
fe8ab488 5452static int
c18c124e
A
5453unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
5454 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
1c79356b 5455{
c18c124e 5456 struct nameidata nd;
0a7de745 5457 vnode_t vp, dvp;
1c79356b 5458 int error;
91447636 5459 struct componentname *cnp;
2d21ac55 5460 char *path = NULL;
cb323159
A
5461 char *no_firmlink_path = NULL;
5462 int len_path = 0;
5463 int len_no_firmlink_path = 0;
b0d623f7 5464#if CONFIG_FSE
2d21ac55 5465 fse_info finfo;
6d2010ae 5466 struct vnode_attr va;
b0d623f7 5467#endif
c18c124e
A
5468 int flags;
5469 int need_event;
5470 int has_listeners;
5471 int truncated_path;
cb323159 5472 int truncated_no_firmlink_path;
6d2010ae 5473 int batched;
c18c124e
A
5474 struct vnode_attr *vap;
5475 int do_retry;
5476 int retry_count = 0;
5477 int cn_flags;
5478
5479 cn_flags = LOCKPARENT;
0a7de745 5480 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH)) {
c18c124e 5481 cn_flags |= AUDITVNPATH1;
0a7de745 5482 }
c18c124e 5483 /* If a starting dvp is passed, it trumps any fd passed. */
0a7de745 5484 if (start_dvp) {
c18c124e 5485 cn_flags |= USEDVP;
0a7de745 5486 }
6d2010ae 5487
c910b4d9
A
5488#if NAMEDRSRCFORK
5489 /* unlink or delete is allowed on rsrc forks and named streams */
c18c124e 5490 cn_flags |= CN_ALLOWRSRCFORK;
c910b4d9
A
5491#endif
5492
c18c124e
A
5493retry:
5494 do_retry = 0;
5495 flags = 0;
5496 need_event = 0;
5497 has_listeners = 0;
5498 truncated_path = 0;
cb323159 5499 truncated_no_firmlink_path = 0;
c18c124e
A
5500 vap = NULL;
5501
5502 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
5503
5504 nd.ni_dvp = start_dvp;
5505 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
5506 cnp = &nd.ni_cnd;
91447636 5507
813fb2f6 5508continue_lookup:
c18c124e 5509 error = nameiat(&nd, fd);
0a7de745
A
5510 if (error) {
5511 return error;
5512 }
b0d623f7 5513
c18c124e
A
5514 dvp = nd.ni_dvp;
5515 vp = nd.ni_vp;
91447636 5516
6d2010ae 5517
91447636 5518 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 5519 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 5520 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 5521 }
39037602 5522
39236c6e 5523 /* Skip any potential upcalls if told to. */
316670eb
A
5524 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
5525 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
5526 }
5527
6d2010ae
A
5528 if (vp) {
5529 batched = vnode_compound_remove_available(vp);
5530 /*
5531 * The root of a mounted filesystem cannot be deleted.
5532 */
cb323159 5533 if ((vp->v_flag & VROOT) || (dvp->v_mount != vp->v_mount)) {
6d2010ae 5534 error = EBUSY;
cb323159 5535 goto out;
6d2010ae 5536 }
2d21ac55 5537
00867663 5538#if DEVELOPMENT || DEBUG
0a7de745
A
5539 /*
5540 * XXX VSWAP: Check for entitlements or special flag here
5541 * so we can restrict access appropriately.
5542 */
00867663
A
5543#else /* DEVELOPMENT || DEBUG */
5544
5545 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
5546 error = EPERM;
5547 goto out;
5548 }
5549#endif /* DEVELOPMENT || DEBUG */
5550
6d2010ae
A
5551 if (!batched) {
5552 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
5553 if (error) {
3e170ce0 5554 if (error == ENOENT) {
3e170ce0
A
5555 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5556 do_retry = 1;
5557 retry_count++;
5558 }
c18c124e 5559 }
6d2010ae
A
5560 goto out;
5561 }
5562 }
5563 } else {
5564 batched = 1;
2d21ac55 5565
6d2010ae
A
5566 if (!vnode_compound_remove_available(dvp)) {
5567 panic("No vp, but no compound remove?");
5568 }
5569 }
2d21ac55 5570
2d21ac55
A
5571#if CONFIG_FSE
5572 need_event = need_fsevent(FSE_DELETE, dvp);
5573 if (need_event) {
6d2010ae
A
5574 if (!batched) {
5575 if ((vp->v_flag & VISHARDLINK) == 0) {
5576 /* XXX need to get these data in batched VNOP */
5577 get_fse_info(vp, &finfo, ctx);
5578 }
5579 } else {
5580 error = vfs_get_notify_attributes(&va);
5581 if (error) {
5582 goto out;
5583 }
5584
5585 vap = &va;
2d21ac55
A
5586 }
5587 }
5588#endif
5589 has_listeners = kauth_authorize_fileop_has_listeners();
5590 if (need_event || has_listeners) {
2d21ac55 5591 if (path == NULL) {
6d2010ae 5592 GET_PATH(path);
2d21ac55 5593 }
cb323159
A
5594 len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
5595 if (no_firmlink_path == NULL) {
5596 GET_PATH(no_firmlink_path);
cb323159
A
5597 }
5598 len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
2d21ac55
A
5599 }
5600
5601#if NAMEDRSRCFORK
0a7de745 5602 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
2d21ac55 5603 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
0a7de745 5604 } else
2d21ac55 5605#endif
6d2010ae 5606 {
c18c124e
A
5607 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
5608 vp = nd.ni_vp;
6d2010ae
A
5609 if (error == EKEEPLOOKING) {
5610 if (!batched) {
5611 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5612 }
5613
c18c124e 5614 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
5615 panic("EKEEPLOOKING, but continue flag not set?");
5616 }
5617
5618 if (vnode_isdir(vp)) {
5619 error = EISDIR;
5620 goto out;
5621 }
813fb2f6 5622 goto continue_lookup;
3e170ce0 5623 } else if (error == ENOENT && batched) {
3e170ce0
A
5624 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5625 /*
5626 * For compound VNOPs, the authorization callback may
5627 * return ENOENT in case of racing hardlink lookups
5628 * hitting the name cache, redrive the lookup.
5629 */
5630 do_retry = 1;
5631 retry_count += 1;
5632 goto out;
5633 }
6d2010ae
A
5634 }
5635 }
2d21ac55
A
5636
5637 /*
39037602 5638 * Call out to allow 3rd party notification of delete.
2d21ac55
A
5639 * Ignore result of kauth_authorize_fileop call.
5640 */
1c79356b 5641 if (!error) {
2d21ac55 5642 if (has_listeners) {
39037602 5643 kauth_authorize_fileop(vfs_context_ucred(ctx),
0a7de745
A
5644 KAUTH_FILEOP_DELETE,
5645 (uintptr_t)vp,
5646 (uintptr_t)path);
2d21ac55 5647 }
91447636 5648
2d21ac55 5649 if (vp->v_flag & VISHARDLINK) {
0a7de745
A
5650 //
5651 // if a hardlink gets deleted we want to blow away the
5652 // v_parent link because the path that got us to this
5653 // instance of the link is no longer valid. this will
5654 // force the next call to get the path to ask the file
5655 // system instead of just following the v_parent link.
5656 //
5657 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 5658 }
91447636 5659
2d21ac55
A
5660#if CONFIG_FSE
5661 if (need_event) {
5662 if (vp->v_flag & VISHARDLINK) {
5663 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
5664 } else if (vap) {
5665 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 5666 }
b0d623f7
A
5667 if (truncated_path) {
5668 finfo.mode |= FSE_TRUNCATED_PATH;
5669 }
2d21ac55 5670 add_fsevent(FSE_DELETE, ctx,
cb323159 5671 FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
0a7de745
A
5672 FSE_ARG_FINFO, &finfo,
5673 FSE_ARG_DONE);
2d21ac55
A
5674 }
5675#endif
1c79356b 5676 }
6d2010ae
A
5677
5678out:
0a7de745 5679 if (path != NULL) {
2d21ac55 5680 RELEASE_PATH(path);
cb323159 5681 path = NULL;
0a7de745 5682 }
2d21ac55 5683
cb323159
A
5684 if (no_firmlink_path != NULL) {
5685 RELEASE_PATH(no_firmlink_path);
5686 no_firmlink_path = NULL;
5687 }
c910b4d9 5688#if NAMEDRSRCFORK
39037602 5689 /* recycle the deleted rsrc fork vnode to force a reclaim, which
b0d623f7
A
5690 * will cause its shadow file to go away if necessary.
5691 */
0a7de745
A
5692 if (vp && (vnode_isnamedstream(vp)) &&
5693 (vp->v_parent != NULLVP) &&
5694 vnode_isshadow(vp)) {
5695 vnode_recycle(vp);
5696 }
c910b4d9 5697#endif
6d2010ae
A
5698 /*
5699 * nameidone has to happen before we vnode_put(dvp)
5700 * since it may need to release the fs_nodelock on the dvp
5701 */
c18c124e 5702 nameidone(&nd);
91447636 5703 vnode_put(dvp);
6d2010ae
A
5704 if (vp) {
5705 vnode_put(vp);
5706 }
c18c124e
A
5707
5708 if (do_retry) {
5709 goto retry;
5710 }
5711
0a7de745 5712 return error;
1c79356b
A
5713}
5714
fe8ab488 5715int
c18c124e
A
5716unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
5717 enum uio_seg segflg, int unlink_flags)
fe8ab488 5718{
0a7de745
A
5719 return unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
5720 unlink_flags);
fe8ab488
A
5721}
5722
1c79356b 5723/*
c18c124e 5724 * Delete a name from the filesystem using Carbon semantics.
1c79356b 5725 */
c18c124e
A
5726int
5727delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
fe8ab488 5728{
0a7de745
A
5729 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5730 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY);
fe8ab488
A
5731}
5732
c18c124e
A
5733/*
5734 * Delete a name from the filesystem using POSIX semantics.
5735 */
1c79356b 5736int
b0d623f7 5737unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 5738{
0a7de745
A
5739 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5740 uap->path, UIO_USERSPACE, 0);
fe8ab488 5741}
2d21ac55 5742
fe8ab488
A
5743int
5744unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
5745{
cb323159 5746 if (uap->flag & ~(AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
0a7de745
A
5747 return EINVAL;
5748 }
fe8ab488 5749
cb323159
A
5750 if (uap->flag & (AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
5751 int unlink_flags = 0;
5752
5753 if (uap->flag & AT_REMOVEDIR_DATALESS) {
5754 unlink_flags |= VNODE_REMOVE_DATALESS_DIR;
5755 }
0a7de745 5756 return rmdirat_internal(vfs_context_current(), uap->fd,
cb323159 5757 uap->path, UIO_USERSPACE, unlink_flags);
0a7de745
A
5758 } else {
5759 return unlinkat_internal(vfs_context_current(), uap->fd,
5760 NULLVP, uap->path, UIO_USERSPACE, 0);
5761 }
1c79356b
A
5762}
5763
5764/*
5765 * Reposition read/write file offset.
5766 */
1c79356b 5767int
2d21ac55 5768lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 5769{
91447636 5770 struct fileproc *fp;
2d21ac55
A
5771 vnode_t vp;
5772 struct vfs_context *ctx;
91447636 5773 off_t offset = uap->offset, file_size;
1c79356b
A
5774 int error;
5775
0a7de745
A
5776 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
5777 if (error == ENOTSUP) {
5778 return ESPIPE;
5779 }
5780 return error;
55e303ae 5781 }
91447636
A
5782 if (vnode_isfifo(vp)) {
5783 file_drop(uap->fd);
0a7de745 5784 return ESPIPE;
91447636 5785 }
2d21ac55
A
5786
5787
5788 ctx = vfs_context_current();
5789#if CONFIG_MACF
0a7de745 5790 if (uap->whence == L_INCR && uap->offset == 0) {
2d21ac55 5791 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
f427ee49 5792 fp->fp_glob);
0a7de745 5793 } else {
2d21ac55 5794 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
f427ee49 5795 fp->fp_glob);
0a7de745 5796 }
2d21ac55
A
5797 if (error) {
5798 file_drop(uap->fd);
0a7de745 5799 return error;
2d21ac55
A
5800 }
5801#endif
0a7de745 5802 if ((error = vnode_getwithref(vp))) {
91447636 5803 file_drop(uap->fd);
0a7de745 5804 return error;
91447636
A
5805 }
5806
1c79356b
A
5807 switch (uap->whence) {
5808 case L_INCR:
f427ee49 5809 offset += fp->fp_glob->fg_offset;
1c79356b
A
5810 break;
5811 case L_XTND:
0a7de745 5812 if ((error = vnode_size(vp, &file_size, ctx)) != 0) {
55e303ae 5813 break;
0a7de745 5814 }
91447636 5815 offset += file_size;
1c79356b
A
5816 break;
5817 case L_SET:
1c79356b 5818 break;
813fb2f6 5819 case SEEK_HOLE:
0a7de745 5820 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
813fb2f6
A
5821 break;
5822 case SEEK_DATA:
0a7de745 5823 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
813fb2f6 5824 break;
1c79356b 5825 default:
55e303ae 5826 error = EINVAL;
1c79356b 5827 }
55e303ae
A
5828 if (error == 0) {
5829 if (uap->offset > 0 && offset < 0) {
5830 /* Incremented/relative move past max size */
5831 error = EOVERFLOW;
5832 } else {
5833 /*
5834 * Allow negative offsets on character devices, per
5835 * POSIX 1003.1-2001. Most likely for writing disk
5836 * labels.
5837 */
5838 if (offset < 0 && vp->v_type != VCHR) {
5839 /* Decremented/relative move before start */
5840 error = EINVAL;
5841 } else {
5842 /* Success */
f427ee49
A
5843 fp->fp_glob->fg_offset = offset;
5844 *retval = fp->fp_glob->fg_offset;
55e303ae
A
5845 }
5846 }
5847 }
b0d623f7 5848
39037602 5849 /*
b0d623f7
A
5850 * An lseek can affect whether data is "available to read." Use
5851 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5852 */
5853 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
5854 (void)vnode_put(vp);
5855 file_drop(uap->fd);
0a7de745 5856 return error;
1c79356b
A
5857}
5858
91447636 5859
1c79356b 5860/*
91447636 5861 * Check access permissions.
2d21ac55
A
5862 *
5863 * Returns: 0 Success
5864 * vnode_authorize:???
1c79356b 5865 */
91447636
A
5866static int
5867access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 5868{
0a7de745 5869 kauth_action_t action;
1c79356b
A
5870 int error;
5871
0a7de745
A
5872 /*
5873 * If just the regular access bits, convert them to something
91447636 5874 * that vnode_authorize will understand.
0a7de745
A
5875 */
5876 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5877 action = 0;
5878 if (uflags & R_OK) {
5879 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5880 }
5881 if (uflags & W_OK) {
91447636
A
5882 if (vnode_isdir(vp)) {
5883 action |= KAUTH_VNODE_ADD_FILE |
5884 KAUTH_VNODE_ADD_SUBDIRECTORY;
5885 /* might want delete rights here too */
5886 } else {
5887 action |= KAUTH_VNODE_WRITE_DATA;
5888 }
5889 }
0a7de745 5890 if (uflags & X_OK) {
91447636
A
5891 if (vnode_isdir(vp)) {
5892 action |= KAUTH_VNODE_SEARCH;
5893 } else {
5894 action |= KAUTH_VNODE_EXECUTE;
5895 }
5896 }
0a7de745 5897 } else {
91447636
A
5898 /* take advantage of definition of uflags */
5899 action = uflags >> 8;
5900 }
39037602 5901
2d21ac55
A
5902#if CONFIG_MACF
5903 error = mac_vnode_check_access(ctx, vp, uflags);
0a7de745
A
5904 if (error) {
5905 return error;
5906 }
2d21ac55
A
5907#endif /* MAC */
5908
0a7de745
A
5909 /* action == 0 means only check for existence */
5910 if (action != 0) {
5911 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
91447636
A
5912 } else {
5913 error = 0;
5914 }
5915
0a7de745 5916 return error;
1c79356b 5917}
1c79356b 5918
91447636
A
5919
5920
2d21ac55 5921/*
b0d623f7 5922 * access_extended: Check access permissions in bulk.
2d21ac55 5923 *
b0d623f7 5924 * Description: uap->entries Pointer to an array of accessx
0a7de745
A
5925 * descriptor structs, plus one or
5926 * more NULL terminated strings (see
5927 * "Notes" section below).
b0d623f7
A
5928 * uap->size Size of the area pointed to by
5929 * uap->entries.
5930 * uap->results Pointer to the results array.
2d21ac55
A
5931 *
5932 * Returns: 0 Success
5933 * ENOMEM Insufficient memory
5934 * EINVAL Invalid arguments
5935 * namei:EFAULT Bad address
5936 * namei:ENAMETOOLONG Filename too long
5937 * namei:ENOENT No such file or directory
5938 * namei:ELOOP Too many levels of symbolic links
5939 * namei:EBADF Bad file descriptor
5940 * namei:ENOTDIR Not a directory
5941 * namei:???
5942 * access1:
5943 *
5944 * Implicit returns:
5945 * uap->results Array contents modified
5946 *
5947 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 5948 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
5949 * strings
5950 *
5951 * struct accessx_descriptor[0]
5952 * ...
5953 * struct accessx_descriptor[n]
5954 * char name_data[0];
5955 *
5956 * We determine the entry count by walking the buffer containing
b0d623f7 5957 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
5958 * see, the valid values for the offset ad_name_offset will be
5959 * in the byte range:
5960 *
5961 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5962 * to
5963 * [ uap->entries + uap->size - 2 ]
5964 *
5965 * since we must have at least one string, and the string must
b0d623f7 5966 * be at least one character plus the NULL terminator in length.
39037602 5967 *
2d21ac55
A
5968 * XXX: Need to support the check-as uid argument
5969 */
1c79356b 5970int
b0d623f7 5971access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 5972{
2d21ac55
A
5973 struct accessx_descriptor *input = NULL;
5974 errno_t *result = NULL;
5975 errno_t error = 0;
5976 int wantdelete = 0;
f427ee49
A
5977 size_t desc_max, desc_actual;
5978 unsigned int i, j;
91447636 5979 struct vfs_context context;
1c79356b 5980 struct nameidata nd;
0a7de745 5981 int niopts;
2d21ac55
A
5982 vnode_t vp = NULL;
5983 vnode_t dvp = NULL;
5984#define ACCESSX_MAX_DESCR_ON_STACK 10
5985 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 5986
91447636
A
5987 context.vc_ucred = NULL;
5988
2d21ac55
A
5989 /*
5990 * Validate parameters; if valid, copy the descriptor array and string
5991 * arguments into local memory. Before proceeding, the following
5992 * conditions must have been met:
5993 *
5994 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5995 * o There must be sufficient room in the request for at least one
5996 * descriptor and a one yte NUL terminated string.
5997 * o The allocation of local storage must not fail.
5998 */
0a7de745
A
5999 if (uap->size > ACCESSX_MAX_TABLESIZE) {
6000 return ENOMEM;
6001 }
6002 if (uap->size < (sizeof(struct accessx_descriptor) + 2)) {
6003 return EINVAL;
6004 }
6005 if (uap->size <= sizeof(stack_input)) {
2d21ac55
A
6006 input = stack_input;
6007 } else {
f427ee49 6008 input = kheap_alloc(KHEAP_DATA_BUFFERS, uap->size, Z_WAITOK);
0a7de745
A
6009 if (input == NULL) {
6010 error = ENOMEM;
6011 goto out;
6012 }
2d21ac55 6013 }
91447636 6014 error = copyin(uap->entries, input, uap->size);
0a7de745 6015 if (error) {
91447636 6016 goto out;
0a7de745 6017 }
1c79356b 6018
b0d623f7
A
6019 AUDIT_ARG(opaque, input, uap->size);
6020
91447636 6021 /*
2d21ac55
A
6022 * Force NUL termination of the copyin buffer to avoid nami() running
6023 * off the end. If the caller passes us bogus data, they may get a
6024 * bogus result.
6025 */
6026 ((char *)input)[uap->size - 1] = 0;
6027
6028 /*
6029 * Access is defined as checking against the process' real identity,
0a7de745 6030 * even if operations are checking the effective identity. This
2d21ac55 6031 * requires that we use a local vfs context.
0a7de745 6032 */
91447636 6033 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 6034 context.vc_thread = current_thread();
91447636
A
6035
6036 /*
2d21ac55
A
6037 * Find out how many entries we have, so we can allocate the result
6038 * array by walking the list and adjusting the count downward by the
6039 * earliest string offset we see.
91447636 6040 */
2d21ac55
A
6041 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
6042 desc_actual = desc_max;
6043 for (i = 0; i < desc_actual; i++) {
91447636 6044 /*
2d21ac55
A
6045 * Take the offset to the name string for this entry and
6046 * convert to an input array index, which would be one off
6047 * the end of the array if this entry was the lowest-addressed
6048 * name string.
91447636
A
6049 */
6050 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
6051
6052 /*
6053 * An offset greater than the max allowable offset is an error.
6054 * It is also an error for any valid entry to point
6055 * to a location prior to the end of the current entry, if
6056 * it's not a reference to the string of the previous entry.
6057 */
6058 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
6059 error = EINVAL;
6060 goto out;
6061 }
2d21ac55 6062
39037602
A
6063 /* Also do not let ad_name_offset point to something beyond the size of the input */
6064 if (input[i].ad_name_offset >= uap->size) {
6065 error = EINVAL;
6066 goto out;
6067 }
6068
2d21ac55
A
6069 /*
6070 * An offset of 0 means use the previous descriptor's offset;
6071 * this is used to chain multiple requests for the same file
6072 * to avoid multiple lookups.
6073 */
91447636 6074 if (j == 0) {
2d21ac55 6075 /* This is not valid for the first entry */
91447636
A
6076 if (i == 0) {
6077 error = EINVAL;
6078 goto out;
6079 }
6080 continue;
6081 }
2d21ac55
A
6082
6083 /*
6084 * If the offset of the string for this descriptor is before
6085 * what we believe is the current actual last descriptor,
6086 * then we need to adjust our estimate downward; this permits
6087 * the string table following the last descriptor to be out
6088 * of order relative to the descriptor list.
6089 */
0a7de745 6090 if (j < desc_actual) {
2d21ac55 6091 desc_actual = j;
0a7de745 6092 }
91447636 6093 }
2d21ac55
A
6094
6095 /*
6096 * We limit the actual number of descriptors we are willing to process
6097 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
6098 * requested does not exceed this limit,
6099 */
6100 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
6101 error = ENOMEM;
6102 goto out;
6103 }
f427ee49
A
6104 result = kheap_alloc(KHEAP_DATA_BUFFERS, desc_actual * sizeof(errno_t),
6105 Z_WAITOK | Z_ZERO);
91447636
A
6106 if (result == NULL) {
6107 error = ENOMEM;
6108 goto out;
6109 }
6110
6111 /*
2d21ac55
A
6112 * Do the work by iterating over the descriptor entries we know to
6113 * at least appear to contain valid data.
91447636
A
6114 */
6115 error = 0;
2d21ac55 6116 for (i = 0; i < desc_actual; i++) {
91447636 6117 /*
2d21ac55
A
6118 * If the ad_name_offset is 0, then we use the previous
6119 * results to make the check; otherwise, we are looking up
6120 * a new file name.
91447636
A
6121 */
6122 if (input[i].ad_name_offset != 0) {
6123 /* discard old vnodes */
6124 if (vp) {
6125 vnode_put(vp);
6126 vp = NULL;
6127 }
6128 if (dvp) {
6129 vnode_put(dvp);
6130 dvp = NULL;
6131 }
39037602 6132
2d21ac55
A
6133 /*
6134 * Scan forward in the descriptor list to see if we
6135 * need the parent vnode. We will need it if we are
6136 * deleting, since we must have rights to remove
6137 * entries in the parent directory, as well as the
6138 * rights to delete the object itself.
6139 */
91447636 6140 wantdelete = input[i].ad_flags & _DELETE_OK;
0a7de745
A
6141 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++) {
6142 if (input[j].ad_flags & _DELETE_OK) {
91447636 6143 wantdelete = 1;
0a7de745
A
6144 }
6145 }
39037602 6146
91447636 6147 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 6148
91447636 6149 /* need parent for vnode_authorize for deletion test */
0a7de745 6150 if (wantdelete) {
91447636 6151 niopts |= WANTPARENT;
0a7de745 6152 }
91447636
A
6153
6154 /* do the lookup */
6d2010ae 6155 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
0a7de745
A
6156 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
6157 &context);
91447636
A
6158 error = namei(&nd);
6159 if (!error) {
6160 vp = nd.ni_vp;
0a7de745 6161 if (wantdelete) {
91447636 6162 dvp = nd.ni_dvp;
0a7de745 6163 }
91447636
A
6164 }
6165 nameidone(&nd);
6166 }
6167
6168 /*
6169 * Handle lookup errors.
6170 */
0a7de745 6171 switch (error) {
91447636
A
6172 case ENOENT:
6173 case EACCES:
6174 case EPERM:
6175 case ENOTDIR:
6176 result[i] = error;
6177 break;
6178 case 0:
6179 /* run this access check */
6180 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
6181 break;
6182 default:
6183 /* fatal lookup error */
6184
6185 goto out;
6186 }
6187 }
6188
b0d623f7
A
6189 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
6190
91447636 6191 /* copy out results */
2d21ac55 6192 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
39037602 6193
91447636 6194out:
0a7de745 6195 if (input && input != stack_input) {
f427ee49 6196 kheap_free(KHEAP_DATA_BUFFERS, input, uap->size);
0a7de745
A
6197 }
6198 if (result) {
f427ee49 6199 kheap_free(KHEAP_DATA_BUFFERS, result, desc_actual * sizeof(errno_t));
0a7de745
A
6200 }
6201 if (vp) {
91447636 6202 vnode_put(vp);
0a7de745
A
6203 }
6204 if (dvp) {
91447636 6205 vnode_put(dvp);
0a7de745
A
6206 }
6207 if (IS_VALID_CRED(context.vc_ucred)) {
6208 kauth_cred_unref(&context.vc_ucred);
6209 }
6210 return error;
1c79356b
A
6211}
6212
2d21ac55
A
6213
6214/*
6215 * Returns: 0 Success
6216 * namei:EFAULT Bad address
6217 * namei:ENAMETOOLONG Filename too long
6218 * namei:ENOENT No such file or directory
6219 * namei:ELOOP Too many levels of symbolic links
6220 * namei:EBADF Bad file descriptor
6221 * namei:ENOTDIR Not a directory
6222 * namei:???
6223 * access1:
6224 */
fe8ab488
A
6225static int
6226faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
6227 int flag, enum uio_seg segflg)
1c79356b 6228{
1c79356b
A
6229 int error;
6230 struct nameidata nd;
0a7de745 6231 int niopts;
91447636 6232 struct vfs_context context;
cf7d32b8
A
6233#if NAMEDRSRCFORK
6234 int is_namedstream = 0;
6235#endif
6236
0a7de745 6237 /*
fe8ab488
A
6238 * Unless the AT_EACCESS option is used, Access is defined as checking
6239 * against the process' real identity, even if operations are checking
6240 * the effective identity. So we need to tweak the credential
0a7de745
A
6241 * in the context for that case.
6242 */
6243 if (!(flag & AT_EACCESS)) {
fe8ab488 6244 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
0a7de745 6245 } else {
fe8ab488 6246 context.vc_ucred = ctx->vc_ucred;
0a7de745 6247 }
fe8ab488
A
6248 context.vc_thread = ctx->vc_thread;
6249
91447636 6250
cb323159 6251 niopts = (flag & AT_SYMLINK_NOFOLLOW ? NOFOLLOW : FOLLOW) | AUDITVNPATH1;
0a7de745
A
6252 /* need parent for vnode_authorize for deletion test */
6253 if (amode & _DELETE_OK) {
6254 niopts |= WANTPARENT;
6255 }
6256 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
6257 path, &context);
2d21ac55
A
6258
6259#if NAMEDRSRCFORK
6260 /* access(F_OK) calls are allowed for resource forks. */
0a7de745 6261 if (amode == F_OK) {
2d21ac55 6262 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
0a7de745 6263 }
2d21ac55 6264#endif
0a7de745
A
6265 error = nameiat(&nd, fd);
6266 if (error) {
6267 goto out;
6268 }
91447636 6269
cf7d32b8 6270#if NAMEDRSRCFORK
39037602 6271 /* Grab reference on the shadow stream file vnode to
b0d623f7
A
6272 * force an inactive on release which will mark it
6273 * for recycle.
cf7d32b8
A
6274 */
6275 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
6276 (nd.ni_vp->v_parent != NULLVP) &&
6277 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
6278 is_namedstream = 1;
6279 vnode_ref(nd.ni_vp);
6280 }
6281#endif
6282
fe8ab488 6283 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 6284
cf7d32b8
A
6285#if NAMEDRSRCFORK
6286 if (is_namedstream) {
6287 vnode_rele(nd.ni_vp);
6288 }
6289#endif
6290
0a7de745
A
6291 vnode_put(nd.ni_vp);
6292 if (amode & _DELETE_OK) {
6293 vnode_put(nd.ni_dvp);
6294 }
6295 nameidone(&nd);
39037602 6296
91447636 6297out:
0a7de745 6298 if (!(flag & AT_EACCESS)) {
fe8ab488 6299 kauth_cred_unref(&context.vc_ucred);
0a7de745
A
6300 }
6301 return error;
fe8ab488
A
6302}
6303
6304int
6305access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
6306{
0a7de745
A
6307 return faccessat_internal(vfs_context_current(), AT_FDCWD,
6308 uap->path, uap->flags, 0, UIO_USERSPACE);
91447636
A
6309}
6310
fe8ab488
A
6311int
6312faccessat(__unused proc_t p, struct faccessat_args *uap,
0a7de745 6313 __unused int32_t *retval)
fe8ab488 6314{
cb323159 6315 if (uap->flag & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW)) {
0a7de745
A
6316 return EINVAL;
6317 }
fe8ab488 6318
0a7de745
A
6319 return faccessat_internal(vfs_context_current(), uap->fd,
6320 uap->path, uap->amode, uap->flag, UIO_USERSPACE);
fe8ab488 6321}
91447636 6322
2d21ac55
A
6323/*
6324 * Returns: 0 Success
6325 * EFAULT
6326 * copyout:EFAULT
6327 * namei:???
6328 * vn_stat:???
6329 */
91447636 6330static int
fe8ab488
A
6331fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
6332 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
6333 enum uio_seg segflg, int fd, int flag)
91447636 6334{
fe8ab488
A
6335 struct nameidata nd;
6336 int follow;
b0d623f7
A
6337 union {
6338 struct stat sb;
6339 struct stat64 sb64;
527f9951 6340 } source = {};
b0d623f7
A
6341 union {
6342 struct user64_stat user64_sb;
6343 struct user32_stat user32_sb;
6344 struct user64_stat64 user64_sb64;
6345 struct user32_stat64 user32_sb64;
527f9951 6346 } dest = {};
91447636
A
6347 caddr_t sbp;
6348 int error, my_size;
6349 kauth_filesec_t fsec;
6350 size_t xsecurity_bufsize;
2d21ac55 6351 void * statptr;
cb323159
A
6352 struct fileproc *fp = NULL;
6353 int needsrealdev = 0;
1c79356b 6354
fe8ab488
A
6355 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6356 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
6357 segflg, path, ctx);
6358
2d21ac55 6359#if NAMEDRSRCFORK
cf7d32b8 6360 int is_namedstream = 0;
2d21ac55 6361 /* stat calls are allowed for resource forks. */
fe8ab488 6362 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 6363#endif
cb323159
A
6364
6365 if (flag & AT_FDONLY) {
6366 vnode_t fvp;
6367
6368 error = fp_getfvp(vfs_context_proc(ctx), fd, &fp, &fvp);
6369 if (error) {
6370 return error;
6371 }
6372 if ((error = vnode_getwithref(fvp))) {
6373 file_drop(fd);
6374 return error;
6375 }
6376 nd.ni_vp = fvp;
6377 } else {
6378 error = nameiat(&nd, fd);
6379 if (error) {
6380 return error;
6381 }
0a7de745 6382 }
91447636 6383 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
6384
6385 statptr = (void *)&source;
cf7d32b8
A
6386
6387#if NAMEDRSRCFORK
39037602
A
6388 /* Grab reference on the shadow stream file vnode to
6389 * force an inactive on release which will mark it
b0d623f7 6390 * for recycle.
cf7d32b8 6391 */
fe8ab488
A
6392 if (vnode_isnamedstream(nd.ni_vp) &&
6393 (nd.ni_vp->v_parent != NULLVP) &&
6394 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 6395 is_namedstream = 1;
fe8ab488 6396 vnode_ref(nd.ni_vp);
cf7d32b8
A
6397 }
6398#endif
6399
cb323159
A
6400 needsrealdev = flag & AT_REALDEV ? 1 : 0;
6401 if (fp && (xsecurity == USER_ADDR_NULL)) {
6402 /*
6403 * If the caller has the file open, and is not
6404 * requesting extended security information, we are
6405 * going to let them get the basic stat information.
6406 */
6407 error = vn_stat_noauth(nd.ni_vp, statptr, NULL, isstat64, needsrealdev, ctx,
f427ee49 6408 fp->fp_glob->fg_cred);
cb323159
A
6409 } else {
6410 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL),
6411 isstat64, needsrealdev, ctx);
6412 }
2d21ac55
A
6413
6414#if NAMEDRSRCFORK
cf7d32b8 6415 if (is_namedstream) {
fe8ab488 6416 vnode_rele(nd.ni_vp);
2d21ac55
A
6417 }
6418#endif
fe8ab488
A
6419 vnode_put(nd.ni_vp);
6420 nameidone(&nd);
cb323159
A
6421 if (fp) {
6422 file_drop(fd);
6423 fp = NULL;
6424 }
91447636 6425
0a7de745
A
6426 if (error) {
6427 return error;
6428 }
91447636 6429 /* Zap spare fields */
2d21ac55 6430 if (isstat64 != 0) {
b0d623f7
A
6431 source.sb64.st_lspare = 0;
6432 source.sb64.st_qspare[0] = 0LL;
6433 source.sb64.st_qspare[1] = 0LL;
2d21ac55 6434 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 6435 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
b0d623f7
A
6436 my_size = sizeof(dest.user64_sb64);
6437 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 6438 } else {
39037602 6439 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
b0d623f7
A
6440 my_size = sizeof(dest.user32_sb64);
6441 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
6442 }
6443 /*
6444 * Check if we raced (post lookup) against the last unlink of a file.
6445 */
b0d623f7
A
6446 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
6447 source.sb64.st_nlink = 1;
2d21ac55
A
6448 }
6449 } else {
b0d623f7
A
6450 source.sb.st_lspare = 0;
6451 source.sb.st_qspare[0] = 0LL;
6452 source.sb.st_qspare[1] = 0LL;
2d21ac55 6453 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 6454 munge_user64_stat(&source.sb, &dest.user64_sb);
b0d623f7
A
6455 my_size = sizeof(dest.user64_sb);
6456 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 6457 } else {
39037602 6458 munge_user32_stat(&source.sb, &dest.user32_sb);
b0d623f7
A
6459 my_size = sizeof(dest.user32_sb);
6460 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
6461 }
6462
6463 /*
6464 * Check if we raced (post lookup) against the last unlink of a file.
6465 */
b0d623f7
A
6466 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
6467 source.sb.st_nlink = 1;
2d21ac55 6468 }
91447636 6469 }
0a7de745 6470 if ((error = copyout(sbp, ub, my_size)) != 0) {
91447636 6471 goto out;
0a7de745 6472 }
91447636
A
6473
6474 /* caller wants extended security information? */
6475 if (xsecurity != USER_ADDR_NULL) {
91447636
A
6476 /* did we get any? */
6477 if (fsec == KAUTH_FILESEC_NONE) {
6478 if (susize(xsecurity_size, 0) != 0) {
6479 error = EFAULT;
6480 goto out;
6481 }
6482 } else {
6483 /* find the user buffer size */
6484 xsecurity_bufsize = fusize(xsecurity_size);
6485
6486 /* copy out the actual data size */
6487 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
6488 error = EFAULT;
6489 goto out;
6490 }
6491
6492 /* if the caller supplied enough room, copy out to it */
0a7de745 6493 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
91447636 6494 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
0a7de745 6495 }
91447636
A
6496 }
6497 }
6498out:
0a7de745 6499 if (fsec != KAUTH_FILESEC_NONE) {
91447636 6500 kauth_filesec_free(fsec);
0a7de745
A
6501 }
6502 return error;
1c79356b
A
6503}
6504
b0d623f7
A
6505/*
6506 * stat_extended: Get file status; with extended security (ACL).
6507 *
6508 * Parameters: p (ignored)
6509 * uap User argument descriptor (see below)
39037602 6510 * retval (ignored)
b0d623f7
A
6511 *
6512 * Indirect: uap->path Path of file to get status from
6513 * uap->ub User buffer (holds file status info)
6514 * uap->xsecurity ACL to get (extended security)
6515 * uap->xsecurity_size Size of ACL
39037602 6516 *
b0d623f7
A
6517 * Returns: 0 Success
6518 * !0 errno value
6519 *
6520 */
2d21ac55 6521int
fe8ab488
A
6522stat_extended(__unused proc_t p, struct stat_extended_args *uap,
6523 __unused int32_t *retval)
2d21ac55 6524{
0a7de745
A
6525 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6526 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
6527 0);
1c79356b
A
6528}
6529
2d21ac55
A
6530/*
6531 * Returns: 0 Success
fe8ab488 6532 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 6533 */
91447636 6534int
b0d623f7 6535stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 6536{
0a7de745
A
6537 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6538 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0);
91447636 6539}
1c79356b 6540
91447636 6541int
b0d623f7 6542stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 6543{
0a7de745
A
6544 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6545 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0);
1c79356b 6546}
1c79356b 6547
b0d623f7
A
6548/*
6549 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6550 *
6551 * Parameters: p (ignored)
6552 * uap User argument descriptor (see below)
39037602 6553 * retval (ignored)
b0d623f7
A
6554 *
6555 * Indirect: uap->path Path of file to get status from
6556 * uap->ub User buffer (holds file status info)
6557 * uap->xsecurity ACL to get (extended security)
6558 * uap->xsecurity_size Size of ACL
39037602 6559 *
b0d623f7
A
6560 * Returns: 0 Success
6561 * !0 errno value
6562 *
6563 */
2d21ac55 6564int
b0d623f7 6565stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 6566{
0a7de745
A
6567 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6568 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6569 0);
2d21ac55 6570}
91447636 6571
b0d623f7
A
6572/*
6573 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6574 *
6575 * Parameters: p (ignored)
6576 * uap User argument descriptor (see below)
39037602 6577 * retval (ignored)
b0d623f7
A
6578 *
6579 * Indirect: uap->path Path of file to get status from
6580 * uap->ub User buffer (holds file status info)
6581 * uap->xsecurity ACL to get (extended security)
6582 * uap->xsecurity_size Size of ACL
39037602 6583 *
b0d623f7
A
6584 * Returns: 0 Success
6585 * !0 errno value
6586 *
6587 */
2d21ac55 6588int
b0d623f7 6589lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 6590{
0a7de745
A
6591 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6592 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
6593 AT_SYMLINK_NOFOLLOW);
91447636
A
6594}
6595
fe8ab488
A
6596/*
6597 * Get file status; this version does not follow links.
6598 */
91447636 6599int
b0d623f7 6600lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 6601{
0a7de745
A
6602 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6603 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
2d21ac55 6604}
b0d623f7 6605
2d21ac55 6606int
b0d623f7 6607lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 6608{
0a7de745
A
6609 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6610 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
91447636
A
6611}
6612
b0d623f7
A
6613/*
6614 * lstat64_extended: Get file status; can handle large inode numbers; does not
6615 * follow links; with extended security (ACL).
6616 *
6617 * Parameters: p (ignored)
6618 * uap User argument descriptor (see below)
39037602 6619 * retval (ignored)
b0d623f7
A
6620 *
6621 * Indirect: uap->path Path of file to get status from
6622 * uap->ub User buffer (holds file status info)
6623 * uap->xsecurity ACL to get (extended security)
6624 * uap->xsecurity_size Size of ACL
39037602 6625 *
b0d623f7
A
6626 * Returns: 0 Success
6627 * !0 errno value
6628 *
6629 */
91447636 6630int
b0d623f7 6631lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 6632{
0a7de745
A
6633 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6634 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6635 AT_SYMLINK_NOFOLLOW);
fe8ab488
A
6636}
6637
6638int
6639fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
6640{
cb323159 6641 if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
0a7de745
A
6642 return EINVAL;
6643 }
fe8ab488 6644
0a7de745
A
6645 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6646 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag);
fe8ab488
A
6647}
6648
6649int
6650fstatat64(__unused proc_t p, struct fstatat64_args *uap,
6651 __unused int32_t *retval)
6652{
cb323159 6653 if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
0a7de745
A
6654 return EINVAL;
6655 }
fe8ab488 6656
0a7de745
A
6657 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6658 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag);
91447636
A
6659}
6660
1c79356b 6661/*
91447636 6662 * Get configurable pathname variables.
2d21ac55
A
6663 *
6664 * Returns: 0 Success
6665 * namei:???
6666 * vn_pathconf:???
6667 *
6668 * Notes: Global implementation constants are intended to be
6669 * implemented in this function directly; all other constants
6670 * are per-FS implementation, and therefore must be handled in
6671 * each respective FS, instead.
6672 *
6673 * XXX We implement some things globally right now that should actually be
6674 * XXX per-FS; we will need to deal with this at some point.
1c79356b 6675 */
1c79356b
A
6676/* ARGSUSED */
6677int
b0d623f7 6678pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 6679{
1c79356b
A
6680 int error;
6681 struct nameidata nd;
2d21ac55 6682 vfs_context_t ctx = vfs_context_current();
91447636 6683
39037602 6684 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
0a7de745 6685 UIO_USERSPACE, uap->path, ctx);
55e303ae 6686 error = namei(&nd);
0a7de745
A
6687 if (error) {
6688 return error;
6689 }
1c79356b 6690
2d21ac55 6691 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 6692
91447636
A
6693 vnode_put(nd.ni_vp);
6694 nameidone(&nd);
0a7de745 6695 return error;
1c79356b
A
6696}
6697
6698/*
6699 * Return target name of a symbolic link.
6700 */
1c79356b 6701/* ARGSUSED */
fe8ab488
A
6702static int
6703readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
6704 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
6705 int *retval)
1c79356b 6706{
2d21ac55 6707 vnode_t vp;
91447636 6708 uio_t auio;
1c79356b
A
6709 int error;
6710 struct nameidata nd;
0a7de745 6711 char uio_buf[UIO_SIZEOF(1)];
91447636 6712
f427ee49
A
6713 if (bufsize > INT32_MAX) {
6714 return EINVAL;
6715 }
6716
fe8ab488
A
6717 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
6718 seg, path, ctx);
6719
6720 error = nameiat(&nd, fd);
0a7de745
A
6721 if (error) {
6722 return error;
6723 }
1c79356b 6724 vp = nd.ni_vp;
91447636
A
6725
6726 nameidone(&nd);
6727
fe8ab488 6728 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
0a7de745 6729 &uio_buf[0], sizeof(uio_buf));
fe8ab488
A
6730 uio_addiov(auio, buf, bufsize);
6731 if (vp->v_type != VLNK) {
1c79356b 6732 error = EINVAL;
fe8ab488 6733 } else {
2d21ac55 6734#if CONFIG_MACF
fe8ab488 6735 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55 6736#endif
0a7de745 6737 if (error == 0) {
fe8ab488 6738 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
0a7de745
A
6739 ctx);
6740 }
6741 if (error == 0) {
2d21ac55 6742 error = VNOP_READLINK(vp, auio, ctx);
0a7de745 6743 }
91447636
A
6744 }
6745 vnode_put(vp);
b0d623f7 6746
f427ee49 6747 *retval = (int)(bufsize - uio_resid(auio));
0a7de745 6748 return error;
1c79356b
A
6749}
6750
fe8ab488
A
6751int
6752readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
6753{
6754 enum uio_seg procseg;
6755
6756 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
0a7de745
A
6757 return readlinkat_internal(vfs_context_current(), AT_FDCWD,
6758 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
6759 uap->count, procseg, retval);
fe8ab488
A
6760}
6761
6762int
6763readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
6764{
6765 enum uio_seg procseg;
6766
6767 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
0a7de745
A
6768 return readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
6769 procseg, uap->buf, uap->bufsize, procseg, retval);
fe8ab488
A
6770}
6771
6772/*
cb323159 6773 * Change file flags, the deep inner layer.
91447636
A
6774 */
6775static int
cb323159
A
6776chflags0(vnode_t vp, struct vnode_attr *va,
6777 int (*setattr)(vnode_t, void *, vfs_context_t),
6778 void *arg, vfs_context_t ctx)
91447636 6779{
cb323159 6780 kauth_action_t action = 0;
91447636
A
6781 int error;
6782
2d21ac55 6783#if CONFIG_MACF
cb323159 6784 error = mac_vnode_check_setflags(ctx, vp, va->va_flags);
0a7de745 6785 if (error) {
2d21ac55 6786 goto out;
0a7de745 6787 }
2d21ac55
A
6788#endif
6789
91447636 6790 /* request authorisation, disregard immutability */
cb323159 6791 if ((error = vnode_authattr(vp, va, &action, ctx)) != 0) {
91447636 6792 goto out;
0a7de745 6793 }
91447636
A
6794 /*
6795 * Request that the auth layer disregard those file flags it's allowed to when
6796 * authorizing this operation; we need to do this in order to be able to
6797 * clear immutable flags.
6798 */
0a7de745 6799 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) {
91447636 6800 goto out;
0a7de745 6801 }
cb323159 6802 error = (*setattr)(vp, arg, ctx);
91447636 6803
39037602 6804#if CONFIG_MACF
0a7de745 6805 if (error == 0) {
cb323159 6806 mac_vnode_notify_setflags(ctx, vp, va->va_flags);
0a7de745 6807 }
39037602
A
6808#endif
6809
cb323159
A
6810out:
6811 return error;
6812}
6813
6814/*
6815 * Change file flags.
6816 *
6817 * NOTE: this will vnode_put() `vp'
6818 */
6819static int
6820chflags1(vnode_t vp, int flags, vfs_context_t ctx)
6821{
6822 struct vnode_attr va;
6823 int error;
6824
6825 VATTR_INIT(&va);
6826 VATTR_SET(&va, va_flags, flags);
6827
6828 error = chflags0(vp, &va, (void *)vnode_setattr, &va, ctx);
6829 vnode_put(vp);
6830
2d21ac55
A
6831 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
6832 error = ENOTSUP;
6833 }
cb323159 6834
0a7de745 6835 return error;
91447636
A
6836}
6837
1c79356b
A
6838/*
6839 * Change flags of a file given a path name.
6840 */
1c79356b
A
6841/* ARGSUSED */
6842int
b0d623f7 6843chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 6844{
2d21ac55
A
6845 vnode_t vp;
6846 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6847 int error;
6848 struct nameidata nd;
6849
55e303ae 6850 AUDIT_ARG(fflags, uap->flags);
39037602 6851 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
0a7de745 6852 UIO_USERSPACE, uap->path, ctx);
55e303ae 6853 error = namei(&nd);
0a7de745
A
6854 if (error) {
6855 return error;
6856 }
1c79356b 6857 vp = nd.ni_vp;
91447636
A
6858 nameidone(&nd);
6859
813fb2f6 6860 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 6861 error = chflags1(vp, uap->flags, ctx);
91447636 6862
0a7de745 6863 return error;
1c79356b
A
6864}
6865
6866/*
6867 * Change flags of a file given a file descriptor.
6868 */
1c79356b
A
6869/* ARGSUSED */
6870int
b0d623f7 6871fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 6872{
2d21ac55 6873 vnode_t vp;
1c79356b
A
6874 int error;
6875
55e303ae
A
6876 AUDIT_ARG(fd, uap->fd);
6877 AUDIT_ARG(fflags, uap->flags);
0a7de745
A
6878 if ((error = file_vnode(uap->fd, &vp))) {
6879 return error;
6880 }
55e303ae 6881
91447636
A
6882 if ((error = vnode_getwithref(vp))) {
6883 file_drop(uap->fd);
0a7de745 6884 return error;
91447636 6885 }
e5568f75
A
6886
6887 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6888
813fb2f6 6889 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 6890 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
6891
6892 file_drop(uap->fd);
0a7de745 6893 return error;
91447636
A
6894}
6895
6896/*
6897 * Change security information on a filesystem object.
2d21ac55
A
6898 *
6899 * Returns: 0 Success
6900 * EPERM Operation not permitted
6901 * vnode_authattr:??? [anything vnode_authattr can return]
6902 * vnode_authorize:??? [anything vnode_authorize can return]
6903 * vnode_setattr:??? [anything vnode_setattr can return]
6904 *
6905 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6906 * translated to EPERM before being returned.
91447636
A
6907 */
6908static int
fe8ab488 6909chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
6910{
6911 kauth_action_t action;
6912 int error;
39037602 6913
b0d623f7
A
6914 AUDIT_ARG(mode, vap->va_mode);
6915 /* XXX audit new args */
91447636 6916
2d21ac55
A
6917#if NAMEDSTREAMS
6918 /* chmod calls are not allowed for resource forks. */
6919 if (vp->v_flag & VISNAMEDSTREAM) {
0a7de745 6920 return EPERM;
2d21ac55
A
6921 }
6922#endif
6923
6924#if CONFIG_MACF
316670eb 6925 if (VATTR_IS_ACTIVE(vap, va_mode) &&
0a7de745
A
6926 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0) {
6927 return error;
6928 }
39037602
A
6929
6930 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6931 if ((error = mac_vnode_check_setowner(ctx, vp,
6932 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
0a7de745
A
6933 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1))) {
6934 return error;
6935 }
39037602
A
6936 }
6937
6938 if (VATTR_IS_ACTIVE(vap, va_acl) &&
0a7de745
A
6939 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl))) {
6940 return error;
6941 }
2d21ac55
A
6942#endif
6943
0a7de745 6944 /* make sure that the caller is allowed to set this security information */
91447636
A
6945 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6946 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
0a7de745 6947 if (error == EACCES) {
91447636 6948 error = EPERM;
0a7de745
A
6949 }
6950 return error;
6951 }
6952
6953 if ((error = vnode_setattr(vp, vap, ctx)) != 0) {
6954 return error;
91447636 6955 }
39037602 6956
39037602 6957#if CONFIG_MACF
0a7de745 6958 if (VATTR_IS_ACTIVE(vap, va_mode)) {
39037602 6959 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
0a7de745 6960 }
39037602 6961
0a7de745 6962 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
39037602 6963 mac_vnode_notify_setowner(ctx, vp,
0a7de745
A
6964 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6965 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6966 }
39037602 6967
0a7de745 6968 if (VATTR_IS_ACTIVE(vap, va_acl)) {
39037602 6969 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
0a7de745 6970 }
39037602 6971#endif
91447636 6972
0a7de745 6973 return error;
1c79356b
A
6974}
6975
91447636 6976
1c79356b 6977/*
b0d623f7 6978 * Change mode of a file given a path name.
2d21ac55
A
6979 *
6980 * Returns: 0 Success
6981 * namei:??? [anything namei can return]
fe8ab488 6982 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 6983 */
91447636 6984static int
fe8ab488
A
6985chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6986 int fd, int flag, enum uio_seg segflg)
91447636
A
6987{
6988 struct nameidata nd;
fe8ab488 6989 int follow, error;
91447636 6990
fe8ab488
A
6991 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6992 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6993 segflg, path, ctx);
0a7de745
A
6994 if ((error = nameiat(&nd, fd))) {
6995 return error;
6996 }
fe8ab488 6997 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
6998 vnode_put(nd.ni_vp);
6999 nameidone(&nd);
0a7de745 7000 return error;
91447636
A
7001}
7002
0c530ab8 7003/*
39037602 7004 * chmod_extended: Change the mode of a file given a path name; with extended
b0d623f7 7005 * argument list (including extended security (ACL)).
0c530ab8
A
7006 *
7007 * Parameters: p Process requesting the open
7008 * uap User argument descriptor (see below)
7009 * retval (ignored)
7010 *
7011 * Indirect: uap->path Path to object (same as 'chmod')
7012 * uap->uid UID to set
7013 * uap->gid GID to set
7014 * uap->mode File mode to set (same as 'chmod')
7015 * uap->xsecurity ACL to set (or delete)
7016 *
7017 * Returns: 0 Success
7018 * !0 errno value
7019 *
7020 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
7021 *
7022 * XXX: We should enummerate the possible errno values here, and where
7023 * in the code they originated.
7024 */
1c79356b 7025int
b0d623f7 7026chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 7027{
1c79356b 7028 int error;
91447636
A
7029 struct vnode_attr va;
7030 kauth_filesec_t xsecdst;
7031
b0d623f7
A
7032 AUDIT_ARG(owner, uap->uid, uap->gid);
7033
91447636 7034 VATTR_INIT(&va);
0a7de745 7035 if (uap->mode != -1) {
91447636 7036 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
0a7de745
A
7037 }
7038 if (uap->uid != KAUTH_UID_NONE) {
91447636 7039 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
7040 }
7041 if (uap->gid != KAUTH_GID_NONE) {
91447636 7042 VATTR_SET(&va, va_gid, uap->gid);
0a7de745 7043 }
91447636
A
7044
7045 xsecdst = NULL;
0a7de745
A
7046 switch (uap->xsecurity) {
7047 /* explicit remove request */
7048 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
91447636
A
7049 VATTR_SET(&va, va_acl, NULL);
7050 break;
0a7de745 7051 /* not being set */
91447636
A
7052 case USER_ADDR_NULL:
7053 break;
7054 default:
0a7de745
A
7055 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
7056 return error;
7057 }
91447636
A
7058 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7059 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
7060 }
1c79356b 7061
fe8ab488
A
7062 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
7063 UIO_USERSPACE);
55e303ae 7064
0a7de745 7065 if (xsecdst != NULL) {
91447636 7066 kauth_filesec_free(xsecdst);
0a7de745
A
7067 }
7068 return error;
91447636 7069}
4a249263 7070
2d21ac55
A
7071/*
7072 * Returns: 0 Success
fe8ab488 7073 * chmodat:??? [anything chmodat can return]
2d21ac55 7074 */
fe8ab488
A
7075static int
7076fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
7077 int flag, enum uio_seg segflg)
91447636 7078{
91447636
A
7079 struct vnode_attr va;
7080
7081 VATTR_INIT(&va);
fe8ab488
A
7082 VATTR_SET(&va, va_mode, mode & ALLPERMS);
7083
0a7de745 7084 return chmodat(ctx, path, &va, fd, flag, segflg);
fe8ab488
A
7085}
7086
7087int
7088chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
7089{
0a7de745
A
7090 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
7091 AT_FDCWD, 0, UIO_USERSPACE);
fe8ab488 7092}
91447636 7093
fe8ab488
A
7094int
7095fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
7096{
0a7de745
A
7097 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
7098 return EINVAL;
7099 }
fe8ab488 7100
0a7de745
A
7101 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
7102 uap->fd, uap->flag, UIO_USERSPACE);
1c79356b
A
7103}
7104
7105/*
7106 * Change mode of a file given a file descriptor.
7107 */
91447636 7108static int
2d21ac55 7109fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 7110{
2d21ac55 7111 vnode_t vp;
1c79356b 7112 int error;
55e303ae 7113
91447636 7114 AUDIT_ARG(fd, fd);
55e303ae 7115
0a7de745
A
7116 if ((error = file_vnode(fd, &vp)) != 0) {
7117 return error;
7118 }
91447636
A
7119 if ((error = vnode_getwithref(vp)) != 0) {
7120 file_drop(fd);
0a7de745 7121 return error;
91447636 7122 }
55e303ae
A
7123 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7124
fe8ab488 7125 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
7126 (void)vnode_put(vp);
7127 file_drop(fd);
55e303ae 7128
0a7de745 7129 return error;
1c79356b
A
7130}
7131
b0d623f7
A
7132/*
7133 * fchmod_extended: Change mode of a file given a file descriptor; with
7134 * extended argument list (including extended security (ACL)).
7135 *
7136 * Parameters: p Process requesting to change file mode
7137 * uap User argument descriptor (see below)
39037602 7138 * retval (ignored)
b0d623f7
A
7139 *
7140 * Indirect: uap->mode File mode to set (same as 'chmod')
7141 * uap->uid UID to set
7142 * uap->gid GID to set
7143 * uap->xsecurity ACL to set (or delete)
7144 * uap->fd File descriptor of file to change mode
39037602 7145 *
b0d623f7
A
7146 * Returns: 0 Success
7147 * !0 errno value
7148 *
7149 */
91447636 7150int
b0d623f7 7151fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
7152{
7153 int error;
7154 struct vnode_attr va;
7155 kauth_filesec_t xsecdst;
7156
b0d623f7
A
7157 AUDIT_ARG(owner, uap->uid, uap->gid);
7158
91447636 7159 VATTR_INIT(&va);
0a7de745 7160 if (uap->mode != -1) {
91447636 7161 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
0a7de745
A
7162 }
7163 if (uap->uid != KAUTH_UID_NONE) {
91447636 7164 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
7165 }
7166 if (uap->gid != KAUTH_GID_NONE) {
91447636 7167 VATTR_SET(&va, va_gid, uap->gid);
0a7de745 7168 }
91447636
A
7169
7170 xsecdst = NULL;
0a7de745 7171 switch (uap->xsecurity) {
91447636
A
7172 case USER_ADDR_NULL:
7173 VATTR_SET(&va, va_acl, NULL);
7174 break;
0a7de745 7175 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
39236c6e
A
7176 VATTR_SET(&va, va_acl, NULL);
7177 break;
0a7de745 7178 /* not being set */
91447636
A
7179 case CAST_USER_ADDR_T(-1):
7180 break;
7181 default:
0a7de745
A
7182 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
7183 return error;
7184 }
91447636
A
7185 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7186 }
7187
7188 error = fchmod1(p, uap->fd, &va);
7189
39037602 7190
0a7de745 7191 switch (uap->xsecurity) {
91447636
A
7192 case USER_ADDR_NULL:
7193 case CAST_USER_ADDR_T(-1):
7194 break;
7195 default:
0a7de745 7196 if (xsecdst != NULL) {
91447636 7197 kauth_filesec_free(xsecdst);
0a7de745 7198 }
91447636 7199 }
0a7de745 7200 return error;
91447636
A
7201}
7202
7203int
b0d623f7 7204fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
7205{
7206 struct vnode_attr va;
7207
7208 VATTR_INIT(&va);
7209 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
7210
0a7de745 7211 return fchmod1(p, uap->fd, &va);
91447636
A
7212}
7213
7214
1c79356b
A
7215/*
7216 * Set ownership given a path name.
7217 */
1c79356b 7218/* ARGSUSED */
91447636 7219static int
fe8ab488 7220fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
0a7de745 7221 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 7222{
2d21ac55 7223 vnode_t vp;
91447636 7224 struct vnode_attr va;
1c79356b
A
7225 int error;
7226 struct nameidata nd;
fe8ab488 7227 int follow;
91447636 7228 kauth_action_t action;
1c79356b 7229
fe8ab488 7230 AUDIT_ARG(owner, uid, gid);
55e303ae 7231
fe8ab488
A
7232 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7233 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
7234 path, ctx);
7235 error = nameiat(&nd, fd);
0a7de745
A
7236 if (error) {
7237 return error;
7238 }
1c79356b
A
7239 vp = nd.ni_vp;
7240
91447636
A
7241 nameidone(&nd);
7242
91447636 7243 VATTR_INIT(&va);
0a7de745 7244 if (uid != (uid_t)VNOVAL) {
fe8ab488 7245 VATTR_SET(&va, va_uid, uid);
0a7de745
A
7246 }
7247 if (gid != (gid_t)VNOVAL) {
fe8ab488 7248 VATTR_SET(&va, va_gid, gid);
0a7de745 7249 }
91447636 7250
2d21ac55 7251#if CONFIG_MACF
fe8ab488 7252 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
0a7de745 7253 if (error) {
2d21ac55 7254 goto out;
0a7de745 7255 }
2d21ac55
A
7256#endif
7257
91447636 7258 /* preflight and authorize attribute changes */
0a7de745 7259 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
91447636 7260 goto out;
0a7de745
A
7261 }
7262 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636 7263 goto out;
0a7de745 7264 }
91447636 7265 error = vnode_setattr(vp, &va, ctx);
39037602
A
7266
7267#if CONFIG_MACF
0a7de745 7268 if (error == 0) {
39037602 7269 mac_vnode_notify_setowner(ctx, vp, uid, gid);
0a7de745 7270 }
39037602
A
7271#endif
7272
91447636
A
7273out:
7274 /*
7275 * EACCES is only allowed from namei(); permissions failure should
7276 * return EPERM, so we need to translate the error code.
7277 */
0a7de745 7278 if (error == EACCES) {
91447636 7279 error = EPERM;
0a7de745 7280 }
fe8ab488 7281
91447636 7282 vnode_put(vp);
0a7de745 7283 return error;
1c79356b
A
7284}
7285
91447636 7286int
fe8ab488 7287chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 7288{
0a7de745
A
7289 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
7290 uap->uid, uap->gid, 0, UIO_USERSPACE);
91447636
A
7291}
7292
7293int
fe8ab488 7294lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 7295{
0a7de745
A
7296 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
7297 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE);
fe8ab488
A
7298}
7299
7300int
7301fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
7302{
0a7de745
A
7303 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
7304 return EINVAL;
7305 }
fe8ab488 7306
0a7de745
A
7307 return fchownat_internal(vfs_context_current(), uap->fd, uap->path,
7308 uap->uid, uap->gid, uap->flag, UIO_USERSPACE);
91447636
A
7309}
7310
1c79356b
A
7311/*
7312 * Set ownership given a file descriptor.
7313 */
1c79356b
A
7314/* ARGSUSED */
7315int
b0d623f7 7316fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 7317{
91447636 7318 struct vnode_attr va;
2d21ac55
A
7319 vfs_context_t ctx = vfs_context_current();
7320 vnode_t vp;
1c79356b 7321 int error;
91447636 7322 kauth_action_t action;
1c79356b 7323
55e303ae
A
7324 AUDIT_ARG(owner, uap->uid, uap->gid);
7325 AUDIT_ARG(fd, uap->fd);
7326
0a7de745
A
7327 if ((error = file_vnode(uap->fd, &vp))) {
7328 return error;
7329 }
55e303ae 7330
0a7de745 7331 if ((error = vnode_getwithref(vp))) {
91447636 7332 file_drop(uap->fd);
0a7de745 7333 return error;
91447636 7334 }
55e303ae
A
7335 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7336
91447636 7337 VATTR_INIT(&va);
0a7de745 7338 if (uap->uid != VNOVAL) {
91447636 7339 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
7340 }
7341 if (uap->gid != VNOVAL) {
91447636 7342 VATTR_SET(&va, va_gid, uap->gid);
0a7de745 7343 }
91447636 7344
2d21ac55
A
7345#if NAMEDSTREAMS
7346 /* chown calls are not allowed for resource forks. */
7347 if (vp->v_flag & VISNAMEDSTREAM) {
7348 error = EPERM;
7349 goto out;
7350 }
7351#endif
7352
7353#if CONFIG_MACF
7354 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
0a7de745 7355 if (error) {
2d21ac55 7356 goto out;
0a7de745 7357 }
2d21ac55 7358#endif
91447636 7359
0a7de745
A
7360 /* preflight and authorize attribute changes */
7361 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
91447636 7362 goto out;
0a7de745 7363 }
2d21ac55 7364 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
0a7de745 7365 if (error == EACCES) {
91447636 7366 error = EPERM;
0a7de745 7367 }
91447636
A
7368 goto out;
7369 }
2d21ac55 7370 error = vnode_setattr(vp, &va, ctx);
4a249263 7371
39037602 7372#if CONFIG_MACF
0a7de745 7373 if (error == 0) {
39037602 7374 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
0a7de745 7375 }
39037602
A
7376#endif
7377
91447636
A
7378out:
7379 (void)vnode_put(vp);
7380 file_drop(uap->fd);
0a7de745 7381 return error;
1c79356b
A
7382}
7383
9bccf70c 7384static int
2d21ac55 7385getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 7386{
9bccf70c
A
7387 int error;
7388
91447636
A
7389 if (usrtvp == USER_ADDR_NULL) {
7390 struct timeval old_tv;
7391 /* XXX Y2038 bug because of microtime argument */
7392 microtime(&old_tv);
7393 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
7394 tsp[1] = tsp[0];
7395 } else {
91447636 7396 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 7397 struct user64_timeval tv[2];
91447636 7398 error = copyin(usrtvp, (void *)tv, sizeof(tv));
0a7de745
A
7399 if (error) {
7400 return error;
7401 }
f427ee49
A
7402 TIMEVAL64_TO_TIMESPEC(&tv[0], &tsp[0]);
7403 TIMEVAL64_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 7404 } else {
b0d623f7
A
7405 struct user32_timeval tv[2];
7406 error = copyin(usrtvp, (void *)tv, sizeof(tv));
0a7de745
A
7407 if (error) {
7408 return error;
7409 }
b0d623f7
A
7410 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
7411 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 7412 }
9bccf70c
A
7413 }
7414 return 0;
7415}
7416
7417static int
2d21ac55 7418setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
0a7de745 7419 int nullflag)
9bccf70c
A
7420{
7421 int error;
91447636
A
7422 struct vnode_attr va;
7423 kauth_action_t action;
e5568f75
A
7424
7425 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7426
91447636
A
7427 VATTR_INIT(&va);
7428 VATTR_SET(&va, va_access_time, ts[0]);
7429 VATTR_SET(&va, va_modify_time, ts[1]);
0a7de745 7430 if (nullflag) {
91447636 7431 va.va_vaflags |= VA_UTIMES_NULL;
0a7de745 7432 }
91447636 7433
2d21ac55
A
7434#if NAMEDSTREAMS
7435 /* utimes calls are not allowed for resource forks. */
7436 if (vp->v_flag & VISNAMEDSTREAM) {
7437 error = EPERM;
7438 goto out;
7439 }
7440#endif
7441
7442#if CONFIG_MACF
7443 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
0a7de745 7444 if (error) {
2d21ac55 7445 goto out;
0a7de745 7446 }
2d21ac55
A
7447#endif
7448 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
0a7de745 7449 if (!nullflag && error == EACCES) {
2d21ac55 7450 error = EPERM;
0a7de745 7451 }
91447636 7452 goto out;
2d21ac55
A
7453 }
7454
91447636 7455 /* since we may not need to auth anything, check here */
2d21ac55 7456 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
0a7de745 7457 if (!nullflag && error == EACCES) {
2d21ac55 7458 error = EPERM;
0a7de745 7459 }
91447636 7460 goto out;
2d21ac55 7461 }
91447636 7462 error = vnode_setattr(vp, &va, ctx);
4a249263 7463
39037602 7464#if CONFIG_MACF
0a7de745 7465 if (error == 0) {
39037602 7466 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
0a7de745 7467 }
39037602
A
7468#endif
7469
9bccf70c
A
7470out:
7471 return error;
7472}
7473
1c79356b
A
7474/*
7475 * Set the access and modification times of a file.
7476 */
1c79356b
A
7477/* ARGSUSED */
7478int
b0d623f7 7479utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 7480{
9bccf70c 7481 struct timespec ts[2];
91447636 7482 user_addr_t usrtvp;
1c79356b
A
7483 int error;
7484 struct nameidata nd;
2d21ac55 7485 vfs_context_t ctx = vfs_context_current();
1c79356b 7486
2d21ac55 7487 /*
39037602 7488 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
7489 * name lookup first because auditing wants the path.
7490 */
39037602 7491 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
0a7de745 7492 UIO_USERSPACE, uap->path, ctx);
55e303ae 7493 error = namei(&nd);
0a7de745
A
7494 if (error) {
7495 return error;
7496 }
91447636 7497 nameidone(&nd);
55e303ae 7498
91447636
A
7499 /*
7500 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7501 * the current time instead.
7502 */
55e303ae 7503 usrtvp = uap->tptr;
0a7de745 7504 if ((error = getutimes(usrtvp, ts)) != 0) {
91447636 7505 goto out;
0a7de745 7506 }
91447636 7507
2d21ac55 7508 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
7509
7510out:
7511 vnode_put(nd.ni_vp);
0a7de745 7512 return error;
1c79356b
A
7513}
7514
9bccf70c
A
7515/*
7516 * Set the access and modification times of a file.
7517 */
9bccf70c
A
7518/* ARGSUSED */
7519int
b0d623f7 7520futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
7521{
7522 struct timespec ts[2];
2d21ac55 7523 vnode_t vp;
91447636 7524 user_addr_t usrtvp;
9bccf70c
A
7525 int error;
7526
55e303ae 7527 AUDIT_ARG(fd, uap->fd);
9bccf70c 7528 usrtvp = uap->tptr;
0a7de745
A
7529 if ((error = getutimes(usrtvp, ts)) != 0) {
7530 return error;
7531 }
7532 if ((error = file_vnode(uap->fd, &vp)) != 0) {
7533 return error;
7534 }
7535 if ((error = vnode_getwithref(vp))) {
91447636 7536 file_drop(uap->fd);
0a7de745 7537 return error;
91447636 7538 }
55e303ae 7539
2d21ac55 7540 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
7541 vnode_put(vp);
7542 file_drop(uap->fd);
0a7de745 7543 return error;
9bccf70c
A
7544}
7545
1c79356b
A
7546/*
7547 * Truncate a file given its path name.
7548 */
1c79356b
A
7549/* ARGSUSED */
7550int
f427ee49 7551truncate(proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 7552{
2d21ac55 7553 vnode_t vp;
91447636 7554 struct vnode_attr va;
2d21ac55 7555 vfs_context_t ctx = vfs_context_current();
1c79356b
A
7556 int error;
7557 struct nameidata nd;
91447636 7558 kauth_action_t action;
f427ee49 7559 rlim_t fsize_limit;
91447636 7560
0a7de745
A
7561 if (uap->length < 0) {
7562 return EINVAL;
7563 }
f427ee49
A
7564
7565 fsize_limit = proc_limitgetcur(p, RLIMIT_FSIZE, TRUE);
7566 if ((rlim_t)uap->length > fsize_limit) {
7567 psignal(p, SIGXFSZ);
7568 return EFBIG;
7569 }
7570
39037602 7571 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
0a7de745
A
7572 UIO_USERSPACE, uap->path, ctx);
7573 if ((error = namei(&nd))) {
7574 return error;
7575 }
1c79356b 7576 vp = nd.ni_vp;
91447636
A
7577
7578 nameidone(&nd);
7579
7580 VATTR_INIT(&va);
7581 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
7582
7583#if CONFIG_MACF
7584 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
0a7de745 7585 if (error) {
2d21ac55 7586 goto out;
0a7de745 7587 }
2d21ac55
A
7588#endif
7589
0a7de745 7590 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
91447636 7591 goto out;
0a7de745
A
7592 }
7593 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636 7594 goto out;
0a7de745 7595 }
2d21ac55 7596 error = vnode_setattr(vp, &va, ctx);
39037602
A
7597
7598#if CONFIG_MACF
0a7de745 7599 if (error == 0) {
39037602 7600 mac_vnode_notify_truncate(ctx, NOCRED, vp);
0a7de745 7601 }
39037602
A
7602#endif
7603
91447636
A
7604out:
7605 vnode_put(vp);
0a7de745 7606 return error;
1c79356b
A
7607}
7608
7609/*
7610 * Truncate a file given a file descriptor.
7611 */
1c79356b
A
7612/* ARGSUSED */
7613int
b0d623f7 7614ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 7615{
2d21ac55 7616 vfs_context_t ctx = vfs_context_current();
91447636 7617 struct vnode_attr va;
2d21ac55 7618 vnode_t vp;
91447636 7619 struct fileproc *fp;
0a7de745 7620 int error;
91447636 7621 int fd = uap->fd;
f427ee49 7622 rlim_t fsize_limit;
1c79356b 7623
55e303ae 7624 AUDIT_ARG(fd, uap->fd);
0a7de745
A
7625 if (uap->length < 0) {
7626 return EINVAL;
7627 }
39037602 7628
f427ee49
A
7629 fsize_limit = proc_limitgetcur(p, RLIMIT_FSIZE, TRUE);
7630 if ((rlim_t)uap->length > fsize_limit) {
7631 psignal(p, SIGXFSZ);
7632 return EFBIG;
7633 }
7634
0a7de745
A
7635 if ((error = fp_lookup(p, fd, &fp, 0))) {
7636 return error;
91447636 7637 }
1c79356b 7638
f427ee49 7639 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
39236c6e 7640 case DTYPE_PSXSHM:
91447636
A
7641 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
7642 goto out;
39236c6e
A
7643 case DTYPE_VNODE:
7644 break;
7645 default:
91447636
A
7646 error = EINVAL;
7647 goto out;
1c79356b 7648 }
1c79356b 7649
f427ee49 7650 vp = (vnode_t)fp->fp_glob->fg_data;
e5568f75 7651
f427ee49 7652 if ((fp->fp_glob->fg_flag & FWRITE) == 0) {
91447636
A
7653 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7654 error = EINVAL;
7655 goto out;
1c79356b 7656 }
1c79356b 7657
91447636
A
7658 if ((error = vnode_getwithref(vp)) != 0) {
7659 goto out;
7660 }
1c79356b 7661
91447636 7662 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 7663
2d21ac55
A
7664#if CONFIG_MACF
7665 error = mac_vnode_check_truncate(ctx,
f427ee49 7666 fp->fp_glob->fg_cred, vp);
2d21ac55
A
7667 if (error) {
7668 (void)vnode_put(vp);
7669 goto out;
7670 }
7671#endif
91447636
A
7672 VATTR_INIT(&va);
7673 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 7674 error = vnode_setattr(vp, &va, ctx);
39037602
A
7675
7676#if CONFIG_MACF
0a7de745 7677 if (error == 0) {
f427ee49 7678 mac_vnode_notify_truncate(ctx, fp->fp_glob->fg_cred, vp);
0a7de745 7679 }
39037602
A
7680#endif
7681
91447636
A
7682 (void)vnode_put(vp);
7683out:
7684 file_drop(fd);
0a7de745 7685 return error;
1c79356b 7686}
91447636 7687
1c79356b
A
7688
7689/*
b0d623f7 7690 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 7691 */
1c79356b
A
7692/* ARGSUSED */
7693int
b0d623f7 7694fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 7695{
2d21ac55 7696 __pthread_testcancel(1);
0a7de745 7697 return fsync_common(p, uap, MNT_WAIT);
b0d623f7
A
7698}
7699
7700
7701/*
7702 * Sync an open file with synchronized I/O _file_ integrity completion
7703 *
7704 * Notes: This is a legacy support function that does not test for
7705 * thread cancellation points.
7706 */
7707/* ARGSUSED */
39037602 7708int
b0d623f7
A
7709fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
7710{
0a7de745 7711 return fsync_common(p, (struct fsync_args *)uap, MNT_WAIT);
2d21ac55
A
7712}
7713
b0d623f7
A
7714
7715/*
7716 * Sync an open file with synchronized I/O _data_ integrity completion
7717 */
7718/* ARGSUSED */
2d21ac55 7719int
b0d623f7
A
7720fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
7721{
7722 __pthread_testcancel(1);
0a7de745 7723 return fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT);
b0d623f7
A
7724}
7725
7726
7727/*
7728 * fsync_common
7729 *
7730 * Common fsync code to support both synchronized I/O file integrity completion
7731 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7732 *
7733 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7734 * will only guarantee that the file data contents are retrievable. If
7735 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7736 * includes additional metadata unnecessary for retrieving the file data
7737 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7738 * storage.
7739 *
7740 * Parameters: p The process
7741 * uap->fd The descriptor to synchronize
7742 * flags The data integrity flags
7743 *
7744 * Returns: int Success
7745 * fp_getfvp:EBADF Bad file descriptor
7746 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7747 * VNOP_FSYNC:??? unspecified
7748 *
7749 * Notes: We use struct fsync_args because it is a short name, and all
7750 * caller argument structures are otherwise identical.
7751 */
7752static int
7753fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
7754{
7755 vnode_t vp;
91447636 7756 struct fileproc *fp;
2d21ac55 7757 vfs_context_t ctx = vfs_context_current();
1c79356b
A
7758 int error;
7759
b0d623f7
A
7760 AUDIT_ARG(fd, uap->fd);
7761
0a7de745
A
7762 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
7763 return error;
7764 }
7765 if ((error = vnode_getwithref(vp))) {
91447636 7766 file_drop(uap->fd);
0a7de745 7767 return error;
91447636 7768 }
91447636 7769
b0d623f7
A
7770 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7771
7772 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
7773
7774#if NAMEDRSRCFORK
7775 /* Sync resource fork shadow file if necessary. */
7776 if ((error == 0) &&
39037602 7777 (vp->v_flag & VISNAMEDSTREAM) &&
2d21ac55 7778 (vp->v_parent != NULLVP) &&
b0d623f7 7779 vnode_isshadow(vp) &&
f427ee49 7780 (fp->fp_glob->fg_flag & FWASWRITTEN)) {
2d21ac55
A
7781 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
7782 }
7783#endif
91447636
A
7784
7785 (void)vnode_put(vp);
7786 file_drop(uap->fd);
0a7de745 7787 return error;
1c79356b
A
7788}
7789
7790/*
39037602 7791 * Duplicate files. Source must be a file, target must be a file or
1c79356b 7792 * must not exist.
91447636
A
7793 *
7794 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7795 * perform inheritance correctly.
1c79356b 7796 */
1c79356b
A
7797/* ARGSUSED */
7798int
b0d623f7 7799copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 7800{
91447636 7801 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
7802 struct nameidata fromnd, tond;
7803 int error;
2d21ac55 7804 vfs_context_t ctx = vfs_context_current();
39037602
A
7805#if CONFIG_MACF
7806 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
7807 struct vnode_attr va;
7808#endif
55e303ae
A
7809
7810 /* Check that the flags are valid. */
1c79356b
A
7811
7812 if (uap->flags & ~CPF_MASK) {
0a7de745 7813 return EINVAL;
55e303ae 7814 }
1c79356b 7815
4bd07ac2 7816 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
0a7de745
A
7817 UIO_USERSPACE, uap->from, ctx);
7818 if ((error = namei(&fromnd))) {
7819 return error;
7820 }
1c79356b
A
7821 fvp = fromnd.ni_vp;
7822
6d2010ae 7823 NDINIT(&tond, CREATE, OP_LINK,
0a7de745
A
7824 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7825 UIO_USERSPACE, uap->to, ctx);
91447636 7826 if ((error = namei(&tond))) {
1c79356b
A
7827 goto out1;
7828 }
7829 tdvp = tond.ni_dvp;
7830 tvp = tond.ni_vp;
91447636 7831
1c79356b
A
7832 if (tvp != NULL) {
7833 if (!(uap->flags & CPF_OVERWRITE)) {
7834 error = EEXIST;
7835 goto out;
7836 }
7837 }
39037602 7838
1c79356b
A
7839 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
7840 error = EISDIR;
7841 goto out;
7842 }
7843
39037602
A
7844 /* This calls existing MAC hooks for open */
7845 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
7846 NULL))) {
7847 goto out;
7848 }
7849
7850 if (tvp) {
7851 /*
7852 * See unlinkat_internal for an explanation of the potential
7853 * ENOENT from the MAC hook but the gist is that the MAC hook
7854 * can fail because vn_getpath isn't able to return the full
7855 * path. We choose to ignore this failure.
7856 */
7857 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
0a7de745 7858 if (error && error != ENOENT) {
39037602 7859 goto out;
0a7de745 7860 }
39037602
A
7861 error = 0;
7862 }
7863
7864#if CONFIG_MACF
7865 VATTR_INIT(&va);
7866 VATTR_SET(&va, va_type, fvp->v_type);
7867 /* Mask off all but regular access permissions */
7868 VATTR_SET(&va, va_mode,
0a7de745 7869 ((((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
39037602 7870 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
0a7de745 7871 if (error) {
39037602 7872 goto out;
0a7de745 7873 }
39037602
A
7874#endif /* CONFIG_MACF */
7875
0a7de745 7876 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
1c79356b 7877 goto out;
0a7de745 7878 }
1c79356b 7879
0a7de745 7880 if (fvp == tdvp) {
1c79356b 7881 error = EINVAL;
0a7de745 7882 }
1c79356b
A
7883 /*
7884 * If source is the same as the destination (that is the
7885 * same inode number) then there is nothing to do.
7886 * (fixed to have POSIX semantics - CSM 3/2/98)
7887 */
0a7de745 7888 if (fvp == tvp) {
1c79356b 7889 error = -1;
0a7de745
A
7890 }
7891 if (!error) {
7892 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
7893 }
1c79356b 7894out:
91447636
A
7895 sdvp = tond.ni_startdir;
7896 /*
7897 * nameidone has to happen before we vnode_put(tdvp)
7898 * since it may need to release the fs_nodelock on the tdvp
7899 */
7900 nameidone(&tond);
7901
0a7de745 7902 if (tvp) {
91447636 7903 vnode_put(tvp);
0a7de745 7904 }
91447636
A
7905 vnode_put(tdvp);
7906 vnode_put(sdvp);
1c79356b 7907out1:
91447636
A
7908 vnode_put(fvp);
7909
91447636
A
7910 nameidone(&fromnd);
7911
0a7de745
A
7912 if (error == -1) {
7913 return 0;
7914 }
7915 return error;
1c79356b
A
7916}
7917
39037602 7918#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
91447636 7919
1c79356b 7920/*
39037602
A
7921 * Helper function for doing clones. The caller is expected to provide an
7922 * iocounted source vnode and release it.
1c79356b 7923 */
fe8ab488 7924static int
39037602
A
7925clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
7926 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
1c79356b 7927{
91447636 7928 vnode_t tvp, tdvp;
39037602 7929 struct nameidata tond;
1c79356b 7930 int error;
39037602 7931 int follow;
813fb2f6 7932 boolean_t free_src_acl;
39037602
A
7933 boolean_t attr_cleanup;
7934 enum vtype v_type;
7935 kauth_action_t action;
7936 struct componentname *cnp;
7937 uint32_t defaulted;
7938 struct vnode_attr va;
813fb2f6 7939 struct vnode_attr nva;
5ba3f43e 7940 uint32_t vnop_flags;
316670eb 7941
39037602
A
7942 v_type = vnode_vtype(fvp);
7943 switch (v_type) {
7944 case VLNK:
0a7de745 7945 /* FALLTHRU */
39037602
A
7946 case VREG:
7947 action = KAUTH_VNODE_ADD_FILE;
7948 break;
7949 case VDIR:
7950 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
7951 fvp->v_mountedhere) {
0a7de745 7952 return EINVAL;
39037602
A
7953 }
7954 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
7955 break;
7956 default:
0a7de745 7957 return EINVAL;
39037602
A
7958 }
7959
7960 AUDIT_ARG(fd2, dst_dirfd);
7961 AUDIT_ARG(value32, flags);
7962
7963 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7964 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
7965 UIO_USERSPACE, dst, ctx);
0a7de745
A
7966 if ((error = nameiat(&tond, dst_dirfd))) {
7967 return error;
7968 }
39037602
A
7969 cnp = &tond.ni_cnd;
7970 tdvp = tond.ni_dvp;
7971 tvp = tond.ni_vp;
7972
813fb2f6 7973 free_src_acl = FALSE;
39037602
A
7974 attr_cleanup = FALSE;
7975
7976 if (tvp != NULL) {
7977 error = EEXIST;
7978 goto out;
7979 }
7980
7981 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7982 error = EXDEV;
7983 goto out;
7984 }
7985
7986#if CONFIG_MACF
0a7de745 7987 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp))) {
39037602 7988 goto out;
0a7de745 7989 }
39037602 7990#endif
0a7de745 7991 if ((error = vnode_authorize(tdvp, NULL, action, ctx))) {
39037602 7992 goto out;
0a7de745 7993 }
39037602
A
7994
7995 action = KAUTH_VNODE_GENERIC_READ_BITS;
0a7de745 7996 if (data_read_authorised) {
39037602 7997 action &= ~KAUTH_VNODE_READ_DATA;
0a7de745
A
7998 }
7999 if ((error = vnode_authorize(fvp, NULL, action, ctx))) {
39037602 8000 goto out;
0a7de745 8001 }
39037602
A
8002
8003 /*
8004 * certain attributes may need to be changed from the source, we ask for
c3c9b80d
A
8005 * those here with the exception of source file's ACL. The clone file
8006 * will inherit the target directory's ACL.
39037602
A
8007 */
8008 VATTR_INIT(&va);
813fb2f6
A
8009 VATTR_WANTED(&va, va_uid);
8010 VATTR_WANTED(&va, va_gid);
39037602
A
8011 VATTR_WANTED(&va, va_mode);
8012 VATTR_WANTED(&va, va_flags);
39037602 8013
0a7de745 8014 if ((error = vnode_getattr(fvp, &va, ctx)) != 0) {
39037602 8015 goto out;
0a7de745 8016 }
39037602 8017
813fb2f6
A
8018 VATTR_INIT(&nva);
8019 VATTR_SET(&nva, va_type, v_type);
8020 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
8021 VATTR_SET(&nva, va_acl, va.va_acl);
8022 free_src_acl = TRUE;
39037602
A
8023 }
8024
8025 /* Handle ACL inheritance, initialize vap. */
8026 if (v_type == VLNK) {
813fb2f6 8027 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
39037602 8028 } else {
813fb2f6 8029 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
0a7de745 8030 if (error) {
813fb2f6 8031 goto out;
0a7de745 8032 }
39037602
A
8033 attr_cleanup = TRUE;
8034 }
8035
5ba3f43e 8036 vnop_flags = VNODE_CLONEFILE_DEFAULT;
813fb2f6
A
8037 /*
8038 * We've got initial values for all security parameters,
8039 * If we are superuser, then we can change owners to be the
8040 * same as the source. Both superuser and the owner have default
8041 * WRITE_SECURITY privileges so all other fields can be taken
8042 * from source as well.
8043 */
5ba3f43e 8044 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
0a7de745 8045 if (VATTR_IS_SUPPORTED(&va, va_uid)) {
813fb2f6 8046 VATTR_SET(&nva, va_uid, va.va_uid);
0a7de745
A
8047 }
8048 if (VATTR_IS_SUPPORTED(&va, va_gid)) {
813fb2f6 8049 VATTR_SET(&nva, va_gid, va.va_gid);
0a7de745 8050 }
5ba3f43e
A
8051 } else {
8052 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
813fb2f6 8053 }
5ba3f43e 8054
0a7de745 8055 if (VATTR_IS_SUPPORTED(&va, va_mode)) {
813fb2f6 8056 VATTR_SET(&nva, va_mode, va.va_mode);
0a7de745 8057 }
813fb2f6
A
8058 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
8059 VATTR_SET(&nva, va_flags,
5ba3f43e
A
8060 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
8061 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
39037602
A
8062 }
8063
5ba3f43e 8064 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
39037602
A
8065
8066 if (!error && tvp) {
0a7de745 8067 int update_flags = 0;
39037602
A
8068#if CONFIG_FSE
8069 int fsevent;
8070#endif /* CONFIG_FSE */
8071
39037602
A
8072 /*
8073 * If some of the requested attributes weren't handled by the
8074 * VNOP, use our fallback code.
8075 */
c3c9b80d 8076 if (!VATTR_ALL_SUPPORTED(&nva)) {
813fb2f6 8077 (void)vnode_setattr_fallback(tvp, &nva, ctx);
0a7de745 8078 }
39037602 8079
ea3f0419
A
8080#if CONFIG_MACF
8081 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
8082 VNODE_LABEL_CREATE, ctx);
8083#endif
8084
39037602 8085 // Make sure the name & parent pointers are hooked up
0a7de745 8086 if (tvp->v_name == NULL) {
39037602 8087 update_flags |= VNODE_UPDATE_NAME;
0a7de745
A
8088 }
8089 if (tvp->v_parent == NULLVP) {
39037602 8090 update_flags |= VNODE_UPDATE_PARENT;
0a7de745 8091 }
39037602
A
8092
8093 if (update_flags) {
8094 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
8095 cnp->cn_namelen, cnp->cn_hash, update_flags);
8096 }
8097
8098#if CONFIG_FSE
8099 switch (vnode_vtype(tvp)) {
8100 case VLNK:
0a7de745 8101 /* FALLTHRU */
39037602
A
8102 case VREG:
8103 fsevent = FSE_CREATE_FILE;
8104 break;
8105 case VDIR:
8106 fsevent = FSE_CREATE_DIR;
8107 break;
8108 default:
8109 goto out;
8110 }
8111
8112 if (need_fsevent(fsevent, tvp)) {
5ba3f43e
A
8113 /*
8114 * The following is a sequence of three explicit events.
8115 * A pair of FSE_CLONE events representing the source and destination
8116 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
8117 * fseventsd may coalesce the destination clone and create events
8118 * into a single event resulting in the following sequence for a client
8119 * FSE_CLONE (src)
8120 * FSE_CLONE | FSE_CREATE (dst)
8121 */
8122 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
8123 FSE_ARG_DONE);
39037602
A
8124 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
8125 FSE_ARG_DONE);
8126 }
8127#endif /* CONFIG_FSE */
8128 }
39037602
A
8129
8130out:
0a7de745 8131 if (attr_cleanup) {
813fb2f6 8132 vn_attribute_cleanup(&nva, defaulted);
0a7de745
A
8133 }
8134 if (free_src_acl && va.va_acl) {
39037602 8135 kauth_acl_free(va.va_acl);
0a7de745 8136 }
39037602 8137 nameidone(&tond);
0a7de745 8138 if (tvp) {
39037602 8139 vnode_put(tvp);
0a7de745 8140 }
39037602 8141 vnode_put(tdvp);
0a7de745 8142 return error;
39037602
A
8143}
8144
8145/*
8146 * clone files or directories, target must not exist.
8147 */
8148/* ARGSUSED */
8149int
8150clonefileat(__unused proc_t p, struct clonefileat_args *uap,
8151 __unused int32_t *retval)
8152{
8153 vnode_t fvp;
8154 struct nameidata fromnd;
8155 int follow;
8156 int error;
8157 vfs_context_t ctx = vfs_context_current();
8158
8159 /* Check that the flags are valid. */
0a7de745
A
8160 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
8161 return EINVAL;
8162 }
39037602
A
8163
8164 AUDIT_ARG(fd, uap->src_dirfd);
8165
8166 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
8167 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
8168 UIO_USERSPACE, uap->src, ctx);
0a7de745
A
8169 if ((error = nameiat(&fromnd, uap->src_dirfd))) {
8170 return error;
8171 }
39037602
A
8172
8173 fvp = fromnd.ni_vp;
8174 nameidone(&fromnd);
8175
8176 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
8177 uap->flags, ctx);
8178
8179 vnode_put(fvp);
0a7de745 8180 return error;
39037602
A
8181}
8182
8183int
8184fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
8185 __unused int32_t *retval)
8186{
8187 vnode_t fvp;
8188 struct fileproc *fp;
8189 int error;
8190 vfs_context_t ctx = vfs_context_current();
8191
5ba3f43e 8192 /* Check that the flags are valid. */
0a7de745
A
8193 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
8194 return EINVAL;
8195 }
5ba3f43e 8196
39037602
A
8197 AUDIT_ARG(fd, uap->src_fd);
8198 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
0a7de745
A
8199 if (error) {
8200 return error;
8201 }
39037602 8202
f427ee49 8203 if ((fp->fp_glob->fg_flag & FREAD) == 0) {
39037602
A
8204 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
8205 error = EBADF;
8206 goto out;
8207 }
8208
0a7de745 8209 if ((error = vnode_getwithref(fvp))) {
39037602 8210 goto out;
0a7de745 8211 }
39037602
A
8212
8213 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
8214
8215 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
8216 uap->flags, ctx);
8217
8218 vnode_put(fvp);
8219out:
8220 file_drop(uap->src_fd);
0a7de745 8221 return error;
39037602
A
8222}
8223
39037602 8224static int
cb323159 8225rename_submounts_callback(mount_t mp, void *arg)
39037602 8226{
cb323159
A
8227 int error = 0;
8228 mount_t pmp = (mount_t)arg;
f427ee49 8229 int prefix_len = (int)strlen(pmp->mnt_vfsstat.f_mntonname);
cb323159
A
8230
8231 if (strncmp(mp->mnt_vfsstat.f_mntonname, pmp->mnt_vfsstat.f_mntonname, prefix_len) != 0) {
8232 return 0;
8233 }
8234
8235 if (mp->mnt_vfsstat.f_mntonname[prefix_len] != '/') {
8236 return 0;
8237 }
8238
8239 if ((error = vfs_busy(mp, LK_NOWAIT))) {
8240 printf("vfs_busy failed with %d for %s\n", error, mp->mnt_vfsstat.f_mntonname);
8241 return -1;
8242 }
8243
8244 int pathlen = MAXPATHLEN;
8245 if ((error = vn_getpath_ext(mp->mnt_vnodecovered, NULL, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER))) {
8246 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error, mp->mnt_vfsstat.f_mntonname);
8247 }
8248
8249 vfs_unbusy(mp);
8250
8251 return error;
8252}
8253
8254/*
8255 * Rename files. Source and destination must either both be directories,
8256 * or both not be directories. If target is a directory, it must be empty.
8257 */
8258/* ARGSUSED */
8259static int
8260renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
8261 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
8262{
8263 if (flags & ~VFS_RENAME_FLAGS_MASK) {
8264 return EINVAL;
8265 }
39037602 8266
0a7de745 8267 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL)) {
39037602 8268 return EINVAL;
0a7de745 8269 }
39037602
A
8270
8271 vnode_t tvp, tdvp;
8272 vnode_t fvp, fdvp;
f427ee49 8273 vnode_t mnt_fvp;
39037602
A
8274 struct nameidata *fromnd, *tond;
8275 int error;
8276 int do_retry;
8277 int retry_count;
8278 int mntrename;
8279 int need_event;
b226f5e5
A
8280 int need_kpath2;
8281 int has_listeners;
39037602
A
8282 const char *oname = NULL;
8283 char *from_name = NULL, *to_name = NULL;
cb323159 8284 char *from_name_no_firmlink = NULL, *to_name_no_firmlink = NULL;
0a7de745 8285 int from_len = 0, to_len = 0;
cb323159 8286 int from_len_no_firmlink = 0, to_len_no_firmlink = 0;
39037602 8287 int holding_mntlock;
f427ee49 8288 int vn_authorize_skipped;
39037602
A
8289 mount_t locked_mp = NULL;
8290 vnode_t oparent = NULLVP;
8291#if CONFIG_FSE
8292 fse_info from_finfo, to_finfo;
8293#endif
cb323159
A
8294 int from_truncated = 0, to_truncated = 0;
8295 int from_truncated_no_firmlink = 0, to_truncated_no_firmlink = 0;
39037602
A
8296 int batched = 0;
8297 struct vnode_attr *fvap, *tvap;
8298 int continuing = 0;
8299 /* carving out a chunk for structs that are too big to be on stack. */
8300 struct {
8301 struct nameidata from_node, to_node;
8302 struct vnode_attr fv_attr, tv_attr;
8303 } * __rename_data;
f427ee49 8304 __rename_data = kheap_alloc(KHEAP_TEMP, sizeof(*__rename_data), Z_WAITOK);
39037602
A
8305 fromnd = &__rename_data->from_node;
8306 tond = &__rename_data->to_node;
8307
8308 holding_mntlock = 0;
8309 do_retry = 0;
8310 retry_count = 0;
91447636
A
8311retry:
8312 fvp = tvp = NULL;
8313 fdvp = tdvp = NULL;
6d2010ae 8314 fvap = tvap = NULL;
f427ee49 8315 mnt_fvp = NULLVP;
1c79356b 8316 mntrename = FALSE;
f427ee49 8317 vn_authorize_skipped = FALSE;
1c79356b 8318
316670eb 8319 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 8320 segflg, from, ctx);
316670eb 8321 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 8322
316670eb 8323 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 8324 segflg, to, ctx);
316670eb 8325 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 8326
6d2010ae 8327continue_lookup:
316670eb 8328 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
0a7de745 8329 if ((error = nameiat(fromnd, fromfd))) {
6d2010ae 8330 goto out1;
0a7de745 8331 }
316670eb
A
8332 fdvp = fromnd->ni_dvp;
8333 fvp = fromnd->ni_vp;
1c79356b 8334
0a7de745 8335 if (fvp && fvp->v_type == VDIR) {
316670eb 8336 tond->ni_cnd.cn_flags |= WILLBEDIR;
0a7de745 8337 }
6d2010ae 8338 }
2d21ac55 8339
316670eb 8340 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
0a7de745 8341 if ((error = nameiat(tond, tofd))) {
6d2010ae
A
8342 /*
8343 * Translate error code for rename("dir1", "dir2/.").
8344 */
0a7de745 8345 if (error == EISDIR && fvp->v_type == VDIR) {
6d2010ae 8346 error = EINVAL;
0a7de745 8347 }
6d2010ae
A
8348 goto out1;
8349 }
316670eb
A
8350 tdvp = tond->ni_dvp;
8351 tvp = tond->ni_vp;
fe8ab488 8352 }
91447636 8353
00867663
A
8354#if DEVELOPMENT || DEBUG
8355 /*
8356 * XXX VSWAP: Check for entitlements or special flag here
8357 * so we can restrict access appropriately.
8358 */
8359#else /* DEVELOPMENT || DEBUG */
8360
8361 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
8362 error = EPERM;
8363 goto out1;
8364 }
8365
8366 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
8367 error = EPERM;
8368 goto out1;
8369 }
8370#endif /* DEVELOPMENT || DEBUG */
8371
39037602
A
8372 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
8373 error = ENOENT;
8374 goto out1;
8375 }
8376
8377 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
f427ee49
A
8378 int32_t pval = 0;
8379 int err = 0;
8380
8381 /*
8382 * We allow rename with VFS_RENAME_EXCL flag for an existing file which
8383 * has the same name as target iff the following conditions are met:
8384 * 1. the target file system is case insensitive
8385 * 2. source and target directories are the same
8386 * 3. source and target files are the same
8387 * 4. name only differs in case (determined by underlying filesystem)
8388 */
8389 if (fvp != tvp || fdvp != tdvp) {
8390 error = EEXIST;
8391 goto out1;
8392 }
8393
8394 /*
8395 * Assume that the target file system is case sensitive if
8396 * _PC_CASE_SENSITIVE selector isn't supported.
8397 */
8398 err = VNOP_PATHCONF(tvp, _PC_CASE_SENSITIVE, &pval, ctx);
8399 if (err != 0 || pval != 0) {
8400 error = EEXIST;
8401 goto out1;
8402 }
39037602
A
8403 }
8404
6d2010ae 8405 batched = vnode_compound_rename_available(fdvp);
d9a64523
A
8406
8407#if CONFIG_FSE
8408 need_event = need_fsevent(FSE_RENAME, fdvp);
8409 if (need_event) {
8410 if (fvp) {
8411 get_fse_info(fvp, &from_finfo, ctx);
8412 } else {
8413 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
8414 if (error) {
8415 goto out1;
8416 }
8417
8418 fvap = &__rename_data->fv_attr;
8419 }
8420
8421 if (tvp) {
0a7de745 8422 get_fse_info(tvp, &to_finfo, ctx);
d9a64523
A
8423 } else if (batched) {
8424 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
8425 if (error) {
8426 goto out1;
8427 }
8428
8429 tvap = &__rename_data->tv_attr;
8430 }
8431 }
8432#else
8433 need_event = 0;
8434#endif /* CONFIG_FSE */
8435
b226f5e5
A
8436 has_listeners = kauth_authorize_fileop_has_listeners();
8437
8438 need_kpath2 = 0;
8439#if CONFIG_AUDIT
8440 if (AUDIT_RECORD_EXISTS()) {
8441 need_kpath2 = 1;
8442 }
8443#endif
8444
8445 if (need_event || has_listeners) {
d9a64523
A
8446 if (from_name == NULL) {
8447 GET_PATH(from_name);
d9a64523
A
8448 }
8449
8450 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
cb323159
A
8451
8452 if (from_name_no_firmlink == NULL) {
8453 GET_PATH(from_name_no_firmlink);
cb323159
A
8454 }
8455
8456 from_len_no_firmlink = safe_getpath_no_firmlink(fdvp, fromnd->ni_cnd.cn_nameptr, from_name_no_firmlink, MAXPATHLEN, &from_truncated_no_firmlink);
b226f5e5 8457 }
d9a64523 8458
b226f5e5 8459 if (need_event || need_kpath2 || has_listeners) {
d9a64523
A
8460 if (to_name == NULL) {
8461 GET_PATH(to_name);
d9a64523
A
8462 }
8463
8464 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
cb323159
A
8465
8466 if (to_name_no_firmlink == NULL) {
8467 GET_PATH(to_name_no_firmlink);
cb323159
A
8468 }
8469
8470 to_len_no_firmlink = safe_getpath_no_firmlink(tdvp, tond->ni_cnd.cn_nameptr, to_name_no_firmlink, MAXPATHLEN, &to_truncated_no_firmlink);
b226f5e5
A
8471 if (to_name && need_kpath2) {
8472 AUDIT_ARG(kpath, to_name, ARG_KPATH2);
8473 }
d9a64523 8474 }
6d2010ae 8475 if (!fvp) {
fe8ab488 8476 /*
6d2010ae
A
8477 * Claim: this check will never reject a valid rename.
8478 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8479 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 8480 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
0a7de745 8481 * then you can't move it to within another dir on the same mountpoint.
6d2010ae
A
8482 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8483 *
8484 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 8485 */
6d2010ae
A
8486 if (fdvp->v_mount != tdvp->v_mount) {
8487 error = EXDEV;
8488 goto out1;
8489 }
8490 goto skipped_lookup;
1c79356b 8491 }
2d21ac55 8492
0a7de745
A
8493 /*
8494 * If the source and destination are the same (i.e. they're
8495 * links to the same vnode) and the target file system is
8496 * case sensitive, then there is nothing to do.
6d2010ae
A
8497 *
8498 * XXX Come back to this.
0a7de745 8499 */
2d21ac55
A
8500 if (fvp == tvp) {
8501 int pathconf_val;
fe8ab488 8502
2d21ac55
A
8503 /*
8504 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8505 * then assume that this file system is case sensitive.
8506 */
8507 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
8508 pathconf_val != 0) {
f427ee49 8509 vn_authorize_skipped = TRUE;
2d21ac55 8510 goto out1;
fe8ab488 8511 }
2d21ac55 8512 }
91447636 8513
1c79356b
A
8514 /*
8515 * Allow the renaming of mount points.
8516 * - target must not exist
8517 * - target must reside in the same directory as source
8518 * - union mounts cannot be renamed
f427ee49 8519 * - the root fs, and tightly-linked system volumes, cannot be renamed
6d2010ae
A
8520 *
8521 * XXX Handle this in VFS after a continued lookup (if we missed
8522 * in the cache to start off)
39037602
A
8523 *
8524 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8525 * we'll skip past here. The file system is responsible for
8526 * checking that @tvp is not a descendent of @fvp and vice versa
8527 * so it should always return EINVAL if either @tvp or @fvp is the
8528 * root of a volume.
1c79356b 8529 */
91447636 8530 if ((fvp->v_flag & VROOT) &&
1c79356b 8531 (fvp->v_type == VDIR) &&
0a7de745
A
8532 (tvp == NULL) &&
8533 (fvp->v_mountedhere == NULL) &&
8534 (fdvp == tdvp) &&
8535 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
cb323159 8536 ((fvp->v_mount->mnt_kern_flag & MNTK_SYSTEM) == 0) &&
1c79356b 8537 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 8538 vnode_t coveredvp;
fe8ab488 8539
1c79356b 8540 /* switch fvp to the covered vnode */
91447636 8541 coveredvp = fvp->v_mount->mnt_vnodecovered;
0a7de745
A
8542 if ((vnode_getwithref(coveredvp))) {
8543 error = ENOENT;
91447636
A
8544 goto out1;
8545 }
f427ee49
A
8546 /*
8547 * Save the 'fvp' as it is needed for vn_authorize_renamex_with_paths()
8548 * later.
8549 */
8550 mnt_fvp = fvp;
91447636
A
8551
8552 fvp = coveredvp;
1c79356b
A
8553 mntrename = TRUE;
8554 }
91447636
A
8555 /*
8556 * Check for cross-device rename.
8557 */
8558 if ((fvp->v_mount != tdvp->v_mount) ||
8559 (tvp && (fvp->v_mount != tvp->v_mount))) {
8560 error = EXDEV;
8561 goto out1;
8562 }
55e303ae 8563
91447636
A
8564 /*
8565 * If source is the same as the destination (that is the
8566 * same inode number) then there is nothing to do...
8567 * EXCEPT if the underlying file system supports case
8568 * insensitivity and is case preserving. In this case
8569 * the file system needs to handle the special case of
8570 * getting the same vnode as target (fvp) and source (tvp).
8571 *
8572 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8573 * and _PC_CASE_PRESERVING can have this exception, and they need to
8574 * handle the special case of getting the same vnode as target and
8575 * source. NOTE: Then the target is unlocked going into vnop_rename,
8576 * so not to cause locking problems. There is a single reference on tvp.
8577 *
fe8ab488 8578 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
8579 * that correct behaviour then is just to return success without doing
8580 * anything.
6d2010ae
A
8581 *
8582 * XXX filesystem should take care of this itself, perhaps...
91447636
A
8583 */
8584 if (fvp == tvp && fdvp == tdvp) {
316670eb 8585 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
0a7de745
A
8586 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
8587 fromnd->ni_cnd.cn_namelen)) {
f427ee49 8588 vn_authorize_skipped = TRUE;
91447636 8589 goto out1;
55e303ae 8590 }
91447636 8591 }
55e303ae 8592
91447636 8593 if (holding_mntlock && fvp->v_mount != locked_mp) {
0a7de745 8594 /*
91447636
A
8595 * we're holding a reference and lock
8596 * on locked_mp, but it no longer matches
8597 * what we want to do... so drop our hold
8598 */
8599 mount_unlock_renames(locked_mp);
8600 mount_drop(locked_mp, 0);
0a7de745 8601 holding_mntlock = 0;
91447636
A
8602 }
8603 if (tdvp != fdvp && fvp->v_type == VDIR) {
0a7de745 8604 /*
91447636
A
8605 * serialize renames that re-shape
8606 * the tree... if holding_mntlock is
8607 * set, then we're ready to go...
8608 * otherwise we
8609 * first need to drop the iocounts
8610 * we picked up, second take the
8611 * lock to serialize the access,
8612 * then finally start the lookup
8613 * process over with the lock held
8614 */
0a7de745
A
8615 if (!holding_mntlock) {
8616 /*
91447636
A
8617 * need to grab a reference on
8618 * the mount point before we
8619 * drop all the iocounts... once
8620 * the iocounts are gone, the mount
8621 * could follow
8622 */
8623 locked_mp = fvp->v_mount;
8624 mount_ref(locked_mp, 0);
55e303ae 8625
91447636
A
8626 /*
8627 * nameidone has to happen before we vnode_put(tvp)
8628 * since it may need to release the fs_nodelock on the tvp
8629 */
316670eb 8630 nameidone(tond);
55e303ae 8631
0a7de745
A
8632 if (tvp) {
8633 vnode_put(tvp);
8634 }
91447636
A
8635 vnode_put(tdvp);
8636
8637 /*
8638 * nameidone has to happen before we vnode_put(fdvp)
8639 * since it may need to release the fs_nodelock on the fvp
8640 */
316670eb 8641 nameidone(fromnd);
55e303ae 8642
91447636
A
8643 vnode_put(fvp);
8644 vnode_put(fdvp);
8645
f427ee49
A
8646 if (mnt_fvp != NULLVP) {
8647 vnode_put(mnt_fvp);
8648 }
8649
91447636
A
8650 mount_lock_renames(locked_mp);
8651 holding_mntlock = 1;
8652
8653 goto retry;
55e303ae 8654 }
91447636 8655 } else {
0a7de745 8656 /*
91447636 8657 * when we dropped the iocounts to take
fe8ab488 8658 * the lock, we allowed the identity of
91447636
A
8659 * the various vnodes to change... if they did,
8660 * we may no longer be dealing with a rename
8661 * that reshapes the tree... once we're holding
8662 * the iocounts, the vnodes can't change type
8663 * so we're free to drop the lock at this point
8664 * and continue on
1c79356b 8665 */
0a7de745 8666 if (holding_mntlock) {
91447636
A
8667 mount_unlock_renames(locked_mp);
8668 mount_drop(locked_mp, 0);
0a7de745 8669 holding_mntlock = 0;
1c79356b 8670 }
91447636 8671 }
6d2010ae 8672
f427ee49
A
8673 if (!batched) {
8674 error = vn_authorize_renamex_with_paths(fdvp, mntrename ? mnt_fvp : fvp,
8675 &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx,
8676 flags, NULL);
8677 if (error) {
8678 if (error == ENOENT) {
8679 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8680 /*
8681 * We encountered a race where after doing the namei,
8682 * tvp stops being valid. If so, simply re-drive the rename
8683 * call from the top.
8684 */
8685 do_retry = 1;
8686 retry_count += 1;
8687 }
8688 }
8689 goto out1;
8690 }
8691 }
8692
8693 /* Release the 'mnt_fvp' now that it is no longer needed. */
8694 if (mnt_fvp != NULLVP) {
8695 vnode_put(mnt_fvp);
8696 mnt_fvp = NULLVP;
8697 }
8698
8699 // save these off so we can later verify that fvp is the same
8700 oname = fvp->v_name;
8701 oparent = fvp->v_parent;
8702
8703skipped_lookup:
8704 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
8705 tdvp, &tvp, &tond->ni_cnd, tvap,
8706 flags, ctx);
8707
8708 if (holding_mntlock) {
91447636
A
8709 /*
8710 * we can drop our serialization
8711 * lock now
8712 */
8713 mount_unlock_renames(locked_mp);
8714 mount_drop(locked_mp, 0);
8715 holding_mntlock = 0;
8716 }
8717 if (error) {
cb323159
A
8718 if (error == EDATALESS) {
8719 /*
8720 * If we've been here before, something has gone
8721 * horribly wrong and we should just get out lest
8722 * we spiral around the drain forever.
8723 */
8724 if (flags & VFS_RENAME_DATALESS) {
8725 error = EIO;
8726 goto out1;
8727 }
8728
8729 /*
8730 * The object we're renaming is dataless (or has a
8731 * dataless descendent) and requires materialization
8732 * before the rename occurs. But we're holding the
8733 * mount point's rename lock, so it's not safe to
8734 * make the upcall.
8735 *
8736 * In this case, we release the lock, perform the
8737 * materialization, and start the whole thing over.
8738 */
8739 error = vnode_materialize_dataless_file(fvp,
8740 NAMESPACE_HANDLER_RENAME_OP);
8741
8742 if (error == 0) {
8743 /*
8744 * The next time around we need to tell the
8745 * file system that the materializtaion has
8746 * been performed.
8747 */
8748 flags |= VFS_RENAME_DATALESS;
8749 do_retry = 1;
8750 }
8751 goto out1;
8752 }
6d2010ae 8753 if (error == EKEEPLOOKING) {
316670eb
A
8754 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
8755 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
8756 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8757 }
8758 }
8759
316670eb
A
8760 fromnd->ni_vp = fvp;
8761 tond->ni_vp = tvp;
fe8ab488 8762
6d2010ae
A
8763 goto continue_lookup;
8764 }
8765
8766 /*
fe8ab488
A
8767 * We may encounter a race in the VNOP where the destination didn't
8768 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
8769 * try to create the entry. In this case, we should re-drive this rename
8770 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 8771 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
8772 */
8773 if (error == ERECYCLE) {
bca245ac
A
8774 if (retry_count < MAX_RENAME_ERECYCLE_RETRIES) {
8775 do_retry = 1;
8776 retry_count += 1;
8777 } else {
8778 printf("rename retry limit due to ERECYCLE reached\n");
8779 error = ENOENT;
8780 }
6d2010ae 8781 }
55e303ae 8782
c18c124e
A
8783 /*
8784 * For compound VNOPs, the authorization callback may return
8785 * ENOENT in case of racing hardlink lookups hitting the name
8786 * cache, redrive the lookup.
8787 */
3e170ce0 8788 if (batched && error == ENOENT) {
3e170ce0
A
8789 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8790 do_retry = 1;
8791 retry_count += 1;
8792 }
c18c124e
A
8793 }
8794
91447636 8795 goto out1;
fe8ab488
A
8796 }
8797
8798 /* call out to allow 3rd party notification of rename.
91447636
A
8799 * Ignore result of kauth_authorize_fileop call.
8800 */
fe8ab488 8801 kauth_authorize_fileop(vfs_context_ucred(ctx),
0a7de745
A
8802 KAUTH_FILEOP_RENAME,
8803 (uintptr_t)from_name, (uintptr_t)to_name);
39037602
A
8804 if (flags & VFS_RENAME_SWAP) {
8805 kauth_authorize_fileop(vfs_context_ucred(ctx),
0a7de745
A
8806 KAUTH_FILEOP_RENAME,
8807 (uintptr_t)to_name, (uintptr_t)from_name);
39037602 8808 }
91447636 8809
2d21ac55 8810#if CONFIG_FSE
91447636 8811 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
8812 if (from_truncated || to_truncated) {
8813 // set it here since only the from_finfo gets reported up to user space
8814 from_finfo.mode |= FSE_TRUNCATED_PATH;
8815 }
6d2010ae
A
8816
8817 if (tvap && tvp) {
8818 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
8819 }
8820 if (fvap) {
8821 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
8822 }
8823
39037602
A
8824 if (tvp) {
8825 add_fsevent(FSE_RENAME, ctx,
cb323159 8826 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
0a7de745 8827 FSE_ARG_FINFO, &from_finfo,
cb323159 8828 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
0a7de745
A
8829 FSE_ARG_FINFO, &to_finfo,
8830 FSE_ARG_DONE);
39037602
A
8831 if (flags & VFS_RENAME_SWAP) {
8832 /*
8833 * Strictly speaking, swap is the equivalent of
8834 * *three* renames. FSEvents clients should only take
8835 * the events as a hint, so we only bother reporting
8836 * two.
8837 */
8838 add_fsevent(FSE_RENAME, ctx,
cb323159 8839 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
0a7de745 8840 FSE_ARG_FINFO, &to_finfo,
cb323159 8841 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
91447636 8842 FSE_ARG_FINFO, &from_finfo,
91447636 8843 FSE_ARG_DONE);
0a7de745
A
8844 }
8845 } else {
8846 add_fsevent(FSE_RENAME, ctx,
cb323159 8847 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
0a7de745 8848 FSE_ARG_FINFO, &from_finfo,
cb323159 8849 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
0a7de745 8850 FSE_ARG_DONE);
91447636
A
8851 }
8852 }
2d21ac55 8853#endif /* CONFIG_FSE */
fe8ab488 8854
91447636
A
8855 /*
8856 * update filesystem's mount point data
8857 */
8858 if (mntrename) {
0a7de745 8859 char *cp, *pathend, *mpname;
91447636
A
8860 char * tobuf;
8861 struct mount *mp;
8862 int maxlen;
8863 size_t len = 0;
8864
8865 mp = fvp->v_mountedhere;
8866
8867 if (vfs_busy(mp, LK_NOWAIT)) {
0a7de745 8868 error = EBUSY;
91447636 8869 goto out1;
55e303ae 8870 }
f427ee49 8871 tobuf = zalloc(ZV_NAMEI);
55e303ae 8872
0a7de745 8873 if (UIO_SEG_IS_USER_SPACE(segflg)) {
fe8ab488 8874 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
0a7de745 8875 } else {
fe8ab488 8876 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
0a7de745 8877 }
91447636 8878 if (!error) {
0a7de745
A
8879 /* find current mount point prefix */
8880 pathend = &mp->mnt_vfsstat.f_mntonname[0];
91447636 8881 for (cp = pathend; *cp != '\0'; ++cp) {
0a7de745
A
8882 if (*cp == '/') {
8883 pathend = cp + 1;
8884 }
91447636
A
8885 }
8886 /* find last component of target name */
8887 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
0a7de745
A
8888 if (*cp == '/') {
8889 mpname = cp + 1;
8890 }
91447636 8891 }
cb323159
A
8892
8893 /* Update f_mntonname of sub mounts */
8894 vfs_iterate(0, rename_submounts_callback, (void *)mp);
8895
91447636 8896 /* append name to prefix */
f427ee49 8897 maxlen = MAXPATHLEN - (int)(pathend - mp->mnt_vfsstat.f_mntonname);
91447636 8898 bzero(pathend, maxlen);
cb323159 8899
2d21ac55 8900 strlcpy(pathend, mpname, maxlen);
91447636 8901 }
f427ee49 8902 zfree(ZV_NAMEI, tobuf);
91447636
A
8903
8904 vfs_unbusy(mp);
cb323159
A
8905
8906 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
91447636
A
8907 }
8908 /*
fe8ab488 8909 * fix up name & parent pointers. note that we first
91447636
A
8910 * check that fvp has the same name/parent pointers it
8911 * had before the rename call... this is a 'weak' check
8912 * at best...
6d2010ae
A
8913 *
8914 * XXX oparent and oname may not be set in the compound vnop case
91447636 8915 */
6d2010ae 8916 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
0a7de745 8917 int update_flags;
91447636 8918
0a7de745 8919 update_flags = VNODE_UPDATE_NAME;
91447636 8920
0a7de745
A
8921 if (fdvp != tdvp) {
8922 update_flags |= VNODE_UPDATE_PARENT;
8923 }
91447636 8924
0a7de745 8925 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
8926 }
8927out1:
f427ee49
A
8928 /*
8929 * There are some cases (for e.g. 'fvp == tvp') when vn_authorize was
8930 * skipped earlier as no actual rename was performed.
8931 */
8932 if (vn_authorize_skipped && error == 0) {
8933 error = vn_authorize_renamex_with_paths(fdvp, fvp,
8934 &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx,
8935 flags, NULL);
8936 if (error && error == ENOENT) {
8937 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8938 do_retry = 1;
8939 retry_count += 1;
8940 }
8941 }
8942 }
593a1d5f
A
8943 if (to_name != NULL) {
8944 RELEASE_PATH(to_name);
8945 to_name = NULL;
8946 }
cb323159
A
8947 if (to_name_no_firmlink != NULL) {
8948 RELEASE_PATH(to_name_no_firmlink);
8949 to_name_no_firmlink = NULL;
8950 }
593a1d5f
A
8951 if (from_name != NULL) {
8952 RELEASE_PATH(from_name);
8953 from_name = NULL;
8954 }
cb323159
A
8955 if (from_name_no_firmlink != NULL) {
8956 RELEASE_PATH(from_name_no_firmlink);
8957 from_name_no_firmlink = NULL;
8958 }
91447636 8959 if (holding_mntlock) {
0a7de745 8960 mount_unlock_renames(locked_mp);
91447636 8961 mount_drop(locked_mp, 0);
593a1d5f 8962 holding_mntlock = 0;
91447636
A
8963 }
8964 if (tdvp) {
8965 /*
8966 * nameidone has to happen before we vnode_put(tdvp)
8967 * since it may need to release the fs_nodelock on the tdvp
8968 */
316670eb 8969 nameidone(tond);
91447636 8970
0a7de745
A
8971 if (tvp) {
8972 vnode_put(tvp);
8973 }
8974 vnode_put(tdvp);
91447636
A
8975 }
8976 if (fdvp) {
8977 /*
8978 * nameidone has to happen before we vnode_put(fdvp)
8979 * since it may need to release the fs_nodelock on the fdvp
8980 */
316670eb 8981 nameidone(fromnd);
91447636 8982
0a7de745
A
8983 if (fvp) {
8984 vnode_put(fvp);
8985 }
8986 vnode_put(fdvp);
91447636 8987 }
f427ee49
A
8988 if (mnt_fvp != NULLVP) {
8989 vnode_put(mnt_fvp);
8990 }
6d2010ae
A
8991 /*
8992 * If things changed after we did the namei, then we will re-drive
8993 * this rename call from the top.
8994 */
316670eb 8995 if (do_retry) {
6d2010ae 8996 do_retry = 0;
593a1d5f
A
8997 goto retry;
8998 }
316670eb 8999
f427ee49 9000 kheap_free(KHEAP_TEMP, __rename_data, sizeof(*__rename_data));
0a7de745 9001 return error;
1c79356b
A
9002}
9003
fe8ab488
A
9004int
9005rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
9006{
0a7de745
A
9007 return renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
9008 AT_FDCWD, uap->to, UIO_USERSPACE, 0);
fe8ab488
A
9009}
9010
0a7de745
A
9011int
9012renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
fe8ab488
A
9013{
9014 return renameat_internal(
39037602
A
9015 vfs_context_current(),
9016 uap->fromfd, uap->from,
9017 uap->tofd, uap->to,
fe8ab488
A
9018 UIO_USERSPACE, uap->flags);
9019}
39037602 9020
fe8ab488
A
9021int
9022renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
9023{
0a7de745
A
9024 return renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
9025 uap->tofd, uap->to, UIO_USERSPACE, 0);
fe8ab488
A
9026}
9027
1c79356b
A
9028/*
9029 * Make a directory file.
2d21ac55
A
9030 *
9031 * Returns: 0 Success
9032 * EEXIST
9033 * namei:???
9034 * vnode_authorize:???
9035 * vn_create:???
1c79356b 9036 */
1c79356b 9037/* ARGSUSED */
91447636 9038static int
fe8ab488
A
9039mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
9040 enum uio_seg segflg)
1c79356b 9041{
0a7de745 9042 vnode_t vp, dvp;
1c79356b 9043 int error;
91447636 9044 int update_flags = 0;
6d2010ae 9045 int batched;
1c79356b
A
9046 struct nameidata nd;
9047
91447636 9048 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 9049 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
0a7de745 9050 path, ctx);
9bccf70c 9051 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
9052 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
9053
9054continue_lookup:
fe8ab488 9055 error = nameiat(&nd, fd);
0a7de745
A
9056 if (error) {
9057 return error;
9058 }
91447636 9059 dvp = nd.ni_dvp;
1c79356b 9060 vp = nd.ni_vp;
55e303ae 9061
fe8ab488
A
9062 if (vp != NULL) {
9063 error = EEXIST;
9064 goto out;
9065 }
9066
6d2010ae 9067 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
9068
9069 VATTR_SET(vap, va_type, VDIR);
fe8ab488 9070
6d2010ae
A
9071 /*
9072 * XXX
9073 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
9074 * only get EXISTS or EISDIR for existing path components, and not that it could see
9075 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
9076 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
9077 */
fe8ab488 9078 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
9079 if (error == EACCES || error == EPERM) {
9080 int error2;
9081
9082 nameidone(&nd);
9083 vnode_put(dvp);
9084 dvp = NULLVP;
9085
fe8ab488
A
9086 /*
9087 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
9088 * rather than EACCESS if the target exists.
9089 */
fe8ab488 9090 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
0a7de745 9091 path, ctx);
fe8ab488 9092 error2 = nameiat(&nd, fd);
6d2010ae
A
9093 if (error2) {
9094 goto out;
9095 } else {
9096 vp = nd.ni_vp;
9097 error = EEXIST;
9098 goto out;
9099 }
9100 }
9101
2d21ac55 9102 goto out;
6d2010ae
A
9103 }
9104
9105 /*
fe8ab488 9106 * make the directory
6d2010ae 9107 */
fe8ab488 9108 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
9109 if (error == EKEEPLOOKING) {
9110 nd.ni_vp = vp;
9111 goto continue_lookup;
9112 }
2d21ac55 9113
fe8ab488 9114 goto out;
6d2010ae 9115 }
fe8ab488 9116
91447636 9117 // Make sure the name & parent pointers are hooked up
0a7de745
A
9118 if (vp->v_name == NULL) {
9119 update_flags |= VNODE_UPDATE_NAME;
9120 }
9121 if (vp->v_parent == NULLVP) {
9122 update_flags |= VNODE_UPDATE_PARENT;
9123 }
91447636 9124
0a7de745
A
9125 if (update_flags) {
9126 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
9127 }
55e303ae 9128
2d21ac55 9129#if CONFIG_FSE
91447636 9130 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 9131#endif
91447636
A
9132
9133out:
9134 /*
9135 * nameidone has to happen before we vnode_put(dvp)
9136 * since it may need to release the fs_nodelock on the dvp
9137 */
9138 nameidone(&nd);
9139
0a7de745 9140 if (vp) {
6d2010ae 9141 vnode_put(vp);
0a7de745
A
9142 }
9143 if (dvp) {
6d2010ae 9144 vnode_put(dvp);
0a7de745 9145 }
55e303ae 9146
0a7de745 9147 return error;
1c79356b
A
9148}
9149
b0d623f7
A
9150/*
9151 * mkdir_extended: Create a directory; with extended security (ACL).
9152 *
9153 * Parameters: p Process requesting to create the directory
9154 * uap User argument descriptor (see below)
fe8ab488 9155 * retval (ignored)
b0d623f7
A
9156 *
9157 * Indirect: uap->path Path of directory to create
9158 * uap->mode Access permissions to set
9159 * uap->xsecurity ACL to set
fe8ab488 9160 *
b0d623f7
A
9161 * Returns: 0 Success
9162 * !0 Not success
9163 *
9164 */
1c79356b 9165int
b0d623f7 9166mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 9167{
91447636
A
9168 int ciferror;
9169 kauth_filesec_t xsecdst;
9170 struct vnode_attr va;
9171
b0d623f7
A
9172 AUDIT_ARG(owner, uap->uid, uap->gid);
9173
91447636
A
9174 xsecdst = NULL;
9175 if ((uap->xsecurity != USER_ADDR_NULL) &&
0a7de745 9176 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
91447636 9177 return ciferror;
0a7de745 9178 }
91447636 9179
91447636 9180 VATTR_INIT(&va);
fe8ab488 9181 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
0a7de745 9182 if (xsecdst != NULL) {
91447636 9183 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
0a7de745 9184 }
91447636 9185
fe8ab488
A
9186 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
9187 UIO_USERSPACE);
0a7de745 9188 if (xsecdst != NULL) {
91447636 9189 kauth_filesec_free(xsecdst);
0a7de745 9190 }
91447636 9191 return ciferror;
1c79356b
A
9192}
9193
1c79356b 9194int
b0d623f7 9195mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 9196{
91447636 9197 struct vnode_attr va;
1c79356b 9198
91447636 9199 VATTR_INIT(&va);
fe8ab488 9200 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 9201
0a7de745
A
9202 return mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
9203 UIO_USERSPACE);
91447636 9204}
1c79356b 9205
91447636 9206int
fe8ab488
A
9207mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
9208{
9209 struct vnode_attr va;
9210
9211 VATTR_INIT(&va);
9212 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
9213
0a7de745
A
9214 return mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
9215 UIO_USERSPACE);
fe8ab488
A
9216}
9217
9218static int
9219rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
cb323159 9220 enum uio_seg segflg, int unlink_flags)
1c79356b 9221{
2d21ac55 9222 vnode_t vp, dvp;
91447636
A
9223 int error;
9224 struct nameidata nd;
6d2010ae 9225 char *path = NULL;
cb323159
A
9226 char *no_firmlink_path = NULL;
9227 int len_path = 0;
9228 int len_no_firmlink_path = 0;
6d2010ae
A
9229 int has_listeners = 0;
9230 int need_event = 0;
cb323159
A
9231 int truncated_path = 0;
9232 int truncated_no_firmlink_path = 0;
6d2010ae
A
9233#if CONFIG_FSE
9234 struct vnode_attr va;
9235#endif /* CONFIG_FSE */
9236 struct vnode_attr *vap = NULL;
c18c124e 9237 int restart_count = 0;
6d2010ae 9238 int batched;
91447636 9239
b0d623f7 9240 int restart_flag;
91447636 9241
fe8ab488 9242 /*
2d21ac55
A
9243 * This loop exists to restart rmdir in the unlikely case that two
9244 * processes are simultaneously trying to remove the same directory
9245 * containing orphaned appleDouble files.
9246 */
9247 do {
6d2010ae 9248 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 9249 segflg, dirpath, ctx);
6d2010ae
A
9250 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
9251continue_lookup:
2d21ac55 9252 restart_flag = 0;
6d2010ae 9253 vap = NULL;
2d21ac55 9254
fe8ab488 9255 error = nameiat(&nd, fd);
0a7de745
A
9256 if (error) {
9257 return error;
9258 }
2d21ac55
A
9259
9260 dvp = nd.ni_dvp;
9261 vp = nd.ni_vp;
9262
6d2010ae
A
9263 if (vp) {
9264 batched = vnode_compound_rmdir_available(vp);
2d21ac55 9265
6d2010ae
A
9266 if (vp->v_flag & VROOT) {
9267 /*
9268 * The root of a mounted filesystem cannot be deleted.
9269 */
9270 error = EBUSY;
9271 goto out;
9272 }
1c79356b 9273
00867663
A
9274#if DEVELOPMENT || DEBUG
9275 /*
0a7de745
A
9276 * XXX VSWAP: Check for entitlements or special flag here
9277 * so we can restrict access appropriately.
9278 */
00867663
A
9279#else /* DEVELOPMENT || DEBUG */
9280
9281 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
9282 error = EPERM;
9283 goto out;
9284 }
9285#endif /* DEVELOPMENT || DEBUG */
9286
2d21ac55 9287 /*
6d2010ae
A
9288 * Removed a check here; we used to abort if vp's vid
9289 * was not the same as what we'd seen the last time around.
9290 * I do not think that check was valid, because if we retry
9291 * and all dirents are gone, the directory could legitimately
9292 * be recycled but still be present in a situation where we would
fe8ab488 9293 * have had permission to delete. Therefore, we won't make
6d2010ae
A
9294 * an effort to preserve that check now that we may not have a
9295 * vp here.
2d21ac55 9296 */
6d2010ae
A
9297
9298 if (!batched) {
9299 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
9300 if (error) {
3e170ce0 9301 if (error == ENOENT) {
3e170ce0
A
9302 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
9303 restart_flag = 1;
9304 restart_count += 1;
9305 }
c18c124e 9306 }
6d2010ae
A
9307 goto out;
9308 }
9309 }
2d21ac55 9310 } else {
6d2010ae
A
9311 batched = 1;
9312
9313 if (!vnode_compound_rmdir_available(dvp)) {
9314 panic("No error, but no compound rmdir?");
9315 }
91447636 9316 }
6d2010ae 9317
2d21ac55 9318#if CONFIG_FSE
f427ee49 9319 fse_info finfo = {0};
b0d623f7 9320
6d2010ae
A
9321 need_event = need_fsevent(FSE_DELETE, dvp);
9322 if (need_event) {
9323 if (!batched) {
2d21ac55 9324 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
9325 } else {
9326 error = vfs_get_notify_attributes(&va);
9327 if (error) {
9328 goto out;
9329 }
9330
9331 vap = &va;
2d21ac55 9332 }
6d2010ae 9333 }
2d21ac55 9334#endif
6d2010ae
A
9335 has_listeners = kauth_authorize_fileop_has_listeners();
9336 if (need_event || has_listeners) {
9337 if (path == NULL) {
2d21ac55 9338 GET_PATH(path);
6d2010ae 9339 }
b0d623f7 9340
cb323159
A
9341 len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
9342
9343 if (no_firmlink_path == NULL) {
9344 GET_PATH(no_firmlink_path);
cb323159
A
9345 }
9346
9347 len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
b0d623f7 9348#if CONFIG_FSE
cb323159 9349 if (truncated_no_firmlink_path) {
6d2010ae 9350 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 9351 }
6d2010ae
A
9352#endif
9353 }
91447636 9354
6d2010ae
A
9355 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
9356 nd.ni_vp = vp;
9357 if (vp == NULLVP) {
9358 /* Couldn't find a vnode */
9359 goto out;
9360 }
2d21ac55 9361
6d2010ae
A
9362 if (error == EKEEPLOOKING) {
9363 goto continue_lookup;
3e170ce0 9364 } else if (batched && error == ENOENT) {
3e170ce0
A
9365 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
9366 /*
9367 * For compound VNOPs, the authorization callback
9368 * may return ENOENT in case of racing hard link lookups
9369 * redrive the lookup.
9370 */
9371 restart_flag = 1;
9372 restart_count += 1;
9373 goto out;
9374 }
6d2010ae 9375 }
cb323159
A
9376
9377 /*
9378 * XXX There's no provision for passing flags
9379 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9380 * because it's not empty, then we try again
9381 * with VNOP_REMOVE(), passing in a special
9382 * flag that clever file systems will know
9383 * how to handle.
9384 */
9385 if (error == ENOTEMPTY &&
9386 (unlink_flags & VNODE_REMOVE_DATALESS_DIR) != 0) {
9387 /*
9388 * If this fails, we want to keep the original
9389 * error.
9390 */
9391 if (vn_remove(dvp, &vp, &nd,
9392 VNODE_REMOVE_DATALESS_DIR, vap, ctx) == 0) {
9393 error = 0;
9394 }
9395 }
9396
39236c6e 9397#if CONFIG_APPLEDOUBLE
6d2010ae
A
9398 /*
9399 * Special case to remove orphaned AppleDouble
9400 * files. I don't like putting this in the kernel,
9401 * but carbon does not like putting this in carbon either,
9402 * so here we are.
9403 */
9404 if (error == ENOTEMPTY) {
cb323159
A
9405 int ad_error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
9406 if (ad_error == EBUSY) {
9407 error = ad_error;
6d2010ae 9408 goto out;
2d21ac55
A
9409 }
9410
6d2010ae 9411
2d21ac55 9412 /*
fe8ab488 9413 * Assuming everything went well, we will try the RMDIR again
2d21ac55 9414 */
cb323159 9415 if (!ad_error) {
6d2010ae 9416 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
0a7de745 9417 }
6d2010ae 9418 }
39236c6e 9419#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 9420 /*
fe8ab488 9421 * Call out to allow 3rd party notification of delete.
6d2010ae
A
9422 * Ignore result of kauth_authorize_fileop call.
9423 */
9424 if (!error) {
9425 if (has_listeners) {
fe8ab488 9426 kauth_authorize_fileop(vfs_context_ucred(ctx),
0a7de745
A
9427 KAUTH_FILEOP_DELETE,
9428 (uintptr_t)vp,
9429 (uintptr_t)path);
6d2010ae
A
9430 }
9431
9432 if (vp->v_flag & VISHARDLINK) {
9433 // see the comment in unlink1() about why we update
9434 // the parent of a hard link when it is removed
9435 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
9436 }
2d21ac55
A
9437
9438#if CONFIG_FSE
6d2010ae
A
9439 if (need_event) {
9440 if (vap) {
9441 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 9442 }
6d2010ae 9443 add_fsevent(FSE_DELETE, ctx,
cb323159 9444 FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
0a7de745
A
9445 FSE_ARG_FINFO, &finfo,
9446 FSE_ARG_DONE);
2d21ac55 9447 }
6d2010ae 9448#endif
2d21ac55
A
9449 }
9450
9451out:
6d2010ae
A
9452 if (path != NULL) {
9453 RELEASE_PATH(path);
9454 path = NULL;
9455 }
cb323159
A
9456
9457 if (no_firmlink_path != NULL) {
9458 RELEASE_PATH(no_firmlink_path);
9459 no_firmlink_path = NULL;
9460 }
9461
2d21ac55
A
9462 /*
9463 * nameidone has to happen before we vnode_put(dvp)
9464 * since it may need to release the fs_nodelock on the dvp
9465 */
9466 nameidone(&nd);
2d21ac55 9467 vnode_put(dvp);
6d2010ae 9468
0a7de745 9469 if (vp) {
6d2010ae 9470 vnode_put(vp);
0a7de745 9471 }
2d21ac55
A
9472
9473 if (restart_flag == 0) {
9474 wakeup_one((caddr_t)vp);
0a7de745 9475 return error;
2d21ac55
A
9476 }
9477 tsleep(vp, PVFS, "rm AD", 1);
2d21ac55 9478 } while (restart_flag != 0);
91447636 9479
0a7de745 9480 return error;
1c79356b 9481}
91447636 9482
fe8ab488
A
9483/*
9484 * Remove a directory file.
9485 */
9486/* ARGSUSED */
9487int
9488rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
9489{
0a7de745 9490 return rmdirat_internal(vfs_context_current(), AT_FDCWD,
cb323159 9491 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE, 0);
fe8ab488
A
9492}
9493
2d21ac55
A
9494/* Get direntry length padded to 8 byte alignment */
9495#define DIRENT64_LEN(namlen) \
9496 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9497
5ba3f43e
A
9498/* Get dirent length padded to 4 byte alignment */
9499#define DIRENT_LEN(namelen) \
9500 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9501
9502/* Get the end of this dirent */
9503#define DIRENT_END(dep) \
9504 (((char *)(dep)) + (dep)->d_reclen - 1)
9505
fe8ab488 9506errno_t
2d21ac55 9507vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
0a7de745 9508 int *numdirent, vfs_context_t ctxp)
2d21ac55
A
9509{
9510 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
39037602 9511 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
0a7de745 9512 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
9513 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
9514 } else {
9515 size_t bufsize;
9516 void * bufptr;
9517 uio_t auio;
15129b1c 9518 struct direntry *entry64;
2d21ac55 9519 struct dirent *dep;
f427ee49 9520 size_t bytesread;
2d21ac55
A
9521 int error;
9522
9523 /*
5ba3f43e
A
9524 * We're here because the underlying file system does not
9525 * support direnties or we mounted denying support so we must
9526 * fall back to dirents and convert them to direntries.
9527 *
9528 * Our kernel buffer needs to be smaller since re-packing will
9529 * expand each dirent. The worse case (when the name length
9530 * is 3 or less) corresponds to a struct direntry size of 32
2d21ac55
A
9531 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9532 * (4-byte aligned). So having a buffer that is 3/8 the size
9533 * will prevent us from reading more than we can pack.
0a7de745 9534 *
2d21ac55 9535 * Since this buffer is wired memory, we will limit the
39037602 9536 * buffer size to a maximum of 32K. We would really like to
2d21ac55 9537 * use 32K in the MIN(), but we use magic number 87371 to
39037602 9538 * prevent uio_resid() * 3 / 8 from overflowing.
2d21ac55 9539 */
316670eb 9540 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
f427ee49 9541 bufptr = kheap_alloc(KHEAP_DATA_BUFFERS, bufsize, Z_WAITOK);
b0d623f7
A
9542 if (bufptr == NULL) {
9543 return ENOMEM;
9544 }
2d21ac55 9545
b0d623f7 9546 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
9547 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
9548 auio->uio_offset = uio->uio_offset;
9549
9550 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
9551
9552 dep = (struct dirent *)bufptr;
9553 bytesread = bufsize - uio_resid(auio);
9554
f427ee49 9555 entry64 = kheap_alloc(KHEAP_TEMP, sizeof(struct direntry), Z_WAITOK);
2d21ac55
A
9556 /*
9557 * Convert all the entries and copy them out to user's buffer.
9558 */
9559 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
f427ee49
A
9560 /* First check that the dirent struct up to d_name is within the buffer */
9561 if ((char*)dep + offsetof(struct dirent, d_name) > ((char *)bufptr + bytesread) ||
9562 /* Check that the length of the entire dirent is within the buffer */
9563 DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
9564 /* Check that the actual length including the name doesn't exceed d_reclen */
5ba3f43e
A
9565 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
9566 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
0a7de745
A
9567 vp->v_mount->mnt_vfsstat.f_mntonname,
9568 vp->v_name ? vp->v_name : "<unknown>");
5ba3f43e
A
9569 error = EIO;
9570 break;
9571 }
9572
f427ee49
A
9573 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
9574
15129b1c 9575 bzero(entry64, enbufsize);
2d21ac55 9576 /* Convert a dirent to a dirent64. */
15129b1c
A
9577 entry64->d_ino = dep->d_ino;
9578 entry64->d_seekoff = 0;
f427ee49 9579 entry64->d_reclen = (uint16_t)enbufsize;
15129b1c
A
9580 entry64->d_namlen = dep->d_namlen;
9581 entry64->d_type = dep->d_type;
9582 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
9583
9584 /* Move to next entry. */
9585 dep = (struct dirent *)((char *)dep + dep->d_reclen);
9586
9587 /* Copy entry64 to user's buffer. */
15129b1c 9588 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
9589 }
9590
9591 /* Update the real offset using the offset we got from VNOP_READDIR. */
9592 if (error == 0) {
9593 uio->uio_offset = auio->uio_offset;
9594 }
9595 uio_free(auio);
f427ee49
A
9596 kheap_free(KHEAP_DATA_BUFFERS, bufptr, bufsize);
9597 kheap_free(KHEAP_TEMP, entry64, sizeof(struct direntry));
0a7de745 9598 return error;
2d21ac55
A
9599 }
9600}
1c79356b 9601
0a7de745 9602#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
39236c6e 9603
1c79356b
A
9604/*
9605 * Read a block of directory entries in a file system independent format.
9606 */
2d21ac55
A
9607static int
9608getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
cb323159 9609 off_t *offset, int *eofflag, int flags)
1c79356b 9610{
2d21ac55 9611 vnode_t vp;
0a7de745 9612 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
9613 struct fileproc *fp;
9614 uio_t auio;
2d21ac55
A
9615 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9616 off_t loff;
cb323159 9617 int error, numdirent;
0a7de745 9618 char uio_buf[UIO_SIZEOF(1)];
1c79356b 9619
2d21ac55
A
9620 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
9621 if (error) {
0a7de745 9622 return error;
2d21ac55 9623 }
f427ee49 9624 if ((fp->fp_glob->fg_flag & FREAD) == 0) {
91447636
A
9625 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9626 error = EBADF;
9627 goto out;
9628 }
2d21ac55 9629
0a7de745 9630 if (bufsize > GETDIRENTRIES_MAXBUFSIZE) {
39236c6e 9631 bufsize = GETDIRENTRIES_MAXBUFSIZE;
0a7de745 9632 }
39236c6e 9633
2d21ac55 9634#if CONFIG_MACF
f427ee49 9635 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->fp_glob);
0a7de745 9636 if (error) {
2d21ac55 9637 goto out;
0a7de745 9638 }
2d21ac55 9639#endif
0a7de745 9640 if ((error = vnode_getwithref(vp))) {
91447636
A
9641 goto out;
9642 }
91447636 9643 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 9644
1c79356b 9645unionread:
91447636
A
9646 if (vp->v_type != VDIR) {
9647 (void)vnode_put(vp);
9648 error = EINVAL;
9649 goto out;
9650 }
2d21ac55
A
9651
9652#if CONFIG_MACF
9653 error = mac_vnode_check_readdir(&context, vp);
9654 if (error != 0) {
9655 (void)vnode_put(vp);
9656 goto out;
9657 }
9658#endif /* MAC */
91447636 9659
f427ee49 9660 loff = fp->fp_glob->fg_offset;
2d21ac55
A
9661 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9662 uio_addiov(auio, bufp, bufsize);
91447636 9663
2d21ac55 9664 if (flags & VNODE_READDIR_EXTENDED) {
cb323159 9665 error = vnode_readdir64(vp, auio, flags, eofflag, &numdirent, &context);
f427ee49 9666 fp->fp_glob->fg_offset = uio_offset(auio);
2d21ac55 9667 } else {
cb323159 9668 error = VNOP_READDIR(vp, auio, 0, eofflag, &numdirent, &context);
f427ee49 9669 fp->fp_glob->fg_offset = uio_offset(auio);
2d21ac55 9670 }
91447636
A
9671 if (error) {
9672 (void)vnode_put(vp);
9673 goto out;
9674 }
1c79356b 9675
0a7de745 9676 if ((user_ssize_t)bufsize == uio_resid(auio)) {
39236c6e 9677 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 9678 struct vnode *tvp = vp;
39236c6e
A
9679 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
9680 vnode_ref(vp);
f427ee49
A
9681 fp->fp_glob->fg_data = (caddr_t) vp;
9682 fp->fp_glob->fg_offset = 0;
39236c6e
A
9683 vnode_rele(tvp);
9684 vnode_put(tvp);
9685 goto unionread;
9686 }
9687 vp = tvp;
1c79356b
A
9688 }
9689 }
2d21ac55 9690
91447636 9691 vnode_put(vp);
2d21ac55
A
9692 if (offset) {
9693 *offset = loff;
9694 }
39037602 9695
2d21ac55 9696 *bytesread = bufsize - uio_resid(auio);
91447636
A
9697out:
9698 file_drop(fd);
0a7de745 9699 return error;
1c79356b
A
9700}
9701
2d21ac55
A
9702
9703int
b0d623f7 9704getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
9705{
9706 off_t offset;
2d21ac55 9707 ssize_t bytesread;
cb323159 9708 int error, eofflag;
2d21ac55
A
9709
9710 AUDIT_ARG(fd, uap->fd);
cb323159
A
9711 error = getdirentries_common(uap->fd, uap->buf, uap->count,
9712 &bytesread, &offset, &eofflag, 0);
2d21ac55
A
9713
9714 if (error == 0) {
b0d623f7
A
9715 if (proc_is64bit(p)) {
9716 user64_long_t base = (user64_long_t)offset;
9717 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
9718 } else {
9719 user32_long_t base = (user32_long_t)offset;
9720 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
9721 }
f427ee49 9722 *retval = (int)bytesread;
2d21ac55 9723 }
0a7de745 9724 return error;
2d21ac55
A
9725}
9726
9727int
9728getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
9729{
9730 off_t offset;
9731 ssize_t bytesread;
cb323159
A
9732 int error, eofflag;
9733 user_size_t bufsize;
2d21ac55
A
9734
9735 AUDIT_ARG(fd, uap->fd);
cb323159
A
9736
9737 /*
9738 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9739 * then the kernel carves out the last 4 bytes to return extended
9740 * information to userspace (namely whether we reached EOF with this call).
9741 */
9742 if (uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
9743 bufsize = uap->bufsize - sizeof(getdirentries64_flags_t);
9744 } else {
9745 bufsize = uap->bufsize;
9746 }
9747
9748 error = getdirentries_common(uap->fd, uap->buf, bufsize,
9749 &bytesread, &offset, &eofflag, VNODE_READDIR_EXTENDED);
2d21ac55
A
9750
9751 if (error == 0) {
9752 *retval = bytesread;
9753 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
cb323159
A
9754
9755 if (error == 0 && uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
9756 getdirentries64_flags_t flags = 0;
9757 if (eofflag) {
9758 flags |= GETDIRENTRIES64_EOF;
9759 }
9760 error = copyout(&flags, (user_addr_t)uap->buf + bufsize,
9761 sizeof(flags));
9762 }
2d21ac55 9763 }
0a7de745 9764 return error;
2d21ac55
A
9765}
9766
9767
1c79356b
A
9768/*
9769 * Set the mode mask for creation of filesystem nodes.
b0d623f7 9770 * XXX implement xsecurity
1c79356b 9771 */
0a7de745 9772#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
91447636 9773static int
b0d623f7 9774umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 9775{
2d21ac55 9776 struct filedesc *fdp;
1c79356b 9777
91447636 9778 AUDIT_ARG(mask, newmask);
2d21ac55 9779 proc_fdlock(p);
1c79356b
A
9780 fdp = p->p_fd;
9781 *retval = fdp->fd_cmask;
91447636 9782 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 9783 proc_fdunlock(p);
0a7de745 9784 return 0;
1c79356b
A
9785}
9786
b0d623f7
A
9787/*
9788 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9789 *
9790 * Parameters: p Process requesting to set the umask
9791 * uap User argument descriptor (see below)
9792 * retval umask of the process (parameter p)
9793 *
9794 * Indirect: uap->newmask umask to set
9795 * uap->xsecurity ACL to set
39037602 9796 *
b0d623f7
A
9797 * Returns: 0 Success
9798 * !0 Not success
9799 *
9800 */
9801int
9802umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
9803{
9804 int ciferror;
9805 kauth_filesec_t xsecdst;
9806
9807 xsecdst = KAUTH_FILESEC_NONE;
9808 if (uap->xsecurity != USER_ADDR_NULL) {
0a7de745 9809 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
91447636 9810 return ciferror;
0a7de745 9811 }
91447636
A
9812 } else {
9813 xsecdst = KAUTH_FILESEC_NONE;
9814 }
9815
9816 ciferror = umask1(p, uap->newmask, xsecdst, retval);
9817
0a7de745 9818 if (xsecdst != KAUTH_FILESEC_NONE) {
91447636 9819 kauth_filesec_free(xsecdst);
0a7de745 9820 }
91447636
A
9821 return ciferror;
9822}
9823
9824int
b0d623f7 9825umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636 9826{
0a7de745 9827 return umask1(p, uap->newmask, UMASK_NOXSECURITY, retval);
91447636
A
9828}
9829
1c79356b
A
9830/*
9831 * Void all references to file by ripping underlying filesystem
9832 * away from vnode.
9833 */
1c79356b
A
9834/* ARGSUSED */
9835int
b0d623f7 9836revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 9837{
2d21ac55 9838 vnode_t vp;
91447636 9839 struct vnode_attr va;
2d21ac55 9840 vfs_context_t ctx = vfs_context_current();
1c79356b
A
9841 int error;
9842 struct nameidata nd;
9843
6d2010ae 9844 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
0a7de745 9845 uap->path, ctx);
55e303ae 9846 error = namei(&nd);
0a7de745
A
9847 if (error) {
9848 return error;
9849 }
1c79356b 9850 vp = nd.ni_vp;
91447636
A
9851
9852 nameidone(&nd);
9853
b0d623f7
A
9854 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
9855 error = ENOTSUP;
9856 goto out;
9857 }
9858
9859 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
9860 error = EBUSY;
9861 goto out;
9862 }
9863
2d21ac55
A
9864#if CONFIG_MACF
9865 error = mac_vnode_check_revoke(ctx, vp);
0a7de745 9866 if (error) {
2d21ac55 9867 goto out;
0a7de745 9868 }
2d21ac55
A
9869#endif
9870
91447636
A
9871 VATTR_INIT(&va);
9872 VATTR_WANTED(&va, va_uid);
0a7de745 9873 if ((error = vnode_getattr(vp, &va, ctx))) {
1c79356b 9874 goto out;
0a7de745 9875 }
2d21ac55 9876 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
0a7de745 9877 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
1c79356b 9878 goto out;
0a7de745
A
9879 }
9880 if (vp->v_usecount > 0 || (vnode_isaliased(vp))) {
2d21ac55 9881 VNOP_REVOKE(vp, REVOKEALL, ctx);
0a7de745 9882 }
1c79356b 9883out:
91447636 9884 vnode_put(vp);
0a7de745 9885 return error;
1c79356b
A
9886}
9887
0b4e3aa0 9888
1c79356b
A
9889/*
9890 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 9891 * The following system calls are designed to support features
1c79356b
A
9892 * which are specific to the HFS & HFS Plus volume formats
9893 */
9894
9bccf70c 9895
1c79356b 9896/*
39236c6e
A
9897 * Obtain attribute information on objects in a directory while enumerating
9898 * the directory.
9899 */
1c79356b
A
9900/* ARGSUSED */
9901int
0a7de745 9902getdirentriesattr(proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 9903{
2d21ac55 9904 vnode_t vp;
91447636
A
9905 struct fileproc *fp;
9906 uio_t auio = NULL;
9907 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
d9a64523
A
9908 uint32_t count = 0, savecount = 0;
9909 uint32_t newstate = 0;
91447636 9910 int error, eofflag;
f427ee49 9911 off_t loff = 0;
39037602 9912 struct attrlist attributelist;
2d21ac55 9913 vfs_context_t ctx = vfs_context_current();
91447636 9914 int fd = uap->fd;
0a7de745 9915 char uio_buf[UIO_SIZEOF(1)];
91447636
A
9916 kauth_action_t action;
9917
9918 AUDIT_ARG(fd, fd);
39037602 9919
91447636 9920 /* Get the attributes into kernel space */
2d21ac55 9921 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
0a7de745 9922 return error;
2d21ac55
A
9923 }
9924 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
0a7de745 9925 return error;
2d21ac55 9926 }
39236c6e 9927 savecount = count;
0a7de745
A
9928 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
9929 return error;
2d21ac55 9930 }
f427ee49 9931 if ((fp->fp_glob->fg_flag & FREAD) == 0) {
91447636
A
9932 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9933 error = EBADF;
9934 goto out;
9935 }
2d21ac55
A
9936
9937
9938#if CONFIG_MACF
9939 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
f427ee49 9940 fp->fp_glob);
0a7de745 9941 if (error) {
2d21ac55 9942 goto out;
0a7de745 9943 }
2d21ac55
A
9944#endif
9945
9946
0a7de745 9947 if ((error = vnode_getwithref(vp))) {
91447636 9948 goto out;
0a7de745 9949 }
55e303ae 9950
91447636 9951 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 9952
39236c6e 9953unionread:
91447636
A
9954 if (vp->v_type != VDIR) {
9955 (void)vnode_put(vp);
9956 error = EINVAL;
9957 goto out;
9958 }
55e303ae 9959
2d21ac55
A
9960#if CONFIG_MACF
9961 error = mac_vnode_check_readdir(ctx, vp);
9962 if (error != 0) {
9963 (void)vnode_put(vp);
9964 goto out;
9965 }
9966#endif /* MAC */
9967
91447636 9968 /* set up the uio structure which will contain the users return buffer */
f427ee49 9969 loff = fp->fp_glob->fg_offset;
39236c6e 9970 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636 9971 uio_addiov(auio, uap->buffer, uap->buffersize);
39037602 9972
91447636
A
9973 /*
9974 * If the only item requested is file names, we can let that past with
9975 * just LIST_DIRECTORY. If they want any other attributes, that means
9976 * they need SEARCH as well.
9977 */
9978 action = KAUTH_VNODE_LIST_DIRECTORY;
9979 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
0a7de745 9980 attributelist.fileattr || attributelist.dirattr) {
91447636 9981 action |= KAUTH_VNODE_SEARCH;
0a7de745 9982 }
39037602 9983
2d21ac55 9984 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
b0d623f7
A
9985 /* Believe it or not, uap->options only has 32-bits of valid
9986 * info, so truncate before extending again */
39236c6e
A
9987
9988 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
f427ee49 9989 (uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
39236c6e
A
9990 }
9991
9992 if (error) {
9993 (void) vnode_put(vp);
9994 goto out;
9995 }
9996
9997 /*
9998 * If we've got the last entry of a directory in a union mount
9999 * then reset the eofflag and pretend there's still more to come.
10000 * The next call will again set eofflag and the buffer will be empty,
10001 * so traverse to the underlying directory and do the directory
10002 * read there.
10003 */
10004 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
10005 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
10006 eofflag = 0;
0a7de745 10007 } else { // Empty buffer
39236c6e
A
10008 struct vnode *tvp = vp;
10009 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
f427ee49
A
10010 vnode_ref_ext(vp, fp->fp_glob->fg_flag & O_EVTONLY, 0);
10011 fp->fp_glob->fg_data = (caddr_t) vp;
10012 fp->fp_glob->fg_offset = 0; // reset index for new dir
39236c6e 10013 count = savecount;
f427ee49 10014 vnode_rele_internal(tvp, fp->fp_glob->fg_flag & O_EVTONLY, 0, 0);
39236c6e
A
10015 vnode_put(tvp);
10016 goto unionread;
10017 }
10018 vp = tvp;
10019 }
2d21ac55 10020 }
39236c6e 10021
91447636 10022 (void)vnode_put(vp);
1c79356b 10023
0a7de745 10024 if (error) {
91447636 10025 goto out;
0a7de745 10026 }
f427ee49 10027 fp->fp_glob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 10028
0a7de745 10029 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count)))) {
91447636 10030 goto out;
0a7de745
A
10031 }
10032 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate)))) {
91447636 10033 goto out;
0a7de745
A
10034 }
10035 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff)))) {
91447636 10036 goto out;
0a7de745 10037 }
1c79356b
A
10038
10039 *retval = eofflag; /* similar to getdirentries */
91447636 10040 error = 0;
2d21ac55 10041out:
91447636 10042 file_drop(fd);
0a7de745 10043 return error; /* return error earlier, an retval of 0 or 1 now */
39236c6e 10044} /* end of getdirentriesattr system call */
1c79356b
A
10045
10046/*
0a7de745
A
10047 * Exchange data between two files
10048 */
1c79356b 10049
1c79356b
A
10050/* ARGSUSED */
10051int
0a7de745 10052exchangedata(__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b 10053{
1c79356b 10054 struct nameidata fnd, snd;
2d21ac55
A
10055 vfs_context_t ctx = vfs_context_current();
10056 vnode_t fvp;
10057 vnode_t svp;
10058 int error;
b0d623f7 10059 u_int32_t nameiflags;
91447636
A
10060 char *fpath = NULL;
10061 char *spath = NULL;
0a7de745
A
10062 int flen = 0, slen = 0;
10063 int from_truncated = 0, to_truncated = 0;
b0d623f7 10064#if CONFIG_FSE
91447636 10065 fse_info f_finfo, s_finfo;
b0d623f7 10066#endif
39037602 10067
1c79356b 10068 nameiflags = 0;
0a7de745
A
10069 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
10070 nameiflags |= FOLLOW;
10071 }
1c79356b 10072
6d2010ae 10073 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
0a7de745 10074 UIO_USERSPACE, uap->path1, ctx);
1c79356b 10075
6d2010ae 10076 error = namei(&fnd);
0a7de745 10077 if (error) {
6d2010ae 10078 goto out2;
0a7de745 10079 }
1c79356b 10080
91447636
A
10081 nameidone(&fnd);
10082 fvp = fnd.ni_vp;
1c79356b 10083
39037602 10084 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
0a7de745 10085 UIO_USERSPACE, uap->path2, ctx);
1c79356b 10086
6d2010ae
A
10087 error = namei(&snd);
10088 if (error) {
91447636 10089 vnode_put(fvp);
55e303ae 10090 goto out2;
6d2010ae 10091 }
91447636 10092 nameidone(&snd);
1c79356b
A
10093 svp = snd.ni_vp;
10094
91447636
A
10095 /*
10096 * if the files are the same, return an inval error
10097 */
1c79356b 10098 if (svp == fvp) {
91447636
A
10099 error = EINVAL;
10100 goto out;
39037602 10101 }
1c79356b 10102
91447636
A
10103 /*
10104 * if the files are on different volumes, return an error
10105 */
10106 if (svp->v_mount != fvp->v_mount) {
0a7de745 10107 error = EXDEV;
91447636
A
10108 goto out;
10109 }
2d21ac55 10110
39236c6e 10111 /* If they're not files, return an error */
0a7de745 10112 if ((vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
10113 error = EINVAL;
10114 goto out;
10115 }
10116
2d21ac55
A
10117#if CONFIG_MACF
10118 error = mac_vnode_check_exchangedata(ctx,
10119 fvp, svp);
0a7de745 10120 if (error) {
2d21ac55 10121 goto out;
0a7de745 10122 }
2d21ac55
A
10123#endif
10124 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
0a7de745 10125 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)) {
91447636 10126 goto out;
0a7de745 10127 }
1c79356b 10128
2d21ac55
A
10129 if (
10130#if CONFIG_FSE
0a7de745 10131 need_fsevent(FSE_EXCHANGE, fvp) ||
2d21ac55 10132#endif
0a7de745 10133 kauth_authorize_fileop_has_listeners()) {
2d21ac55
A
10134 GET_PATH(fpath);
10135 GET_PATH(spath);
b0d623f7
A
10136
10137 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
10138 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
39037602 10139
2d21ac55
A
10140#if CONFIG_FSE
10141 get_fse_info(fvp, &f_finfo, ctx);
10142 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
10143 if (from_truncated || to_truncated) {
10144 // set it here since only the f_finfo gets reported up to user space
10145 f_finfo.mode |= FSE_TRUNCATED_PATH;
10146 }
2d21ac55 10147#endif
91447636 10148 }
1c79356b 10149 /* Ok, make the call */
2d21ac55 10150 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 10151
91447636 10152 if (error == 0) {
0a7de745 10153 const char *tmpname;
91447636 10154
0a7de745
A
10155 if (fpath != NULL && spath != NULL) {
10156 /* call out to allow 3rd party notification of exchangedata.
10157 * Ignore result of kauth_authorize_fileop call.
10158 */
10159 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
10160 (uintptr_t)fpath, (uintptr_t)spath);
10161 }
10162 name_cache_lock();
91447636 10163
0a7de745
A
10164 tmpname = fvp->v_name;
10165 fvp->v_name = svp->v_name;
10166 svp->v_name = tmpname;
39037602 10167
0a7de745
A
10168 if (fvp->v_parent != svp->v_parent) {
10169 vnode_t tmp;
91447636 10170
0a7de745
A
10171 tmp = fvp->v_parent;
10172 fvp->v_parent = svp->v_parent;
10173 svp->v_parent = tmp;
10174 }
10175 name_cache_unlock();
91447636 10176
2d21ac55 10177#if CONFIG_FSE
0a7de745
A
10178 if (fpath != NULL && spath != NULL) {
10179 add_fsevent(FSE_EXCHANGE, ctx,
10180 FSE_ARG_STRING, flen, fpath,
10181 FSE_ARG_FINFO, &f_finfo,
10182 FSE_ARG_STRING, slen, spath,
10183 FSE_ARG_FINFO, &s_finfo,
10184 FSE_ARG_DONE);
10185 }
2d21ac55 10186#endif
55e303ae
A
10187 }
10188
1c79356b 10189out:
0a7de745
A
10190 if (fpath != NULL) {
10191 RELEASE_PATH(fpath);
10192 }
10193 if (spath != NULL) {
10194 RELEASE_PATH(spath);
10195 }
91447636
A
10196 vnode_put(svp);
10197 vnode_put(fvp);
1c79356b 10198out2:
0a7de745 10199 return error;
91447636 10200}
1c79356b 10201
39236c6e
A
10202/*
10203 * Return (in MB) the amount of freespace on the given vnode's volume.
10204 */
10205uint32_t freespace_mb(vnode_t vp);
10206
10207uint32_t
10208freespace_mb(vnode_t vp)
10209{
39037602 10210 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
f427ee49
A
10211 return (uint32_t)(((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
10212 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
39236c6e
A
10213}
10214
316670eb 10215#if CONFIG_SEARCHFS
1c79356b 10216
1c79356b
A
10217/* ARGSUSED */
10218
10219int
b0d623f7 10220searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 10221{
39236c6e 10222 vnode_t vp, tvp;
0a7de745 10223 int i, error = 0;
1c79356b
A
10224 int fserror = 0;
10225 struct nameidata nd;
b0d623f7 10226 struct user64_fssearchblock searchblock;
1c79356b
A
10227 struct searchstate *state;
10228 struct attrlist *returnattrs;
b0d623f7 10229 struct timeval timelimit;
0a7de745 10230 void *searchparams1, *searchparams2;
91447636
A
10231 uio_t auio = NULL;
10232 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 10233 uint32_t nummatches;
f427ee49 10234 size_t mallocsize;
b0d623f7 10235 uint32_t nameiflags;
2d21ac55 10236 vfs_context_t ctx = vfs_context_current();
0a7de745 10237 char uio_buf[UIO_SIZEOF(1)];
1c79356b 10238
39236c6e 10239 /* Start by copying in fsearchblock parameter list */
0a7de745
A
10240 if (IS_64BIT_PROCESS(p)) {
10241 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
10242 timelimit.tv_sec = searchblock.timelimit.tv_sec;
10243 timelimit.tv_usec = searchblock.timelimit.tv_usec;
10244 } else {
10245 struct user32_fssearchblock tmp_searchblock;
10246
10247 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
10248 // munge into 64-bit version
10249 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
10250 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
10251 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
10252 searchblock.maxmatches = tmp_searchblock.maxmatches;
39037602 10253 /*
b0d623f7
A
10254 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
10255 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
10256 */
0a7de745
A
10257 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
10258 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
10259 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
10260 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
10261 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
10262 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
10263 searchblock.searchattrs = tmp_searchblock.searchattrs;
10264 }
10265 if (error) {
10266 return error;
10267 }
1c79356b 10268
39037602 10269 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
a3d08fcd 10270 */
39037602 10271 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
0a7de745
A
10272 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) {
10273 return EINVAL;
10274 }
39037602 10275
1c79356b
A
10276 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
10277 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
10278 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
0a7de745 10279 /* block. */
fe8ab488
A
10280 /* */
10281 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
10282 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
10283 /* assumes the size is still 556 bytes it will continue to work */
39037602 10284
91447636 10285 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
0a7de745 10286 sizeof(struct attrlist) + sizeof(struct searchstate) + (2 * sizeof(uint32_t));
1c79356b 10287
f427ee49 10288 searchparams1 = kheap_alloc(KHEAP_DATA_BUFFERS, mallocsize, Z_WAITOK);
1c79356b
A
10289
10290 /* Now set up the various pointers to the correct place in our newly allocated memory */
10291
10292 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
10293 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
0a7de745 10294 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof(struct attrlist));
1c79356b
A
10295
10296 /* Now copy in the stuff given our local variables. */
10297
0a7de745 10298 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1))) {
1c79356b 10299 goto freeandexit;
0a7de745 10300 }
1c79356b 10301
0a7de745 10302 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2))) {
1c79356b 10303 goto freeandexit;
0a7de745 10304 }
1c79356b 10305
0a7de745 10306 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) {
1c79356b 10307 goto freeandexit;
0a7de745 10308 }
39037602 10309
0a7de745 10310 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) {
1c79356b 10311 goto freeandexit;
0a7de745 10312 }
1c79356b 10313
39236c6e
A
10314 /*
10315 * When searching a union mount, need to set the
10316 * start flag at the first call on each layer to
10317 * reset state for the new volume.
10318 */
0a7de745 10319 if (uap->options & SRCHFS_START) {
39236c6e 10320 state->ss_union_layer = 0;
0a7de745 10321 } else {
39236c6e 10322 uap->options |= state->ss_union_flags;
0a7de745 10323 }
39236c6e 10324 state->ss_union_flags = 0;
b0d623f7
A
10325
10326 /*
10327 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
10328 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
39037602
A
10329 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
10330 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
b0d623f7
A
10331 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
10332 */
10333
10334 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
10335 attrreference_t* string_ref;
10336 u_int32_t* start_length;
39037602 10337 user64_size_t param_length;
b0d623f7
A
10338
10339 /* validate searchparams1 */
39037602 10340 param_length = searchblock.sizeofsearchparams1;
b0d623f7 10341 /* skip the word that specifies length of the buffer */
0a7de745
A
10342 start_length = (u_int32_t*) searchparams1;
10343 start_length = start_length + 1;
10344 string_ref = (attrreference_t*) start_length;
b0d623f7
A
10345
10346 /* ensure no negative offsets or too big offsets */
0a7de745 10347 if (string_ref->attr_dataoffset < 0) {
b0d623f7 10348 error = EINVAL;
39037602 10349 goto freeandexit;
b0d623f7
A
10350 }
10351 if (string_ref->attr_length > MAXPATHLEN) {
10352 error = EINVAL;
10353 goto freeandexit;
10354 }
39037602 10355
b0d623f7
A
10356 /* Check for pointer overflow in the string ref */
10357 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
10358 error = EINVAL;
10359 goto freeandexit;
10360 }
10361
10362 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
10363 error = EINVAL;
10364 goto freeandexit;
10365 }
10366 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
10367 error = EINVAL;
10368 goto freeandexit;
10369 }
10370 }
10371
10372 /* set up the uio structure which will contain the users return buffer */
39236c6e 10373 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
0a7de745 10374 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 10375
91447636 10376 nameiflags = 0;
0a7de745
A
10377 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
10378 nameiflags |= FOLLOW;
10379 }
6d2010ae 10380 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
0a7de745 10381 UIO_USERSPACE, uap->path, ctx);
1c79356b 10382
55e303ae 10383 error = namei(&nd);
0a7de745 10384 if (error) {
1c79356b 10385 goto freeandexit;
0a7de745 10386 }
39236c6e 10387 vp = nd.ni_vp;
91447636 10388 nameidone(&nd);
39236c6e
A
10389
10390 /*
10391 * Switch to the root vnode for the volume
10392 */
10393 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 10394 vnode_put(vp);
0a7de745 10395 if (error) {
39236c6e 10396 goto freeandexit;
0a7de745 10397 }
39236c6e
A
10398 vp = tvp;
10399
10400 /*
10401 * If it's a union mount, the path lookup takes
10402 * us to the top layer. But we may need to descend
10403 * to a lower layer. For non-union mounts the layer
10404 * is always zero.
10405 */
10406 for (i = 0; i < (int) state->ss_union_layer; i++) {
0a7de745 10407 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0) {
39236c6e 10408 break;
0a7de745 10409 }
39236c6e
A
10410 tvp = vp;
10411 vp = vp->v_mount->mnt_vnodecovered;
10412 if (vp == NULL) {
fe8ab488 10413 vnode_put(tvp);
39236c6e
A
10414 error = ENOENT;
10415 goto freeandexit;
10416 }
813fb2f6 10417 error = vnode_getwithref(vp);
39236c6e 10418 vnode_put(tvp);
0a7de745 10419 if (error) {
813fb2f6 10420 goto freeandexit;
0a7de745 10421 }
39236c6e 10422 }
1c79356b 10423
6d2010ae
A
10424#if CONFIG_MACF
10425 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
10426 if (error) {
10427 vnode_put(vp);
10428 goto freeandexit;
10429 }
10430#endif
10431
39037602 10432
1c79356b 10433 /*
39037602 10434 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 10435 * before and sometimes the underlying code doesnt deal with it well.
1c79356b 10436 */
0a7de745 10437 if (searchblock.maxmatches == 0) {
1c79356b
A
10438 nummatches = 0;
10439 goto saveandexit;
0a7de745 10440 }
1c79356b
A
10441
10442 /*
39236c6e 10443 * Allright, we have everything we need, so lets make that call.
39037602 10444 *
39236c6e
A
10445 * We keep special track of the return value from the file system:
10446 * EAGAIN is an acceptable error condition that shouldn't keep us
10447 * from copying out any results...
1c79356b
A
10448 */
10449
6d2010ae 10450 fserror = VNOP_SEARCHFS(vp,
0a7de745
A
10451 searchparams1,
10452 searchparams2,
10453 &searchblock.searchattrs,
f427ee49 10454 (uint32_t)searchblock.maxmatches,
0a7de745
A
10455 &timelimit,
10456 returnattrs,
10457 &nummatches,
f427ee49
A
10458 (uint32_t)uap->scriptcode,
10459 (uint32_t)uap->options,
0a7de745
A
10460 auio,
10461 (struct searchstate *) &state->ss_fsstate,
10462 ctx);
39037602 10463
39236c6e
A
10464 /*
10465 * If it's a union mount we need to be called again
10466 * to search the mounted-on filesystem.
10467 */
10468 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
10469 state->ss_union_flags = SRCHFS_START;
0a7de745 10470 state->ss_union_layer++; // search next layer down
39236c6e
A
10471 fserror = EAGAIN;
10472 }
10473
6d2010ae
A
10474saveandexit:
10475
10476 vnode_put(vp);
10477
10478 /* Now copy out the stuff that needs copying out. That means the number of matches, the
0a7de745 10479 * search state. Everything was already put into he return buffer by the vop call. */
6d2010ae 10480
0a7de745 10481 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) {
6d2010ae 10482 goto freeandexit;
0a7de745 10483 }
6d2010ae 10484
0a7de745 10485 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) {
6d2010ae 10486 goto freeandexit;
0a7de745 10487 }
39037602 10488
6d2010ae
A
10489 error = fserror;
10490
10491freeandexit:
10492
f427ee49 10493 kheap_free(KHEAP_DATA_BUFFERS, searchparams1, mallocsize);
6d2010ae 10494
0a7de745 10495 return error;
6d2010ae
A
10496} /* end of searchfs system call */
10497
316670eb
A
10498#else /* CONFIG_SEARCHFS */
10499
10500int
10501searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
10502{
0a7de745 10503 return ENOTSUP;
316670eb
A
10504}
10505
10506#endif /* CONFIG_SEARCHFS */
6d2010ae
A
10507
10508
cb323159 10509#if CONFIG_DATALESS_FILES
6d2010ae 10510
cb323159
A
10511/*
10512 * === Namespace Resolver Up-call Mechanism ===
10513 *
10514 * When I/O is performed to a dataless file or directory (read, write,
10515 * lookup-in, etc.), the file system performs an upcall to the namespace
10516 * resolver (filecoordinationd) to materialize the object.
10517 *
10518 * We need multiple up-calls to be in flight at once, and we need these
10519 * up-calls to be interruptible, thus the following implementation:
10520 *
10521 * => The nspace_resolver_request represents the in-kernel request state.
10522 * It contains a request ID, storage space for the errno code returned
10523 * by filecoordinationd, and flags.
10524 *
10525 * => The request ID is simply a global monotonically incrementing 32-bit
10526 * number. Outstanding requests are stored in a hash table, and the
10527 * hash function is extremely simple.
10528 *
10529 * => When an upcall is to be made to filecoordinationd, a request structure
10530 * is allocated on the stack (it is small, and needs to live only during
10531 * the duration of the call to resolve_nspace_item_ext()). It is
10532 * initialized and inserted into the table. Some backpressure from
10533 * filecoordinationd is applied by limiting the numnber of entries that
10534 * can be inserted into the table (and thus limiting the number of
10535 * outstanding requests issued to filecoordinationd); waiting for an
10536 * available slot is interruptible.
10537 *
10538 * => Once the request has been inserted into the table, the up-call is made
10539 * to filecoordinationd via a MiG-generated stub. The up-call returns
10540 * immediately and filecoordinationd processes the request asynchronously.
10541 *
10542 * => The caller now waits for the request to complete. Tnis is achieved by
10543 * sleeping on the address of the request structure and waiting for
10544 * filecoordinationd to mark the request structure as complete. This
10545 * is an interruptible sleep call; if interrupted, the request structure
10546 * is removed from the table and EINTR is returned to the caller. If
10547 * this occurs, an advisory up-call is made to filecoordinationd with
10548 * the request ID to indicate that the request can be aborted or
10549 * de-prioritized at the discretion of filecoordinationd.
10550 *
10551 * => When filecoordinationd has completed the request, it signals completion
10552 * by writing to the vfs.nspace.complete sysctl node. Only a process
10553 * decorated as a namespace resolver can write to this sysctl node. The
10554 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10555 * The request ID is looked up in the table, and if the request is found,
10556 * the error code is stored in the request structure and a wakeup()
10557 * issued on the address of the request structure. If the request is not
10558 * found, we simply drop the completion notification, assuming that the
10559 * caller was interrupted.
10560 *
10561 * => When the waiting thread wakes up, it extracts the error code from the
10562 * request structure, removes the request from the table, and returns the
10563 * error code to the calling function. Fini!
10564 */
6d2010ae 10565
cb323159
A
10566struct nspace_resolver_request {
10567 LIST_ENTRY(nspace_resolver_request) r_hashlink;
f427ee49 10568 vnode_t r_vp;
cb323159
A
10569 uint32_t r_req_id;
10570 int r_resolver_error;
10571 int r_flags;
10572};
6d2010ae 10573
cb323159 10574#define RRF_COMPLETE 0x0001
6d2010ae 10575
cb323159
A
10576static uint32_t
10577next_nspace_req_id(void)
10578{
10579 static uint32_t next_req_id;
6d2010ae 10580
cb323159 10581 return OSAddAtomic(1, &next_req_id);
6d2010ae
A
10582}
10583
cb323159
A
10584#define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10585#define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
6d2010ae 10586
cb323159
A
10587static LIST_HEAD(nspace_resolver_requesthead,
10588 nspace_resolver_request) * nspace_resolver_request_hashtbl;
10589static u_long nspace_resolver_request_hashmask;
10590static u_int nspace_resolver_request_count;
10591static bool nspace_resolver_request_wait_slot;
c3c9b80d
A
10592static LCK_GRP_DECLARE(nspace_resolver_request_lck_grp, "file namespace resolver");
10593static LCK_MTX_DECLARE(nspace_resolver_request_hash_mutex,
10594 &nspace_resolver_request_lck_grp);
6d2010ae 10595
cb323159
A
10596#define NSPACE_REQ_LOCK() \
10597 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10598#define NSPACE_REQ_UNLOCK() \
10599 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10600
10601#define NSPACE_RESOLVER_HASH(req_id) \
10602 (&nspace_resolver_request_hashtbl[(req_id) & \
10603 nspace_resolver_request_hashmask])
6d2010ae 10604
cb323159
A
10605static struct nspace_resolver_request *
10606nspace_resolver_req_lookup(uint32_t req_id)
6d2010ae 10607{
cb323159
A
10608 struct nspace_resolver_requesthead *bucket;
10609 struct nspace_resolver_request *req;
10610
10611 bucket = NSPACE_RESOLVER_HASH(req_id);
10612 LIST_FOREACH(req, bucket, r_hashlink) {
10613 if (req->r_req_id == req_id) {
10614 return req;
0a7de745 10615 }
6d2010ae 10616 }
6d2010ae 10617
cb323159 10618 return NULL;
6d2010ae
A
10619}
10620
cb323159
A
10621static int
10622nspace_resolver_req_add(struct nspace_resolver_request *req)
6d2010ae 10623{
cb323159
A
10624 struct nspace_resolver_requesthead *bucket;
10625 int error;
39037602 10626
cb323159
A
10627 while (nspace_resolver_request_count >=
10628 NSPACE_RESOLVER_MAX_OUTSTANDING) {
10629 nspace_resolver_request_wait_slot = true;
10630 error = msleep(&nspace_resolver_request_count,
10631 &nspace_resolver_request_hash_mutex,
10632 PVFS | PCATCH, "nspacerq", NULL);
10633 if (error) {
10634 return error;
6d2010ae
A
10635 }
10636 }
10637
cb323159
A
10638 bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
10639#if DIAGNOSTIC
10640 assert(nspace_resolver_req_lookup(req->r_req_id) == NULL);
10641#endif /* DIAGNOSTIC */
10642 LIST_INSERT_HEAD(bucket, req, r_hashlink);
10643 nspace_resolver_request_count++;
39037602 10644
cb323159
A
10645 return 0;
10646}
39037602 10647
cb323159
A
10648static void
10649nspace_resolver_req_remove(struct nspace_resolver_request *req)
10650{
10651 struct nspace_resolver_requesthead *bucket;
39037602 10652
cb323159
A
10653 bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
10654#if DIAGNOSTIC
10655 assert(nspace_resolver_req_lookup(req->r_req_id) != NULL);
10656#endif /* DIAGNOSTIC */
10657 LIST_REMOVE(req, r_hashlink);
10658 nspace_resolver_request_count--;
39037602 10659
cb323159
A
10660 if (nspace_resolver_request_wait_slot) {
10661 nspace_resolver_request_wait_slot = false;
10662 wakeup(&nspace_resolver_request_count);
10663 }
6d2010ae
A
10664}
10665
cb323159
A
10666static void
10667nspace_resolver_req_cancel(uint32_t req_id)
6d2010ae 10668{
cb323159
A
10669 kern_return_t kr;
10670 mach_port_t mp;
6d2010ae 10671
cb323159
A
10672 // Failures here aren't fatal -- the cancellation message
10673 // sent to the resolver is merely advisory.
6d2010ae 10674
cb323159
A
10675 kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
10676 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
10677 return;
6d2010ae
A
10678 }
10679
cb323159
A
10680 kr = send_nspace_resolve_cancel(mp, req_id);
10681 if (kr != KERN_SUCCESS) {
10682 os_log_error(OS_LOG_DEFAULT,
10683 "NSPACE send_nspace_resolve_cancel failure: %d", kr);
6d2010ae
A
10684 }
10685
cb323159
A
10686 ipc_port_release_send(mp);
10687}
6d2010ae 10688
cb323159
A
10689static int
10690nspace_resolver_req_wait(struct nspace_resolver_request *req)
10691{
10692 bool send_cancel_message = false;
10693 int error;
6d2010ae 10694
cb323159 10695 NSPACE_REQ_LOCK();
6d2010ae 10696
cb323159
A
10697 while ((req->r_flags & RRF_COMPLETE) == 0) {
10698 error = msleep(req, &nspace_resolver_request_hash_mutex,
10699 PVFS | PCATCH, "nspace", NULL);
10700 if (error && error != ERESTART) {
10701 req->r_resolver_error = (error == EINTR) ? EINTR :
10702 ETIMEDOUT;
10703 send_cancel_message = true;
6d2010ae
A
10704 break;
10705 }
10706 }
10707
cb323159 10708 nspace_resolver_req_remove(req);
39037602 10709
cb323159 10710 NSPACE_REQ_UNLOCK();
6d2010ae 10711
cb323159
A
10712 if (send_cancel_message) {
10713 nspace_resolver_req_cancel(req->r_req_id);
6d2010ae
A
10714 }
10715
cb323159
A
10716 return req->r_resolver_error;
10717}
6d2010ae 10718
cb323159
A
10719static void
10720nspace_resolver_req_mark_complete(
10721 struct nspace_resolver_request *req,
10722 int resolver_error)
10723{
10724 req->r_resolver_error = resolver_error;
10725 req->r_flags |= RRF_COMPLETE;
10726 wakeup(req);
10727}
39037602 10728
cb323159 10729static void
f427ee49 10730nspace_resolver_req_completed(uint32_t req_id, int resolver_error, uint64_t orig_gencount)
cb323159
A
10731{
10732 struct nspace_resolver_request *req;
6d2010ae 10733
cb323159 10734 NSPACE_REQ_LOCK();
6d2010ae 10735
cb323159
A
10736 // If we don't find the request corresponding to our req_id,
10737 // just drop the completion signal on the floor; it's likely
10738 // that the requester interrupted with a signal.
6d2010ae 10739
cb323159
A
10740 req = nspace_resolver_req_lookup(req_id);
10741 if (req) {
f427ee49
A
10742 mount_t locked_mp = NULL;
10743
10744 locked_mp = req->r_vp->v_mount;
10745 mount_ref(locked_mp, 0);
10746 mount_lock_renames(locked_mp);
10747
10748 //
10749 // if the resolver isn't already returning an error and we have an
10750 // orig_gencount, then get an iocount on the request vnode and check
10751 // that the gencount on req->r_vp has not changed.
10752 //
10753 // note: a ref was taken on req->r_vp when the request was created
10754 // and that ref will be dropped by that thread when it wakes up.
10755 //
10756 if (resolver_error == 0 &&
10757 orig_gencount != 0 &&
10758 vnode_getwithref(req->r_vp) == 0) {
10759 struct vnode_attr va;
10760 uint64_t cur_gencount;
10761
10762 VATTR_INIT(&va);
10763 VATTR_WANTED(&va, va_recursive_gencount);
10764
10765 if (vnode_getattr(req->r_vp, &va, vfs_context_kernel()) == 0) {
10766 cur_gencount = va.va_recursive_gencount;
10767 } else {
10768 cur_gencount = 0;
10769 }
10770
10771 if (resolver_error == 0 && cur_gencount && orig_gencount && cur_gencount != orig_gencount) {
10772 printf("nspace.complete: gencount changed! (orig %llu cur %llu)\n", orig_gencount, cur_gencount);
10773
10774 // this error will be returned to the thread that initiated the
10775 // materialization of req->r_vp.
10776 resolver_error = EBUSY;
10777
10778 // note: we explicitly do not return an error to the caller (i.e.
10779 // the thread that did the materialization) because they said they
10780 // don't want one.
10781 }
10782
10783 vnode_put(req->r_vp);
10784 }
10785
10786 mount_unlock_renames(locked_mp);
10787 mount_drop(locked_mp, 0);
10788
cb323159 10789 nspace_resolver_req_mark_complete(req, resolver_error);
6d2010ae
A
10790 }
10791
cb323159 10792 NSPACE_REQ_UNLOCK();
f427ee49
A
10793
10794 return;
cb323159
A
10795}
10796
10797static struct proc *nspace_resolver_proc;
6d2010ae 10798
cb323159
A
10799static int
10800nspace_resolver_get_proc_state(struct proc *p, int *is_resolver)
10801{
10802 *is_resolver = ((p->p_lflag & P_LNSPACE_RESOLVER) &&
10803 p == nspace_resolver_proc) ? 1 : 0;
10804 return 0;
6d2010ae
A
10805}
10806
cb323159
A
10807static int
10808nspace_resolver_set_proc_state(struct proc *p, int is_resolver)
6d2010ae 10809{
cb323159
A
10810 vfs_context_t ctx = vfs_context_current();
10811 int error = 0;
6d2010ae 10812
cb323159
A
10813 //
10814 // The system filecoordinationd runs as uid == 0. This also
10815 // has the nice side-effect of filtering out filecoordinationd
10816 // running in the simulator.
10817 //
10818 if (!vfs_context_issuser(ctx)) {
10819 return EPERM;
39037602
A
10820 }
10821
cb323159
A
10822 error = priv_check_cred(vfs_context_ucred(ctx),
10823 PRIV_VFS_DATALESS_RESOLVER, 0);
10824 if (error) {
10825 return error;
39037602
A
10826 }
10827
cb323159
A
10828 if (is_resolver) {
10829 NSPACE_REQ_LOCK();
39037602 10830
cb323159
A
10831 if (nspace_resolver_proc == NULL) {
10832 proc_lock(p);
10833 p->p_lflag |= P_LNSPACE_RESOLVER;
10834 proc_unlock(p);
10835 nspace_resolver_proc = p;
10836 } else {
10837 error = EBUSY;
39037602 10838 }
cb323159
A
10839
10840 NSPACE_REQ_UNLOCK();
10841 } else {
10842 // This is basically just like the exit case.
10843 // nspace_resolver_exited() will verify that the
10844 // process is the resolver, and will clear the
10845 // global.
10846 nspace_resolver_exited(p);
39037602
A
10847 }
10848
cb323159 10849 return error;
39037602
A
10850}
10851
cb323159
A
10852static int
10853nspace_materialization_get_proc_state(struct proc *p, int *is_prevented)
39037602 10854{
cb323159
A
10855 if ((p->p_lflag & P_LNSPACE_RESOLVER) != 0 ||
10856 (p->p_vfs_iopolicy &
10857 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) == 0) {
10858 *is_prevented = 1;
10859 } else {
10860 *is_prevented = 0;
6d2010ae 10861 }
6d2010ae
A
10862 return 0;
10863}
39037602 10864
6d2010ae 10865static int
cb323159 10866nspace_materialization_set_proc_state(struct proc *p, int is_prevented)
6d2010ae 10867{
cb323159
A
10868 if (p->p_lflag & P_LNSPACE_RESOLVER) {
10869 return is_prevented ? 0 : EBUSY;
10870 }
6d2010ae 10871
cb323159
A
10872 if (is_prevented) {
10873 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES), &p->p_vfs_iopolicy);
6d2010ae 10874 } else {
cb323159 10875 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES, &p->p_vfs_iopolicy);
6d2010ae 10876 }
cb323159
A
10877 return 0;
10878}
6d2010ae 10879
cb323159
A
10880static int
10881nspace_materialization_get_thread_state(int *is_prevented)
10882{
10883 uthread_t ut = get_bsdthread_info(current_thread());
10884
10885 *is_prevented = (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) ? 1 : 0;
10886 return 0;
6d2010ae 10887}
6d2010ae 10888
6d2010ae 10889static int
cb323159 10890nspace_materialization_set_thread_state(int is_prevented)
6d2010ae 10891{
cb323159 10892 uthread_t ut = get_bsdthread_info(current_thread());
6d2010ae 10893
cb323159
A
10894 if (is_prevented) {
10895 ut->uu_flag |= UT_NSPACE_NODATALESSFAULTS;
10896 } else {
10897 ut->uu_flag &= ~UT_NSPACE_NODATALESSFAULTS;
6d2010ae 10898 }
cb323159
A
10899 return 0;
10900}
6d2010ae 10901
cb323159
A
10902/* the vfs.nspace branch */
10903SYSCTL_NODE(_vfs, OID_AUTO, nspace, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs nspace hinge");
1c79356b 10904
cb323159
A
10905static int
10906sysctl_nspace_resolver(__unused struct sysctl_oid *oidp,
10907 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10908{
10909 struct proc *p = req->p;
10910 int new_value, old_value, changed = 0;
10911 int error;
10912
10913 error = nspace_resolver_get_proc_state(p, &old_value);
10914 if (error) {
6d2010ae
A
10915 return error;
10916 }
cb323159
A
10917
10918 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10919 &changed);
10920 if (error == 0 && changed) {
10921 error = nspace_resolver_set_proc_state(p, new_value);
6d2010ae 10922 }
cb323159
A
10923 return error;
10924}
1c79356b 10925
cb323159
A
10926/* decorate this process as the dataless file resolver */
10927SYSCTL_PROC(_vfs_nspace, OID_AUTO, resolver,
10928 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10929 0, 0, sysctl_nspace_resolver, "I", "");
1c79356b 10930
cb323159
A
10931static int
10932sysctl_nspace_prevent_materialization(__unused struct sysctl_oid *oidp,
10933 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10934{
10935 struct proc *p = req->p;
10936 int new_value, old_value, changed = 0;
10937 int error;
1c79356b 10938
cb323159
A
10939 error = nspace_materialization_get_proc_state(p, &old_value);
10940 if (error) {
10941 return error;
10942 }
10943
10944 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10945 &changed);
10946 if (error == 0 && changed) {
10947 error = nspace_materialization_set_proc_state(p, new_value);
10948 }
10949 return error;
6d2010ae 10950}
1c79356b 10951
cb323159
A
10952/* decorate this process as not wanting to materialize dataless files */
10953SYSCTL_PROC(_vfs_nspace, OID_AUTO, prevent_materialization,
10954 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10955 0, 0, sysctl_nspace_prevent_materialization, "I", "");
10956
6d2010ae 10957static int
cb323159
A
10958sysctl_nspace_thread_prevent_materialization(__unused struct sysctl_oid *oidp,
10959 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
6d2010ae 10960{
cb323159
A
10961 int new_value, old_value, changed = 0;
10962 int error;
39037602 10963
cb323159
A
10964 error = nspace_materialization_get_thread_state(&old_value);
10965 if (error) {
10966 return error;
6d2010ae 10967 }
39037602 10968
cb323159
A
10969 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10970 &changed);
10971 if (error == 0 && changed) {
10972 error = nspace_materialization_set_thread_state(new_value);
10973 }
10974 return error;
10975}
39037602 10976
cb323159
A
10977/* decorate this thread as not wanting to materialize dataless files */
10978SYSCTL_PROC(_vfs_nspace, OID_AUTO, thread_prevent_materialization,
10979 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10980 0, 0, sysctl_nspace_thread_prevent_materialization, "I", "");
39037602 10981
cb323159
A
10982static int
10983sysctl_nspace_complete(__unused struct sysctl_oid *oidp, __unused void *arg1,
10984 __unused int arg2, struct sysctl_req *req)
10985{
10986 struct proc *p = req->p;
10987 uint32_t req_status[2] = { 0, 0 };
f427ee49
A
10988 uint64_t gencount = 0;
10989 int error, is_resolver, changed = 0, gencount_changed;
39037602 10990
cb323159
A
10991 error = nspace_resolver_get_proc_state(p, &is_resolver);
10992 if (error) {
10993 return error;
39037602
A
10994 }
10995
cb323159
A
10996 if (!is_resolver) {
10997 return EPERM;
10998 }
39236c6e 10999
cb323159
A
11000 error = sysctl_io_opaque(req, req_status, sizeof(req_status),
11001 &changed);
11002 if (error) {
11003 return error;
11004 }
39037602 11005
f427ee49
A
11006 // get the gencount if it was passed
11007 error = sysctl_io_opaque(req, &gencount, sizeof(gencount),
11008 &gencount_changed);
11009 if (error) {
11010 gencount = 0;
11011 // we ignore the error because the gencount was optional
11012 error = 0;
11013 }
11014
cb323159
A
11015 /*
11016 * req_status[0] is the req_id
11017 *
11018 * req_status[1] is the errno
11019 */
11020 if (error == 0 && changed) {
11021 nspace_resolver_req_completed(req_status[0],
f427ee49 11022 (int)req_status[1], gencount);
cb323159
A
11023 }
11024 return error;
11025}
39037602 11026
cb323159
A
11027/* Resolver reports completed reqs here. */
11028SYSCTL_PROC(_vfs_nspace, OID_AUTO, complete,
11029 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
11030 0, 0, sysctl_nspace_complete, "-", "");
39037602 11031
cb323159 11032#endif /* CONFIG_DATALESS_FILES */
39037602 11033
cb323159
A
11034#if CONFIG_DATALESS_FILES
11035#define __no_dataless_unused /* nothing */
11036#else
11037#define __no_dataless_unused __unused
11038#endif
39037602 11039
c3c9b80d
A
11040int
11041vfs_context_dataless_materialization_is_prevented(
11042 vfs_context_t const ctx __no_dataless_unused)
cb323159
A
11043{
11044#if CONFIG_DATALESS_FILES
c3c9b80d
A
11045 proc_t const p = vfs_context_proc(ctx);
11046 thread_t const t = vfs_context_thread(ctx);
11047 uthread_t const ut = t ? get_bsdthread_info(t) : NULL;
11048
11049 /*
11050 * Kernel context ==> return EDEADLK, as we would with any random
11051 * process decorated as no-materialize.
11052 */
11053 if (ctx == vfs_context_kernel()) {
11054 return EDEADLK;
11055 }
11056
11057 /*
11058 * If the process has the dataless-manipulation entitlement,
11059 * materialization is prevented, and depending on the kind
11060 * of file system operation, things get to proceed as if the
11061 * object is not dataless.
11062 */
11063 if (vfs_context_is_dataless_manipulator(ctx)) {
11064 return EJUSTRETURN;
11065 }
11066
11067 /*
11068 * Per-thread decorations override any process-wide decorations.
11069 * (Foundation uses this, and this overrides even the dataless-
11070 * manipulation entitlement so as to make API contracts consistent.)
11071 */
11072 if (ut != NULL) {
11073 if (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) {
11074 return EDEADLK;
11075 }
11076 if (ut->uu_flag & UT_NSPACE_FORCEDATALESSFAULTS) {
11077 return 0;
11078 }
11079 }
39037602 11080
c3c9b80d
A
11081 /*
11082 * If the process's iopolicy specifies that dataless files
11083 * can be materialized, then we let it go ahead.
11084 */
11085 if (p->p_vfs_iopolicy & P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) {
11086 return 0;
11087 }
11088#endif /* CONFIG_DATALESS_FILES */
39037602 11089
c3c9b80d
A
11090 /*
11091 * The default behavior is to not materialize dataless files;
11092 * return to the caller that deadlock was detected.
11093 */
11094 return EDEADLK;
11095}
11096
11097void
11098nspace_resolver_init(void)
11099{
11100#if CONFIG_DATALESS_FILES
cb323159
A
11101 nspace_resolver_request_hashtbl =
11102 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE,
11103 M_VNODE /* XXX */, &nspace_resolver_request_hashmask);
11104#endif /* CONFIG_DATALESS_FILES */
11105}
39037602 11106
cb323159
A
11107void
11108nspace_resolver_exited(struct proc *p __no_dataless_unused)
11109{
11110#if CONFIG_DATALESS_FILES
11111 struct nspace_resolver_requesthead *bucket;
11112 struct nspace_resolver_request *req;
11113 u_long idx;
39037602 11114
cb323159 11115 NSPACE_REQ_LOCK();
39037602 11116
cb323159
A
11117 if ((p->p_lflag & P_LNSPACE_RESOLVER) &&
11118 p == nspace_resolver_proc) {
11119 for (idx = 0; idx <= nspace_resolver_request_hashmask; idx++) {
11120 bucket = &nspace_resolver_request_hashtbl[idx];
11121 LIST_FOREACH(req, bucket, r_hashlink) {
11122 nspace_resolver_req_mark_complete(req,
11123 ETIMEDOUT);
0a7de745 11124 }
39037602 11125 }
cb323159
A
11126 nspace_resolver_proc = NULL;
11127 }
39037602 11128
cb323159
A
11129 NSPACE_REQ_UNLOCK();
11130#endif /* CONFIG_DATALESS_FILES */
11131}
39037602 11132
cb323159
A
11133int
11134resolve_nspace_item(struct vnode *vp, uint64_t op)
11135{
11136 return resolve_nspace_item_ext(vp, op, NULL);
11137}
39037602 11138
cb323159
A
11139#define DATALESS_RESOLVER_ENTITLEMENT \
11140 "com.apple.private.vfs.dataless-resolver"
11141#define DATALESS_MANIPULATION_ENTITLEMENT \
11142 "com.apple.private.vfs.dataless-manipulation"
39037602 11143
cb323159
A
11144/*
11145 * Return TRUE if the vfs context is associated with a process entitled
11146 * for dataless manipulation.
11147 *
11148 * XXX Arguably belongs in vfs_subr.c, but is here because of the
11149 * complication around CONFIG_DATALESS_FILES.
11150 */
11151boolean_t
11152vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused)
11153{
11154#if CONFIG_DATALESS_FILES
11155 assert(ctx->vc_thread == current_thread());
11156 task_t const task = current_task();
11157 return IOTaskHasEntitlement(task, DATALESS_MANIPULATION_ENTITLEMENT) ||
11158 IOTaskHasEntitlement(task, DATALESS_RESOLVER_ENTITLEMENT);
11159#else
11160 return false;
11161#endif /* CONFIG_DATALESS_FILES */
11162}
39037602 11163
cb323159
A
11164int
11165resolve_nspace_item_ext(
11166 struct vnode *vp __no_dataless_unused,
11167 uint64_t op __no_dataless_unused,
11168 void *arg __unused)
11169{
11170#if CONFIG_DATALESS_FILES
11171 int error;
11172 mach_port_t mp;
11173 char *path = NULL;
11174 int path_len;
11175 kern_return_t kr;
11176 struct nspace_resolver_request req;
39037602 11177
cb323159
A
11178 // only allow namespace events on regular files, directories and symlinks.
11179 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
11180 return EFTYPE;
6d2010ae 11181 }
39037602 11182
cb323159
A
11183 //
11184 // if this is a snapshot event and the vnode is on a
11185 // disk image just pretend nothing happened since any
11186 // change to the disk image will cause the disk image
11187 // itself to get backed up and this avoids multi-way
11188 // deadlocks between the snapshot handler and the ever
11189 // popular diskimages-helper process. the variable
11190 // nspace_allow_virtual_devs allows this behavior to
11191 // be overridden (for use by the Mobile TimeMachine
11192 // testing infrastructure which uses disk images)
11193 //
11194 if (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT) {
11195 os_log_debug(OS_LOG_DEFAULT, "NSPACE SNAPSHOT not handled");
11196 return ENOTSUP;
6d2010ae 11197 }
39037602 11198
c3c9b80d
A
11199 error = vfs_context_dataless_materialization_is_prevented(
11200 vfs_context_current());
cb323159
A
11201 if (error) {
11202 os_log_debug(OS_LOG_DEFAULT,
11203 "NSPACE process/thread is decorated as no-materialization");
11204 return error;
11205 }
39037602 11206
cb323159
A
11207 kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
11208 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
11209 os_log_error(OS_LOG_DEFAULT, "NSPACE no port");
11210 // Treat this like being unable to access the backing
11211 // store server.
11212 return ETIMEDOUT;
11213 }
39037602 11214
f427ee49 11215 path = zalloc(ZV_NAMEI);
cb323159 11216 path_len = MAXPATHLEN;
1c79356b 11217
cb323159
A
11218 error = vn_getpath(vp, path, &path_len);
11219 if (error == 0) {
11220 int xxx_rdar44371223; /* XXX Mig bug */
11221 req.r_req_id = next_nspace_req_id();
11222 req.r_resolver_error = 0;
11223 req.r_flags = 0;
11224
f427ee49
A
11225 if ((error = vnode_ref(vp)) == 0) { // take a ref so that the vnode doesn't go away
11226 req.r_vp = vp;
11227 } else {
11228 goto out_release_port;
11229 }
11230
cb323159
A
11231 NSPACE_REQ_LOCK();
11232 error = nspace_resolver_req_add(&req);
11233 NSPACE_REQ_UNLOCK();
11234 if (error) {
f427ee49 11235 vnode_rele(req.r_vp);
cb323159 11236 goto out_release_port;
39236c6e 11237 }
cb323159
A
11238
11239 os_log_debug(OS_LOG_DEFAULT, "NSPACE resolve_path call");
11240 kr = send_nspace_resolve_path(mp, req.r_req_id,
11241 current_proc()->p_pid, (uint32_t)(op & 0xffffffff),
11242 path, &xxx_rdar44371223);
11243 if (kr != KERN_SUCCESS) {
11244 // Also treat this like being unable to access
11245 // the backing store server.
11246 os_log_error(OS_LOG_DEFAULT,
11247 "NSPACE resolve_path failure: %d", kr);
11248 error = ETIMEDOUT;
11249
11250 NSPACE_REQ_LOCK();
11251 nspace_resolver_req_remove(&req);
11252 NSPACE_REQ_UNLOCK();
f427ee49 11253 vnode_rele(req.r_vp);
cb323159 11254 goto out_release_port;
39236c6e 11255 }
cb323159
A
11256
11257 // Give back the memory we allocated earlier while
11258 // we wait; we no longer need it.
f427ee49 11259 zfree(ZV_NAMEI, path);
cb323159
A
11260 path = NULL;
11261
11262 // Request has been submitted to the resolver.
11263 // Now (interruptibly) wait for completion.
11264 // Upon requrn, the request will have been removed
11265 // from the lookup table.
11266 error = nspace_resolver_req_wait(&req);
f427ee49
A
11267
11268 vnode_rele(req.r_vp);
39236c6e
A
11269 }
11270
cb323159
A
11271out_release_port:
11272 if (path != NULL) {
f427ee49 11273 zfree(ZV_NAMEI, path);
cb323159
A
11274 }
11275 ipc_port_release_send(mp);
39236c6e 11276
cb323159
A
11277 return error;
11278#else
11279 return ENOTSUP;
11280#endif /* CONFIG_DATALESS_FILES */
11281}
11282
11283int
11284nspace_snapshot_event(__unused vnode_t vp, __unused time_t ctime,
11285 __unused uint64_t op_type, __unused void *arg)
11286{
39236c6e 11287 return 0;
39236c6e 11288}
1c79356b 11289
cb323159 11290#if 0
0a7de745 11291static int
cb323159 11292build_volfs_path(struct vnode *vp, char *path, int *len)
6d2010ae 11293{
cb323159
A
11294 struct vnode_attr va;
11295 int ret;
39236c6e 11296
cb323159
A
11297 VATTR_INIT(&va);
11298 VATTR_WANTED(&va, va_fsid);
11299 VATTR_WANTED(&va, va_fileid);
39236c6e 11300
cb323159
A
11301 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
11302 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
11303 ret = -1;
0a7de745 11304 } else {
cb323159
A
11305 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
11306 ret = 0;
6d2010ae 11307 }
39037602 11308
cb323159 11309 return ret;
6d2010ae 11310}
cb323159 11311#endif
1c79356b 11312
5ba3f43e
A
11313static unsigned long
11314fsctl_bogus_command_compat(unsigned long cmd)
11315{
5ba3f43e
A
11316 switch (cmd) {
11317 case IOCBASECMD(FSIOC_SYNC_VOLUME):
0a7de745 11318 return FSIOC_SYNC_VOLUME;
5ba3f43e 11319 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
0a7de745 11320 return FSIOC_ROUTEFS_SETROUTEID;
5ba3f43e 11321 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
0a7de745 11322 return FSIOC_SET_PACKAGE_EXTS;
5ba3f43e 11323 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
0a7de745 11324 return FSIOC_SET_FSTYPENAME_OVERRIDE;
5ba3f43e 11325 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
0a7de745 11326 return DISK_CONDITIONER_IOC_GET;
5ba3f43e 11327 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
0a7de745 11328 return DISK_CONDITIONER_IOC_SET;
5ba3f43e 11329 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
0a7de745 11330 return FSIOC_FIOSEEKHOLE;
5ba3f43e 11331 case IOCBASECMD(FSIOC_FIOSEEKDATA):
0a7de745 11332 return FSIOC_FIOSEEKDATA;
5ba3f43e 11333 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
0a7de745 11334 return SPOTLIGHT_IOC_GET_MOUNT_TIME;
5ba3f43e 11335 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
0a7de745 11336 return SPOTLIGHT_IOC_GET_LAST_MTIME;
5ba3f43e
A
11337 }
11338
0a7de745 11339 return cmd;
5ba3f43e
A
11340}
11341
cb323159
A
11342static int
11343cas_bsdflags_setattr(vnode_t vp, void *arg, vfs_context_t ctx)
11344{
11345 return VNOP_IOCTL(vp, FSIOC_CAS_BSDFLAGS, arg, FWRITE, ctx);
11346}
11347
f427ee49
A
11348static int __attribute__((noinline))
11349handle_sync_volume(vnode_t vp, vnode_t *arg_vp, caddr_t data, vfs_context_t ctx)
11350{
11351 struct vfs_attr vfa;
11352 mount_t mp = vp->v_mount;
11353 unsigned arg;
11354 int error;
11355
11356 /* record vid of vp so we can drop it below. */
11357 uint32_t vvid = vp->v_id;
11358
11359 /*
11360 * Then grab mount_iterref so that we can release the vnode.
11361 * Without this, a thread may call vnode_iterate_prepare then
11362 * get into a deadlock because we've never released the root vp
11363 */
11364 error = mount_iterref(mp, 0);
11365 if (error) {
11366 return error;
11367 }
11368 vnode_put(vp);
11369
11370 arg = MNT_NOWAIT;
11371 if (*(uint32_t*)data & FSCTL_SYNC_WAIT) {
11372 arg = MNT_WAIT;
11373 }
11374
11375 /*
11376 * If the filessytem supports multiple filesytems in a
11377 * partition (For eg APFS volumes in a container, it knows
11378 * that the waitfor argument to VFS_SYNC are flags.
11379 */
11380 VFSATTR_INIT(&vfa);
11381 VFSATTR_WANTED(&vfa, f_capabilities);
11382 if ((vfs_getattr(mp, &vfa, vfs_context_current()) == 0) &&
11383 VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) &&
11384 ((vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE)) &&
11385 ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE))) {
11386 arg |= MNT_VOLUME;
11387 }
11388
11389 /* issue the sync for this volume */
11390 (void)sync_callback(mp, &arg);
11391
11392 /*
11393 * Then release the mount_iterref once we're done syncing; it's not
11394 * needed for the VNOP_IOCTL below
11395 */
11396 mount_iterdrop(mp);
11397
11398 if (arg & FSCTL_SYNC_FULLSYNC) {
11399 /* re-obtain vnode iocount on the root vp, if possible */
11400 error = vnode_getwithvid(vp, vvid);
11401 if (error == 0) {
11402 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
11403 vnode_put(vp);
11404 }
11405 }
11406 /* mark the argument VP as having been released */
11407 *arg_vp = NULL;
11408 return error;
11409}
11410
11411#if ROUTEFS
11412static int __attribute__((noinline))
11413handle_routes(user_addr_t udata)
11414{
11415 char routepath[MAXPATHLEN];
11416 size_t len = 0;
11417 int error;
11418
11419 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
11420 return error;
11421 }
11422 bzero(routepath, MAXPATHLEN);
11423 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
11424 if (error) {
11425 return error;
11426 }
11427 error = routefs_kernel_mount(routepath);
11428 return error;
11429}
11430#endif
11431
11432static int __attribute__((noinline))
11433handle_flags(vnode_t vp, caddr_t data, vfs_context_t ctx)
11434{
11435 struct fsioc_cas_bsdflags *cas = (struct fsioc_cas_bsdflags *)data;
11436 struct vnode_attr va;
11437 int error;
11438
11439 VATTR_INIT(&va);
11440 VATTR_SET(&va, va_flags, cas->new_flags);
11441
11442 error = chflags0(vp, &va, cas_bsdflags_setattr, cas, ctx);
11443 return error;
11444}
11445
11446static int __attribute__((noinline))
11447handle_auth(vnode_t vp, u_long cmd, caddr_t data, u_long options, vfs_context_t ctx)
11448{
11449 struct mount *mp = NULL;
11450 errno_t rootauth = 0;
11451
11452 mp = vp->v_mount;
11453
11454 /*
11455 * query the underlying FS and see if it reports something
11456 * sane for this vnode. If volume is authenticated via
11457 * chunklist, leave that for the caller to determine.
11458 */
11459 rootauth = VNOP_IOCTL(vp, cmd, data, (int)options, ctx);
11460
11461 return rootauth;
11462}
11463
1c79356b
A
11464/*
11465 * Make a filesystem-specific control call:
11466 */
1c79356b 11467/* ARGSUSED */
b0d623f7
A
11468static int
11469fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 11470{
0a7de745 11471 int error = 0;
91447636 11472 boolean_t is64bit;
2d21ac55 11473 u_int size;
1c79356b 11474#define STK_PARAMS 128
39037602 11475 char stkbuf[STK_PARAMS] = {0};
1c79356b 11476 caddr_t data, memp;
b0d623f7 11477 vnode_t vp = *arg_vp;
1c79356b 11478
cb323159
A
11479 if (vp->v_type == VCHR || vp->v_type == VBLK) {
11480 return ENOTTY;
11481 }
11482
5ba3f43e
A
11483 cmd = fsctl_bogus_command_compat(cmd);
11484
1c79356b 11485 size = IOCPARM_LEN(cmd);
0a7de745
A
11486 if (size > IOCPARM_MAX) {
11487 return EINVAL;
11488 }
1c79356b 11489
6d2010ae 11490 is64bit = proc_is64bit(p);
91447636 11491
1c79356b 11492 memp = NULL;
04b8595b 11493
0a7de745 11494 if (size > sizeof(stkbuf)) {
f427ee49 11495 if ((memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK)) == 0) {
0a7de745
A
11496 return ENOMEM;
11497 }
1c79356b
A
11498 data = memp;
11499 } else {
91447636 11500 data = &stkbuf[0];
1c79356b 11501 };
39037602 11502
1c79356b
A
11503 if (cmd & IOC_IN) {
11504 if (size) {
b0d623f7 11505 error = copyin(udata, data, size);
39037602 11506 if (error) {
fe8ab488 11507 if (memp) {
f427ee49 11508 kheap_free(KHEAP_TEMP, memp, size);
fe8ab488
A
11509 }
11510 return error;
11511 }
1c79356b 11512 } else {
6d2010ae
A
11513 if (is64bit) {
11514 *(user_addr_t *)data = udata;
0a7de745 11515 } else {
6d2010ae
A
11516 *(uint32_t *)data = (uint32_t)udata;
11517 }
1c79356b
A
11518 };
11519 } else if ((cmd & IOC_OUT) && size) {
11520 /*
11521 * Zero the buffer so the user always
11522 * gets back something deterministic.
11523 */
11524 bzero(data, size);
91447636 11525 } else if (cmd & IOC_VOID) {
b0d623f7 11526 if (is64bit) {
6d2010ae 11527 *(user_addr_t *)data = udata;
0a7de745 11528 } else {
6d2010ae 11529 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 11530 }
91447636 11531 }
1c79356b 11532
b0d623f7 11533 /* Check to see if it's a generic command */
5ba3f43e 11534 switch (cmd) {
f427ee49
A
11535 case FSIOC_SYNC_VOLUME:
11536 error = handle_sync_volume(vp, arg_vp, data, ctx);
11537 break;
b0d623f7 11538
f427ee49 11539 case FSIOC_ROUTEFS_SETROUTEID:
490019cf 11540#if ROUTEFS
f427ee49 11541 error = handle_routes(udata);
490019cf 11542#endif
f427ee49 11543 break;
0a7de745
A
11544
11545 case FSIOC_SET_PACKAGE_EXTS: {
11546 user_addr_t ext_strings;
11547 uint32_t num_entries;
11548 uint32_t max_width;
11549
11550 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0))) {
11551 break;
490019cf 11552 }
490019cf 11553
0a7de745
A
11554 if ((is64bit && size != sizeof(user64_package_ext_info))
11555 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
11556 // either you're 64-bit and passed a 64-bit struct or
11557 // you're 32-bit and passed a 32-bit struct. otherwise
11558 // it's not ok.
11559 error = EINVAL;
11560 break;
11561 }
b0d623f7 11562
0a7de745 11563 if (is64bit) {
f427ee49
A
11564 if (sizeof(user64_addr_t) > sizeof(user_addr_t)) {
11565 assert(((user64_package_ext_info *)data)->strings <= UINT32_MAX);
11566 }
11567 ext_strings = (user_addr_t)((user64_package_ext_info *)data)->strings;
0a7de745
A
11568 num_entries = ((user64_package_ext_info *)data)->num_entries;
11569 max_width = ((user64_package_ext_info *)data)->max_width;
11570 } else {
11571 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
11572 num_entries = ((user32_package_ext_info *)data)->num_entries;
11573 max_width = ((user32_package_ext_info *)data)->max_width;
11574 }
11575 error = set_package_extensions_table(ext_strings, num_entries, max_width);
11576 }
11577 break;
39037602 11578
0a7de745
A
11579 case FSIOC_SET_FSTYPENAME_OVERRIDE:
11580 {
11581 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
11582 break;
11583 }
11584 if (vp->v_mount) {
11585 mount_lock(vp->v_mount);
11586 if (data[0] != 0) {
f427ee49
A
11587 int i;
11588 for (i = 0; i < MFSTYPENAMELEN; i++) {
11589 if (!data[i]) {
11590 goto continue_copy;
11591 }
11592 }
11593 /*
11594 * Getting here means we have a user data string which has no
11595 * NULL termination in its first MFSTYPENAMELEN bytes.
11596 * This is bogus, let's avoid strlcpy-ing the read data and
11597 * return an error.
11598 */
11599 error = EINVAL;
11600 goto unlock;
11601continue_copy:
0a7de745
A
11602 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
11603 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
11604 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
11605 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
11606 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
6d2010ae 11607 }
0a7de745
A
11608 } else {
11609 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
11610 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
11611 }
11612 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
11613 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 11614 }
f427ee49 11615unlock:
0a7de745 11616 mount_unlock(vp->v_mount);
6d2010ae 11617 }
0a7de745
A
11618 }
11619 break;
39037602 11620
0a7de745
A
11621 case DISK_CONDITIONER_IOC_GET: {
11622 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
11623 }
11624 break;
5ba3f43e 11625
0a7de745
A
11626 case DISK_CONDITIONER_IOC_SET: {
11627 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
11628 }
11629 break;
5ba3f43e 11630
f427ee49
A
11631 case FSIOC_CAS_BSDFLAGS:
11632 error = handle_flags(vp, data, ctx);
11633 break;
cb323159 11634
0a7de745 11635 case FSIOC_FD_ONLY_OPEN_ONCE: {
f427ee49 11636 error = 0;
0a7de745 11637 if (vnode_usecount(vp) > 1) {
f427ee49
A
11638 vnode_lock_spin(vp);
11639 if (vp->v_lflag & VL_HASSTREAMS) {
11640 if (vnode_isinuse_locked(vp, 1, 1)) {
11641 error = EBUSY;
11642 }
11643 } else if (vnode_usecount(vp) > 1) {
11644 error = EBUSY;
11645 }
11646 vnode_unlock(vp);
fe8ab488 11647 }
0a7de745
A
11648 }
11649 break;
11650
f427ee49
A
11651 case FSIOC_EVAL_ROOTAUTH:
11652 error = handle_auth(vp, cmd, data, options, ctx);
11653 break;
11654
0a7de745
A
11655 default: {
11656 /* other, known commands shouldn't be passed down here */
11657 switch (cmd) {
11658 case F_PUNCHHOLE:
11659 case F_TRIM_ACTIVE_FILE:
11660 case F_RDADVISE:
11661 case F_TRANSCODEKEY:
11662 case F_GETPROTECTIONLEVEL:
11663 case F_GETDEFAULTPROTLEVEL:
11664 case F_MAKECOMPRESSED:
11665 case F_SET_GREEDY_MODE:
11666 case F_SETSTATICCONTENT:
11667 case F_SETIOTYPE:
11668 case F_SETBACKINGSTORE:
11669 case F_GETPATH_MTMINFO:
11670 case APFSIOC_REVERT_TO_SNAPSHOT:
11671 case FSIOC_FIOSEEKHOLE:
11672 case FSIOC_FIOSEEKDATA:
11673 case HFS_GET_BOOT_INFO:
11674 case HFS_SET_BOOT_INFO:
11675 case FIOPINSWAP:
11676 case F_CHKCLEAN:
11677 case F_FULLFSYNC:
11678 case F_BARRIERFSYNC:
11679 case F_FREEZE_FS:
11680 case F_THAW_FS:
f427ee49 11681 case FSIOC_KERNEL_ROOTAUTH:
0a7de745
A
11682 error = EINVAL;
11683 goto outdrop;
11684 }
11685 /* Invoke the filesystem-specific code */
f427ee49 11686 error = VNOP_IOCTL(vp, cmd, data, (int)options, ctx);
0a7de745 11687 }
fe8ab488
A
11688 } /* end switch stmt */
11689
1c79356b 11690 /*
fe8ab488 11691 * if no errors, copy any data to user. Size was
1c79356b
A
11692 * already set and checked above.
11693 */
0a7de745 11694 if (error == 0 && (cmd & IOC_OUT) && size) {
b0d623f7 11695 error = copyout(data, udata, size);
0a7de745 11696 }
39037602 11697
a39ff7e2 11698outdrop:
fe8ab488 11699 if (memp) {
f427ee49 11700 kheap_free(KHEAP_TEMP, memp, size);
fe8ab488 11701 }
39037602 11702
1c79356b
A
11703 return error;
11704}
b0d623f7
A
11705
11706/* ARGSUSED */
11707int
0a7de745 11708fsctl(proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
b0d623f7
A
11709{
11710 int error;
39037602 11711 struct nameidata nd;
f427ee49 11712 uint32_t nameiflags;
b0d623f7
A
11713 vnode_t vp = NULL;
11714 vfs_context_t ctx = vfs_context_current();
11715
f427ee49 11716 AUDIT_ARG(cmd, (int)uap->cmd);
b0d623f7
A
11717 AUDIT_ARG(value32, uap->options);
11718 /* Get the vnode for the file we are getting info on: */
11719 nameiflags = 0;
0a7de745
A
11720 //
11721 // if we come through fsctl() then the file is by definition not open.
11722 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11723 // lest the caller mistakenly thinks the only open is their own (but in
11724 // reality it's someone elses).
11725 //
11726 if (uap->cmd == FSIOC_FD_ONLY_OPEN_ONCE) {
11727 return EINVAL;
11728 }
11729 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
11730 nameiflags |= FOLLOW;
11731 }
cb323159
A
11732 if (uap->cmd == FSIOC_FIRMLINK_CTL) {
11733 nameiflags |= (CN_FIRMLINK_NOFOLLOW | NOCACHE);
11734 }
6d2010ae 11735 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
0a7de745
A
11736 UIO_USERSPACE, uap->path, ctx);
11737 if ((error = namei(&nd))) {
11738 goto done;
11739 }
b0d623f7
A
11740 vp = nd.ni_vp;
11741 nameidone(&nd);
11742
11743#if CONFIG_MACF
11744 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
11745 if (error) {
11746 goto done;
11747 }
11748#endif
11749
11750 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11751
11752done:
0a7de745 11753 if (vp) {
b0d623f7 11754 vnode_put(vp);
0a7de745 11755 }
b0d623f7
A
11756 return error;
11757}
11758/* ARGSUSED */
11759int
0a7de745 11760ffsctl(proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
b0d623f7
A
11761{
11762 int error;
11763 vnode_t vp = NULL;
11764 vfs_context_t ctx = vfs_context_current();
11765 int fd = -1;
11766
11767 AUDIT_ARG(fd, uap->fd);
f427ee49 11768 AUDIT_ARG(cmd, (int)uap->cmd);
b0d623f7 11769 AUDIT_ARG(value32, uap->options);
39037602 11770
b0d623f7 11771 /* Get the vnode for the file we are getting info on: */
0a7de745 11772 if ((error = file_vnode(uap->fd, &vp))) {
3e170ce0 11773 return error;
0a7de745 11774 }
b0d623f7
A
11775 fd = uap->fd;
11776 if ((error = vnode_getwithref(vp))) {
3e170ce0
A
11777 file_drop(fd);
11778 return error;
b0d623f7
A
11779 }
11780
11781#if CONFIG_MACF
3e170ce0
A
11782 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
11783 file_drop(fd);
11784 vnode_put(vp);
11785 return error;
b0d623f7
A
11786 }
11787#endif
11788
11789 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11790
3e170ce0 11791 file_drop(fd);
b0d623f7 11792
3e170ce0
A
11793 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11794 if (vp) {
b0d623f7 11795 vnode_put(vp);
3e170ce0
A
11796 }
11797
b0d623f7
A
11798 return error;
11799}
1c79356b 11800/* end of fsctl system call */
0b4e3aa0 11801
f427ee49
A
11802#define FILESEC_ACCESS_ENTITLEMENT \
11803 "com.apple.private.vfs.filesec-access"
11804
11805static int
11806xattr_entitlement_check(const char *attrname, vfs_context_t ctx, bool setting)
11807{
11808 if (strcmp(attrname, KAUTH_FILESEC_XATTR) == 0) {
11809 /*
11810 * get: root and tasks with FILESEC_ACCESS_ENTITLEMENT.
11811 * set: only tasks with FILESEC_ACCESS_ENTITLEMENT.
11812 */
11813 if ((!setting && vfs_context_issuser(ctx)) ||
11814 IOTaskHasEntitlement(current_task(),
11815 FILESEC_ACCESS_ENTITLEMENT)) {
11816 return 0;
11817 }
11818 }
11819
11820 return EPERM;
11821}
11822
91447636
A
11823/*
11824 * Retrieve the data of an extended attribute.
11825 */
11826int
2d21ac55 11827getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 11828{
2d21ac55 11829 vnode_t vp;
91447636 11830 struct nameidata nd;
0a7de745 11831 char attrname[XATTR_MAXNAMELEN + 1];
2d21ac55 11832 vfs_context_t ctx = vfs_context_current();
91447636
A
11833 uio_t auio = NULL;
11834 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11835 size_t attrsize = 0;
11836 size_t namelen;
b0d623f7 11837 u_int32_t nameiflags;
91447636 11838 int error;
0a7de745 11839 char uio_buf[UIO_SIZEOF(1)];
55e303ae 11840
0a7de745
A
11841 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11842 return EINVAL;
11843 }
55e303ae 11844
91447636 11845 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 11846 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636 11847 if ((error = namei(&nd))) {
0a7de745 11848 return error;
91447636
A
11849 }
11850 vp = nd.ni_vp;
11851 nameidone(&nd);
55e303ae 11852
d9a64523
A
11853 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11854 if (error != 0) {
91447636
A
11855 goto out;
11856 }
f427ee49
A
11857 if (xattr_protected(attrname) &&
11858 (error = xattr_entitlement_check(attrname, ctx, false)) != 0) {
11859 goto out;
91447636 11860 }
b0d623f7
A
11861 /*
11862 * the specific check for 0xffffffff is a hack to preserve
11863 * binaray compatibilty in K64 with applications that discovered
39037602 11864 * that passing in a buf pointer and a size of -1 resulted in
b0d623f7
A
11865 * just the size of the indicated extended attribute being returned.
11866 * this isn't part of the documented behavior, but because of the
11867 * original implemtation's check for "uap->size > 0", this behavior
11868 * was allowed. In K32 that check turned into a signed comparison
11869 * even though uap->size is unsigned... in K64, we blow by that
11870 * check because uap->size is unsigned and doesn't get sign smeared
39037602 11871 * in the munger for a 32 bit user app. we also need to add a
b0d623f7
A
11872 * check to limit the maximum size of the buffer being passed in...
11873 * unfortunately, the underlying fileystems seem to just malloc
11874 * the requested size even if the actual extended attribute is tiny.
11875 * because that malloc is for kernel wired memory, we have to put a
11876 * sane limit on it.
11877 *
11878 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11879 * U64 running on K64 will yield -1 (64 bits wide)
11880 * U32/U64 running on K32 will yield -1 (32 bits wide)
11881 */
0a7de745 11882 if (uap->size == 0xffffffff || uap->size == (size_t)-1) {
b0d623f7 11883 goto no_uio;
0a7de745 11884 }
b0d623f7 11885
b0d623f7 11886 if (uap->value) {
0a7de745 11887 if (uap->size > (size_t)XATTR_MAXSIZE) {
6d2010ae 11888 uap->size = XATTR_MAXSIZE;
0a7de745 11889 }
39037602 11890
91447636 11891 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
0a7de745 11892 &uio_buf[0], sizeof(uio_buf));
91447636
A
11893 uio_addiov(auio, uap->value, uap->size);
11894 }
b0d623f7 11895no_uio:
2d21ac55 11896 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
11897out:
11898 vnode_put(vp);
55e303ae 11899
91447636
A
11900 if (auio) {
11901 *retval = uap->size - uio_resid(auio);
11902 } else {
11903 *retval = (user_ssize_t)attrsize;
55e303ae
A
11904 }
11905
0a7de745 11906 return error;
91447636 11907}
55e303ae 11908
91447636
A
11909/*
11910 * Retrieve the data of an extended attribute.
11911 */
11912int
2d21ac55 11913fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 11914{
2d21ac55 11915 vnode_t vp;
0a7de745 11916 char attrname[XATTR_MAXNAMELEN + 1];
f427ee49 11917 vfs_context_t ctx = vfs_context_current();
91447636
A
11918 uio_t auio = NULL;
11919 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11920 size_t attrsize = 0;
11921 size_t namelen;
11922 int error;
0a7de745 11923 char uio_buf[UIO_SIZEOF(1)];
55e303ae 11924
0a7de745
A
11925 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11926 return EINVAL;
11927 }
55e303ae 11928
0a7de745
A
11929 if ((error = file_vnode(uap->fd, &vp))) {
11930 return error;
91447636 11931 }
0a7de745 11932 if ((error = vnode_getwithref(vp))) {
91447636 11933 file_drop(uap->fd);
0a7de745 11934 return error;
91447636 11935 }
d9a64523
A
11936 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11937 if (error != 0) {
91447636
A
11938 goto out;
11939 }
f427ee49
A
11940 if (xattr_protected(attrname) &&
11941 (error = xattr_entitlement_check(attrname, ctx, false)) != 0) {
91447636
A
11942 goto out;
11943 }
11944 if (uap->value && uap->size > 0) {
11945 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
0a7de745 11946 &uio_buf[0], sizeof(uio_buf));
91447636
A
11947 uio_addiov(auio, uap->value, uap->size);
11948 }
55e303ae 11949
2d21ac55 11950 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
11951out:
11952 (void)vnode_put(vp);
11953 file_drop(uap->fd);
55e303ae 11954
91447636
A
11955 if (auio) {
11956 *retval = uap->size - uio_resid(auio);
11957 } else {
11958 *retval = (user_ssize_t)attrsize;
11959 }
0a7de745 11960 return error;
91447636 11961}
55e303ae 11962
91447636
A
11963/*
11964 * Set the data of an extended attribute.
11965 */
55e303ae 11966int
2d21ac55 11967setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 11968{
2d21ac55 11969 vnode_t vp;
91447636 11970 struct nameidata nd;
0a7de745 11971 char attrname[XATTR_MAXNAMELEN + 1];
2d21ac55 11972 vfs_context_t ctx = vfs_context_current();
91447636
A
11973 uio_t auio = NULL;
11974 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11975 size_t namelen;
b0d623f7 11976 u_int32_t nameiflags;
91447636 11977 int error;
0a7de745 11978 char uio_buf[UIO_SIZEOF(1)];
55e303ae 11979
0a7de745
A
11980 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11981 return EINVAL;
11982 }
55e303ae 11983
d9a64523
A
11984 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11985 if (error != 0) {
6d2010ae
A
11986 if (error == EPERM) {
11987 /* if the string won't fit in attrname, copyinstr emits EPERM */
0a7de745 11988 return ENAMETOOLONG;
6d2010ae
A
11989 }
11990 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11991 return error;
91447636 11992 }
f427ee49
A
11993 if (xattr_protected(attrname) &&
11994 (error = xattr_entitlement_check(attrname, ctx, true)) != 0) {
11995 return error;
0a7de745 11996 }
2d21ac55 11997 if (uap->size != 0 && uap->value == 0) {
0a7de745 11998 return EINVAL;
55e303ae 11999 }
f427ee49
A
12000 if (uap->size > INT_MAX) {
12001 return E2BIG;
12002 }
55e303ae 12003
91447636 12004 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 12005 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636 12006 if ((error = namei(&nd))) {
0a7de745 12007 return error;
91447636
A
12008 }
12009 vp = nd.ni_vp;
12010 nameidone(&nd);
55e303ae 12011
91447636 12012 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
0a7de745 12013 &uio_buf[0], sizeof(uio_buf));
91447636 12014 uio_addiov(auio, uap->value, uap->size);
55e303ae 12015
2d21ac55
A
12016 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
12017#if CONFIG_FSE
12018 if (error == 0) {
12019 add_fsevent(FSE_XATTR_MODIFIED, ctx,
12020 FSE_ARG_VNODE, vp,
12021 FSE_ARG_DONE);
12022 }
12023#endif
91447636
A
12024 vnode_put(vp);
12025 *retval = 0;
0a7de745 12026 return error;
91447636 12027}
55e303ae 12028
91447636
A
12029/*
12030 * Set the data of an extended attribute.
12031 */
12032int
2d21ac55 12033fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 12034{
2d21ac55 12035 vnode_t vp;
0a7de745 12036 char attrname[XATTR_MAXNAMELEN + 1];
f427ee49 12037 vfs_context_t ctx = vfs_context_current();
91447636
A
12038 uio_t auio = NULL;
12039 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
12040 size_t namelen;
12041 int error;
0a7de745 12042 char uio_buf[UIO_SIZEOF(1)];
55e303ae 12043
0a7de745
A
12044 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
12045 return EINVAL;
12046 }
55e303ae 12047
d9a64523
A
12048 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
12049 if (error != 0) {
3e170ce0
A
12050 if (error == EPERM) {
12051 /* if the string won't fit in attrname, copyinstr emits EPERM */
0a7de745 12052 return ENAMETOOLONG;
3e170ce0
A
12053 }
12054 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
12055 return error;
55e303ae 12056 }
f427ee49
A
12057 if (xattr_protected(attrname) &&
12058 (error = xattr_entitlement_check(attrname, ctx, true)) != 0) {
12059 return error;
0a7de745 12060 }
2d21ac55 12061 if (uap->size != 0 && uap->value == 0) {
0a7de745 12062 return EINVAL;
55e303ae 12063 }
f427ee49
A
12064 if (uap->size > INT_MAX) {
12065 return E2BIG;
12066 }
0a7de745
A
12067 if ((error = file_vnode(uap->fd, &vp))) {
12068 return error;
55e303ae 12069 }
0a7de745 12070 if ((error = vnode_getwithref(vp))) {
91447636 12071 file_drop(uap->fd);
0a7de745 12072 return error;
91447636
A
12073 }
12074 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
0a7de745 12075 &uio_buf[0], sizeof(uio_buf));
91447636 12076 uio_addiov(auio, uap->value, uap->size);
91447636 12077
2d21ac55
A
12078 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
12079#if CONFIG_FSE
12080 if (error == 0) {
12081 add_fsevent(FSE_XATTR_MODIFIED, ctx,
12082 FSE_ARG_VNODE, vp,
12083 FSE_ARG_DONE);
12084 }
12085#endif
91447636
A
12086 vnode_put(vp);
12087 file_drop(uap->fd);
12088 *retval = 0;
0a7de745 12089 return error;
91447636 12090}
55e303ae 12091
91447636
A
12092/*
12093 * Remove an extended attribute.
b0d623f7 12094 * XXX Code duplication here.
91447636 12095 */
91447636 12096int
2d21ac55 12097removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 12098{
2d21ac55 12099 vnode_t vp;
91447636 12100 struct nameidata nd;
0a7de745 12101 char attrname[XATTR_MAXNAMELEN + 1];
91447636 12102 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 12103 vfs_context_t ctx = vfs_context_current();
91447636 12104 size_t namelen;
b0d623f7 12105 u_int32_t nameiflags;
91447636 12106 int error;
55e303ae 12107
0a7de745
A
12108 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
12109 return EINVAL;
12110 }
55e303ae 12111
91447636
A
12112 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
12113 if (error != 0) {
0a7de745
A
12114 return error;
12115 }
12116 if (xattr_protected(attrname)) {
12117 return EPERM;
91447636 12118 }
91447636 12119 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 12120 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636 12121 if ((error = namei(&nd))) {
0a7de745 12122 return error;
91447636
A
12123 }
12124 vp = nd.ni_vp;
12125 nameidone(&nd);
55e303ae 12126
2d21ac55
A
12127 error = vn_removexattr(vp, attrname, uap->options, ctx);
12128#if CONFIG_FSE
12129 if (error == 0) {
12130 add_fsevent(FSE_XATTR_REMOVED, ctx,
12131 FSE_ARG_VNODE, vp,
12132 FSE_ARG_DONE);
12133 }
12134#endif
91447636
A
12135 vnode_put(vp);
12136 *retval = 0;
0a7de745 12137 return error;
55e303ae
A
12138}
12139
91447636
A
12140/*
12141 * Remove an extended attribute.
b0d623f7 12142 * XXX Code duplication here.
91447636 12143 */
91447636 12144int
2d21ac55 12145fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 12146{
2d21ac55 12147 vnode_t vp;
0a7de745 12148 char attrname[XATTR_MAXNAMELEN + 1];
91447636
A
12149 size_t namelen;
12150 int error;
6d2010ae 12151#if CONFIG_FSE
2d21ac55 12152 vfs_context_t ctx = vfs_context_current();
6d2010ae 12153#endif
55e303ae 12154
0a7de745
A
12155 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
12156 return EINVAL;
12157 }
91447636
A
12158
12159 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
12160 if (error != 0) {
0a7de745 12161 return error;
91447636 12162 }
0a7de745
A
12163 if (xattr_protected(attrname)) {
12164 return EPERM;
91447636 12165 }
0a7de745
A
12166 if ((error = file_vnode(uap->fd, &vp))) {
12167 return error;
12168 }
12169 if ((error = vnode_getwithref(vp))) {
91447636 12170 file_drop(uap->fd);
0a7de745 12171 return error;
91447636 12172 }
4a249263 12173
2d21ac55
A
12174 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
12175#if CONFIG_FSE
12176 if (error == 0) {
12177 add_fsevent(FSE_XATTR_REMOVED, ctx,
12178 FSE_ARG_VNODE, vp,
12179 FSE_ARG_DONE);
12180 }
12181#endif
91447636
A
12182 vnode_put(vp);
12183 file_drop(uap->fd);
12184 *retval = 0;
0a7de745 12185 return error;
55e303ae
A
12186}
12187
91447636
A
12188/*
12189 * Retrieve the list of extended attribute names.
b0d623f7 12190 * XXX Code duplication here.
91447636 12191 */
91447636 12192int
2d21ac55 12193listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 12194{
2d21ac55 12195 vnode_t vp;
91447636 12196 struct nameidata nd;
2d21ac55 12197 vfs_context_t ctx = vfs_context_current();
91447636
A
12198 uio_t auio = NULL;
12199 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
12200 size_t attrsize = 0;
b0d623f7 12201 u_int32_t nameiflags;
91447636 12202 int error;
0a7de745 12203 char uio_buf[UIO_SIZEOF(1)];
4a249263 12204
0a7de745
A
12205 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
12206 return EINVAL;
12207 }
55e303ae 12208
fe8ab488 12209 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 12210 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636 12211 if ((error = namei(&nd))) {
0a7de745 12212 return error;
91447636
A
12213 }
12214 vp = nd.ni_vp;
12215 nameidone(&nd);
12216 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae 12217 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
0a7de745 12218 &uio_buf[0], sizeof(uio_buf));
91447636
A
12219 uio_addiov(auio, uap->namebuf, uap->bufsize);
12220 }
55e303ae 12221
2d21ac55 12222 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 12223
91447636
A
12224 vnode_put(vp);
12225 if (auio) {
12226 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
12227 } else {
12228 *retval = (user_ssize_t)attrsize;
12229 }
0a7de745 12230 return error;
55e303ae
A
12231}
12232
91447636
A
12233/*
12234 * Retrieve the list of extended attribute names.
b0d623f7 12235 * XXX Code duplication here.
91447636 12236 */
55e303ae 12237int
2d21ac55 12238flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 12239{
2d21ac55 12240 vnode_t vp;
91447636
A
12241 uio_t auio = NULL;
12242 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
12243 size_t attrsize = 0;
12244 int error;
0a7de745 12245 char uio_buf[UIO_SIZEOF(1)];
91447636 12246
0a7de745
A
12247 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
12248 return EINVAL;
12249 }
91447636 12250
0a7de745
A
12251 if ((error = file_vnode(uap->fd, &vp))) {
12252 return error;
91447636 12253 }
0a7de745 12254 if ((error = vnode_getwithref(vp))) {
91447636 12255 file_drop(uap->fd);
0a7de745 12256 return error;
91447636
A
12257 }
12258 if (uap->namebuf != 0 && uap->bufsize > 0) {
39037602 12259 auio = uio_createwithbuffer(1, 0, spacetype,
0a7de745 12260 UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636
A
12261 uio_addiov(auio, uap->namebuf, uap->bufsize);
12262 }
91447636 12263
2d21ac55 12264 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 12265
91447636
A
12266 vnode_put(vp);
12267 file_drop(uap->fd);
12268 if (auio) {
12269 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
12270 } else {
12271 *retval = (user_ssize_t)attrsize;
12272 }
0a7de745 12273 return error;
55e303ae 12274}
4a249263 12275
0a7de745 12276static int
cb323159
A
12277fsgetpath_internal(vfs_context_t ctx, int volfs_id, uint64_t objid,
12278 vm_size_t bufsize, caddr_t buf, uint32_t options, int *pathlen)
b0d623f7 12279{
fe8ab488 12280 int error;
b0d623f7 12281 struct mount *mp = NULL;
fe8ab488 12282 vnode_t vp;
b0d623f7 12283 int length;
fe8ab488 12284 int bpflags;
813fb2f6
A
12285 /* maximum number of times to retry build_path */
12286 unsigned int retries = 0x10;
b0d623f7 12287
fe8ab488 12288 if (bufsize > PAGE_SIZE) {
0a7de745 12289 return EINVAL;
fe8ab488
A
12290 }
12291
12292 if (buf == NULL) {
0a7de745 12293 return ENOMEM;
b0d623f7 12294 }
fe8ab488 12295
813fb2f6 12296retry:
fe8ab488 12297 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 12298 error = ENOTSUP; /* unexpected failure */
fe8ab488 12299 return ENOTSUP;
b0d623f7 12300 }
fe8ab488 12301
39236c6e 12302unionget:
fe8ab488 12303 if (objid == 2) {
cb323159
A
12304 struct vfs_attr vfsattr;
12305 int use_vfs_root = TRUE;
12306
12307 VFSATTR_INIT(&vfsattr);
12308 VFSATTR_WANTED(&vfsattr, f_capabilities);
12309 if (!(options & FSOPT_ISREALFSID) &&
12310 vfs_getattr(mp, &vfsattr, vfs_context_kernel()) == 0 &&
12311 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
12312 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
12313 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
12314 use_vfs_root = FALSE;
12315 }
12316 }
12317
12318 if (use_vfs_root) {
12319 error = VFS_ROOT(mp, &vp, ctx);
12320 } else {
12321 error = VFS_VGET(mp, objid, &vp, ctx);
12322 }
b0d623f7 12323 } else {
fe8ab488 12324 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 12325 }
39236c6e
A
12326
12327 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
12328 /*
12329 * If the fileid isn't found and we're in a union
12330 * mount volume, then see if the fileid is in the
12331 * mounted-on volume.
12332 */
12333 struct mount *tmp = mp;
12334 mp = vnode_mount(tmp->mnt_vnodecovered);
12335 vfs_unbusy(tmp);
0a7de745 12336 if (vfs_busy(mp, LK_NOWAIT) == 0) {
39236c6e 12337 goto unionget;
0a7de745 12338 }
fe8ab488 12339 } else {
39236c6e 12340 vfs_unbusy(mp);
fe8ab488 12341 }
39236c6e 12342
b0d623f7 12343 if (error) {
fe8ab488 12344 return error;
b0d623f7 12345 }
fe8ab488 12346
6d2010ae
A
12347#if CONFIG_MACF
12348 error = mac_vnode_check_fsgetpath(ctx, vp);
12349 if (error) {
12350 vnode_put(vp);
fe8ab488 12351 return error;
6d2010ae
A
12352 }
12353#endif
fe8ab488 12354
b0d623f7
A
12355 /* Obtain the absolute path to this vnode. */
12356 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
cb323159
A
12357 if (options & FSOPT_NOFIRMLINKPATH) {
12358 bpflags |= BUILDPATH_NO_FIRMLINK;
12359 }
316670eb 12360 bpflags |= BUILDPATH_CHECK_MOVED;
f427ee49 12361 error = build_path(vp, buf, (int)bufsize, &length, bpflags, ctx);
b0d623f7 12362 vnode_put(vp);
fe8ab488 12363
b0d623f7 12364 if (error) {
813fb2f6
A
12365 /* there was a race building the path, try a few more times */
12366 if (error == EAGAIN) {
12367 --retries;
0a7de745 12368 if (retries > 0) {
813fb2f6 12369 goto retry;
0a7de745 12370 }
813fb2f6
A
12371
12372 error = ENOENT;
12373 }
b0d623f7
A
12374 goto out;
12375 }
fe8ab488
A
12376
12377 AUDIT_ARG(text, buf);
39236c6e 12378
f427ee49
A
12379 if (kdebug_debugid_enabled(VFS_LOOKUP) && length > 0) {
12380 unsigned long path_words[NUMPARMS];
12381 size_t path_len = sizeof(path_words);
39236c6e 12382
f427ee49
A
12383 if ((size_t)length < path_len) {
12384 memcpy((char *)path_words, buf, length);
12385 memset((char *)path_words + length, 0, path_len - length);
39236c6e 12386
f427ee49 12387 path_len = length;
fe8ab488 12388 } else {
f427ee49 12389 memcpy((char *)path_words, buf + (length - path_len), path_len);
fe8ab488 12390 }
39236c6e 12391
f427ee49 12392 kdebug_vfs_lookup(path_words, (int)path_len, vp,
0a7de745 12393 KDBG_VFS_LOOKUP_FLAG_LOOKUP);
39236c6e 12394 }
fe8ab488 12395
f427ee49 12396 *pathlen = length; /* may be superseded by error */
fe8ab488
A
12397
12398out:
0a7de745 12399 return error;
fe8ab488
A
12400}
12401
12402/*
12403 * Obtain the full pathname of a file system object by id.
fe8ab488 12404 */
cb323159 12405static int
f427ee49 12406fsgetpath_extended(user_addr_t buf, user_size_t bufsize, user_addr_t user_fsid, uint64_t objid,
cb323159 12407 uint32_t options, user_ssize_t *retval)
fe8ab488
A
12408{
12409 vfs_context_t ctx = vfs_context_current();
12410 fsid_t fsid;
12411 char *realpath;
12412 int length;
12413 int error;
12414
cb323159
A
12415 if (options & ~(FSOPT_NOFIRMLINKPATH | FSOPT_ISREALFSID)) {
12416 return EINVAL;
12417 }
12418
12419 if ((error = copyin(user_fsid, (caddr_t)&fsid, sizeof(fsid)))) {
0a7de745 12420 return error;
fe8ab488
A
12421 }
12422 AUDIT_ARG(value32, fsid.val[0]);
cb323159 12423 AUDIT_ARG(value64, objid);
fe8ab488 12424 /* Restrict output buffer size for now. */
39037602 12425
cb323159 12426 if (bufsize > PAGE_SIZE || bufsize <= 0) {
0a7de745 12427 return EINVAL;
39037602 12428 }
f427ee49 12429 realpath = kheap_alloc(KHEAP_TEMP, bufsize, Z_WAITOK | Z_ZERO);
fe8ab488 12430 if (realpath == NULL) {
0a7de745 12431 return ENOMEM;
fe8ab488
A
12432 }
12433
cb323159
A
12434 error = fsgetpath_internal(ctx, fsid.val[0], objid, bufsize, realpath,
12435 options, &length);
fe8ab488
A
12436
12437 if (error) {
12438 goto out;
12439 }
39037602 12440
cb323159 12441 error = copyout((caddr_t)realpath, buf, length);
b0d623f7
A
12442
12443 *retval = (user_ssize_t)length; /* may be superseded by error */
12444out:
f427ee49 12445 kheap_free(KHEAP_TEMP, realpath, bufsize);
0a7de745 12446 return error;
b0d623f7
A
12447}
12448
cb323159
A
12449int
12450fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
12451{
12452 return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
12453 0, retval);
12454}
12455
12456int
12457fsgetpath_ext(__unused proc_t p, struct fsgetpath_ext_args *uap, user_ssize_t *retval)
12458{
12459 return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
12460 uap->options, retval);
12461}
12462
91447636
A
12463/*
12464 * Common routine to handle various flavors of statfs data heading out
12465 * to user space.
2d21ac55
A
12466 *
12467 * Returns: 0 Success
12468 * EFAULT
91447636
A
12469 */
12470static int
39037602
A
12471munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
12472 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 12473 boolean_t partial_copy)
4a249263 12474{
0a7de745
A
12475 int error;
12476 int my_size, copy_size;
91447636
A
12477
12478 if (is_64_bit) {
b0d623f7 12479 struct user64_statfs sfs;
91447636
A
12480 my_size = copy_size = sizeof(sfs);
12481 bzero(&sfs, my_size);
12482 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
f427ee49 12483 sfs.f_type = (short)mp->mnt_vtable->vfc_typenum;
91447636 12484 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
12485 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
12486 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
12487 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
12488 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
12489 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
12490 sfs.f_files = (user64_long_t)sfsp->f_files;
12491 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
12492 sfs.f_fsid = sfsp->f_fsid;
12493 sfs.f_owner = sfsp->f_owner;
6d2010ae 12494 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 12495 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
12496 } else {
12497 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
12498 }
2d21ac55
A
12499 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
12500 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
12501
12502 if (partial_copy) {
12503 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
12504 }
12505 error = copyout((caddr_t)&sfs, bufp, copy_size);
0a7de745 12506 } else {
b0d623f7
A
12507 struct user32_statfs sfs;
12508
91447636
A
12509 my_size = copy_size = sizeof(sfs);
12510 bzero(&sfs, my_size);
39037602 12511
91447636 12512 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
f427ee49 12513 sfs.f_type = (short)mp->mnt_vtable->vfc_typenum;
91447636 12514 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
39037602 12515
91447636
A
12516 /*
12517 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12518 * have to fudge the numbers here in that case. We inflate the blocksize in order
12519 * to reflect the filesystem size as best we can.
12520 */
39037602 12521 if ((sfsp->f_blocks > INT_MAX)
0a7de745
A
12522 /* Hack for 4061702 . I think the real fix is for Carbon to
12523 * look for some volume capability and not depend on hidden
12524 * semantics agreed between a FS and carbon.
12525 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12526 * for Carbon to set bNoVolumeSizes volume attribute.
12527 * Without this the webdavfs files cannot be copied onto
12528 * disk as they look huge. This change should not affect
12529 * XSAN as they should not setting these to -1..
12530 */
12531 && (sfsp->f_blocks != 0xffffffffffffffffULL)
12532 && (sfsp->f_bfree != 0xffffffffffffffffULL)
12533 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
12534 int shift;
91447636
A
12535
12536 /*
12537 * Work out how far we have to shift the block count down to make it fit.
12538 * Note that it's possible to have to shift so far that the resulting
12539 * blocksize would be unreportably large. At that point, we will clip
12540 * any values that don't fit.
12541 *
12542 * For safety's sake, we also ensure that f_iosize is never reported as
12543 * being smaller than f_bsize.
12544 */
12545 for (shift = 0; shift < 32; shift++) {
0a7de745 12546 if ((sfsp->f_blocks >> shift) <= INT_MAX) {
91447636 12547 break;
0a7de745
A
12548 }
12549 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX) {
91447636 12550 break;
0a7de745 12551 }
91447636 12552 }
0a7de745 12553#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
b0d623f7
A
12554 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
12555 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
12556 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 12557#undef __SHIFT_OR_CLIP
b0d623f7 12558 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
f427ee49 12559 sfs.f_iosize = (int)lmax(sfsp->f_iosize, sfsp->f_bsize);
91447636
A
12560 } else {
12561 /* filesystem is small enough to be reported honestly */
b0d623f7
A
12562 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
12563 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
12564 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
12565 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
12566 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 12567 }
b0d623f7
A
12568 sfs.f_files = (user32_long_t)sfsp->f_files;
12569 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
12570 sfs.f_fsid = sfsp->f_fsid;
12571 sfs.f_owner = sfsp->f_owner;
6d2010ae 12572 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 12573 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
12574 } else {
12575 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
12576 }
2d21ac55
A
12577 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
12578 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
12579
12580 if (partial_copy) {
12581 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
12582 }
12583 error = copyout((caddr_t)&sfs, bufp, copy_size);
12584 }
39037602 12585
91447636
A
12586 if (sizep != NULL) {
12587 *sizep = my_size;
12588 }
0a7de745 12589 return error;
91447636
A
12590}
12591
12592/*
12593 * copy stat structure into user_stat structure.
12594 */
0a7de745
A
12595void
12596munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 12597{
b0d623f7
A
12598 bzero(usbp, sizeof(*usbp));
12599
12600 usbp->st_dev = sbp->st_dev;
12601 usbp->st_ino = sbp->st_ino;
12602 usbp->st_mode = sbp->st_mode;
12603 usbp->st_nlink = sbp->st_nlink;
12604 usbp->st_uid = sbp->st_uid;
12605 usbp->st_gid = sbp->st_gid;
12606 usbp->st_rdev = sbp->st_rdev;
12607#ifndef _POSIX_C_SOURCE
12608 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12609 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12610 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12611 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12612 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12613 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12614#else
12615 usbp->st_atime = sbp->st_atime;
12616 usbp->st_atimensec = sbp->st_atimensec;
12617 usbp->st_mtime = sbp->st_mtime;
12618 usbp->st_mtimensec = sbp->st_mtimensec;
12619 usbp->st_ctime = sbp->st_ctime;
12620 usbp->st_ctimensec = sbp->st_ctimensec;
12621#endif
12622 usbp->st_size = sbp->st_size;
12623 usbp->st_blocks = sbp->st_blocks;
12624 usbp->st_blksize = sbp->st_blksize;
12625 usbp->st_flags = sbp->st_flags;
12626 usbp->st_gen = sbp->st_gen;
12627 usbp->st_lspare = sbp->st_lspare;
12628 usbp->st_qspare[0] = sbp->st_qspare[0];
12629 usbp->st_qspare[1] = sbp->st_qspare[1];
12630}
12631
0a7de745
A
12632void
12633munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
b0d623f7
A
12634{
12635 bzero(usbp, sizeof(*usbp));
0c530ab8 12636
91447636
A
12637 usbp->st_dev = sbp->st_dev;
12638 usbp->st_ino = sbp->st_ino;
12639 usbp->st_mode = sbp->st_mode;
12640 usbp->st_nlink = sbp->st_nlink;
12641 usbp->st_uid = sbp->st_uid;
12642 usbp->st_gid = sbp->st_gid;
12643 usbp->st_rdev = sbp->st_rdev;
2d21ac55 12644#ifndef _POSIX_C_SOURCE
f427ee49
A
12645 usbp->st_atimespec.tv_sec = (user32_time_t)sbp->st_atimespec.tv_sec;
12646 usbp->st_atimespec.tv_nsec = (user32_long_t)sbp->st_atimespec.tv_nsec;
12647 usbp->st_mtimespec.tv_sec = (user32_time_t)sbp->st_mtimespec.tv_sec;
12648 usbp->st_mtimespec.tv_nsec = (user32_long_t)sbp->st_mtimespec.tv_nsec;
12649 usbp->st_ctimespec.tv_sec = (user32_time_t)sbp->st_ctimespec.tv_sec;
12650 usbp->st_ctimespec.tv_nsec = (user32_long_t)sbp->st_ctimespec.tv_nsec;
2d21ac55
A
12651#else
12652 usbp->st_atime = sbp->st_atime;
12653 usbp->st_atimensec = sbp->st_atimensec;
12654 usbp->st_mtime = sbp->st_mtime;
12655 usbp->st_mtimensec = sbp->st_mtimensec;
12656 usbp->st_ctime = sbp->st_ctime;
12657 usbp->st_ctimensec = sbp->st_ctimensec;
12658#endif
12659 usbp->st_size = sbp->st_size;
12660 usbp->st_blocks = sbp->st_blocks;
12661 usbp->st_blksize = sbp->st_blksize;
12662 usbp->st_flags = sbp->st_flags;
12663 usbp->st_gen = sbp->st_gen;
12664 usbp->st_lspare = sbp->st_lspare;
12665 usbp->st_qspare[0] = sbp->st_qspare[0];
12666 usbp->st_qspare[1] = sbp->st_qspare[1];
12667}
12668
12669/*
12670 * copy stat64 structure into user_stat64 structure.
12671 */
0a7de745
A
12672void
12673munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
b0d623f7
A
12674{
12675 bzero(usbp, sizeof(*usbp));
12676
12677 usbp->st_dev = sbp->st_dev;
12678 usbp->st_ino = sbp->st_ino;
12679 usbp->st_mode = sbp->st_mode;
12680 usbp->st_nlink = sbp->st_nlink;
12681 usbp->st_uid = sbp->st_uid;
12682 usbp->st_gid = sbp->st_gid;
12683 usbp->st_rdev = sbp->st_rdev;
12684#ifndef _POSIX_C_SOURCE
12685 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12686 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12687 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12688 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12689 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12690 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12691 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
12692 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
12693#else
12694 usbp->st_atime = sbp->st_atime;
12695 usbp->st_atimensec = sbp->st_atimensec;
12696 usbp->st_mtime = sbp->st_mtime;
12697 usbp->st_mtimensec = sbp->st_mtimensec;
12698 usbp->st_ctime = sbp->st_ctime;
12699 usbp->st_ctimensec = sbp->st_ctimensec;
12700 usbp->st_birthtime = sbp->st_birthtime;
12701 usbp->st_birthtimensec = sbp->st_birthtimensec;
12702#endif
12703 usbp->st_size = sbp->st_size;
12704 usbp->st_blocks = sbp->st_blocks;
12705 usbp->st_blksize = sbp->st_blksize;
12706 usbp->st_flags = sbp->st_flags;
12707 usbp->st_gen = sbp->st_gen;
12708 usbp->st_lspare = sbp->st_lspare;
12709 usbp->st_qspare[0] = sbp->st_qspare[0];
12710 usbp->st_qspare[1] = sbp->st_qspare[1];
12711}
12712
0a7de745
A
12713void
12714munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 12715{
b0d623f7 12716 bzero(usbp, sizeof(*usbp));
2d21ac55
A
12717
12718 usbp->st_dev = sbp->st_dev;
12719 usbp->st_ino = sbp->st_ino;
12720 usbp->st_mode = sbp->st_mode;
12721 usbp->st_nlink = sbp->st_nlink;
12722 usbp->st_uid = sbp->st_uid;
12723 usbp->st_gid = sbp->st_gid;
12724 usbp->st_rdev = sbp->st_rdev;
12725#ifndef _POSIX_C_SOURCE
f427ee49
A
12726 usbp->st_atimespec.tv_sec = (user32_time_t)sbp->st_atimespec.tv_sec;
12727 usbp->st_atimespec.tv_nsec = (user32_long_t)sbp->st_atimespec.tv_nsec;
12728 usbp->st_mtimespec.tv_sec = (user32_time_t)sbp->st_mtimespec.tv_sec;
12729 usbp->st_mtimespec.tv_nsec = (user32_long_t)sbp->st_mtimespec.tv_nsec;
12730 usbp->st_ctimespec.tv_sec = (user32_time_t)sbp->st_ctimespec.tv_sec;
12731 usbp->st_ctimespec.tv_nsec = (user32_long_t)sbp->st_ctimespec.tv_nsec;
12732 usbp->st_birthtimespec.tv_sec = (user32_time_t)sbp->st_birthtimespec.tv_sec;
12733 usbp->st_birthtimespec.tv_nsec = (user32_long_t)sbp->st_birthtimespec.tv_nsec;
91447636
A
12734#else
12735 usbp->st_atime = sbp->st_atime;
12736 usbp->st_atimensec = sbp->st_atimensec;
12737 usbp->st_mtime = sbp->st_mtime;
12738 usbp->st_mtimensec = sbp->st_mtimensec;
12739 usbp->st_ctime = sbp->st_ctime;
12740 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
12741 usbp->st_birthtime = sbp->st_birthtime;
12742 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
12743#endif
12744 usbp->st_size = sbp->st_size;
12745 usbp->st_blocks = sbp->st_blocks;
12746 usbp->st_blksize = sbp->st_blksize;
12747 usbp->st_flags = sbp->st_flags;
12748 usbp->st_gen = sbp->st_gen;
12749 usbp->st_lspare = sbp->st_lspare;
12750 usbp->st_qspare[0] = sbp->st_qspare[0];
12751 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 12752}
39236c6e
A
12753
12754/*
12755 * Purge buffer cache for simulating cold starts
12756 */
0a7de745
A
12757static int
12758vnode_purge_callback(struct vnode *vp, __unused void *cargs)
39236c6e
A
12759{
12760 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
12761
12762 return VNODE_RETURNED;
12763}
12764
0a7de745
A
12765static int
12766vfs_purge_callback(mount_t mp, __unused void * arg)
39236c6e
A
12767{
12768 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
12769
12770 return VFS_RETURNED;
12771}
12772
12773int
12774vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
12775{
0a7de745 12776 if (!kauth_cred_issuser(kauth_cred_get())) {
39236c6e 12777 return EPERM;
0a7de745 12778 }
39236c6e 12779
0a7de745 12780 vfs_iterate(0 /* flags */, vfs_purge_callback, NULL);
39236c6e
A
12781
12782 return 0;
12783}
12784
39037602
A
12785/*
12786 * gets the vnode associated with the (unnamed) snapshot directory
12787 * for a Filesystem. The snapshot directory vnode is returned with
12788 * an iocount on it.
12789 */
12790int
12791vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
12792{
0a7de745 12793 return VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx);
39037602
A
12794}
12795
12796/*
12797 * Get the snapshot vnode.
12798 *
12799 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12800 * needs nameidone() on ndp.
12801 *
12802 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12803 *
12804 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12805 * not needed.
12806 */
12807static int
12808vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
12809 user_addr_t name, struct nameidata *ndp, int32_t op,
12810#if !CONFIG_TRIGGERS
12811 __unused
12812#endif
12813 enum path_operation pathop,
12814 vfs_context_t ctx)
12815{
12816 int error, i;
12817 caddr_t name_buf;
12818 size_t name_len;
12819 struct vfs_attr vfa;
12820
12821 *sdvpp = NULLVP;
12822 *rvpp = NULLVP;
12823
12824 error = vnode_getfromfd(ctx, dirfd, rvpp);
0a7de745
A
12825 if (error) {
12826 return error;
12827 }
39037602
A
12828
12829 if (!vnode_isvroot(*rvpp)) {
12830 error = EINVAL;
12831 goto out;
12832 }
12833
12834 /* Make sure the filesystem supports snapshots */
12835 VFSATTR_INIT(&vfa);
12836 VFSATTR_WANTED(&vfa, f_capabilities);
12837 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
12838 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
12839 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
12840 VOL_CAP_INT_SNAPSHOT)) ||
12841 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
12842 VOL_CAP_INT_SNAPSHOT))) {
12843 error = ENOTSUP;
12844 goto out;
12845 }
12846
12847 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
0a7de745 12848 if (error) {
39037602 12849 goto out;
0a7de745 12850 }
39037602 12851
f427ee49 12852 name_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
39037602 12853 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
0a7de745 12854 if (error) {
39037602 12855 goto out1;
0a7de745 12856 }
39037602
A
12857
12858 /*
12859 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12860 * (the length returned by copyinstr includes the terminating NUL)
12861 */
12862 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
12863 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
12864 error = EINVAL;
12865 goto out1;
12866 }
0a7de745
A
12867 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++) {
12868 ;
12869 }
39037602
A
12870 if (i < (int)name_len) {
12871 error = EINVAL;
12872 goto out1;
12873 }
12874
12875#if CONFIG_MACF
12876 if (op == CREATE) {
12877 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
12878 name_buf);
12879 } else if (op == DELETE) {
12880 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
12881 name_buf);
12882 }
0a7de745 12883 if (error) {
39037602 12884 goto out1;
0a7de745 12885 }
39037602
A
12886#endif
12887
12888 /* Check if the snapshot already exists ... */
12889 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
12890 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
12891 ndp->ni_dvp = *sdvpp;
12892
12893 error = namei(ndp);
12894out1:
f427ee49 12895 zfree(ZV_NAMEI, name_buf);
39037602
A
12896out:
12897 if (error) {
12898 if (*sdvpp) {
12899 vnode_put(*sdvpp);
12900 *sdvpp = NULLVP;
12901 }
12902 if (*rvpp) {
12903 vnode_put(*rvpp);
12904 *rvpp = NULLVP;
12905 }
12906 }
0a7de745 12907 return error;
39037602
A
12908}
12909
12910/*
12911 * create a filesystem snapshot (for supporting filesystems)
12912 *
12913 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12914 * We get to the (unnamed) snapshot directory vnode and create the vnode
12915 * for the snapshot in it.
12916 *
12917 * Restrictions:
12918 *
12919 * a) Passed in name for snapshot cannot have slashes.
12920 * b) name can't be "." or ".."
12921 *
12922 * Since this requires superuser privileges, vnode_authorize calls are not
12923 * made.
12924 */
f427ee49 12925static int __attribute__((noinline))
39037602
A
12926snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
12927 vfs_context_t ctx)
12928{
12929 vnode_t rvp, snapdvp;
12930 int error;
f427ee49 12931 struct nameidata *ndp;
39037602 12932
f427ee49
A
12933 ndp = kheap_alloc(KHEAP_TEMP, sizeof(*ndp), Z_WAITOK);
12934
12935 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, ndp, CREATE,
39037602 12936 OP_LINK, ctx);
0a7de745 12937 if (error) {
f427ee49 12938 goto out;
0a7de745 12939 }
39037602 12940
f427ee49
A
12941 if (ndp->ni_vp) {
12942 vnode_put(ndp->ni_vp);
39037602
A
12943 error = EEXIST;
12944 } else {
f427ee49 12945 struct vnode_attr *vap;
39037602
A
12946 vnode_t vp = NULLVP;
12947
f427ee49 12948 vap = kheap_alloc(KHEAP_TEMP, sizeof(*vap), Z_WAITOK);
39037602 12949
f427ee49
A
12950 VATTR_INIT(vap);
12951 VATTR_SET(vap, va_type, VREG);
12952 VATTR_SET(vap, va_mode, 0);
12953
12954 error = vn_create(snapdvp, &vp, ndp, vap,
39037602 12955 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
0a7de745 12956 if (!error && vp) {
39037602 12957 vnode_put(vp);
0a7de745 12958 }
f427ee49
A
12959
12960 kheap_free(KHEAP_TEMP, vap, sizeof(*vap));
39037602
A
12961 }
12962
f427ee49 12963 nameidone(ndp);
39037602
A
12964 vnode_put(snapdvp);
12965 vnode_put(rvp);
f427ee49
A
12966out:
12967 kheap_free(KHEAP_TEMP, ndp, sizeof(*ndp));
12968
0a7de745 12969 return error;
39037602
A
12970}
12971
12972/*
12973 * Delete a Filesystem snapshot
12974 *
12975 * get the vnode for the unnamed snapshot directory and the snapshot and
12976 * delete the snapshot.
12977 */
f427ee49 12978static int __attribute__((noinline))
39037602
A
12979snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
12980 vfs_context_t ctx)
12981{
12982 vnode_t rvp, snapdvp;
12983 int error;
f427ee49 12984 struct nameidata *ndp;
39037602 12985
f427ee49
A
12986 ndp = kheap_alloc(KHEAP_TEMP, sizeof(*ndp), Z_WAITOK);
12987
12988 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, ndp, DELETE,
39037602 12989 OP_UNLINK, ctx);
0a7de745 12990 if (error) {
39037602 12991 goto out;
0a7de745 12992 }
39037602 12993
f427ee49 12994 error = VNOP_REMOVE(snapdvp, ndp->ni_vp, &ndp->ni_cnd,
39037602
A
12995 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
12996
f427ee49
A
12997 vnode_put(ndp->ni_vp);
12998 nameidone(ndp);
39037602
A
12999 vnode_put(snapdvp);
13000 vnode_put(rvp);
13001out:
f427ee49
A
13002 kheap_free(KHEAP_TEMP, ndp, sizeof(*ndp));
13003
0a7de745 13004 return error;
39037602
A
13005}
13006
13007/*
13008 * Revert a filesystem to a snapshot
13009 *
13010 * Marks the filesystem to revert to the given snapshot on next mount.
13011 */
f427ee49 13012static int __attribute__((noinline))
39037602 13013snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
0a7de745
A
13014 vfs_context_t ctx)
13015{
13016 int error;
13017 vnode_t rvp;
13018 mount_t mp;
13019 struct fs_snapshot_revert_args revert_data;
13020 struct componentname cnp;
13021 caddr_t name_buf;
13022 size_t name_len;
13023
13024 error = vnode_getfromfd(ctx, dirfd, &rvp);
13025 if (error) {
13026 return error;
13027 }
13028 mp = vnode_mount(rvp);
13029
f427ee49 13030 name_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
0a7de745
A
13031 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
13032 if (error) {
f427ee49 13033 zfree(ZV_NAMEI, name_buf);
0a7de745
A
13034 vnode_put(rvp);
13035 return error;
13036 }
813fb2f6
A
13037
13038#if CONFIG_MACF
0a7de745
A
13039 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
13040 if (error) {
f427ee49 13041 zfree(ZV_NAMEI, name_buf);
0a7de745
A
13042 vnode_put(rvp);
13043 return error;
13044 }
13045#endif
13046
13047 /*
13048 * Grab mount_iterref so that we can release the vnode,
13049 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
13050 */
13051 error = mount_iterref(mp, 0);
13052 vnode_put(rvp);
13053 if (error) {
f427ee49 13054 zfree(ZV_NAMEI, name_buf);
0a7de745
A
13055 return error;
13056 }
13057
13058 memset(&cnp, 0, sizeof(cnp));
13059 cnp.cn_pnbuf = (char *)name_buf;
13060 cnp.cn_nameiop = LOOKUP;
13061 cnp.cn_flags = ISLASTCN | HASBUF;
13062 cnp.cn_pnlen = MAXPATHLEN;
13063 cnp.cn_nameptr = cnp.cn_pnbuf;
13064 cnp.cn_namelen = (int)name_len;
13065 revert_data.sr_cnp = &cnp;
13066
13067 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
13068 mount_iterdrop(mp);
f427ee49 13069 zfree(ZV_NAMEI, name_buf);
0a7de745
A
13070
13071 if (error) {
13072 /* If there was any error, try again using VNOP_IOCTL */
13073
13074 vnode_t snapdvp;
13075 struct nameidata namend;
13076
13077 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
13078 OP_LOOKUP, ctx);
13079 if (error) {
13080 return error;
13081 }
13082
13083
13084 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
13085 0, ctx);
13086
13087 vnode_put(namend.ni_vp);
13088 nameidone(&namend);
13089 vnode_put(snapdvp);
13090 vnode_put(rvp);
13091 }
13092
13093 return error;
39037602
A
13094}
13095
13096/*
13097 * rename a Filesystem snapshot
13098 *
13099 * get the vnode for the unnamed snapshot directory and the snapshot and
13100 * rename the snapshot. This is a very specialised (and simple) case of
13101 * rename(2) (which has to deal with a lot more complications). It differs
13102 * slightly from rename(2) in that EEXIST is returned if the new name exists.
13103 */
f427ee49 13104static int __attribute__((noinline))
39037602
A
13105snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
13106 __unused uint32_t flags, vfs_context_t ctx)
13107{
13108 vnode_t rvp, snapdvp;
13109 int error, i;
13110 caddr_t newname_buf;
13111 size_t name_len;
13112 vnode_t fvp;
13113 struct nameidata *fromnd, *tond;
13114 /* carving out a chunk for structs that are too big to be on stack. */
13115 struct {
13116 struct nameidata from_node;
13117 struct nameidata to_node;
13118 } * __rename_data;
13119
f427ee49 13120 __rename_data = kheap_alloc(KHEAP_TEMP, sizeof(*__rename_data), Z_WAITOK);
39037602
A
13121 fromnd = &__rename_data->from_node;
13122 tond = &__rename_data->to_node;
13123
13124 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
13125 OP_UNLINK, ctx);
0a7de745 13126 if (error) {
39037602 13127 goto out;
0a7de745 13128 }
39037602
A
13129 fvp = fromnd->ni_vp;
13130
f427ee49 13131 newname_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
39037602 13132 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
0a7de745 13133 if (error) {
39037602 13134 goto out1;
0a7de745 13135 }
39037602
A
13136
13137 /*
13138 * Some sanity checks- new name can't be empty, "." or ".." or have
13139 * slashes.
13140 * (the length returned by copyinstr includes the terminating NUL)
13141 *
13142 * The FS rename VNOP is suppossed to handle this but we'll pick it
13143 * off here itself.
13144 */
13145 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
13146 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
13147 error = EINVAL;
13148 goto out1;
13149 }
0a7de745
A
13150 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++) {
13151 ;
13152 }
39037602
A
13153 if (i < (int)name_len) {
13154 error = EINVAL;
13155 goto out1;
13156 }
13157
13158#if CONFIG_MACF
13159 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
13160 newname_buf);
0a7de745 13161 if (error) {
39037602 13162 goto out1;
0a7de745 13163 }
39037602
A
13164#endif
13165
13166 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
13167 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
13168 tond->ni_dvp = snapdvp;
13169
13170 error = namei(tond);
13171 if (error) {
13172 goto out2;
13173 } else if (tond->ni_vp) {
13174 /*
13175 * snapshot rename behaves differently than rename(2) - if the
13176 * new name exists, EEXIST is returned.
13177 */
13178 vnode_put(tond->ni_vp);
13179 error = EEXIST;
13180 goto out2;
13181 }
13182
13183 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
13184 &tond->ni_cnd, ctx);
13185
13186out2:
13187 nameidone(tond);
13188out1:
f427ee49 13189 zfree(ZV_NAMEI, newname_buf);
39037602
A
13190 vnode_put(fvp);
13191 vnode_put(snapdvp);
13192 vnode_put(rvp);
13193 nameidone(fromnd);
13194out:
f427ee49 13195 kheap_free(KHEAP_TEMP, __rename_data, sizeof(*__rename_data));
0a7de745 13196 return error;
39037602
A
13197}
13198
13199/*
13200 * Mount a Filesystem snapshot
13201 *
13202 * get the vnode for the unnamed snapshot directory and the snapshot and
13203 * mount the snapshot.
13204 */
f427ee49 13205static int __attribute__((noinline))
39037602 13206snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
813fb2f6 13207 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
39037602 13208{
ea3f0419 13209 mount_t mp;
39037602 13210 vnode_t rvp, snapdvp, snapvp, vp, pvp;
ea3f0419 13211 struct fs_snapshot_mount_args smnt_data;
39037602
A
13212 int error;
13213 struct nameidata *snapndp, *dirndp;
13214 /* carving out a chunk for structs that are too big to be on stack. */
13215 struct {
13216 struct nameidata snapnd;
13217 struct nameidata dirnd;
13218 } * __snapshot_mount_data;
13219
f427ee49
A
13220 __snapshot_mount_data = kheap_alloc(KHEAP_TEMP,
13221 sizeof(*__snapshot_mount_data), Z_WAITOK);
39037602
A
13222 snapndp = &__snapshot_mount_data->snapnd;
13223 dirndp = &__snapshot_mount_data->dirnd;
13224
13225 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
13226 OP_LOOKUP, ctx);
0a7de745 13227 if (error) {
39037602 13228 goto out;
0a7de745 13229 }
39037602
A
13230
13231 snapvp = snapndp->ni_vp;
13232 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
13233 error = EIO;
13234 goto out1;
13235 }
13236
13237 /* Get the vnode to be covered */
13238 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
13239 UIO_USERSPACE, directory, ctx);
13240 error = namei(dirndp);
0a7de745 13241 if (error) {
39037602 13242 goto out1;
0a7de745 13243 }
39037602
A
13244
13245 vp = dirndp->ni_vp;
13246 pvp = dirndp->ni_dvp;
ea3f0419 13247 mp = vnode_mount(rvp);
39037602
A
13248
13249 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
13250 error = EINVAL;
ea3f0419
A
13251 goto out2;
13252 }
39037602 13253
ea3f0419
A
13254#if CONFIG_MACF
13255 error = mac_mount_check_snapshot_mount(ctx, rvp, vp, &dirndp->ni_cnd, snapndp->ni_cnd.cn_nameptr,
13256 mp->mnt_vfsstat.f_fstypename);
13257 if (error) {
13258 goto out2;
39037602 13259 }
ea3f0419 13260#endif
39037602 13261
ea3f0419
A
13262 smnt_data.sm_mp = mp;
13263 smnt_data.sm_cnp = &snapndp->ni_cnd;
13264 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
13265 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
13266 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
13267
13268out2:
39037602
A
13269 vnode_put(vp);
13270 vnode_put(pvp);
13271 nameidone(dirndp);
13272out1:
13273 vnode_put(snapvp);
13274 vnode_put(snapdvp);
13275 vnode_put(rvp);
13276 nameidone(snapndp);
13277out:
f427ee49
A
13278 kheap_free(KHEAP_TEMP, __snapshot_mount_data,
13279 sizeof(*__snapshot_mount_data));
0a7de745 13280 return error;
39037602
A
13281}
13282
813fb2f6
A
13283/*
13284 * Root from a snapshot of the filesystem
13285 *
13286 * Marks the filesystem to root from the given snapshot on next boot.
13287 */
f427ee49 13288static int __attribute__((noinline))
813fb2f6 13289snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
0a7de745
A
13290 vfs_context_t ctx)
13291{
13292 int error;
13293 vnode_t rvp;
13294 mount_t mp;
13295 struct fs_snapshot_root_args root_data;
13296 struct componentname cnp;
13297 caddr_t name_buf;
13298 size_t name_len;
13299
13300 error = vnode_getfromfd(ctx, dirfd, &rvp);
13301 if (error) {
13302 return error;
13303 }
13304 mp = vnode_mount(rvp);
13305
f427ee49 13306 name_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
0a7de745
A
13307 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
13308 if (error) {
f427ee49 13309 zfree(ZV_NAMEI, name_buf);
0a7de745
A
13310 vnode_put(rvp);
13311 return error;
13312 }
13313
13314 // XXX MAC checks ?
13315
13316 /*
13317 * Grab mount_iterref so that we can release the vnode,
13318 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
13319 */
13320 error = mount_iterref(mp, 0);
13321 vnode_put(rvp);
13322 if (error) {
f427ee49 13323 zfree(ZV_NAMEI, name_buf);
0a7de745
A
13324 return error;
13325 }
13326
13327 memset(&cnp, 0, sizeof(cnp));
13328 cnp.cn_pnbuf = (char *)name_buf;
13329 cnp.cn_nameiop = LOOKUP;
13330 cnp.cn_flags = ISLASTCN | HASBUF;
13331 cnp.cn_pnlen = MAXPATHLEN;
13332 cnp.cn_nameptr = cnp.cn_pnbuf;
13333 cnp.cn_namelen = (int)name_len;
13334 root_data.sr_cnp = &cnp;
13335
13336 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
13337
13338 mount_iterdrop(mp);
f427ee49 13339 zfree(ZV_NAMEI, name_buf);
0a7de745
A
13340
13341 return error;
813fb2f6
A
13342}
13343
39037602
A
13344/*
13345 * FS snapshot operations dispatcher
13346 */
13347int
13348fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
13349 __unused int32_t *retval)
13350{
13351 int error;
13352 vfs_context_t ctx = vfs_context_current();
13353
813fb2f6
A
13354 AUDIT_ARG(fd, uap->dirfd);
13355 AUDIT_ARG(value32, uap->op);
13356
39037602 13357 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
0a7de745
A
13358 if (error) {
13359 return error;
13360 }
13361
13362 /*
f427ee49
A
13363 * Enforce user authorization for snapshot modification operations,
13364 * or if trying to root from snapshot.
0a7de745 13365 */
f427ee49 13366 if (uap->op != SNAPSHOT_OP_MOUNT) {
0a7de745
A
13367 vnode_t dvp = NULLVP;
13368 vnode_t devvp = NULLVP;
13369 mount_t mp;
13370
13371 error = vnode_getfromfd(ctx, uap->dirfd, &dvp);
13372 if (error) {
13373 return error;
13374 }
13375 mp = vnode_mount(dvp);
13376 devvp = mp->mnt_devvp;
13377
13378 /* get an iocount on devvp */
13379 if (devvp == NULLVP) {
13380 error = vnode_lookup(mp->mnt_vfsstat.f_mntfromname, 0, &devvp, ctx);
13381 /* for mounts which arent block devices */
13382 if (error == ENOENT) {
13383 error = ENXIO;
13384 }
13385 } else {
13386 error = vnode_getwithref(devvp);
13387 }
13388
13389 if (error) {
13390 vnode_put(dvp);
13391 return error;
13392 }
13393
13394 if ((vfs_context_issuser(ctx) == 0) &&
13395 (vnode_authorize(devvp, NULL, KAUTH_VNODE_WRITE_DATA, ctx) != 0)) {
13396 error = EPERM;
13397 }
13398 vnode_put(dvp);
13399 vnode_put(devvp);
13400
13401 if (error) {
13402 return error;
13403 }
13404 }
39037602
A
13405
13406 switch (uap->op) {
13407 case SNAPSHOT_OP_CREATE:
13408 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
13409 break;
13410 case SNAPSHOT_OP_DELETE:
13411 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
13412 break;
13413 case SNAPSHOT_OP_RENAME:
13414 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
13415 uap->flags, ctx);
13416 break;
13417 case SNAPSHOT_OP_MOUNT:
13418 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
13419 uap->data, uap->flags, ctx);
13420 break;
0a7de745
A
13421 case SNAPSHOT_OP_REVERT:
13422 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
13423 break;
d9a64523 13424#if CONFIG_MNT_ROOTSNAP
813fb2f6
A
13425 case SNAPSHOT_OP_ROOT:
13426 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
13427 break;
d9a64523 13428#endif /* CONFIG_MNT_ROOTSNAP */
39037602
A
13429 default:
13430 error = ENOSYS;
13431 }
13432
0a7de745 13433 return error;
39037602 13434}