]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
5ba3f43e 2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
3e170ce0 102#include <sys/content_protection.h>
39037602
A
103#include <sys/clonefile.h>
104#include <sys/snapshot.h>
490019cf 105#include <sys/priv.h>
91447636
A
106#include <machine/cons.h>
107#include <machine/limits.h>
108#include <miscfs/specfs/specdev.h>
e5568f75 109
5ba3f43e
A
110#include <vfs/vfs_disk_conditioner.h>
111
b0d623f7 112#include <security/audit/audit.h>
e5568f75
A
113#include <bsm/audit_kevents.h>
114
91447636
A
115#include <mach/mach_types.h>
116#include <kern/kern_types.h>
117#include <kern/kalloc.h>
6d2010ae 118#include <kern/task.h>
91447636
A
119
120#include <vm/vm_pageout.h>
39037602 121#include <vm/vm_protos.h>
1c79356b 122
91447636 123#include <libkern/OSAtomic.h>
b0d623f7 124#include <pexpert/pexpert.h>
3e170ce0 125#include <IOKit/IOBSD.h>
55e303ae 126
490019cf
A
127#if ROUTEFS
128#include <miscfs/routefs/routefs.h>
129#endif /* ROUTEFS */
130
2d21ac55
A
131#if CONFIG_MACF
132#include <security/mac.h>
133#include <security/mac_framework.h>
134#endif
1c79356b 135
39037602 136#if CONFIG_FSE
2d21ac55 137#define GET_PATH(x) \
39037602 138 (x) = get_pathbuff();
2d21ac55
A
139#define RELEASE_PATH(x) \
140 release_pathbuff(x);
39037602 141#else
0a7de745 142#define GET_PATH(x) \
39037602 143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2d21ac55
A
144#define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146#endif /* CONFIG_FSE */
147
a39ff7e2
A
148#ifndef HFS_GET_BOOT_INFO
149#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
150#endif
151
152#ifndef HFS_SET_BOOT_INFO
153#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
154#endif
155
156#ifndef APFSIOC_REVERT_TO_SNAPSHOT
157#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
158#endif
159
5ba3f43e
A
160extern void disk_conditioner_unmount(mount_t mp);
161
2d21ac55
A
162/* struct for checkdirs iteration */
163struct cdirargs {
164 vnode_t olddp;
165 vnode_t newdp;
166};
167/* callback for checkdirs iteration */
168static int checkdirs_callback(proc_t p, void * arg);
1c79356b 169
91447636 170static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 171static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
172void enablequotas(struct mount *mp, vfs_context_t ctx);
173static int getfsstat_callback(mount_t mp, void * arg);
174static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 175static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 176static int sync_callback(mount_t, void *);
39037602 177static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
0a7de745
A
178 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
179 boolean_t partial_copy);
b0d623f7 180static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
0a7de745 181 user_addr_t bufp);
b0d623f7 182static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae 183static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
0a7de745
A
184 struct componentname *cnp, user_addr_t fsmountargs,
185 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
186 vfs_context_t ctx);
6d2010ae
A
187void vfs_notify_mount(vnode_t pdvp);
188
189int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 190
fe8ab488
A
191struct fd_vn_data * fg_vn_data_alloc(void);
192
c18c124e
A
193/*
194 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
195 * Concurrent lookups (or lookups by ids) on hard links can cause the
196 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
197 * does) to return ENOENT as the path cannot be returned from the name cache
198 * alone. We have no option but to retry and hope to get one namei->reverse path
199 * generation done without an intervening lookup, lookup by id on the hard link
200 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
201 * which currently are the MAC hooks for rename, unlink and rmdir.
202 */
203#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
204
fe8ab488
A
205static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
206
207static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
208
b7266188 209#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
210static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
211static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
212static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
213static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
214static void mount_end_update(mount_t mp);
6d2010ae 215static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
216#endif /* CONFIG_IMGSRC_ACCESS */
217
d9a64523
A
218//snapshot functions
219#if CONFIG_MNT_ROOTSNAP
220static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
221#else
222static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
223#endif
224
2d21ac55
A
225int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
226
227__private_extern__
228int sync_internal(void);
229
2d21ac55 230__private_extern__
c18c124e 231int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
91447636 232
fe8ab488
A
233extern lck_grp_t *fd_vn_lck_grp;
234extern lck_grp_attr_t *fd_vn_lck_grp_attr;
235extern lck_attr_t *fd_vn_lck_attr;
236
2d21ac55
A
237/*
238 * incremented each time a mount or unmount operation occurs
239 * used to invalidate the cached value of the rootvp in the
240 * mount structure utilized by cache_lookup_path
241 */
b0d623f7 242uint32_t mount_generation = 0;
1c79356b
A
243
244/* counts number of mount and unmount operations */
0a7de745 245unsigned int vfs_nummntops = 0;
1c79356b 246
39236c6e
A
247extern const struct fileops vnops;
248#if CONFIG_APPLEDOUBLE
39037602 249extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 250#endif /* CONFIG_APPLEDOUBLE */
91447636 251
1c79356b
A
252/*
253 * Virtual File System System Calls
254 */
255
490019cf 256#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
257/*
258 * Private in-kernel mounting spi (NFS only, not exported)
259 */
0a7de745 260__private_extern__
6d2010ae
A
261boolean_t
262vfs_iskernelmount(mount_t mp)
263{
0a7de745 264 return (mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE;
6d2010ae
A
265}
266
0a7de745 267__private_extern__
6d2010ae
A
268int
269kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
0a7de745 270 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
6d2010ae
A
271{
272 struct nameidata nd;
273 boolean_t did_namei;
274 int error;
275
39037602 276 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
0a7de745 277 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
6d2010ae
A
278
279 /*
280 * Get the vnode to be covered if it's not supplied
281 */
282 if (vp == NULLVP) {
283 error = namei(&nd);
0a7de745
A
284 if (error) {
285 return error;
286 }
6d2010ae
A
287 vp = nd.ni_vp;
288 pvp = nd.ni_dvp;
289 did_namei = TRUE;
290 } else {
291 char *pnbuf = CAST_DOWN(char *, path);
292
293 nd.ni_cnd.cn_pnbuf = pnbuf;
294 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
295 did_namei = FALSE;
296 }
297
298 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
0a7de745 299 syscall_flags, kern_flags, NULL, TRUE, ctx);
6d2010ae
A
300
301 if (did_namei) {
302 vnode_put(vp);
303 vnode_put(pvp);
304 nameidone(&nd);
305 }
306
0a7de745 307 return error;
6d2010ae 308}
fe8ab488 309#endif /* NFSCLIENT || DEVFS */
6d2010ae 310
1c79356b
A
311/*
312 * Mount a file system.
313 */
1c79356b
A
314/* ARGSUSED */
315int
b0d623f7 316mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
317{
318 struct __mac_mount_args muap;
319
320 muap.type = uap->type;
321 muap.path = uap->path;
322 muap.flags = uap->flags;
323 muap.data = uap->data;
324 muap.mac_p = USER_ADDR_NULL;
0a7de745 325 return __mac_mount(p, &muap, retval);
2d21ac55
A
326}
327
5ba3f43e
A
328int
329fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
330{
0a7de745
A
331 struct componentname cn;
332 vfs_context_t ctx = vfs_context_current();
333 size_t dummy = 0;
334 int error;
335 int flags = uap->flags;
336 char fstypename[MFSNAMELEN];
337 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
338 vnode_t pvp;
339 vnode_t vp;
5ba3f43e
A
340
341 AUDIT_ARG(fd, uap->fd);
342 AUDIT_ARG(fflags, flags);
343 /* fstypename will get audited by mount_common */
344
345 /* Sanity check the flags */
0a7de745
A
346 if (flags & (MNT_IMGSRC_BY_INDEX | MNT_ROOTFS)) {
347 return ENOTSUP;
5ba3f43e
A
348 }
349
350 if (flags & MNT_UNION) {
0a7de745 351 return EPERM;
5ba3f43e
A
352 }
353
354 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
355 if (error) {
0a7de745 356 return error;
5ba3f43e
A
357 }
358
359 if ((error = file_vnode(uap->fd, &vp)) != 0) {
0a7de745 360 return error;
5ba3f43e
A
361 }
362
363 if ((error = vnode_getwithref(vp)) != 0) {
364 file_drop(uap->fd);
0a7de745 365 return error;
5ba3f43e
A
366 }
367
368 pvp = vnode_getparent(vp);
369 if (pvp == NULL) {
370 vnode_put(vp);
371 file_drop(uap->fd);
0a7de745 372 return EINVAL;
5ba3f43e
A
373 }
374
375 memset(&cn, 0, sizeof(struct componentname));
376 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
377 cn.cn_pnlen = MAXPATHLEN;
378
0a7de745 379 if ((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
5ba3f43e
A
380 FREE(cn.cn_pnbuf, M_TEMP);
381 vnode_put(pvp);
382 vnode_put(vp);
383 file_drop(uap->fd);
0a7de745 384 return error;
5ba3f43e
A
385 }
386
387 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
388
389 FREE(cn.cn_pnbuf, M_TEMP);
390 vnode_put(pvp);
391 vnode_put(vp);
392 file_drop(uap->fd);
393
0a7de745 394 return error;
5ba3f43e
A
395}
396
6d2010ae 397void
39037602 398vfs_notify_mount(vnode_t pdvp)
6d2010ae
A
399{
400 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
401 lock_vnode_and_post(pdvp, NOTE_WRITE);
402}
403
b0d623f7
A
404/*
405 * __mac_mount:
406 * Mount a file system taking into account MAC label behavior.
407 * See mount(2) man page for more information
408 *
409 * Parameters: p Process requesting the mount
410 * uap User argument descriptor (see below)
39037602 411 * retval (ignored)
b0d623f7
A
412 *
413 * Indirect: uap->type Filesystem type
414 * uap->path Path to mount
39037602
A
415 * uap->data Mount arguments
416 * uap->mac_p MAC info
b0d623f7 417 * uap->flags Mount flags
39037602 418 *
b0d623f7
A
419 *
420 * Returns: 0 Success
421 * !0 Not success
422 */
6d2010ae
A
423boolean_t root_fs_upgrade_try = FALSE;
424
2d21ac55 425int
b0d623f7 426__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 427{
39236c6e 428 vnode_t pvp = NULL;
0a7de745 429 vnode_t vp = NULL;
39236c6e 430 int need_nameidone = 0;
6d2010ae
A
431 vfs_context_t ctx = vfs_context_current();
432 char fstypename[MFSNAMELEN];
433 struct nameidata nd;
0a7de745 434 size_t dummy = 0;
6d2010ae
A
435 char *labelstr = NULL;
436 int flags = uap->flags;
437 int error;
39037602 438#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 439 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
440#else
441#pragma unused(p)
442#endif
6d2010ae
A
443 /*
444 * Get the fs type name from user space
445 */
446 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
0a7de745
A
447 if (error) {
448 return error;
449 }
6d2010ae
A
450
451 /*
452 * Get the vnode to be covered
453 */
39037602 454 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
0a7de745 455 UIO_USERSPACE, uap->path, ctx);
6d2010ae 456 error = namei(&nd);
39236c6e
A
457 if (error) {
458 goto out;
459 }
460 need_nameidone = 1;
6d2010ae
A
461 vp = nd.ni_vp;
462 pvp = nd.ni_dvp;
39037602 463
6d2010ae
A
464#ifdef CONFIG_IMGSRC_ACCESS
465 /* Mounting image source cannot be batched with other operations */
466 if (flags == MNT_IMGSRC_BY_INDEX) {
467 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
0a7de745 468 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
6d2010ae
A
469 goto out;
470 }
471#endif /* CONFIG_IMGSRC_ACCESS */
472
473#if CONFIG_MACF
474 /*
475 * Get the label string (if any) from user space
476 */
477 if (uap->mac_p != USER_ADDR_NULL) {
478 struct user_mac mac;
479 size_t ulen = 0;
480
481 if (is_64bit) {
482 struct user64_mac mac64;
483 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
484 mac.m_buflen = mac64.m_buflen;
485 mac.m_string = mac64.m_string;
486 } else {
487 struct user32_mac mac32;
488 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
489 mac.m_buflen = mac32.m_buflen;
490 mac.m_string = mac32.m_string;
491 }
0a7de745 492 if (error) {
6d2010ae 493 goto out;
0a7de745 494 }
6d2010ae
A
495 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
496 (mac.m_buflen < 2)) {
497 error = EINVAL;
498 goto out;
499 }
500 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
501 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
502 if (error) {
503 goto out;
504 }
505 AUDIT_ARG(mac_string, labelstr);
506 }
507#endif /* CONFIG_MACF */
508
509 AUDIT_ARG(fflags, flags);
510
4bd07ac2
A
511#if SECURE_KERNEL
512 if (flags & MNT_UNION) {
513 /* No union mounts on release kernels */
514 error = EPERM;
515 goto out;
516 }
517#endif
518
6d2010ae 519 if ((vp->v_flag & VROOT) &&
0a7de745 520 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
39236c6e 521 if (!(flags & MNT_UNION)) {
6d2010ae 522 flags |= MNT_UPDATE;
0a7de745 523 } else {
39037602 524 /*
39236c6e 525 * For a union mount on '/', treat it as fresh
39037602
A
526 * mount instead of update.
527 * Otherwise, union mouting on '/' used to panic the
528 * system before, since mnt_vnodecovered was found to
529 * be NULL for '/' which is required for unionlookup
39236c6e
A
530 * after it gets ENOENT on union mount.
531 */
532 flags = (flags & ~(MNT_UPDATE));
533 }
534
4bd07ac2 535#if SECURE_KERNEL
39236c6e
A
536 if ((flags & MNT_RDONLY) == 0) {
537 /* Release kernels are not allowed to mount "/" as rw */
538 error = EPERM;
39037602 539 goto out;
39236c6e 540 }
39236c6e
A
541#endif
542 /*
543 * See 7392553 for more details on why this check exists.
544 * Suffice to say: If this check is ON and something tries
545 * to mount the rootFS RW, we'll turn off the codesign
39037602
A
546 * bitmap optimization.
547 */
6d2010ae 548#if CHECK_CS_VALIDATION_BITMAP
0a7de745 549 if ((flags & MNT_RDONLY) == 0) {
6d2010ae
A
550 root_fs_upgrade_try = TRUE;
551 }
552#endif
553 }
554
555 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
0a7de745 556 labelstr, FALSE, ctx);
39236c6e 557
6d2010ae 558out:
39236c6e 559
6d2010ae 560#if CONFIG_MACF
0a7de745 561 if (labelstr) {
6d2010ae 562 FREE(labelstr, M_MACTEMP);
0a7de745 563 }
6d2010ae
A
564#endif /* CONFIG_MACF */
565
39236c6e
A
566 if (vp) {
567 vnode_put(vp);
568 }
569 if (pvp) {
570 vnode_put(pvp);
571 }
572 if (need_nameidone) {
573 nameidone(&nd);
574 }
6d2010ae 575
0a7de745 576 return error;
6d2010ae
A
577}
578
579/*
580 * common mount implementation (final stage of mounting)
0a7de745 581 *
6d2010ae
A
582 * Arguments:
583 * fstypename file system type (ie it's vfs name)
584 * pvp parent of covered vnode
585 * vp covered vnode
586 * cnp component name (ie path) of covered vnode
587 * flags generic mount flags
588 * fsmountargs file system specific data
589 * labelstr optional MAC label
590 * kernelmount TRUE for mounts initiated from inside the kernel
591 * ctx caller's context
592 */
593static int
594mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
0a7de745
A
595 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
596 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
6d2010ae 597{
39236c6e
A
598#if !CONFIG_MACF
599#pragma unused(labelstr)
600#endif
91447636
A
601 struct vnode *devvp = NULLVP;
602 struct vnode *device_vnode = NULLVP;
2d21ac55
A
603#if CONFIG_MACF
604 struct vnode *rvp;
605#endif
1c79356b 606 struct mount *mp;
6601e61a 607 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 608 struct proc *p = vfs_context_proc(ctx);
91447636 609 int error, flag = 0;
91447636 610 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
611 int ronly = 0;
612 int mntalloc = 0;
b0d623f7 613 boolean_t vfsp_ref = FALSE;
743b1565 614 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
615 boolean_t did_rele = FALSE;
616 boolean_t have_usecount = FALSE;
9bccf70c 617
1c79356b 618 /*
6d2010ae 619 * Process an update for an existing mount
1c79356b 620 */
6d2010ae 621 if (flags & MNT_UPDATE) {
1c79356b 622 if ((vp->v_flag & VROOT) == 0) {
91447636
A
623 error = EINVAL;
624 goto out1;
1c79356b
A
625 }
626 mp = vp->v_mount;
d12e1678 627
91447636 628 /* unmount in progress return error */
b0d623f7 629 mount_lock_spin(mp);
91447636
A
630 if (mp->mnt_lflag & MNT_LUNMOUNT) {
631 mount_unlock(mp);
632 error = EBUSY;
633 goto out1;
d12e1678 634 }
91447636
A
635 mount_unlock(mp);
636 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 637 is_rwlock_locked = TRUE;
1c79356b
A
638 /*
639 * We only allow the filesystem to be reloaded if it
640 * is currently mounted read-only.
641 */
6d2010ae 642 if ((flags & MNT_RELOAD) &&
1c79356b 643 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
644 error = ENOTSUP;
645 goto out1;
1c79356b 646 }
b7266188 647
316670eb
A
648 /*
649 * If content protection is enabled, update mounts are not
650 * allowed to turn it off.
651 */
39037602 652 if ((mp->mnt_flag & MNT_CPROTECT) &&
0a7de745 653 ((flags & MNT_CPROTECT) == 0)) {
316670eb
A
654 error = EINVAL;
655 goto out1;
656 }
657
39037602 658#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
659 /* Can't downgrade the backer of the root FS */
660 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
0a7de745 661 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
662 error = ENOTSUP;
663 goto out1;
664 }
665#endif /* CONFIG_IMGSRC_ACCESS */
666
1c79356b
A
667 /*
668 * Only root, or the user that did the original mount is
669 * permitted to update it.
670 */
2d21ac55
A
671 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
672 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
673 goto out1;
674 }
675#if CONFIG_MACF
676 error = mac_mount_check_remount(ctx, mp);
677 if (error != 0) {
91447636 678 goto out1;
1c79356b 679 }
2d21ac55 680#endif
1c79356b 681 /*
91447636
A
682 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
683 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 684 */
6d2010ae
A
685 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
686 flags |= MNT_NOSUID | MNT_NODEV;
0a7de745 687 if (mp->mnt_flag & MNT_NOEXEC) {
6d2010ae 688 flags |= MNT_NOEXEC;
0a7de745 689 }
1c79356b 690 }
d12e1678
A
691 flag = mp->mnt_flag;
692
316670eb
A
693
694
6d2010ae 695 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 696
91447636 697 vfsp = mp->mnt_vtable;
1c79356b
A
698 goto update;
699 }
5ba3f43e 700
1c79356b 701 /*
91447636 702 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
703 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
704 */
6d2010ae
A
705 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
706 flags |= MNT_NOSUID | MNT_NODEV;
0a7de745 707 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
6d2010ae 708 flags |= MNT_NOEXEC;
0a7de745 709 }
1c79356b 710 }
91447636 711
55e303ae
A
712 /* XXXAUDIT: Should we capture the type on the error path as well? */
713 AUDIT_ARG(text, fstypename);
91447636 714 mount_list_lock();
0a7de745 715 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
b0d623f7
A
716 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
717 vfsp->vfc_refcount++;
718 vfsp_ref = TRUE;
1c79356b 719 break;
b0d623f7 720 }
0a7de745 721 }
91447636 722 mount_list_unlock();
1c79356b 723 if (vfsp == NULL) {
91447636
A
724 error = ENODEV;
725 goto out1;
1c79356b 726 }
6d2010ae
A
727
728 /*
729 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
730 */
731 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
732 error = EINVAL; /* unsupported request */
2d21ac55 733 goto out1;
6d2010ae
A
734 }
735
736 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
737 if (error != 0) {
91447636 738 goto out1;
1c79356b 739 }
1c79356b
A
740
741 /*
6d2010ae 742 * Allocate and initialize the filesystem (mount_t)
1c79356b 743 */
b0d623f7 744 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
0a7de745 745 M_MOUNT, M_WAITOK);
b0d623f7 746 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 747 mntalloc = 1;
0b4e3aa0
A
748
749 /* Initialize the default IO constraints */
750 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
751 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
752 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
753 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
754 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 755 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
756 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
757 mp->mnt_ioscale = 1;
2d21ac55
A
758 mp->mnt_ioflags = 0;
759 mp->mnt_realrootvp = NULLVP;
760 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
761
762 TAILQ_INIT(&mp->mnt_vnodelist);
763 TAILQ_INIT(&mp->mnt_workerqueue);
764 TAILQ_INIT(&mp->mnt_newvnodes);
765 mount_lock_init(mp);
766 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 767 is_rwlock_locked = TRUE;
1c79356b 768 mp->mnt_op = vfsp->vfc_vfsops;
91447636 769 mp->mnt_vtable = vfsp;
91447636 770 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 771 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488
A
772 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
773 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 774 mp->mnt_vnodecovered = vp;
2d21ac55 775 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
776 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
777 mp->mnt_devbsdunit = 0;
1c79356b 778
91447636
A
779 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
780 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 781
490019cf 782#if NFSCLIENT || DEVFS || ROUTEFS
0a7de745 783 if (kernelmount) {
6d2010ae 784 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
0a7de745
A
785 }
786 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) {
6d2010ae 787 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
0a7de745 788 }
fe8ab488 789#endif /* NFSCLIENT || DEVFS */
6d2010ae 790
1c79356b 791update:
5ba3f43e 792
1c79356b
A
793 /*
794 * Set the mount level flags.
795 */
0a7de745 796 if (flags & MNT_RDONLY) {
1c79356b 797 mp->mnt_flag |= MNT_RDONLY;
0a7de745 798 } else if (mp->mnt_flag & MNT_RDONLY) {
6d2010ae
A
799 // disallow read/write upgrades of file systems that
800 // had the TYPENAME_OVERRIDE feature set.
801 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
802 error = EPERM;
803 goto out1;
804 }
1c79356b 805 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 806 }
0b4e3aa0 807 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
0a7de745
A
808 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
809 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
810 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
811 MNT_QUARANTINE | MNT_CPROTECT);
813fb2f6
A
812
813#if SECURE_KERNEL
814#if !CONFIG_MNT_SUID
815 /*
5ba3f43e 816 * On release builds of iOS based platforms, always enforce NOSUID on
813fb2f6
A
817 * all mounts. We do this here because we can catch update mounts as well as
818 * non-update mounts in this case.
819 */
820 mp->mnt_flag |= (MNT_NOSUID);
821#endif
822#endif
823
6d2010ae 824 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
0a7de745
A
825 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
826 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
827 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
828 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
829
830#if CONFIG_MACF
6d2010ae 831 if (flags & MNT_MULTILABEL) {
2d21ac55
A
832 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
833 error = EINVAL;
834 goto out1;
835 }
836 mp->mnt_flag |= MNT_MULTILABEL;
837 }
838#endif
6d2010ae
A
839 /*
840 * Process device path for local file systems if requested
841 */
39037602
A
842 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
843 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
6d2010ae 844 if (vfs_context_is64bit(ctx)) {
0a7de745 845 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
39037602 846 goto out1;
0a7de745 847 }
91447636
A
848 fsmountargs += sizeof(devpath);
849 } else {
b0d623f7 850 user32_addr_t tmp;
0a7de745 851 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
39037602 852 goto out1;
0a7de745 853 }
91447636
A
854 /* munge into LP64 addr */
855 devpath = CAST_USER_ADDR_T(tmp);
856 fsmountargs += sizeof(tmp);
857 }
858
6d2010ae 859 /* Lookup device and authorize access to it */
91447636 860 if ((devpath)) {
6d2010ae
A
861 struct nameidata nd;
862
863 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
0a7de745 864 if ((error = namei(&nd))) {
91447636 865 goto out1;
0a7de745 866 }
91447636 867
3e170ce0 868 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
6d2010ae 869 devvp = nd.ni_vp;
91447636 870
6d2010ae 871 nameidone(&nd);
91447636
A
872
873 if (devvp->v_type != VBLK) {
874 error = ENOTBLK;
875 goto out2;
876 }
877 if (major(devvp->v_rdev) >= nblkdev) {
878 error = ENXIO;
879 goto out2;
880 }
881 /*
0a7de745
A
882 * If mount by non-root, then verify that user has necessary
883 * permissions on the device.
884 */
2d21ac55 885 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
886 mode_t accessmode = KAUTH_VNODE_READ_DATA;
887
0a7de745 888 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
91447636 889 accessmode |= KAUTH_VNODE_WRITE_DATA;
0a7de745
A
890 }
891 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0) {
91447636 892 goto out2;
0a7de745 893 }
91447636
A
894 }
895 }
6d2010ae
A
896 /* On first mount, preflight and open device */
897 if (devpath && ((flags & MNT_UPDATE) == 0)) {
0a7de745 898 if ((error = vnode_ref(devvp))) {
91447636 899 goto out2;
0a7de745 900 }
91447636 901 /*
0a7de745
A
902 * Disallow multiple mounts of the same device.
903 * Disallow mounting of a device that is currently in use
904 * (except for root, which might share swap device for miniroot).
905 * Flush out any old buffers remaining from a previous use.
906 */
907 if ((error = vfs_mountedon(devvp))) {
91447636 908 goto out3;
0a7de745 909 }
39037602 910
91447636
A
911 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
912 error = EBUSY;
913 goto out3;
914 }
0a7de745 915 if ((error = VNOP_FSYNC(devvp, MNT_WAIT, ctx))) {
91447636
A
916 error = ENOTBLK;
917 goto out3;
918 }
0a7de745 919 if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) {
91447636 920 goto out3;
0a7de745 921 }
91447636
A
922
923 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
924#if CONFIG_MACF
925 error = mac_vnode_check_open(ctx,
926 devvp,
0a7de745
A
927 ronly ? FREAD : FREAD | FWRITE);
928 if (error) {
2d21ac55 929 goto out3;
0a7de745 930 }
2d21ac55 931#endif /* MAC */
0a7de745 932 if ((error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, ctx))) {
91447636 933 goto out3;
0a7de745 934 }
91447636
A
935
936 mp->mnt_devvp = devvp;
937 device_vnode = devvp;
6d2010ae 938 } else if ((mp->mnt_flag & MNT_RDONLY) &&
0a7de745
A
939 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
940 (device_vnode = mp->mnt_devvp)) {
6d2010ae
A
941 dev_t dev;
942 int maj;
943 /*
944 * If upgrade to read-write by non-root, then verify
945 * that user has necessary permissions on the device.
946 */
947 vnode_getalways(device_vnode);
b0d623f7 948
6d2010ae 949 if (suser(vfs_context_ucred(ctx), NULL) &&
39037602 950 (error = vnode_authorize(device_vnode, NULL,
0a7de745
A
951 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
952 ctx)) != 0) {
6d2010ae
A
953 vnode_put(device_vnode);
954 goto out2;
955 }
b0d623f7 956
6d2010ae
A
957 /* Tell the device that we're upgrading */
958 dev = (dev_t)device_vnode->v_rdev;
959 maj = major(dev);
b0d623f7 960
0a7de745 961 if ((u_int)maj >= (u_int)nblkdev) {
6d2010ae 962 panic("Volume mounted on a device with invalid major number.");
0a7de745 963 }
b0d623f7 964
6d2010ae
A
965 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
966 vnode_put(device_vnode);
91447636 967 device_vnode = NULLVP;
6d2010ae
A
968 if (error != 0) {
969 goto out2;
970 }
91447636
A
971 }
972 }
2d21ac55 973#if CONFIG_MACF
6d2010ae 974 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
975 mac_mount_label_init(mp);
976 mac_mount_label_associate(ctx, mp);
977 }
6d2010ae
A
978 if (labelstr) {
979 if ((flags & MNT_UPDATE) != 0) {
980 error = mac_mount_check_label_update(ctx, mp);
0a7de745 981 if (error != 0) {
2d21ac55 982 goto out3;
0a7de745 983 }
2d21ac55 984 }
2d21ac55
A
985 }
986#endif
1c79356b
A
987 /*
988 * Mount the filesystem.
989 */
39037602
A
990 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
991 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
992 (caddr_t)fsmountargs, 0, ctx);
993 } else {
994 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
995 }
d12e1678 996
6d2010ae 997 if (flags & MNT_UPDATE) {
0a7de745 998 if (mp->mnt_kern_flag & MNTK_WANTRDWR) {
1c79356b 999 mp->mnt_flag &= ~MNT_RDONLY;
0a7de745
A
1000 }
1001 mp->mnt_flag &= ~
1c79356b 1002 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
0a7de745
A
1003 mp->mnt_kern_flag &= ~MNTK_WANTRDWR;
1004 if (error) {
6d2010ae 1005 mp->mnt_flag = flag; /* restore flag value */
0a7de745 1006 }
91447636
A
1007 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
1008 lck_rw_done(&mp->mnt_rwlock);
743b1565 1009 is_rwlock_locked = FALSE;
0a7de745 1010 if (!error) {
2d21ac55 1011 enablequotas(mp, ctx);
0a7de745 1012 }
6d2010ae 1013 goto exit;
1c79356b 1014 }
6d2010ae 1015
1c79356b
A
1016 /*
1017 * Put the new filesystem on the mount list after root.
1018 */
6601e61a 1019 if (error == 0) {
0a7de745 1020 struct vfs_attr vfsattr;
2d21ac55
A
1021#if CONFIG_MACF
1022 if (vfs_flags(mp) & MNT_MULTILABEL) {
1023 error = VFS_ROOT(mp, &rvp, ctx);
1024 if (error) {
1025 printf("%s() VFS_ROOT returned %d\n", __func__, error);
1026 goto out3;
1027 }
2d21ac55 1028 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
0a7de745 1029 /*
b0d623f7
A
1030 * drop reference provided by VFS_ROOT
1031 */
1032 vnode_put(rvp);
1033
0a7de745 1034 if (error) {
2d21ac55 1035 goto out3;
0a7de745 1036 }
2d21ac55 1037 }
0a7de745 1038#endif /* MAC */
2d21ac55
A
1039
1040 vnode_lock_spin(vp);
1041 CLR(vp->v_flag, VMOUNT);
91447636
A
1042 vp->v_mountedhere = mp;
1043 vnode_unlock(vp);
1044
2d21ac55
A
1045 /*
1046 * taking the name_cache_lock exclusively will
1047 * insure that everyone is out of the fast path who
1048 * might be trying to use a now stale copy of
1049 * vp->v_mountedhere->mnt_realrootvp
1050 * bumping mount_generation causes the cached values
1051 * to be invalidated
1052 */
1053 name_cache_lock();
1054 mount_generation++;
1055 name_cache_unlock();
1056
b0d623f7
A
1057 error = vnode_ref(vp);
1058 if (error != 0) {
1059 goto out4;
1060 }
1061
1062 have_usecount = TRUE;
91447636 1063
2d21ac55 1064 error = checkdirs(vp, ctx);
0a7de745 1065 if (error != 0) {
6601e61a
A
1066 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1067 goto out4;
1068 }
39037602
A
1069 /*
1070 * there is no cleanup code here so I have made it void
91447636
A
1071 * we need to revisit this
1072 */
2d21ac55 1073 (void)VFS_START(mp, 0, ctx);
1c79356b 1074
6d2010ae
A
1075 if (mount_list_add(mp) != 0) {
1076 /*
1077 * The system is shutting down trying to umount
1078 * everything, so fail with a plausible errno.
1079 */
1080 error = EBUSY;
b0d623f7
A
1081 goto out4;
1082 }
6601e61a
A
1083 lck_rw_done(&mp->mnt_rwlock);
1084 is_rwlock_locked = FALSE;
1085
2d21ac55
A
1086 /* Check if this mounted file system supports EAs or named streams. */
1087 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1088 VFSATTR_INIT(&vfsattr);
1089 VFSATTR_WANTED(&vfsattr, f_capabilities);
1090 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
39037602 1091 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
2d21ac55
A
1092 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1093 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1094 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1095 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1096 }
1097#if NAMEDSTREAMS
1098 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1099 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1100 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1101 }
1102#endif
1103 /* Check if this file system supports path from id lookups. */
1104 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1105 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1106 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1107 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1108 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1109 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1110 }
39037602
A
1111
1112 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
0a7de745 1113 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
39037602
A
1114 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1115 }
2d21ac55
A
1116 }
1117 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1118 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1119 }
1120 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1121 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1122 }
1c79356b 1123 /* increment the operations count */
b0d623f7 1124 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 1125 enablequotas(mp, ctx);
91447636
A
1126
1127 if (device_vnode) {
1128 device_vnode->v_specflags |= SI_MOUNTEDON;
1129
1130 /*
1131 * cache the IO attributes for the underlying physical media...
1132 * an error return indicates the underlying driver doesn't
1133 * support all the queries necessary... however, reasonable
1134 * defaults will have been set, so no reason to bail or care
1135 */
1136 vfs_init_io_attributes(device_vnode, mp);
39037602 1137 }
6601e61a
A
1138
1139 /* Now that mount is setup, notify the listeners */
6d2010ae 1140 vfs_notify_mount(pvp);
3e170ce0 1141 IOBSDMountChange(mp, kIOMountChangeMount);
1c79356b 1142 } else {
6d2010ae
A
1143 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1144 if (mp->mnt_vnodelist.tqh_first != NULL) {
39037602 1145 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
0a7de745 1146 mp->mnt_vtable->vfc_name, error);
6d2010ae
A
1147 }
1148
2d21ac55 1149 vnode_lock_spin(vp);
1c79356b 1150 CLR(vp->v_flag, VMOUNT);
6601e61a 1151 vnode_unlock(vp);
91447636
A
1152 mount_list_lock();
1153 mp->mnt_vtable->vfc_refcount--;
1154 mount_list_unlock();
55e303ae 1155
0a7de745 1156 if (device_vnode) {
91447636 1157 vnode_rele(device_vnode);
0a7de745 1158 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD | FWRITE, ctx);
91447636
A
1159 }
1160 lck_rw_done(&mp->mnt_rwlock);
743b1565 1161 is_rwlock_locked = FALSE;
39037602 1162
6d2010ae
A
1163 /*
1164 * if we get here, we have a mount structure that needs to be freed,
1165 * but since the coveredvp hasn't yet been updated to point at it,
1166 * no need to worry about other threads holding a crossref on this mp
1167 * so it's ok to just free it
1168 */
91447636 1169 mount_lock_destroy(mp);
2d21ac55
A
1170#if CONFIG_MACF
1171 mac_mount_label_destroy(mp);
1172#endif
0a7de745 1173 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1c79356b 1174 }
6d2010ae 1175exit:
91447636 1176 /*
6d2010ae 1177 * drop I/O count on the device vp if there was one
91447636 1178 */
0a7de745
A
1179 if (devpath && devvp) {
1180 vnode_put(devvp);
1181 }
b0d623f7 1182
0a7de745 1183 return error;
b0d623f7 1184
6d2010ae 1185/* Error condition exits */
6601e61a 1186out4:
2d21ac55 1187 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
39037602
A
1188
1189 /*
6d2010ae
A
1190 * If the mount has been placed on the covered vp,
1191 * it may have been discovered by now, so we have
1192 * to treat this just like an unmount
1193 */
1194 mount_lock_spin(mp);
1195 mp->mnt_lflag |= MNT_LDEAD;
1196 mount_unlock(mp);
1197
6601e61a 1198 if (device_vnode != NULLVP) {
b0d623f7 1199 vnode_rele(device_vnode);
0a7de745
A
1200 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
1201 ctx);
b0d623f7 1202 did_rele = TRUE;
6601e61a 1203 }
6d2010ae 1204
2d21ac55 1205 vnode_lock_spin(vp);
6d2010ae
A
1206
1207 mp->mnt_crossref++;
6601e61a 1208 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1209
6601e61a 1210 vnode_unlock(vp);
6d2010ae 1211
b0d623f7
A
1212 if (have_usecount) {
1213 vnode_rele(vp);
1214 }
91447636 1215out3:
0a7de745 1216 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele)) {
2d21ac55 1217 vnode_rele(devvp);
0a7de745 1218 }
91447636 1219out2:
0a7de745
A
1220 if (devpath && devvp) {
1221 vnode_put(devvp);
1222 }
91447636 1223out1:
743b1565
A
1224 /* Release mnt_rwlock only when it was taken */
1225 if (is_rwlock_locked == TRUE) {
1226 lck_rw_done(&mp->mnt_rwlock);
1227 }
39037602 1228
6601e61a 1229 if (mntalloc) {
0a7de745 1230 if (mp->mnt_crossref) {
6d2010ae 1231 mount_dropcrossref(mp, vp, 0);
0a7de745 1232 } else {
6d2010ae 1233 mount_lock_destroy(mp);
2d21ac55 1234#if CONFIG_MACF
6d2010ae 1235 mac_mount_label_destroy(mp);
2d21ac55 1236#endif
0a7de745 1237 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
6d2010ae 1238 }
b0d623f7 1239 }
b0d623f7 1240 if (vfsp_ref) {
6601e61a
A
1241 mount_list_lock();
1242 vfsp->vfc_refcount--;
1243 mount_list_unlock();
6601e61a 1244 }
91447636 1245
0a7de745 1246 return error;
1c79356b
A
1247}
1248
39037602 1249/*
b7266188
A
1250 * Flush in-core data, check for competing mount attempts,
1251 * and set VMOUNT
1252 */
6d2010ae
A
1253int
1254prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1255{
39236c6e
A
1256#if !CONFIG_MACF
1257#pragma unused(cnp,fsname)
1258#endif
b7266188
A
1259 struct vnode_attr va;
1260 int error;
1261
6d2010ae
A
1262 if (!skip_auth) {
1263 /*
1264 * If the user is not root, ensure that they own the directory
1265 * onto which we are attempting to mount.
1266 */
1267 VATTR_INIT(&va);
1268 VATTR_WANTED(&va, va_uid);
1269 if ((error = vnode_getattr(vp, &va, ctx)) ||
0a7de745
A
1270 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1271 (!vfs_context_issuser(ctx)))) {
6d2010ae
A
1272 error = EPERM;
1273 goto out;
1274 }
b7266188
A
1275 }
1276
0a7de745 1277 if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) {
b7266188 1278 goto out;
0a7de745 1279 }
b7266188 1280
0a7de745 1281 if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
b7266188 1282 goto out;
0a7de745 1283 }
b7266188
A
1284
1285 if (vp->v_type != VDIR) {
1286 error = ENOTDIR;
1287 goto out;
1288 }
1289
1290 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1291 error = EBUSY;
1292 goto out;
1293 }
1294
1295#if CONFIG_MACF
1296 error = mac_mount_check_mount(ctx, vp,
1297 cnp, fsname);
0a7de745 1298 if (error != 0) {
b7266188 1299 goto out;
0a7de745 1300 }
b7266188
A
1301#endif
1302
1303 vnode_lock_spin(vp);
1304 SET(vp->v_flag, VMOUNT);
1305 vnode_unlock(vp);
1306
1307out:
1308 return error;
1309}
1310
6d2010ae
A
1311#if CONFIG_IMGSRC_ACCESS
1312
1313#if DEBUG
1314#define IMGSRC_DEBUG(args...) printf(args)
1315#else
1316#define IMGSRC_DEBUG(args...) do { } while(0)
39037602 1317#endif
6d2010ae 1318
b7266188
A
1319static int
1320authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1321{
1322 struct nameidata nd;
6d2010ae 1323 vnode_t vp, realdevvp;
b7266188
A
1324 mode_t accessmode;
1325 int error;
1326
6d2010ae 1327 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
0a7de745 1328 if ((error = namei(&nd))) {
6d2010ae 1329 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1330 return error;
6d2010ae 1331 }
b7266188 1332
b7266188 1333 vp = nd.ni_vp;
b7266188 1334
6d2010ae
A
1335 if (!vnode_isblk(vp)) {
1336 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1337 error = ENOTBLK;
1338 goto out;
1339 }
6d2010ae
A
1340
1341 realdevvp = mp->mnt_devvp;
1342 if (realdevvp == NULLVP) {
1343 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1344 error = ENXIO;
1345 goto out;
1346 }
6d2010ae
A
1347
1348 error = vnode_getwithref(realdevvp);
1349 if (error != 0) {
1350 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1351 goto out;
1352 }
1353
1354 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1355 IMGSRC_DEBUG("Wrong dev_t.\n");
1356 error = ENXIO;
1357 goto out1;
1358 }
1359
1360 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1361
b7266188
A
1362 /*
1363 * If mount by non-root, then verify that user has necessary
1364 * permissions on the device.
1365 */
1366 if (!vfs_context_issuser(ctx)) {
1367 accessmode = KAUTH_VNODE_READ_DATA;
0a7de745 1368 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
b7266188 1369 accessmode |= KAUTH_VNODE_WRITE_DATA;
0a7de745 1370 }
6d2010ae
A
1371 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1372 IMGSRC_DEBUG("Access denied.\n");
1373 goto out1;
1374 }
b7266188
A
1375 }
1376
1377 *devvpp = vp;
6d2010ae
A
1378
1379out1:
1380 vnode_put(realdevvp);
b7266188 1381out:
6d2010ae 1382 nameidone(&nd);
b7266188
A
1383 if (error) {
1384 vnode_put(vp);
1385 }
1386
1387 return error;
1388}
1389
1390/*
1391 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1392 * and call checkdirs()
1393 */
1394static int
1395place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1396{
1397 int error;
1398
1399 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1400
1401 vnode_lock_spin(vp);
1402 CLR(vp->v_flag, VMOUNT);
1403 vp->v_mountedhere = mp;
1404 vnode_unlock(vp);
1405
1406 /*
1407 * taking the name_cache_lock exclusively will
1408 * insure that everyone is out of the fast path who
1409 * might be trying to use a now stale copy of
1410 * vp->v_mountedhere->mnt_realrootvp
1411 * bumping mount_generation causes the cached values
1412 * to be invalidated
1413 */
1414 name_cache_lock();
1415 mount_generation++;
1416 name_cache_unlock();
1417
1418 error = vnode_ref(vp);
1419 if (error != 0) {
1420 goto out;
1421 }
1422
1423 error = checkdirs(vp, ctx);
0a7de745 1424 if (error != 0) {
b7266188
A
1425 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1426 vnode_rele(vp);
1427 goto out;
1428 }
1429
1430out:
1431 if (error != 0) {
1432 mp->mnt_vnodecovered = NULLVP;
1433 }
1434 return error;
1435}
1436
1437static void
1438undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1439{
1440 vnode_rele(vp);
1441 vnode_lock_spin(vp);
1442 vp->v_mountedhere = (mount_t)NULL;
1443 vnode_unlock(vp);
1444
1445 mp->mnt_vnodecovered = NULLVP;
1446}
1447
1448static int
1449mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1450{
1451 int error;
1452
1453 /* unmount in progress return error */
1454 mount_lock_spin(mp);
1455 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1456 mount_unlock(mp);
1457 return EBUSY;
1458 }
1459 mount_unlock(mp);
1460 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1461
1462 /*
1463 * We only allow the filesystem to be reloaded if it
1464 * is currently mounted read-only.
1465 */
1466 if ((flags & MNT_RELOAD) &&
0a7de745 1467 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
b7266188
A
1468 error = ENOTSUP;
1469 goto out;
1470 }
1471
1472 /*
1473 * Only root, or the user that did the original mount is
1474 * permitted to update it.
1475 */
1476 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
0a7de745 1477 (!vfs_context_issuser(ctx))) {
b7266188
A
1478 error = EPERM;
1479 goto out;
1480 }
1481#if CONFIG_MACF
1482 error = mac_mount_check_remount(ctx, mp);
1483 if (error != 0) {
1484 goto out;
1485 }
1486#endif
1487
1488out:
1489 if (error) {
1490 lck_rw_done(&mp->mnt_rwlock);
1491 }
1492
1493 return error;
1494}
1495
39037602 1496static void
b7266188
A
1497mount_end_update(mount_t mp)
1498{
1499 lck_rw_done(&mp->mnt_rwlock);
1500}
1501
1502static int
6d2010ae
A
1503get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1504{
1505 vnode_t vp;
1506
1507 if (height >= MAX_IMAGEBOOT_NESTING) {
1508 return EINVAL;
1509 }
1510
1511 vp = imgsrc_rootvnodes[height];
1512 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1513 *rvpp = vp;
1514 return 0;
1515 } else {
1516 return ENOENT;
1517 }
1518}
1519
1520static int
39037602 1521relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
0a7de745
A
1522 const char *fsname, vfs_context_t ctx,
1523 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1524{
1525 int error;
1526 mount_t mp;
1527 boolean_t placed = FALSE;
6d2010ae 1528 vnode_t devvp = NULLVP;
b7266188
A
1529 struct vfstable *vfsp;
1530 user_addr_t devpath;
1531 char *old_mntonname;
6d2010ae
A
1532 vnode_t rvp;
1533 uint32_t height;
1534 uint32_t flags;
b7266188
A
1535
1536 /* If we didn't imageboot, nothing to move */
6d2010ae 1537 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1538 return EINVAL;
1539 }
1540
1541 /* Only root can do this */
1542 if (!vfs_context_issuser(ctx)) {
1543 return EPERM;
1544 }
1545
6d2010ae
A
1546 IMGSRC_DEBUG("looking for root vnode.\n");
1547
1548 /*
1549 * Get root vnode of filesystem we're moving.
1550 */
1551 if (by_index) {
1552 if (is64bit) {
1553 struct user64_mnt_imgsrc_args mia64;
1554 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1555 if (error != 0) {
1556 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1557 return error;
1558 }
1559
1560 height = mia64.mi_height;
1561 flags = mia64.mi_flags;
1562 devpath = mia64.mi_devpath;
1563 } else {
1564 struct user32_mnt_imgsrc_args mia32;
1565 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1566 if (error != 0) {
1567 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1568 return error;
1569 }
1570
1571 height = mia32.mi_height;
1572 flags = mia32.mi_flags;
1573 devpath = mia32.mi_devpath;
1574 }
1575 } else {
1576 /*
1577 * For binary compatibility--assumes one level of nesting.
1578 */
1579 if (is64bit) {
0a7de745 1580 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
6d2010ae 1581 return error;
0a7de745 1582 }
6d2010ae
A
1583 } else {
1584 user32_addr_t tmp;
0a7de745 1585 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
6d2010ae 1586 return error;
0a7de745 1587 }
6d2010ae
A
1588
1589 /* munge into LP64 addr */
1590 devpath = CAST_USER_ADDR_T(tmp);
1591 }
1592
1593 height = 0;
1594 flags = 0;
1595 }
1596
1597 if (flags != 0) {
1598 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1599 return EINVAL;
1600 }
1601
1602 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1603 if (error != 0) {
6d2010ae 1604 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1605 return error;
1606 }
1607
6d2010ae
A
1608 IMGSRC_DEBUG("got root vnode.\n");
1609
b7266188
A
1610 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1611
1612 /* Can only move once */
6d2010ae 1613 mp = vnode_mount(rvp);
b7266188 1614 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1615 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1616 error = EBUSY;
1617 goto out0;
1618 }
1619
6d2010ae
A
1620 IMGSRC_DEBUG("Starting updated.\n");
1621
b7266188 1622 /* Get exclusive rwlock on mount, authorize update on mp */
0a7de745 1623 error = mount_begin_update(mp, ctx, 0);
b7266188 1624 if (error != 0) {
6d2010ae 1625 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1626 goto out0;
1627 }
1628
39037602 1629 /*
b7266188
A
1630 * It can only be moved once. Flag is set under the rwlock,
1631 * so we're now safe to proceed.
1632 */
1633 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1634 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1635 goto out1;
1636 }
39037602
A
1637
1638
6d2010ae 1639 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1640
1641 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1642 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1643 if (error != 0) {
6d2010ae 1644 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1645 goto out1;
1646 }
39037602 1647
6d2010ae
A
1648 IMGSRC_DEBUG("Covered vp OK.\n");
1649
b7266188
A
1650 /* Sanity check the name caller has provided */
1651 vfsp = mp->mnt_vtable;
1652 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1653 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1654 error = EINVAL;
1655 goto out2;
1656 }
1657
1658 /* Check the device vnode and update mount-from name, for local filesystems */
1659 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1660 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1661
1662 if (devpath != USER_ADDR_NULL) {
1663 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1664 if (error) {
6d2010ae 1665 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1666 goto out2;
1667 }
1668
1669 vnode_put(devvp);
1670 }
1671 }
1672
39037602 1673 /*
b7266188 1674 * Place mp on top of vnode, ref the vnode, call checkdirs(),
39037602 1675 * and increment the name cache's mount generation
b7266188 1676 */
6d2010ae
A
1677
1678 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1679 error = place_mount_and_checkdirs(mp, vp, ctx);
1680 if (error != 0) {
1681 goto out2;
1682 }
1683
1684 placed = TRUE;
1685
3e170ce0
A
1686 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1687 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
b7266188
A
1688
1689 /* Forbid future moves */
1690 mount_lock(mp);
1691 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1692 mount_unlock(mp);
1693
1694 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1695 if (mount_list_add(mp) != 0) {
1696 /*
1697 * The system is shutting down trying to umount
1698 * everything, so fail with a plausible errno.
1699 */
1700 error = EBUSY;
b7266188
A
1701 goto out3;
1702 }
1703
1704 mount_end_update(mp);
6d2010ae 1705 vnode_put(rvp);
b7266188
A
1706 FREE(old_mntonname, M_TEMP);
1707
6d2010ae
A
1708 vfs_notify_mount(pvp);
1709
b7266188
A
1710 return 0;
1711out3:
3e170ce0 1712 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
b7266188
A
1713
1714 mount_lock(mp);
1715 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1716 mount_unlock(mp);
1717
1718out2:
39037602 1719 /*
b7266188 1720 * Placing the mp on the vnode clears VMOUNT,
39037602 1721 * so cleanup is different after that point
b7266188
A
1722 */
1723 if (placed) {
1724 /* Rele the vp, clear VMOUNT and v_mountedhere */
1725 undo_place_on_covered_vp(mp, vp);
1726 } else {
1727 vnode_lock_spin(vp);
1728 CLR(vp->v_flag, VMOUNT);
1729 vnode_unlock(vp);
1730 }
1731out1:
1732 mount_end_update(mp);
1733
1734out0:
6d2010ae 1735 vnode_put(rvp);
b7266188
A
1736 FREE(old_mntonname, M_TEMP);
1737 return error;
1738}
1739
1740#endif /* CONFIG_IMGSRC_ACCESS */
1741
91447636 1742void
2d21ac55 1743enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1744{
9bccf70c
A
1745 struct nameidata qnd;
1746 int type;
1747 char qfpath[MAXPATHLEN];
91447636
A
1748 const char *qfname = QUOTAFILENAME;
1749 const char *qfopsname = QUOTAOPSNAME;
1750 const char *qfextension[] = INITQFNAMES;
9bccf70c 1751
2d21ac55 1752 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
0a7de745 1753 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0) {
b0d623f7
A
1754 return;
1755 }
39037602 1756 /*
9bccf70c
A
1757 * Enable filesystem disk quotas if necessary.
1758 * We ignore errors as this should not interfere with final mount
1759 */
0a7de745 1760 for (type = 0; type < MAXQUOTAS; type++) {
2d21ac55 1761 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae 1762 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
0a7de745
A
1763 CAST_USER_ADDR_T(qfpath), ctx);
1764 if (namei(&qnd) != 0) {
1765 continue; /* option file to trigger quotas is not present */
1766 }
91447636
A
1767 vnode_put(qnd.ni_vp);
1768 nameidone(&qnd);
0a7de745 1769 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1770
2d21ac55 1771 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1772 }
1773 return;
1774}
1775
2d21ac55
A
1776
1777static int
39037602 1778checkdirs_callback(proc_t p, void * arg)
2d21ac55 1779{
0a7de745 1780 struct cdirargs * cdrp = (struct cdirargs *)arg;
2d21ac55
A
1781 vnode_t olddp = cdrp->olddp;
1782 vnode_t newdp = cdrp->newdp;
1783 struct filedesc *fdp;
1784 vnode_t tvp;
1785 vnode_t fdp_cvp;
1786 vnode_t fdp_rvp;
1787 int cdir_changed = 0;
1788 int rdir_changed = 0;
1789
1790 /*
1791 * XXX Also needs to iterate each thread in the process to see if it
1792 * XXX is using a per-thread current working directory, and, if so,
1793 * XXX update that as well.
1794 */
1795
1796 proc_fdlock(p);
1797 fdp = p->p_fd;
1798 if (fdp == (struct filedesc *)0) {
1799 proc_fdunlock(p);
0a7de745 1800 return PROC_RETURNED;
2d21ac55
A
1801 }
1802 fdp_cvp = fdp->fd_cdir;
1803 fdp_rvp = fdp->fd_rdir;
1804 proc_fdunlock(p);
1805
1806 if (fdp_cvp == olddp) {
1807 vnode_ref(newdp);
1808 tvp = fdp->fd_cdir;
1809 fdp_cvp = newdp;
1810 cdir_changed = 1;
1811 vnode_rele(tvp);
1812 }
1813 if (fdp_rvp == olddp) {
1814 vnode_ref(newdp);
1815 tvp = fdp->fd_rdir;
1816 fdp_rvp = newdp;
1817 rdir_changed = 1;
1818 vnode_rele(tvp);
1819 }
1820 if (cdir_changed || rdir_changed) {
1821 proc_fdlock(p);
1822 fdp->fd_cdir = fdp_cvp;
1823 fdp->fd_rdir = fdp_rvp;
1824 proc_fdunlock(p);
1825 }
0a7de745 1826 return PROC_RETURNED;
2d21ac55
A
1827}
1828
1829
1830
1c79356b
A
1831/*
1832 * Scan all active processes to see if any of them have a current
1833 * or root directory onto which the new filesystem has just been
1834 * mounted. If so, replace them with the new mount point.
1835 */
6601e61a 1836static int
2d21ac55 1837checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1838{
2d21ac55
A
1839 vnode_t newdp;
1840 vnode_t tvp;
6601e61a 1841 int err;
2d21ac55 1842 struct cdirargs cdr;
1c79356b 1843
0a7de745
A
1844 if (olddp->v_usecount == 1) {
1845 return 0;
1846 }
2d21ac55 1847 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
1848
1849 if (err != 0) {
6601e61a 1850#if DIAGNOSTIC
2d21ac55 1851 panic("mount: lost mount: error %d", err);
6601e61a 1852#endif
0a7de745 1853 return err;
6601e61a 1854 }
91447636 1855
2d21ac55
A
1856 cdr.olddp = olddp;
1857 cdr.newdp = newdp;
1858 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1859 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1860
1c79356b 1861 if (rootvnode == olddp) {
91447636 1862 vnode_ref(newdp);
fa4905b1 1863 tvp = rootvnode;
1c79356b 1864 rootvnode = newdp;
91447636 1865 vnode_rele(tvp);
1c79356b 1866 }
91447636
A
1867
1868 vnode_put(newdp);
0a7de745 1869 return 0;
1c79356b
A
1870}
1871
1872/*
1873 * Unmount a file system.
1874 *
1875 * Note: unmount takes a path to the vnode mounted on as argument,
1876 * not special file (as before).
1877 */
1c79356b
A
1878/* ARGSUSED */
1879int
b0d623f7 1880unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1881{
2d21ac55 1882 vnode_t vp;
1c79356b
A
1883 struct mount *mp;
1884 int error;
1885 struct nameidata nd;
2d21ac55 1886 vfs_context_t ctx = vfs_context_current();
91447636 1887
39037602 1888 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
0a7de745 1889 UIO_USERSPACE, uap->path, ctx);
55e303ae 1890 error = namei(&nd);
0a7de745
A
1891 if (error) {
1892 return error;
1893 }
1c79356b
A
1894 vp = nd.ni_vp;
1895 mp = vp->v_mount;
91447636 1896 nameidone(&nd);
1c79356b 1897
2d21ac55
A
1898#if CONFIG_MACF
1899 error = mac_mount_check_umount(ctx, mp);
1900 if (error != 0) {
1901 vnode_put(vp);
0a7de745 1902 return error;
2d21ac55
A
1903 }
1904#endif
55e303ae
A
1905 /*
1906 * Must be the root of the filesystem
1907 */
1908 if ((vp->v_flag & VROOT) == 0) {
91447636 1909 vnode_put(vp);
0a7de745 1910 return EINVAL;
55e303ae 1911 }
6601e61a 1912 mount_ref(mp, 0);
91447636 1913 vnode_put(vp);
6601e61a 1914 /* safedounmount consumes the mount ref */
0a7de745 1915 return safedounmount(mp, uap->flags, ctx);
2d21ac55
A
1916}
1917
1918int
39037602 1919vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2d21ac55
A
1920{
1921 mount_t mp;
1922
1923 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1924 if (mp == (mount_t)0) {
0a7de745 1925 return ENOENT;
2d21ac55
A
1926 }
1927 mount_ref(mp, 0);
1928 mount_iterdrop(mp);
1929 /* safedounmount consumes the mount ref */
0a7de745 1930 return safedounmount(mp, flags, ctx);
55e303ae
A
1931}
1932
2d21ac55 1933
55e303ae 1934/*
6601e61a 1935 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1936 * Do the actual file system unmount, prevent some common foot shooting.
1937 */
1938int
2d21ac55 1939safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1940{
1941 int error;
2d21ac55 1942 proc_t p = vfs_context_proc(ctx);
55e303ae 1943
316670eb
A
1944 /*
1945 * If the file system is not responding and MNT_NOBLOCK
1946 * is set and not a forced unmount then return EBUSY.
1947 */
1948 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
0a7de745 1949 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
316670eb
A
1950 error = EBUSY;
1951 goto out;
1952 }
1953
1c79356b 1954 /*
39037602 1955 * Skip authorization if the mount is tagged as permissive and
6d2010ae 1956 * this is not a forced-unmount attempt.
1c79356b 1957 */
6d2010ae
A
1958 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1959 /*
1960 * Only root, or the user that did the original mount is
1961 * permitted to unmount this filesystem.
1962 */
1963 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
0a7de745 1964 (error = suser(kauth_cred_get(), &p->p_acflag))) {
6d2010ae 1965 goto out;
0a7de745 1966 }
6d2010ae 1967 }
1c79356b
A
1968 /*
1969 * Don't allow unmounting the root file system.
1970 */
6601e61a 1971 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1972 error = EBUSY; /* the root is always busy */
6601e61a
A
1973 goto out;
1974 }
1c79356b 1975
b7266188
A
1976#ifdef CONFIG_IMGSRC_ACCESS
1977 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1978 error = EBUSY;
1979 goto out;
1980 }
1981#endif /* CONFIG_IMGSRC_ACCESS */
1982
0a7de745 1983 return dounmount(mp, flags, 1, ctx);
2d21ac55 1984
6601e61a
A
1985out:
1986 mount_drop(mp, 0);
0a7de745 1987 return error;
1c79356b
A
1988}
1989
1990/*
1991 * Do the actual file system unmount.
1992 */
1993int
2d21ac55 1994dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1995{
2d21ac55 1996 vnode_t coveredvp = (vnode_t)0;
1c79356b 1997 int error;
91447636 1998 int needwakeup = 0;
91447636
A
1999 int forcedunmount = 0;
2000 int lflags = 0;
593a1d5f 2001 struct vnode *devvp = NULLVP;
6d2010ae 2002#if CONFIG_TRIGGERS
39236c6e 2003 proc_t p = vfs_context_proc(ctx);
6d2010ae 2004 int did_vflush = 0;
39236c6e 2005 int pflags_save = 0;
6d2010ae 2006#endif /* CONFIG_TRIGGERS */
91447636 2007
813fb2f6
A
2008#if CONFIG_FSE
2009 if (!(flags & MNT_FORCE)) {
2010 fsevent_unmount(mp, ctx); /* has to come first! */
2011 }
2012#endif
2013
91447636 2014 mount_lock(mp);
fe8ab488
A
2015
2016 /*
2017 * If already an unmount in progress just return EBUSY.
2018 * Even a forced unmount cannot override.
2019 */
91447636 2020 if (mp->mnt_lflag & MNT_LUNMOUNT) {
0a7de745 2021 if (withref != 0) {
6601e61a 2022 mount_drop(mp, 1);
0a7de745 2023 }
fe8ab488 2024 mount_unlock(mp);
0a7de745 2025 return EBUSY;
9bccf70c 2026 }
39236c6e 2027
fe8ab488
A
2028 if (flags & MNT_FORCE) {
2029 forcedunmount = 1;
2030 mp->mnt_lflag |= MNT_LFORCE;
2031 }
2032
39236c6e 2033#if CONFIG_TRIGGERS
0a7de745 2034 if (flags & MNT_NOBLOCK && p != kernproc) {
39236c6e 2035 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
0a7de745 2036 }
39236c6e
A
2037#endif
2038
1c79356b 2039 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636 2040 mp->mnt_lflag |= MNT_LUNMOUNT;
0a7de745 2041 mp->mnt_flag &= ~MNT_ASYNC;
2d21ac55
A
2042 /*
2043 * anyone currently in the fast path that
2044 * trips over the cached rootvp will be
2045 * dumped out and forced into the slow path
2046 * to regenerate a new cached value
2047 */
2048 mp->mnt_realrootvp = NULLVP;
91447636 2049 mount_unlock(mp);
39037602 2050
fe8ab488
A
2051 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2052 /*
2053 * Force unmount any mounts in this filesystem.
2054 * If any unmounts fail - just leave them dangling.
2055 * Avoids recursion.
2056 */
2057 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2058 }
2059
2d21ac55
A
2060 /*
2061 * taking the name_cache_lock exclusively will
2062 * insure that everyone is out of the fast path who
2063 * might be trying to use a now stale copy of
2064 * vp->v_mountedhere->mnt_realrootvp
2065 * bumping mount_generation causes the cached values
2066 * to be invalidated
2067 */
2068 name_cache_lock();
2069 mount_generation++;
2070 name_cache_unlock();
2071
2072
91447636 2073 lck_rw_lock_exclusive(&mp->mnt_rwlock);
0a7de745 2074 if (withref != 0) {
6601e61a 2075 mount_drop(mp, 0);
0a7de745 2076 }
91447636
A
2077 error = 0;
2078 if (forcedunmount == 0) {
0a7de745 2079 ubc_umount(mp); /* release cached vnodes */
91447636 2080 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 2081 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
2082 if (error) {
2083 mount_lock(mp);
2084 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2085 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2086 mp->mnt_lflag &= ~MNT_LFORCE;
2087 goto out;
2088 }
2089 }
2090 }
6d2010ae 2091
5ba3f43e
A
2092 /* free disk_conditioner_info structure for this mount */
2093 disk_conditioner_unmount(mp);
2094
3e170ce0
A
2095 IOBSDMountChange(mp, kIOMountChangeUnmount);
2096
6d2010ae
A
2097#if CONFIG_TRIGGERS
2098 vfs_nested_trigger_unmounts(mp, flags, ctx);
2099 did_vflush = 1;
39037602 2100#endif
0a7de745 2101 if (forcedunmount) {
91447636 2102 lflags |= FORCECLOSE;
0a7de745 2103 }
91447636
A
2104 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2105 if ((forcedunmount == 0) && error) {
2106 mount_lock(mp);
9bccf70c 2107 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2108 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2109 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
2110 goto out;
2111 }
91447636
A
2112
2113 /* make sure there are no one in the mount iterations or lookup */
2114 mount_iterdrain(mp);
2115
2d21ac55 2116 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 2117 if (error) {
91447636
A
2118 mount_iterreset(mp);
2119 mount_lock(mp);
1c79356b 2120 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2121 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2122 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
2123 goto out;
2124 }
2125
2126 /* increment the operations count */
0a7de745 2127 if (!error) {
b0d623f7 2128 OSAddAtomic(1, &vfs_nummntops);
0a7de745 2129 }
91447636 2130
0a7de745 2131 if (mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
2132 /* hold an io reference and drop the usecount before close */
2133 devvp = mp->mnt_devvp;
593a1d5f
A
2134 vnode_getalways(devvp);
2135 vnode_rele(devvp);
0a7de745
A
2136 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
2137 ctx);
b0d623f7 2138 vnode_clearmountedon(devvp);
593a1d5f 2139 vnode_put(devvp);
91447636
A
2140 }
2141 lck_rw_done(&mp->mnt_rwlock);
2142 mount_list_remove(mp);
2143 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 2144
91447636 2145 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 2146 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
2147 /*
2148 * The covered vnode needs special handling. Trying to get an
2149 * iocount must not block here as this may lead to deadlocks
2150 * if the Filesystem to which the covered vnode belongs is
2151 * undergoing forced unmounts. Since we hold a usecount, the
2152 * vnode cannot be reused (it can, however, still be terminated)
2153 */
2154 vnode_getalways(coveredvp);
6d2010ae
A
2155 vnode_lock_spin(coveredvp);
2156
2157 mp->mnt_crossref++;
2158 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 2159 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
2160
2161 vnode_unlock(coveredvp);
2162 vnode_put(coveredvp);
1c79356b 2163 }
91447636
A
2164
2165 mount_list_lock();
2166 mp->mnt_vtable->vfc_refcount--;
2167 mount_list_unlock();
2168
0a7de745 2169 cache_purgevfs(mp); /* remove cache entries for this file sys */
91447636
A
2170 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2171 mount_lock(mp);
2172 mp->mnt_lflag |= MNT_LDEAD;
2173
2174 if (mp->mnt_lflag & MNT_LWAIT) {
0a7de745 2175 /*
91447636
A
2176 * do the wakeup here
2177 * in case we block in mount_refdrain
2178 * which will drop the mount lock
2179 * and allow anyone blocked in vfs_busy
2180 * to wakeup and see the LDEAD state
2181 */
2182 mp->mnt_lflag &= ~MNT_LWAIT;
2183 wakeup((caddr_t)mp);
1c79356b 2184 }
91447636 2185 mount_refdrain(mp);
1c79356b 2186out:
91447636
A
2187 if (mp->mnt_lflag & MNT_LWAIT) {
2188 mp->mnt_lflag &= ~MNT_LWAIT;
39037602 2189 needwakeup = 1;
91447636 2190 }
6d2010ae 2191
6d2010ae 2192#if CONFIG_TRIGGERS
39236c6e 2193 if (flags & MNT_NOBLOCK && p != kernproc) {
0a7de745
A
2194 // Restore P_NOREMOTEHANG bit to its previous value
2195 if ((pflags_save & P_NOREMOTEHANG) == 0) {
39236c6e 2196 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
0a7de745 2197 }
39236c6e
A
2198 }
2199
39037602 2200 /*
6d2010ae 2201 * Callback and context are set together under the mount lock, and
39037602 2202 * never cleared, so we're safe to examine them here, drop the lock,
6d2010ae
A
2203 * and call out.
2204 */
2205 if (mp->mnt_triggercallback != NULL) {
2206 mount_unlock(mp);
2207 if (error == 0) {
2208 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2209 } else if (did_vflush) {
2210 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2211 }
2212 } else {
2213 mount_unlock(mp);
2214 }
39037602 2215#else
91447636 2216 mount_unlock(mp);
6d2010ae
A
2217#endif /* CONFIG_TRIGGERS */
2218
91447636
A
2219 lck_rw_done(&mp->mnt_rwlock);
2220
0a7de745 2221 if (needwakeup) {
1c79356b 2222 wakeup((caddr_t)mp);
0a7de745 2223 }
6d2010ae 2224
55e303ae 2225 if (!error) {
91447636 2226 if ((coveredvp != NULLVP)) {
fe8ab488 2227 vnode_t pvp = NULLVP;
b0d623f7 2228
fe8ab488
A
2229 /*
2230 * The covered vnode needs special handling. Trying to
2231 * get an iocount must not block here as this may lead
2232 * to deadlocks if the Filesystem to which the covered
2233 * vnode belongs is undergoing forced unmounts. Since we
2234 * hold a usecount, the vnode cannot be reused
2235 * (it can, however, still be terminated).
2236 */
2237 vnode_getalways(coveredvp);
6d2010ae
A
2238
2239 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2240 /*
2241 * We'll _try_ to detect if this really needs to be
2242 * done. The coveredvp can only be in termination (or
2243 * terminated) if the coveredvp's mount point is in a
2244 * forced unmount (or has been) since we still hold the
2245 * ref.
2246 */
2247 if (!vnode_isrecycled(coveredvp)) {
2248 pvp = vnode_getparent(coveredvp);
6d2010ae 2249#if CONFIG_TRIGGERS
fe8ab488
A
2250 if (coveredvp->v_resolve) {
2251 vnode_trigger_rearm(coveredvp, ctx);
2252 }
2253#endif
2254 }
2255
2256 vnode_rele(coveredvp);
91447636 2257 vnode_put(coveredvp);
fe8ab488 2258 coveredvp = NULLVP;
b0d623f7
A
2259
2260 if (pvp) {
2261 lock_vnode_and_post(pvp, NOTE_WRITE);
2262 vnode_put(pvp);
2263 }
91447636 2264 } else if (mp->mnt_flag & MNT_ROOTFS) {
0a7de745 2265 mount_lock_destroy(mp);
2d21ac55 2266#if CONFIG_MACF
0a7de745 2267 mac_mount_label_destroy(mp);
2d21ac55 2268#endif
0a7de745
A
2269 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
2270 } else {
91447636 2271 panic("dounmount: no coveredvp");
0a7de745 2272 }
55e303ae 2273 }
0a7de745 2274 return error;
1c79356b
A
2275}
2276
fe8ab488
A
2277/*
2278 * Unmount any mounts in this filesystem.
2279 */
2280void
2281dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2282{
0a7de745 2283 mount_t smp;
fe8ab488
A
2284 fsid_t *fsids, fsid;
2285 int fsids_sz;
2286 int count = 0, i, m = 0;
2287 vnode_t vp;
2288
2289 mount_list_lock();
2290
2291 // Get an array to hold the submounts fsids.
2292 TAILQ_FOREACH(smp, &mountlist, mnt_list)
0a7de745 2293 count++;
fe8ab488
A
2294 fsids_sz = count * sizeof(fsid_t);
2295 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2296 if (fsids == NULL) {
2297 mount_list_unlock();
2298 goto out;
2299 }
0a7de745 2300 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
fe8ab488
A
2301
2302 /*
2303 * Fill the array with submount fsids.
2304 * Since mounts are always added to the tail of the mount list, the
39037602 2305 * list is always in mount order.
fe8ab488
A
2306 * For each mount check if the mounted-on vnode belongs to a
2307 * mount that's already added to our array of mounts to be unmounted.
2308 */
2309 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2310 vp = smp->mnt_vnodecovered;
0a7de745 2311 if (vp == NULL) {
fe8ab488 2312 continue;
0a7de745
A
2313 }
2314 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
fe8ab488
A
2315 for (i = 0; i <= m; i++) {
2316 if (fsids[i].val[0] == fsid.val[0] &&
2317 fsids[i].val[1] == fsid.val[1]) {
2318 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2319 break;
2320 }
2321 }
2322 }
2323 mount_list_unlock();
2324
2325 // Unmount the submounts in reverse order. Ignore errors.
2326 for (i = m; i > 0; i--) {
2327 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2328 if (smp) {
2329 mount_ref(smp, 0);
2330 mount_iterdrop(smp);
2331 (void) dounmount(smp, flags, 1, ctx);
2332 }
2333 }
2334out:
0a7de745 2335 if (fsids) {
fe8ab488 2336 FREE(fsids, M_TEMP);
0a7de745 2337 }
fe8ab488
A
2338}
2339
91447636
A
2340void
2341mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2342{
6d2010ae
A
2343 vnode_lock(dp);
2344 mp->mnt_crossref--;
2345
0a7de745 2346 if (mp->mnt_crossref < 0) {
6d2010ae 2347 panic("mount cross refs -ve");
0a7de745 2348 }
6d2010ae
A
2349
2350 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
0a7de745 2351 if (need_put) {
6d2010ae 2352 vnode_put_locked(dp);
0a7de745 2353 }
91447636 2354 vnode_unlock(dp);
6d2010ae
A
2355
2356 mount_lock_destroy(mp);
2357#if CONFIG_MACF
2358 mac_mount_label_destroy(mp);
2359#endif
0a7de745 2360 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
6d2010ae
A
2361 return;
2362 }
0a7de745 2363 if (need_put) {
6d2010ae 2364 vnode_put_locked(dp);
0a7de745 2365 }
6d2010ae 2366 vnode_unlock(dp);
91447636
A
2367}
2368
2369
1c79356b
A
2370/*
2371 * Sync each mounted filesystem.
2372 */
2373#if DIAGNOSTIC
2374int syncprt = 0;
1c79356b
A
2375#endif
2376
0a7de745 2377int print_vmpage_stat = 0;
a39ff7e2 2378
39037602 2379static int
fe8ab488 2380sync_callback(mount_t mp, __unused void *arg)
1c79356b 2381{
91447636 2382 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488
A
2383 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2384
2385 mp->mnt_flag &= ~MNT_ASYNC;
2386 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
0a7de745 2387 if (asyncflag) {
fe8ab488 2388 mp->mnt_flag |= MNT_ASYNC;
0a7de745 2389 }
1c79356b 2390 }
1c79356b 2391
0a7de745 2392 return VFS_RETURNED;
fe8ab488 2393}
91447636 2394
91447636
A
2395/* ARGSUSED */
2396int
b0d623f7 2397sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2398{
fe8ab488 2399 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2400
fe8ab488
A
2401 if (print_vmpage_stat) {
2402 vm_countdirtypages();
2403 }
2404
2405#if DIAGNOSTIC
0a7de745 2406 if (syncprt) {
fe8ab488 2407 vfs_bufstats();
0a7de745 2408 }
fe8ab488
A
2409#endif /* DIAGNOSTIC */
2410 return 0;
2411}
2412
d9a64523
A
2413typedef enum {
2414 SYNC_ALL = 0,
2415 SYNC_ONLY_RELIABLE_MEDIA = 1,
2416 SYNC_ONLY_UNRELIABLE_MEDIA = 2
2417} sync_type_t;
2418
2419static int
2420sync_internal_callback(mount_t mp, void *arg)
2421{
2422 if (arg) {
2423 int is_reliable = !(mp->mnt_kern_flag & MNTK_VIRTUALDEV) &&
0a7de745 2424 (mp->mnt_flag & MNT_LOCAL);
d9a64523
A
2425 sync_type_t sync_type = *((sync_type_t *)arg);
2426
0a7de745
A
2427 if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable) {
2428 return VFS_RETURNED;
2429 } else if ((sync_type = SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable) {
2430 return VFS_RETURNED;
2431 }
d9a64523
A
2432 }
2433
2434 (void)sync_callback(mp, NULL);
2435
0a7de745 2436 return VFS_RETURNED;
d9a64523
A
2437}
2438
2439int sync_thread_state = 0;
2440int sync_timeout_seconds = 5;
2441
2442#define SYNC_THREAD_RUN 0x0001
2443#define SYNC_THREAD_RUNNING 0x0002
2444
fe8ab488 2445static void
d9a64523 2446sync_thread(__unused void *arg, __unused wait_result_t wr)
fe8ab488 2447{
d9a64523 2448 sync_type_t sync_type;
fe8ab488 2449
d9a64523
A
2450 lck_mtx_lock(sync_mtx_lck);
2451 while (sync_thread_state & SYNC_THREAD_RUN) {
2452 sync_thread_state &= ~SYNC_THREAD_RUN;
2453 lck_mtx_unlock(sync_mtx_lck);
2454
2455 sync_type = SYNC_ONLY_RELIABLE_MEDIA;
2456 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2457 sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
2458 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2459
2460 lck_mtx_lock(sync_mtx_lck);
2461 }
2462 /*
2463 * This wakeup _has_ to be issued before the lock is released otherwise
2464 * we may end up waking up a thread in sync_internal which is
2465 * expecting a wakeup from a thread it just created and not from this
2466 * thread which is about to exit.
2467 */
2468 wakeup(&sync_thread_state);
2469 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2470 lck_mtx_unlock(sync_mtx_lck);
fe8ab488 2471
fe8ab488 2472 if (print_vmpage_stat) {
1c79356b 2473 vm_countdirtypages();
1c79356b 2474 }
39236c6e 2475
1c79356b 2476#if DIAGNOSTIC
0a7de745 2477 if (syncprt) {
1c79356b 2478 vfs_bufstats();
0a7de745 2479 }
1c79356b 2480#endif /* DIAGNOSTIC */
1c79356b
A
2481}
2482
d9a64523
A
2483struct timeval sync_timeout_last_print = {0, 0};
2484
1c79356b 2485/*
d9a64523
A
2486 * An in-kernel sync for power management to call.
2487 * This function always returns within sync_timeout seconds.
1c79356b 2488 */
d9a64523
A
2489__private_extern__ int
2490sync_internal(void)
2d21ac55 2491{
fe8ab488 2492 thread_t thd;
2d21ac55 2493 int error;
d9a64523
A
2494 int thread_created = FALSE;
2495 struct timespec ts = {sync_timeout_seconds, 0};
fe8ab488
A
2496
2497 lck_mtx_lock(sync_mtx_lck);
d9a64523
A
2498 sync_thread_state |= SYNC_THREAD_RUN;
2499 if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
2500 int kr;
2501
2502 sync_thread_state |= SYNC_THREAD_RUNNING;
2503 kr = kernel_thread_start(sync_thread, NULL, &thd);
2504 if (kr != KERN_SUCCESS) {
2505 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2506 lck_mtx_unlock(sync_mtx_lck);
2507 printf("sync_thread failed\n");
0a7de745 2508 return 0;
d9a64523
A
2509 }
2510 thread_created = TRUE;
fe8ab488
A
2511 }
2512
d9a64523
A
2513 error = msleep((caddr_t)&sync_thread_state, sync_mtx_lck,
2514 (PVFS | PDROP | PCATCH), "sync_thread", &ts);
fe8ab488 2515 if (error) {
d9a64523
A
2516 struct timeval now;
2517
2518 microtime(&now);
2519 if (now.tv_sec - sync_timeout_last_print.tv_sec > 120) {
2520 printf("sync timed out: %d sec\n", sync_timeout_seconds);
2521 sync_timeout_last_print.tv_sec = now.tv_sec;
2522 }
fe8ab488 2523 }
fe8ab488 2524
0a7de745 2525 if (thread_created) {
d9a64523 2526 thread_deallocate(thd);
0a7de745 2527 }
2d21ac55 2528
0a7de745 2529 return 0;
fe8ab488
A
2530} /* end of sync_internal call */
2531
2532/*
2533 * Change filesystem quotas.
2534 */
2535#if QUOTA
2536int
2537quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2538{
2d21ac55 2539 struct mount *mp;
d9a64523 2540 int error, quota_cmd, quota_status = 0;
91447636
A
2541 caddr_t datap;
2542 size_t fnamelen;
1c79356b 2543 struct nameidata nd;
2d21ac55 2544 vfs_context_t ctx = vfs_context_current();
d9a64523 2545 struct dqblk my_dqblk = {};
91447636 2546
b0d623f7 2547 AUDIT_ARG(uid, uap->uid);
55e303ae 2548 AUDIT_ARG(cmd, uap->cmd);
6d2010ae 2549 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
0a7de745 2550 uap->path, ctx);
55e303ae 2551 error = namei(&nd);
0a7de745
A
2552 if (error) {
2553 return error;
2554 }
1c79356b 2555 mp = nd.ni_vp->v_mount;
91447636
A
2556 vnode_put(nd.ni_vp);
2557 nameidone(&nd);
2558
2559 /* copyin any data we will need for downstream code */
2560 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2561
2562 switch (quota_cmd) {
2563 case Q_QUOTAON:
2564 /* uap->arg specifies a file from which to take the quotas */
2565 fnamelen = MAXPATHLEN;
2566 datap = kalloc(MAXPATHLEN);
2567 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2568 break;
2569 case Q_GETQUOTA:
2570 /* uap->arg is a pointer to a dqblk structure. */
2571 datap = (caddr_t) &my_dqblk;
2572 break;
2573 case Q_SETQUOTA:
2574 case Q_SETUSE:
2575 /* uap->arg is a pointer to a dqblk structure. */
2576 datap = (caddr_t) &my_dqblk;
2577 if (proc_is64bit(p)) {
0a7de745
A
2578 struct user_dqblk my_dqblk64;
2579 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof(my_dqblk64));
91447636
A
2580 if (error == 0) {
2581 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2582 }
0a7de745
A
2583 } else {
2584 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof(my_dqblk));
91447636
A
2585 }
2586 break;
2587 case Q_QUOTASTAT:
2588 /* uap->arg is a pointer to an integer */
2589 datap = (caddr_t) &quota_status;
2590 break;
2591 default:
2592 datap = NULL;
2593 break;
2594 } /* switch */
2595
2596 if (error == 0) {
2d21ac55 2597 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2598 }
2599
2600 switch (quota_cmd) {
2601 case Q_QUOTAON:
0a7de745 2602 if (datap != NULL) {
91447636 2603 kfree(datap, MAXPATHLEN);
0a7de745 2604 }
91447636
A
2605 break;
2606 case Q_GETQUOTA:
2607 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2608 if (error == 0) {
2609 if (proc_is64bit(p)) {
0a7de745 2610 struct user_dqblk my_dqblk64;
5ba3f43e
A
2611
2612 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
91447636 2613 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
0a7de745
A
2614 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof(my_dqblk64));
2615 } else {
2616 error = copyout(datap, uap->arg, sizeof(struct dqblk));
91447636
A
2617 }
2618 }
2619 break;
2620 case Q_QUOTASTAT:
2621 /* uap->arg is a pointer to an integer */
2622 if (error == 0) {
2623 error = copyout(datap, uap->arg, sizeof(quota_status));
2624 }
2625 break;
2626 default:
2627 break;
2628 } /* switch */
2629
0a7de745 2630 return error;
1c79356b 2631}
2d21ac55
A
2632#else
2633int
b0d623f7 2634quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55 2635{
0a7de745 2636 return EOPNOTSUPP;
2d21ac55
A
2637}
2638#endif /* QUOTA */
1c79356b
A
2639
2640/*
2641 * Get filesystem statistics.
2d21ac55
A
2642 *
2643 * Returns: 0 Success
2644 * namei:???
2645 * vfs_update_vfsstat:???
2646 * munge_statfs:EFAULT
1c79356b 2647 */
1c79356b
A
2648/* ARGSUSED */
2649int
b0d623f7 2650statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2651{
91447636
A
2652 struct mount *mp;
2653 struct vfsstatfs *sp;
1c79356b
A
2654 int error;
2655 struct nameidata nd;
2d21ac55 2656 vfs_context_t ctx = vfs_context_current();
91447636 2657 vnode_t vp;
1c79356b 2658
39037602 2659 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
0a7de745 2660 UIO_USERSPACE, uap->path, ctx);
55e303ae 2661 error = namei(&nd);
0a7de745
A
2662 if (error != 0) {
2663 return error;
2664 }
91447636
A
2665 vp = nd.ni_vp;
2666 mp = vp->v_mount;
2667 sp = &mp->mnt_vfsstat;
2668 nameidone(&nd);
2669
39037602
A
2670#if CONFIG_MACF
2671 error = mac_mount_check_stat(ctx, mp);
0a7de745
A
2672 if (error != 0) {
2673 return error;
2674 }
39037602
A
2675#endif
2676
2d21ac55 2677 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39037602 2678 if (error != 0) {
39236c6e 2679 vnode_put(vp);
0a7de745 2680 return error;
39236c6e 2681 }
91447636
A
2682
2683 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2684 vnode_put(vp);
0a7de745 2685 return error;
1c79356b
A
2686}
2687
2688/*
2689 * Get filesystem statistics.
2690 */
1c79356b
A
2691/* ARGSUSED */
2692int
b0d623f7 2693fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2694{
2d21ac55 2695 vnode_t vp;
1c79356b 2696 struct mount *mp;
91447636 2697 struct vfsstatfs *sp;
1c79356b
A
2698 int error;
2699
55e303ae
A
2700 AUDIT_ARG(fd, uap->fd);
2701
0a7de745
A
2702 if ((error = file_vnode(uap->fd, &vp))) {
2703 return error;
2704 }
55e303ae 2705
d1ecb069
A
2706 error = vnode_getwithref(vp);
2707 if (error) {
2708 file_drop(uap->fd);
0a7de745 2709 return error;
d1ecb069
A
2710 }
2711
91447636 2712 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2713
91447636
A
2714 mp = vp->v_mount;
2715 if (!mp) {
d1ecb069
A
2716 error = EBADF;
2717 goto out;
91447636 2718 }
39037602
A
2719
2720#if CONFIG_MACF
2721 error = mac_mount_check_stat(vfs_context_current(), mp);
0a7de745 2722 if (error != 0) {
39037602 2723 goto out;
0a7de745 2724 }
39037602
A
2725#endif
2726
91447636 2727 sp = &mp->mnt_vfsstat;
39037602 2728 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2729 goto out;
91447636 2730 }
91447636
A
2731
2732 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2733
d1ecb069
A
2734out:
2735 file_drop(uap->fd);
2736 vnode_put(vp);
2737
0a7de745 2738 return error;
1c79356b
A
2739}
2740
39037602
A
2741/*
2742 * Common routine to handle copying of statfs64 data to user space
2d21ac55 2743 */
39037602 2744static int
2d21ac55
A
2745statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2746{
2747 int error;
2748 struct statfs64 sfs;
39037602 2749
2d21ac55
A
2750 bzero(&sfs, sizeof(sfs));
2751
2752 sfs.f_bsize = sfsp->f_bsize;
2753 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2754 sfs.f_blocks = sfsp->f_blocks;
2755 sfs.f_bfree = sfsp->f_bfree;
2756 sfs.f_bavail = sfsp->f_bavail;
2757 sfs.f_files = sfsp->f_files;
2758 sfs.f_ffree = sfsp->f_ffree;
2759 sfs.f_fsid = sfsp->f_fsid;
2760 sfs.f_owner = sfsp->f_owner;
2761 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2762 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2763 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2764 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2765 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2766 } else {
2767 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2768 }
2d21ac55
A
2769 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2770 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2771
2772 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2773
0a7de745 2774 return error;
2d21ac55
A
2775}
2776
39037602
A
2777/*
2778 * Get file system statistics in 64-bit mode
2d21ac55
A
2779 */
2780int
b0d623f7 2781statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2782{
2783 struct mount *mp;
2784 struct vfsstatfs *sp;
2785 int error;
2786 struct nameidata nd;
2787 vfs_context_t ctxp = vfs_context_current();
2788 vnode_t vp;
2789
39037602 2790 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
0a7de745 2791 UIO_USERSPACE, uap->path, ctxp);
2d21ac55 2792 error = namei(&nd);
0a7de745
A
2793 if (error != 0) {
2794 return error;
2795 }
2d21ac55
A
2796 vp = nd.ni_vp;
2797 mp = vp->v_mount;
2798 sp = &mp->mnt_vfsstat;
2799 nameidone(&nd);
2800
39037602
A
2801#if CONFIG_MACF
2802 error = mac_mount_check_stat(ctxp, mp);
0a7de745
A
2803 if (error != 0) {
2804 return error;
2805 }
39037602
A
2806#endif
2807
2d21ac55 2808 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39037602 2809 if (error != 0) {
39236c6e 2810 vnode_put(vp);
0a7de745 2811 return error;
39236c6e 2812 }
2d21ac55
A
2813
2814 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2815 vnode_put(vp);
2d21ac55 2816
0a7de745 2817 return error;
2d21ac55
A
2818}
2819
39037602
A
2820/*
2821 * Get file system statistics in 64-bit mode
2d21ac55
A
2822 */
2823int
b0d623f7 2824fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2825{
2826 struct vnode *vp;
2827 struct mount *mp;
2828 struct vfsstatfs *sp;
2829 int error;
2830
2831 AUDIT_ARG(fd, uap->fd);
2832
0a7de745
A
2833 if ((error = file_vnode(uap->fd, &vp))) {
2834 return error;
2835 }
2d21ac55 2836
d1ecb069
A
2837 error = vnode_getwithref(vp);
2838 if (error) {
2839 file_drop(uap->fd);
0a7de745 2840 return error;
d1ecb069
A
2841 }
2842
2d21ac55
A
2843 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2844
2845 mp = vp->v_mount;
2846 if (!mp) {
316670eb 2847 error = EBADF;
d1ecb069 2848 goto out;
2d21ac55 2849 }
39037602
A
2850
2851#if CONFIG_MACF
2852 error = mac_mount_check_stat(vfs_context_current(), mp);
0a7de745 2853 if (error != 0) {
39037602 2854 goto out;
0a7de745 2855 }
39037602
A
2856#endif
2857
2d21ac55
A
2858 sp = &mp->mnt_vfsstat;
2859 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2860 goto out;
2d21ac55 2861 }
2d21ac55
A
2862
2863 error = statfs64_common(mp, sp, uap->buf);
2864
d1ecb069
A
2865out:
2866 file_drop(uap->fd);
2867 vnode_put(vp);
2868
0a7de745 2869 return error;
2d21ac55 2870}
91447636
A
2871
2872struct getfsstat_struct {
0a7de745
A
2873 user_addr_t sfsp;
2874 user_addr_t *mp;
2875 int count;
2876 int maxcount;
2877 int flags;
2878 int error;
1c79356b 2879};
1c79356b 2880
91447636
A
2881
2882static int
2883getfsstat_callback(mount_t mp, void * arg)
2884{
91447636
A
2885 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2886 struct vfsstatfs *sp;
91447636 2887 int error, my_size;
2d21ac55 2888 vfs_context_t ctx = vfs_context_current();
91447636
A
2889
2890 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2891#if CONFIG_MACF
2892 error = mac_mount_check_stat(ctx, mp);
2893 if (error != 0) {
2894 fstp->error = error;
0a7de745 2895 return VFS_RETURNED_DONE;
39037602
A
2896 }
2897#endif
91447636
A
2898 sp = &mp->mnt_vfsstat;
2899 /*
2900 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2901 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2902 */
b0d623f7 2903 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
0a7de745
A
2904 (error = vfs_update_vfsstat(mp, ctx,
2905 VFS_USER_EVENT))) {
91447636 2906 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
0a7de745 2907 return VFS_RETURNED;
1c79356b 2908 }
91447636
A
2909
2910 /*
2911 * Need to handle LP64 version of struct statfs
2912 */
2d21ac55 2913 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2914 if (error) {
2915 fstp->error = error;
0a7de745 2916 return VFS_RETURNED_DONE;
1c79356b 2917 }
91447636 2918 fstp->sfsp += my_size;
2d21ac55
A
2919
2920 if (fstp->mp) {
39236c6e 2921#if CONFIG_MACF
2d21ac55
A
2922 error = mac_mount_label_get(mp, *fstp->mp);
2923 if (error) {
2924 fstp->error = error;
0a7de745 2925 return VFS_RETURNED_DONE;
2d21ac55 2926 }
39236c6e 2927#endif
2d21ac55
A
2928 fstp->mp++;
2929 }
2930 }
91447636 2931 fstp->count++;
0a7de745 2932 return VFS_RETURNED;
91447636
A
2933}
2934
2935/*
2936 * Get statistics on all filesystems.
2937 */
2938int
2939getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2940{
2941 struct __mac_getfsstat_args muap;
2942
2943 muap.buf = uap->buf;
2944 muap.bufsize = uap->bufsize;
2945 muap.mac = USER_ADDR_NULL;
2946 muap.macsize = 0;
2947 muap.flags = uap->flags;
2948
0a7de745 2949 return __mac_getfsstat(p, &muap, retval);
2d21ac55
A
2950}
2951
b0d623f7
A
2952/*
2953 * __mac_getfsstat: Get MAC-related file system statistics
2954 *
2955 * Parameters: p (ignored)
2956 * uap User argument descriptor (see below)
39037602 2957 * retval Count of file system statistics (N stats)
b0d623f7
A
2958 *
2959 * Indirect: uap->bufsize Buffer size
2960 * uap->macsize MAC info size
2961 * uap->buf Buffer where information will be returned
2962 * uap->mac MAC info
2963 * uap->flags File system flags
39037602 2964 *
b0d623f7
A
2965 *
2966 * Returns: 0 Success
2967 * !0 Not success
2968 *
2969 */
2d21ac55
A
2970int
2971__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2972{
2973 user_addr_t sfsp;
2d21ac55 2974 user_addr_t *mp;
b0d623f7 2975 size_t count, maxcount, bufsize, macsize;
91447636
A
2976 struct getfsstat_struct fst;
2977
b0d623f7
A
2978 bufsize = (size_t) uap->bufsize;
2979 macsize = (size_t) uap->macsize;
2980
91447636 2981 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2982 maxcount = bufsize / sizeof(struct user64_statfs);
0a7de745 2983 } else {
b0d623f7 2984 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2985 }
2986 sfsp = uap->buf;
2987 count = 0;
2988
2d21ac55
A
2989 mp = NULL;
2990
2991#if CONFIG_MACF
2992 if (uap->mac != USER_ADDR_NULL) {
2993 u_int32_t *mp0;
2994 int error;
b0d623f7 2995 unsigned int i;
2d21ac55 2996
b0d623f7 2997 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
0a7de745
A
2998 if (count != maxcount) {
2999 return EINVAL;
3000 }
2d21ac55
A
3001
3002 /* Copy in the array */
b0d623f7
A
3003 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
3004 if (mp0 == NULL) {
0a7de745 3005 return ENOMEM;
b0d623f7
A
3006 }
3007
3008 error = copyin(uap->mac, mp0, macsize);
3009 if (error) {
3010 FREE(mp0, M_MACTEMP);
0a7de745 3011 return error;
b0d623f7 3012 }
2d21ac55
A
3013
3014 /* Normalize to an array of user_addr_t */
3015 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
3016 if (mp == NULL) {
3017 FREE(mp0, M_MACTEMP);
0a7de745 3018 return ENOMEM;
b0d623f7
A
3019 }
3020
2d21ac55 3021 for (i = 0; i < count; i++) {
0a7de745 3022 if (IS_64BIT_PROCESS(p)) {
2d21ac55 3023 mp[i] = ((user_addr_t *)mp0)[i];
0a7de745 3024 } else {
2d21ac55 3025 mp[i] = (user_addr_t)mp0[i];
0a7de745 3026 }
2d21ac55
A
3027 }
3028 FREE(mp0, M_MACTEMP);
3029 }
3030#endif
3031
3032
91447636 3033 fst.sfsp = sfsp;
2d21ac55 3034 fst.mp = mp;
91447636
A
3035 fst.flags = uap->flags;
3036 fst.count = 0;
3037 fst.error = 0;
3038 fst.maxcount = maxcount;
3039
39037602 3040
91447636
A
3041 vfs_iterate(0, getfsstat_callback, &fst);
3042
0a7de745 3043 if (mp) {
2d21ac55 3044 FREE(mp, M_MACTEMP);
0a7de745 3045 }
2d21ac55 3046
0a7de745 3047 if (fst.error) {
91447636 3048 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
0a7de745 3049 return fst.error;
91447636
A
3050 }
3051
0a7de745 3052 if (fst.sfsp && fst.count > fst.maxcount) {
91447636 3053 *retval = fst.maxcount;
0a7de745 3054 } else {
91447636 3055 *retval = fst.count;
0a7de745
A
3056 }
3057 return 0;
1c79356b
A
3058}
3059
2d21ac55
A
3060static int
3061getfsstat64_callback(mount_t mp, void * arg)
3062{
3063 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3064 struct vfsstatfs *sp;
3065 int error;
3066
3067 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
3068#if CONFIG_MACF
3069 error = mac_mount_check_stat(vfs_context_current(), mp);
3070 if (error != 0) {
3071 fstp->error = error;
0a7de745 3072 return VFS_RETURNED_DONE;
39037602
A
3073 }
3074#endif
2d21ac55
A
3075 sp = &mp->mnt_vfsstat;
3076 /*
b0d623f7
A
3077 * If MNT_NOWAIT is specified, do not refresh the fsstat
3078 * cache. MNT_WAIT overrides MNT_NOWAIT.
3079 *
3080 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3081 * getfsstat, since the constants are out of the same
3082 * namespace.
2d21ac55 3083 */
b0d623f7 3084 if (((fstp->flags & MNT_NOWAIT) == 0 ||
0a7de745 3085 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
3086 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
3087 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
0a7de745 3088 return VFS_RETURNED;
2d21ac55
A
3089 }
3090
3091 error = statfs64_common(mp, sp, fstp->sfsp);
3092 if (error) {
3093 fstp->error = error;
0a7de745 3094 return VFS_RETURNED_DONE;
2d21ac55
A
3095 }
3096 fstp->sfsp += sizeof(struct statfs64);
3097 }
3098 fstp->count++;
0a7de745 3099 return VFS_RETURNED;
2d21ac55
A
3100}
3101
3102/*
3103 * Get statistics on all file systems in 64 bit mode.
3104 */
3105int
3106getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
3107{
3108 user_addr_t sfsp;
3109 int count, maxcount;
3110 struct getfsstat_struct fst;
3111
3112 maxcount = uap->bufsize / sizeof(struct statfs64);
3113
3114 sfsp = uap->buf;
3115 count = 0;
3116
3117 fst.sfsp = sfsp;
3118 fst.flags = uap->flags;
3119 fst.count = 0;
3120 fst.error = 0;
3121 fst.maxcount = maxcount;
3122
3123 vfs_iterate(0, getfsstat64_callback, &fst);
3124
0a7de745 3125 if (fst.error) {
2d21ac55 3126 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
0a7de745 3127 return fst.error;
2d21ac55
A
3128 }
3129
0a7de745 3130 if (fst.sfsp && fst.count > fst.maxcount) {
2d21ac55 3131 *retval = fst.maxcount;
0a7de745 3132 } else {
2d21ac55 3133 *retval = fst.count;
0a7de745 3134 }
2d21ac55 3135
0a7de745 3136 return 0;
2d21ac55
A
3137}
3138
fe8ab488
A
3139/*
3140 * gets the associated vnode with the file descriptor passed.
3141 * as input
3142 *
3143 * INPUT
3144 * ctx - vfs context of caller
3145 * fd - file descriptor for which vnode is required.
3146 * vpp - Pointer to pointer to vnode to be returned.
3147 *
3148 * The vnode is returned with an iocount so any vnode obtained
3149 * by this call needs a vnode_put
3150 *
3151 */
39037602 3152int
fe8ab488
A
3153vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3154{
3155 int error;
3156 vnode_t vp;
3157 struct fileproc *fp;
3158 proc_t p = vfs_context_proc(ctx);
3159
3160 *vpp = NULLVP;
3161
3162 error = fp_getfvp(p, fd, &fp, &vp);
0a7de745
A
3163 if (error) {
3164 return error;
3165 }
fe8ab488
A
3166
3167 error = vnode_getwithref(vp);
3168 if (error) {
3169 (void)fp_drop(p, fd, fp, 0);
0a7de745 3170 return error;
fe8ab488
A
3171 }
3172
3173 (void)fp_drop(p, fd, fp, 0);
3174 *vpp = vp;
0a7de745 3175 return error;
fe8ab488
A
3176}
3177
3178/*
3179 * Wrapper function around namei to start lookup from a directory
3180 * specified by a file descriptor ni_dirfd.
3181 *
3182 * In addition to all the errors returned by namei, this call can
3183 * return ENOTDIR if the file descriptor does not refer to a directory.
3184 * and EBADF if the file descriptor is not valid.
3185 */
3186int
3187nameiat(struct nameidata *ndp, int dirfd)
3188{
3189 if ((dirfd != AT_FDCWD) &&
3190 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3191 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3192 int error = 0;
3193 char c;
3194
3195 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3196 error = copyin(ndp->ni_dirp, &c, sizeof(char));
0a7de745
A
3197 if (error) {
3198 return error;
3199 }
fe8ab488
A
3200 } else {
3201 c = *((char *)(ndp->ni_dirp));
3202 }
3203
3204 if (c != '/') {
3205 vnode_t dvp_at;
3206
3207 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3208 &dvp_at);
0a7de745
A
3209 if (error) {
3210 return error;
3211 }
fe8ab488
A
3212
3213 if (vnode_vtype(dvp_at) != VDIR) {
3214 vnode_put(dvp_at);
0a7de745 3215 return ENOTDIR;
fe8ab488
A
3216 }
3217
3218 ndp->ni_dvp = dvp_at;
3219 ndp->ni_cnd.cn_flags |= USEDVP;
3220 error = namei(ndp);
3221 ndp->ni_cnd.cn_flags &= ~USEDVP;
3222 vnode_put(dvp_at);
0a7de745 3223 return error;
fe8ab488
A
3224 }
3225 }
3226
0a7de745 3227 return namei(ndp);
fe8ab488
A
3228}
3229
1c79356b
A
3230/*
3231 * Change current working directory to a given file descriptor.
3232 */
1c79356b 3233/* ARGSUSED */
2d21ac55
A
3234static int
3235common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 3236{
2d21ac55
A
3237 struct filedesc *fdp = p->p_fd;
3238 vnode_t vp;
3239 vnode_t tdp;
3240 vnode_t tvp;
1c79356b 3241 struct mount *mp;
1c79356b 3242 int error;
2d21ac55 3243 vfs_context_t ctx = vfs_context_current();
1c79356b 3244
b0d623f7 3245 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
3246 if (per_thread && uap->fd == -1) {
3247 /*
3248 * Switching back from per-thread to per process CWD; verify we
3249 * in fact have one before proceeding. The only success case
3250 * for this code path is to return 0 preemptively after zapping
3251 * the thread structure contents.
3252 */
3253 thread_t th = vfs_context_thread(ctx);
3254 if (th) {
3255 uthread_t uth = get_bsdthread_info(th);
3256 tvp = uth->uu_cdir;
3257 uth->uu_cdir = NULLVP;
3258 if (tvp != NULLVP) {
3259 vnode_rele(tvp);
0a7de745 3260 return 0;
2d21ac55
A
3261 }
3262 }
0a7de745 3263 return EBADF;
2d21ac55 3264 }
91447636 3265
0a7de745
A
3266 if ((error = file_vnode(uap->fd, &vp))) {
3267 return error;
3268 }
3269 if ((error = vnode_getwithref(vp))) {
3270 file_drop(uap->fd);
3271 return error;
91447636 3272 }
55e303ae
A
3273
3274 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3275
2d21ac55 3276 if (vp->v_type != VDIR) {
1c79356b 3277 error = ENOTDIR;
2d21ac55
A
3278 goto out;
3279 }
3280
3281#if CONFIG_MACF
3282 error = mac_vnode_check_chdir(ctx, vp);
0a7de745 3283 if (error) {
2d21ac55 3284 goto out;
0a7de745 3285 }
2d21ac55
A
3286#endif
3287 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
0a7de745 3288 if (error) {
2d21ac55 3289 goto out;
0a7de745 3290 }
2d21ac55 3291
1c79356b 3292 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
3293 if (vfs_busy(mp, LK_NOWAIT)) {
3294 error = EACCES;
3295 goto out;
55e303ae 3296 }
2d21ac55 3297 error = VFS_ROOT(mp, &tdp, ctx);
91447636 3298 vfs_unbusy(mp);
0a7de745 3299 if (error) {
1c79356b 3300 break;
0a7de745 3301 }
91447636 3302 vnode_put(vp);
1c79356b
A
3303 vp = tdp;
3304 }
0a7de745
A
3305 if (error) {
3306 goto out;
3307 }
3308 if ((error = vnode_ref(vp))) {
91447636 3309 goto out;
0a7de745 3310 }
91447636
A
3311 vnode_put(vp);
3312
2d21ac55
A
3313 if (per_thread) {
3314 thread_t th = vfs_context_thread(ctx);
3315 if (th) {
3316 uthread_t uth = get_bsdthread_info(th);
3317 tvp = uth->uu_cdir;
3318 uth->uu_cdir = vp;
b0d623f7 3319 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3320 } else {
3321 vnode_rele(vp);
0a7de745 3322 return ENOENT;
2d21ac55
A
3323 }
3324 } else {
3325 proc_fdlock(p);
3326 tvp = fdp->fd_cdir;
3327 fdp->fd_cdir = vp;
3328 proc_fdunlock(p);
3329 }
91447636 3330
0a7de745
A
3331 if (tvp) {
3332 vnode_rele(tvp);
3333 }
91447636
A
3334 file_drop(uap->fd);
3335
0a7de745 3336 return 0;
91447636
A
3337out:
3338 vnode_put(vp);
3339 file_drop(uap->fd);
3340
0a7de745 3341 return error;
1c79356b
A
3342}
3343
2d21ac55 3344int
b0d623f7 3345fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3346{
3347 return common_fchdir(p, uap, 0);
3348}
3349
3350int
b0d623f7 3351__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3352{
3353 return common_fchdir(p, (void *)uap, 1);
3354}
3355
1c79356b 3356/*
b0d623f7 3357 * Change current working directory (".").
2d21ac55
A
3358 *
3359 * Returns: 0 Success
3360 * change_dir:ENOTDIR
3361 * change_dir:???
3362 * vnode_ref:ENOENT No such file or directory
1c79356b 3363 */
1c79356b 3364/* ARGSUSED */
2d21ac55
A
3365static int
3366common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 3367{
2d21ac55 3368 struct filedesc *fdp = p->p_fd;
1c79356b
A
3369 int error;
3370 struct nameidata nd;
2d21ac55
A
3371 vnode_t tvp;
3372 vfs_context_t ctx = vfs_context_current();
91447636 3373
39037602 3374 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
0a7de745 3375 UIO_USERSPACE, uap->path, ctx);
2d21ac55 3376 error = change_dir(&nd, ctx);
0a7de745
A
3377 if (error) {
3378 return error;
3379 }
3380 if ((error = vnode_ref(nd.ni_vp))) {
3381 vnode_put(nd.ni_vp);
3382 return error;
91447636
A
3383 }
3384 /*
3385 * drop the iocount we picked up in change_dir
3386 */
3387 vnode_put(nd.ni_vp);
3388
2d21ac55
A
3389 if (per_thread) {
3390 thread_t th = vfs_context_thread(ctx);
3391 if (th) {
3392 uthread_t uth = get_bsdthread_info(th);
3393 tvp = uth->uu_cdir;
3394 uth->uu_cdir = nd.ni_vp;
b0d623f7 3395 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3396 } else {
3397 vnode_rele(nd.ni_vp);
0a7de745 3398 return ENOENT;
2d21ac55
A
3399 }
3400 } else {
3401 proc_fdlock(p);
3402 tvp = fdp->fd_cdir;
3403 fdp->fd_cdir = nd.ni_vp;
3404 proc_fdunlock(p);
3405 }
91447636 3406
0a7de745
A
3407 if (tvp) {
3408 vnode_rele(tvp);
3409 }
91447636 3410
0a7de745 3411 return 0;
1c79356b
A
3412}
3413
b0d623f7
A
3414
3415/*
3416 * chdir
3417 *
3418 * Change current working directory (".") for the entire process
3419 *
3420 * Parameters: p Process requesting the call
0a7de745
A
3421 * uap User argument descriptor (see below)
3422 * retval (ignored)
b0d623f7
A
3423 *
3424 * Indirect parameters: uap->path Directory path
3425 *
3426 * Returns: 0 Success
0a7de745
A
3427 * common_chdir: ENOTDIR
3428 * common_chdir: ENOENT No such file or directory
3429 * common_chdir: ???
b0d623f7
A
3430 *
3431 */
2d21ac55 3432int
b0d623f7 3433chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3434{
3435 return common_chdir(p, (void *)uap, 0);
3436}
3437
b0d623f7
A
3438/*
3439 * __pthread_chdir
3440 *
3441 * Change current working directory (".") for a single thread
3442 *
3443 * Parameters: p Process requesting the call
0a7de745
A
3444 * uap User argument descriptor (see below)
3445 * retval (ignored)
b0d623f7
A
3446 *
3447 * Indirect parameters: uap->path Directory path
3448 *
3449 * Returns: 0 Success
0a7de745 3450 * common_chdir: ENOTDIR
b0d623f7
A
3451 * common_chdir: ENOENT No such file or directory
3452 * common_chdir: ???
3453 *
3454 */
2d21ac55 3455int
b0d623f7 3456__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3457{
3458 return common_chdir(p, (void *)uap, 1);
3459}
3460
3461
1c79356b
A
3462/*
3463 * Change notion of root (``/'') directory.
3464 */
1c79356b
A
3465/* ARGSUSED */
3466int
b0d623f7 3467chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3468{
2d21ac55 3469 struct filedesc *fdp = p->p_fd;
1c79356b
A
3470 int error;
3471 struct nameidata nd;
2d21ac55
A
3472 vnode_t tvp;
3473 vfs_context_t ctx = vfs_context_current();
1c79356b 3474
0a7de745
A
3475 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
3476 return error;
3477 }
1c79356b 3478
39037602 3479 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
0a7de745 3480 UIO_USERSPACE, uap->path, ctx);
2d21ac55 3481 error = change_dir(&nd, ctx);
0a7de745
A
3482 if (error) {
3483 return error;
3484 }
1c79356b 3485
2d21ac55
A
3486#if CONFIG_MACF
3487 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3488 &nd.ni_cnd);
3489 if (error) {
91447636 3490 vnode_put(nd.ni_vp);
0a7de745 3491 return error;
91447636 3492 }
2d21ac55
A
3493#endif
3494
0a7de745
A
3495 if ((error = vnode_ref(nd.ni_vp))) {
3496 vnode_put(nd.ni_vp);
3497 return error;
1c79356b 3498 }
91447636 3499 vnode_put(nd.ni_vp);
1c79356b 3500
91447636 3501 proc_fdlock(p);
fa4905b1 3502 tvp = fdp->fd_rdir;
1c79356b 3503 fdp->fd_rdir = nd.ni_vp;
91447636
A
3504 fdp->fd_flags |= FD_CHROOT;
3505 proc_fdunlock(p);
3506
0a7de745 3507 if (tvp != NULL) {
91447636 3508 vnode_rele(tvp);
0a7de745 3509 }
91447636 3510
0a7de745 3511 return 0;
1c79356b
A
3512}
3513
3514/*
3515 * Common routine for chroot and chdir.
2d21ac55
A
3516 *
3517 * Returns: 0 Success
3518 * ENOTDIR Not a directory
3519 * namei:??? [anything namei can return]
3520 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
3521 */
3522static int
91447636 3523change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 3524{
2d21ac55 3525 vnode_t vp;
1c79356b
A
3526 int error;
3527
0a7de745
A
3528 if ((error = namei(ndp))) {
3529 return error;
3530 }
91447636 3531 nameidone(ndp);
1c79356b 3532 vp = ndp->ni_vp;
2d21ac55
A
3533
3534 if (vp->v_type != VDIR) {
91447636 3535 vnode_put(vp);
0a7de745 3536 return ENOTDIR;
2d21ac55
A
3537 }
3538
3539#if CONFIG_MACF
3540 error = mac_vnode_check_chdir(ctx, vp);
3541 if (error) {
3542 vnode_put(vp);
0a7de745 3543 return error;
2d21ac55
A
3544 }
3545#endif
3546
3547 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3548 if (error) {
3549 vnode_put(vp);
0a7de745 3550 return error;
2d21ac55 3551 }
91447636 3552
0a7de745 3553 return error;
1c79356b
A
3554}
3555
fe8ab488
A
3556/*
3557 * Free the vnode data (for directories) associated with the file glob.
3558 */
3559struct fd_vn_data *
3560fg_vn_data_alloc(void)
3561{
3562 struct fd_vn_data *fvdata;
3563
3564 /* Allocate per fd vnode data */
3565 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
0a7de745 3566 M_FD_VN_DATA, M_WAITOK | M_ZERO);
fe8ab488
A
3567 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3568 return fvdata;
3569}
3570
3571/*
3572 * Free the vnode data (for directories) associated with the file glob.
3573 */
3574void
3575fg_vn_data_free(void *fgvndata)
3576{
3577 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3578
0a7de745 3579 if (fvdata->fv_buf) {
fe8ab488 3580 FREE(fvdata->fv_buf, M_FD_DIRBUF);
0a7de745 3581 }
fe8ab488
A
3582 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3583 FREE(fvdata, M_FD_VN_DATA);
3584}
3585
1c79356b
A
3586/*
3587 * Check permissions, allocate an open file structure,
3588 * and call the device open routine if any.
2d21ac55
A
3589 *
3590 * Returns: 0 Success
3591 * EINVAL
3592 * EINTR
3593 * falloc:ENFILE
3594 * falloc:EMFILE
3595 * falloc:ENOMEM
3596 * vn_open_auth:???
3597 * dupfdopen:???
3598 * VNOP_ADVLOCK:???
3599 * vnode_setsize:???
b0d623f7
A
3600 *
3601 * XXX Need to implement uid, gid
1c79356b 3602 */
2d21ac55 3603int
39236c6e
A
3604open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3605 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3606 int32_t *retval)
1c79356b 3607{
2d21ac55
A
3608 proc_t p = vfs_context_proc(ctx);
3609 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3610 struct fileproc *fp;
3611 vnode_t vp;
91447636 3612 int flags, oflags;
1c79356b
A
3613 int type, indx, error;
3614 struct flock lf;
3e170ce0 3615 struct vfs_context context;
ccc36f2f 3616
91447636 3617 oflags = uflags;
ccc36f2f 3618
0a7de745
A
3619 if ((oflags & O_ACCMODE) == O_ACCMODE) {
3620 return EINVAL;
3621 }
3e170ce0 3622
91447636 3623 flags = FFLAGS(uflags);
3e170ce0
A
3624 CLR(flags, FENCRYPTED);
3625 CLR(flags, FUNENCRYPTED);
91447636
A
3626
3627 AUDIT_ARG(fflags, oflags);
3628 AUDIT_ARG(mode, vap->va_mode);
3629
39236c6e
A
3630 if ((error = falloc_withalloc(p,
3631 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
0a7de745 3632 return error;
91447636 3633 }
2d21ac55 3634 uu->uu_dupfd = -indx - 1;
91447636 3635
2d21ac55 3636 if ((error = vn_open_auth(ndp, &flags, vap))) {
0a7de745 3637 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)) { /* XXX from fdopen */
39236c6e 3638 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3639 fp_drop(p, indx, NULL, 0);
0a7de745
A
3640 *retval = indx;
3641 return 0;
91447636 3642 }
1c79356b 3643 }
0a7de745
A
3644 if (error == ERESTART) {
3645 error = EINTR;
3646 }
91447636 3647 fp_free(p, indx, fp);
0a7de745 3648 return error;
1c79356b 3649 }
2d21ac55
A
3650 uu->uu_dupfd = 0;
3651 vp = ndp->ni_vp;
55e303ae 3652
3e170ce0 3653 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
91447636
A
3654 fp->f_fglob->fg_ops = &vnops;
3655 fp->f_fglob->fg_data = (caddr_t)vp;
3656
1c79356b
A
3657 if (flags & (O_EXLOCK | O_SHLOCK)) {
3658 lf.l_whence = SEEK_SET;
3659 lf.l_start = 0;
3660 lf.l_len = 0;
0a7de745 3661 if (flags & O_EXLOCK) {
1c79356b 3662 lf.l_type = F_WRLCK;
0a7de745 3663 } else {
1c79356b 3664 lf.l_type = F_RDLCK;
0a7de745 3665 }
1c79356b 3666 type = F_FLOCK;
0a7de745 3667 if ((flags & FNONBLOCK) == 0) {
1c79356b 3668 type |= F_WAIT;
0a7de745 3669 }
2d21ac55
A
3670#if CONFIG_MACF
3671 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3672 F_SETLK, &lf);
0a7de745 3673 if (error) {
2d21ac55 3674 goto bad;
0a7de745 3675 }
2d21ac55 3676#endif
0a7de745 3677 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
55e303ae 3678 goto bad;
0a7de745 3679 }
91447636 3680 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3681 }
55e303ae 3682
00867663
A
3683#if DEVELOPMENT || DEBUG
3684 /*
3685 * XXX VSWAP: Check for entitlements or special flag here
3686 * so we can restrict access appropriately.
3687 */
3688#else /* DEVELOPMENT || DEBUG */
3689
3690 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3691 /* block attempt to write/truncate swapfile */
3692 error = EPERM;
3693 goto bad;
3694 }
3695#endif /* DEVELOPMENT || DEBUG */
3696
91447636 3697 /* try to truncate by setting the size attribute */
0a7de745 3698 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) {
91447636 3699 goto bad;
0a7de745 3700 }
55e303ae 3701
fe8ab488
A
3702 /*
3703 * For directories we hold some additional information in the fd.
3704 */
3705 if (vnode_vtype(vp) == VDIR) {
3706 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3707 } else {
3708 fp->f_fglob->fg_vn_data = NULL;
2d21ac55
A
3709 }
3710
91447636 3711 vnode_put(vp);
55e303ae 3712
3e170ce0
A
3713 /*
3714 * The first terminal open (without a O_NOCTTY) by a session leader
3715 * results in it being set as the controlling terminal.
3716 */
3717 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3718 !(flags & O_NOCTTY)) {
3719 int tmp = 0;
3720
3721 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3722 (caddr_t)&tmp, ctx);
3723 }
3724
91447636 3725 proc_fdlock(p);
0a7de745 3726 if (flags & O_CLOEXEC) {
6d2010ae 3727 *fdflags(p, indx) |= UF_EXCLOSE;
0a7de745
A
3728 }
3729 if (flags & O_CLOFORK) {
39236c6e 3730 *fdflags(p, indx) |= UF_FORKCLOSE;
0a7de745 3731 }
6601e61a 3732 procfdtbl_releasefd(p, indx, NULL);
39037602
A
3733
3734#if CONFIG_SECLUDED_MEMORY
3735 if (secluded_for_filecache &&
3736 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3737 vnode_vtype(vp) == VREG) {
3738 memory_object_control_t moc;
3739
3740 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3741
3742 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3743 /* nothing to do... */
3744 } else if (fp->f_fglob->fg_flag & FWRITE) {
3745 /* writable -> no longer eligible for secluded pages */
3746 memory_object_mark_eligible_for_secluded(moc,
0a7de745 3747 FALSE);
39037602
A
3748 } else if (secluded_for_filecache == 1) {
3749 char pathname[32] = { 0, };
3750 size_t copied;
3751 /* XXX FBDP: better way to detect /Applications/ ? */
3752 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3753 copyinstr(ndp->ni_dirp,
0a7de745
A
3754 pathname,
3755 sizeof(pathname),
3756 &copied);
39037602
A
3757 } else {
3758 copystr(CAST_DOWN(void *, ndp->ni_dirp),
0a7de745
A
3759 pathname,
3760 sizeof(pathname),
3761 &copied);
39037602 3762 }
0a7de745 3763 pathname[sizeof(pathname) - 1] = '\0';
39037602 3764 if (strncmp(pathname,
0a7de745
A
3765 "/Applications/",
3766 strlen("/Applications/")) == 0 &&
39037602 3767 strncmp(pathname,
0a7de745
A
3768 "/Applications/Camera.app/",
3769 strlen("/Applications/Camera.app/")) != 0) {
39037602
A
3770 /*
3771 * not writable
3772 * AND from "/Applications/"
3773 * AND not from "/Applications/Camera.app/"
3774 * ==> eligible for secluded
3775 */
3776 memory_object_mark_eligible_for_secluded(moc,
0a7de745 3777 TRUE);
39037602
A
3778 }
3779 } else if (secluded_for_filecache == 2) {
5ba3f43e
A
3780#if __arm64__
3781#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3782#elif __arm__
3783#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3784#else
39037602 3785/* not implemented... */
5ba3f43e 3786#endif
39037602 3787 if (!strncmp(vp->v_name,
0a7de745
A
3788 DYLD_SHARED_CACHE_NAME,
3789 strlen(DYLD_SHARED_CACHE_NAME)) ||
39037602 3790 !strncmp(vp->v_name,
0a7de745
A
3791 "dyld",
3792 strlen(vp->v_name)) ||
39037602 3793 !strncmp(vp->v_name,
0a7de745
A
3794 "launchd",
3795 strlen(vp->v_name)) ||
39037602 3796 !strncmp(vp->v_name,
0a7de745
A
3797 "Camera",
3798 strlen(vp->v_name)) ||
39037602 3799 !strncmp(vp->v_name,
0a7de745
A
3800 "mediaserverd",
3801 strlen(vp->v_name)) ||
d9a64523 3802 !strncmp(vp->v_name,
0a7de745
A
3803 "SpringBoard",
3804 strlen(vp->v_name)) ||
d9a64523 3805 !strncmp(vp->v_name,
0a7de745
A
3806 "backboardd",
3807 strlen(vp->v_name))) {
39037602
A
3808 /*
3809 * This file matters when launching Camera:
3810 * do not store its contents in the secluded
3811 * pool that will be drained on Camera launch.
3812 */
3813 memory_object_mark_eligible_for_secluded(moc,
0a7de745 3814 FALSE);
39037602
A
3815 }
3816 }
3817 }
3818#endif /* CONFIG_SECLUDED_MEMORY */
3819
91447636
A
3820 fp_drop(p, indx, fp, 1);
3821 proc_fdunlock(p);
3822
1c79356b 3823 *retval = indx;
91447636 3824
0a7de745 3825 return 0;
55e303ae 3826bad:
3e170ce0 3827 context = *vfs_context_current();
2d21ac55 3828 context.vc_ucred = fp->f_fglob->fg_cred;
39037602 3829
0a7de745 3830 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
fe8ab488
A
3831 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3832 lf.l_whence = SEEK_SET;
0a7de745
A
3833 lf.l_start = 0;
3834 lf.l_len = 0;
3835 lf.l_type = F_UNLCK;
39037602 3836
0a7de745
A
3837 (void)VNOP_ADVLOCK(
3838 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
fe8ab488 3839 }
2d21ac55
A
3840
3841 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3842 vnode_put(vp);
3843 fp_free(p, indx, fp);
3844
0a7de745 3845 return error;
1c79356b
A
3846}
3847
fe8ab488
A
3848/*
3849 * While most of the *at syscall handlers can call nameiat() which
3850 * is a wrapper around namei, the use of namei and initialisation
3851 * of nameidata are far removed and in different functions - namei
3852 * gets called in vn_open_auth for open1. So we'll just do here what
3853 * nameiat() does.
3854 */
3855static int
3856open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3857 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3858 int dirfd)
3859{
3860 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3861 int error;
3862 char c;
3863
3864 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3865 error = copyin(ndp->ni_dirp, &c, sizeof(char));
0a7de745
A
3866 if (error) {
3867 return error;
3868 }
fe8ab488
A
3869 } else {
3870 c = *((char *)(ndp->ni_dirp));
3871 }
3872
3873 if (c != '/') {
3874 vnode_t dvp_at;
3875
3876 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3877 &dvp_at);
0a7de745
A
3878 if (error) {
3879 return error;
3880 }
fe8ab488
A
3881
3882 if (vnode_vtype(dvp_at) != VDIR) {
3883 vnode_put(dvp_at);
0a7de745 3884 return ENOTDIR;
fe8ab488
A
3885 }
3886
3887 ndp->ni_dvp = dvp_at;
3888 ndp->ni_cnd.cn_flags |= USEDVP;
3889 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3890 retval);
3891 vnode_put(dvp_at);
0a7de745 3892 return error;
fe8ab488
A
3893 }
3894 }
3895
0a7de745 3896 return open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval);
fe8ab488
A
3897}
3898
0c530ab8 3899/*
b0d623f7 3900 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3901 *
3902 * Parameters: p Process requesting the open
3903 * uap User argument descriptor (see below)
3904 * retval Pointer to an area to receive the
3905 * return calue from the system call
3906 *
3907 * Indirect: uap->path Path to open (same as 'open')
3908 * uap->flags Flags to open (same as 'open'
3909 * uap->uid UID to set, if creating
3910 * uap->gid GID to set, if creating
3911 * uap->mode File mode, if creating (same as 'open')
3912 * uap->xsecurity ACL to set, if creating
3913 *
3914 * Returns: 0 Success
3915 * !0 errno value
3916 *
3917 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3918 *
3919 * XXX: We should enummerate the possible errno values here, and where
3920 * in the code they originated.
3921 */
1c79356b 3922int
b0d623f7 3923open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3924{
2d21ac55 3925 struct filedesc *fdp = p->p_fd;
91447636
A
3926 int ciferror;
3927 kauth_filesec_t xsecdst;
3928 struct vnode_attr va;
2d21ac55 3929 struct nameidata nd;
91447636
A
3930 int cmode;
3931
b0d623f7
A
3932 AUDIT_ARG(owner, uap->uid, uap->gid);
3933
91447636
A
3934 xsecdst = NULL;
3935 if ((uap->xsecurity != USER_ADDR_NULL) &&
0a7de745 3936 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
91447636 3937 return ciferror;
0a7de745 3938 }
91447636 3939
91447636 3940 VATTR_INIT(&va);
0a7de745 3941 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636 3942 VATTR_SET(&va, va_mode, cmode);
0a7de745 3943 if (uap->uid != KAUTH_UID_NONE) {
91447636 3944 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
3945 }
3946 if (uap->gid != KAUTH_GID_NONE) {
91447636 3947 VATTR_SET(&va, va_gid, uap->gid);
0a7de745
A
3948 }
3949 if (xsecdst != NULL) {
91447636 3950 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
0a7de745 3951 }
91447636 3952
6d2010ae 3953 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
0a7de745 3954 uap->path, vfs_context_current());
2d21ac55 3955
39236c6e 3956 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
0a7de745
A
3957 fileproc_alloc_init, NULL, retval);
3958 if (xsecdst != NULL) {
91447636 3959 kauth_filesec_free(xsecdst);
0a7de745 3960 }
91447636
A
3961
3962 return ciferror;
3963}
3964
39037602 3965/*
316670eb 3966 * Go through the data-protected atomically controlled open (2)
39037602 3967 *
316670eb
A
3968 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3969 */
0a7de745
A
3970int
3971open_dprotected_np(__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval)
3972{
316670eb
A
3973 int flags = uap->flags;
3974 int class = uap->class;
3975 int dpflags = uap->dpflags;
3976
39037602 3977 /*
316670eb
A
3978 * Follow the same path as normal open(2)
3979 * Look up the item if it exists, and acquire the vnode.
3980 */
3981 struct filedesc *fdp = p->p_fd;
3982 struct vnode_attr va;
3983 struct nameidata nd;
3984 int cmode;
3985 int error;
39037602 3986
316670eb
A
3987 VATTR_INIT(&va);
3988 /* Mask off all but regular access permissions */
0a7de745 3989 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
316670eb
A
3990 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3991
3992 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
0a7de745 3993 uap->path, vfs_context_current());
316670eb 3994
39037602
A
3995 /*
3996 * Initialize the extra fields in vnode_attr to pass down our
316670eb
A
3997 * extra fields.
3998 * 1. target cprotect class.
39037602
A
3999 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4000 */
4001 if (flags & O_CREAT) {
0a7de745
A
4002 /* lower level kernel code validates that the class is valid before applying it. */
4003 if (class != PROTECTION_CLASS_DEFAULT) {
4004 /*
4005 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4006 * file behave the same as open (2)
4007 */
4008 VATTR_SET(&va, va_dataprotect_class, class);
4009 }
4010 }
4011
4012 if (dpflags & (O_DP_GETRAWENCRYPTED | O_DP_GETRAWUNENCRYPTED)) {
4013 if (flags & (O_RDWR | O_WRONLY)) {
316670eb 4014 /* Not allowed to write raw encrypted bytes */
39037602
A
4015 return EINVAL;
4016 }
3e170ce0 4017 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
0a7de745 4018 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3e170ce0
A
4019 }
4020 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
0a7de745 4021 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3e170ce0 4022 }
316670eb
A
4023 }
4024
39236c6e 4025 error = open1(vfs_context_current(), &nd, uap->flags, &va,
0a7de745 4026 fileproc_alloc_init, NULL, retval);
316670eb
A
4027
4028 return error;
4029}
4030
fe8ab488
A
4031static int
4032openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
4033 int fd, enum uio_seg segflg, int *retval)
2d21ac55 4034{
fe8ab488 4035 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
91447636 4036 struct vnode_attr va;
2d21ac55 4037 struct nameidata nd;
91447636 4038 int cmode;
1c79356b 4039
91447636
A
4040 VATTR_INIT(&va);
4041 /* Mask off all but regular access permissions */
0a7de745 4042 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636
A
4043 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4044
fe8ab488
A
4045 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
4046 segflg, path, ctx);
2d21ac55 4047
0a7de745
A
4048 return open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
4049 retval, fd);
1c79356b 4050}
91447636 4051
fe8ab488
A
4052int
4053open(proc_t p, struct open_args *uap, int32_t *retval)
4054{
4055 __pthread_testcancel(1);
0a7de745 4056 return open_nocancel(p, (struct open_nocancel_args *)uap, retval);
fe8ab488 4057}
1c79356b 4058
fe8ab488
A
4059int
4060open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
4061 int32_t *retval)
4062{
0a7de745
A
4063 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4064 uap->mode, AT_FDCWD, UIO_USERSPACE, retval);
fe8ab488 4065}
91447636 4066
1c79356b 4067int
fe8ab488 4068openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
0a7de745 4069 int32_t *retval)
1c79356b 4070{
0a7de745
A
4071 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4072 uap->mode, uap->fd, UIO_USERSPACE, retval);
fe8ab488 4073}
91447636 4074
fe8ab488
A
4075int
4076openat(proc_t p, struct openat_args *uap, int32_t *retval)
4077{
4078 __pthread_testcancel(1);
0a7de745 4079 return openat_nocancel(p, (struct openat_nocancel_args *)uap, retval);
fe8ab488
A
4080}
4081
4082/*
4083 * openbyid_np: open a file given a file system id and a file system object id
4084 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4085 * file systems that don't support object ids it is a node id (uint64_t).
4086 *
4087 * Parameters: p Process requesting the open
4088 * uap User argument descriptor (see below)
4089 * retval Pointer to an area to receive the
4090 * return calue from the system call
4091 *
4092 * Indirect: uap->path Path to open (same as 'open')
4093 *
4094 * uap->fsid id of target file system
4095 * uap->objid id of target file system object
4096 * uap->flags Flags to open (same as 'open')
4097 *
4098 * Returns: 0 Success
4099 * !0 errno value
4100 *
4101 *
4102 * XXX: We should enummerate the possible errno values here, and where
4103 * in the code they originated.
4104 */
4105int
4106openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
4107{
4108 fsid_t fsid;
4109 uint64_t objid;
4110 int error;
4111 char *buf = NULL;
4112 int buflen = MAXPATHLEN;
4113 int pathlen = 0;
4114 vfs_context_t ctx = vfs_context_current();
4115
490019cf 4116 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
0a7de745 4117 return error;
490019cf
A
4118 }
4119
fe8ab488 4120 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
0a7de745 4121 return error;
fe8ab488
A
4122 }
4123
4124 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4125 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
0a7de745 4126 return error;
fe8ab488
A
4127 }
4128
4129 AUDIT_ARG(value32, fsid.val[0]);
4130 AUDIT_ARG(value64, objid);
4131
4132 /*resolve path from fsis, objid*/
4133 do {
4134 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
4135 if (buf == NULL) {
0a7de745 4136 return ENOMEM;
fe8ab488
A
4137 }
4138
4139 error = fsgetpath_internal(
4140 ctx, fsid.val[0], objid,
4141 buflen, buf, &pathlen);
4142
4143 if (error) {
4144 FREE(buf, M_TEMP);
4145 buf = NULL;
4146 }
4147 } while (error == ENOSPC && (buflen += MAXPATHLEN));
4148
4149 if (error) {
4150 return error;
4151 }
4152
4153 buf[pathlen] = 0;
4154
4155 error = openat_internal(
4156 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
4157
4158 FREE(buf, M_TEMP);
4159
4160 return error;
4161}
4162
4163
4164/*
4165 * Create a special file.
4166 */
4167static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4168
4169int
4170mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4171{
4172 struct vnode_attr va;
4173 vfs_context_t ctx = vfs_context_current();
4174 int error;
4175 struct nameidata nd;
0a7de745 4176 vnode_t vp, dvp;
fe8ab488 4177
0a7de745
A
4178 VATTR_INIT(&va);
4179 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4180 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
4181
4182 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
0a7de745
A
4183 if ((uap->mode & S_IFMT) == S_IFIFO) {
4184 return mkfifo1(ctx, uap->path, &va);
4185 }
1c79356b 4186
55e303ae 4187 AUDIT_ARG(mode, uap->mode);
b0d623f7 4188 AUDIT_ARG(value32, uap->dev);
91447636 4189
0a7de745
A
4190 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
4191 return error;
4192 }
39037602 4193 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
0a7de745 4194 UIO_USERSPACE, uap->path, ctx);
55e303ae 4195 error = namei(&nd);
0a7de745
A
4196 if (error) {
4197 return error;
4198 }
91447636 4199 dvp = nd.ni_dvp;
1c79356b 4200 vp = nd.ni_vp;
91447636
A
4201
4202 if (vp != NULL) {
1c79356b 4203 error = EEXIST;
91447636 4204 goto out;
1c79356b 4205 }
55e303ae 4206
91447636 4207 switch (uap->mode & S_IFMT) {
91447636
A
4208 case S_IFCHR:
4209 VATTR_SET(&va, va_type, VCHR);
4210 break;
4211 case S_IFBLK:
4212 VATTR_SET(&va, va_type, VBLK);
4213 break;
91447636
A
4214 default:
4215 error = EINVAL;
4216 goto out;
4217 }
2d21ac55
A
4218
4219#if CONFIG_MACF
6d2010ae
A
4220 error = mac_vnode_check_create(ctx,
4221 nd.ni_dvp, &nd.ni_cnd, &va);
0a7de745 4222 if (error) {
6d2010ae 4223 goto out;
0a7de745 4224 }
2d21ac55
A
4225#endif
4226
0a7de745
A
4227 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4228 goto out;
4229 }
2d21ac55 4230
0a7de745 4231 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0) {
91447636 4232 goto out;
0a7de745 4233 }
91447636
A
4234
4235 if (vp) {
0a7de745 4236 int update_flags = 0;
91447636 4237
0a7de745
A
4238 // Make sure the name & parent pointers are hooked up
4239 if (vp->v_name == NULL) {
91447636 4240 update_flags |= VNODE_UPDATE_NAME;
0a7de745
A
4241 }
4242 if (vp->v_parent == NULLVP) {
4243 update_flags |= VNODE_UPDATE_PARENT;
4244 }
91447636 4245
0a7de745
A
4246 if (update_flags) {
4247 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4248 }
91447636 4249
2d21ac55
A
4250#if CONFIG_FSE
4251 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4252 FSE_ARG_VNODE, vp,
4253 FSE_ARG_DONE);
2d21ac55 4254#endif
1c79356b 4255 }
91447636
A
4256
4257out:
4258 /*
4259 * nameidone has to happen before we vnode_put(dvp)
4260 * since it may need to release the fs_nodelock on the dvp
4261 */
4262 nameidone(&nd);
4263
0a7de745
A
4264 if (vp) {
4265 vnode_put(vp);
4266 }
91447636
A
4267 vnode_put(dvp);
4268
0a7de745 4269 return error;
1c79356b
A
4270}
4271
4272/*
4273 * Create a named pipe.
2d21ac55
A
4274 *
4275 * Returns: 0 Success
4276 * EEXIST
4277 * namei:???
4278 * vnode_authorize:???
4279 * vn_create:???
1c79356b 4280 */
91447636
A
4281static int
4282mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 4283{
0a7de745 4284 vnode_t vp, dvp;
1c79356b
A
4285 int error;
4286 struct nameidata nd;
55e303ae 4287
39037602 4288 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
0a7de745 4289 UIO_USERSPACE, upath, ctx);
55e303ae 4290 error = namei(&nd);
0a7de745
A
4291 if (error) {
4292 return error;
4293 }
91447636
A
4294 dvp = nd.ni_dvp;
4295 vp = nd.ni_vp;
4296
0a7de745
A
4297 /* check that this is a new file and authorize addition */
4298 if (vp != NULL) {
4299 error = EEXIST;
4300 goto out;
4301 }
4302 VATTR_SET(vap, va_type, VFIFO);
2d21ac55 4303
0a7de745 4304 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
2d21ac55 4305 goto out;
0a7de745 4306 }
2d21ac55 4307
0a7de745 4308 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
4309out:
4310 /*
4311 * nameidone has to happen before we vnode_put(dvp)
4312 * since it may need to release the fs_nodelock on the dvp
4313 */
4314 nameidone(&nd);
4315
0a7de745
A
4316 if (vp) {
4317 vnode_put(vp);
4318 }
91447636
A
4319 vnode_put(dvp);
4320
55e303ae 4321 return error;
91447636
A
4322}
4323
0c530ab8
A
4324
4325/*
b0d623f7 4326 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
4327 *
4328 * Parameters: p Process requesting the open
4329 * uap User argument descriptor (see below)
4330 * retval (Ignored)
4331 *
4332 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4333 * uap->uid UID to set
4334 * uap->gid GID to set
4335 * uap->mode File mode to set (same as 'mkfifo')
4336 * uap->xsecurity ACL to set, if creating
4337 *
4338 * Returns: 0 Success
4339 * !0 errno value
4340 *
4341 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4342 *
4343 * XXX: We should enummerate the possible errno values here, and where
4344 * in the code they originated.
4345 */
91447636 4346int
b0d623f7 4347mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
4348{
4349 int ciferror;
4350 kauth_filesec_t xsecdst;
91447636
A
4351 struct vnode_attr va;
4352
b0d623f7
A
4353 AUDIT_ARG(owner, uap->uid, uap->gid);
4354
91447636
A
4355 xsecdst = KAUTH_FILESEC_NONE;
4356 if (uap->xsecurity != USER_ADDR_NULL) {
0a7de745 4357 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
91447636 4358 return ciferror;
0a7de745 4359 }
91447636
A
4360 }
4361
91447636 4362 VATTR_INIT(&va);
0a7de745
A
4363 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4364 if (uap->uid != KAUTH_UID_NONE) {
91447636 4365 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
4366 }
4367 if (uap->gid != KAUTH_GID_NONE) {
91447636 4368 VATTR_SET(&va, va_gid, uap->gid);
0a7de745
A
4369 }
4370 if (xsecdst != KAUTH_FILESEC_NONE) {
91447636 4371 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
0a7de745 4372 }
91447636 4373
2d21ac55 4374 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636 4375
0a7de745 4376 if (xsecdst != KAUTH_FILESEC_NONE) {
91447636 4377 kauth_filesec_free(xsecdst);
0a7de745 4378 }
91447636
A
4379 return ciferror;
4380}
4381
4382/* ARGSUSED */
4383int
b0d623f7 4384mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 4385{
91447636
A
4386 struct vnode_attr va;
4387
0a7de745
A
4388 VATTR_INIT(&va);
4389 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
91447636 4390
0a7de745 4391 return mkfifo1(vfs_context_current(), uap->path, &va);
1c79356b
A
4392}
4393
b0d623f7
A
4394
4395static char *
4396my_strrchr(char *p, int ch)
4397{
4398 char *save;
4399
4400 for (save = NULL;; ++p) {
0a7de745 4401 if (*p == ch) {
b0d623f7 4402 save = p;
0a7de745
A
4403 }
4404 if (!*p) {
4405 return save;
4406 }
b0d623f7
A
4407 }
4408 /* NOTREACHED */
4409}
4410
4411extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4412
4413int
4414safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4415{
4416 int ret, len = _len;
4417
4418 *truncated_path = 0;
4419 ret = vn_getpath(dvp, path, &len);
4420 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4421 if (leafname) {
0a7de745
A
4422 path[len - 1] = '/';
4423 len += strlcpy(&path[len], leafname, MAXPATHLEN - len) + 1;
b0d623f7
A
4424 if (len > MAXPATHLEN) {
4425 char *ptr;
39037602 4426
b0d623f7
A
4427 // the string got truncated!
4428 *truncated_path = 1;
4429 ptr = my_strrchr(path, '/');
4430 if (ptr) {
4431 *ptr = '\0'; // chop off the string at the last directory component
4432 }
4433 len = strlen(path) + 1;
4434 }
4435 }
4436 } else if (ret == 0) {
4437 *truncated_path = 1;
4438 } else if (ret != 0) {
0a7de745 4439 struct vnode *mydvp = dvp;
b0d623f7
A
4440
4441 if (ret != ENOSPC) {
4442 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
0a7de745 4443 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
39037602 4444 }
b0d623f7 4445 *truncated_path = 1;
39037602 4446
b0d623f7
A
4447 do {
4448 if (mydvp->v_parent != NULL) {
4449 mydvp = mydvp->v_parent;
4450 } else if (mydvp->v_mount) {
4451 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4452 break;
4453 } else {
4454 // no parent and no mount point? only thing is to punt and say "/" changed
4455 strlcpy(path, "/", _len);
4456 len = 2;
4457 mydvp = NULL;
4458 }
39037602 4459
b0d623f7
A
4460 if (mydvp == NULL) {
4461 break;
4462 }
4463
4464 len = _len;
4465 ret = vn_getpath(mydvp, path, &len);
4466 } while (ret == ENOSPC);
4467 }
4468
4469 return len;
4470}
4471
4472
1c79356b
A
4473/*
4474 * Make a hard file link.
2d21ac55
A
4475 *
4476 * Returns: 0 Success
4477 * EPERM
4478 * EEXIST
4479 * EXDEV
4480 * namei:???
4481 * vnode_authorize:???
4482 * VNOP_LINK:???
1c79356b 4483 */
1c79356b 4484/* ARGSUSED */
fe8ab488
A
4485static int
4486linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4487 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 4488{
0a7de745 4489 vnode_t vp, dvp, lvp;
1c79356b 4490 struct nameidata nd;
fe8ab488 4491 int follow;
1c79356b 4492 int error;
b0d623f7 4493#if CONFIG_FSE
91447636 4494 fse_info finfo;
b0d623f7 4495#endif
b226f5e5 4496 int need_event, has_listeners, need_kpath2;
2d21ac55 4497 char *target_path = NULL;
0a7de745 4498 int truncated = 0;
1c79356b 4499
91447636
A
4500 vp = dvp = lvp = NULLVP;
4501
4502 /* look up the object we are linking to */
fe8ab488
A
4503 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4504 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4505 segflg, path, ctx);
4506
4507 error = nameiat(&nd, fd1);
0a7de745
A
4508 if (error) {
4509 return error;
4510 }
1c79356b 4511 vp = nd.ni_vp;
91447636
A
4512
4513 nameidone(&nd);
4514
2d21ac55
A
4515 /*
4516 * Normally, linking to directories is not supported.
4517 * However, some file systems may have limited support.
4518 */
91447636 4519 if (vp->v_type == VDIR) {
39037602 4520 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
2d21ac55
A
4521 error = EPERM; /* POSIX */
4522 goto out;
4523 }
39037602 4524
2d21ac55
A
4525 /* Linking to a directory requires ownership. */
4526 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4527 struct vnode_attr dva;
4528
4529 VATTR_INIT(&dva);
4530 VATTR_WANTED(&dva, va_uid);
4531 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4532 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4533 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4534 error = EACCES;
4535 goto out;
4536 }
4537 }
91447636
A
4538 }
4539
91447636 4540 /* lookup the target node */
6d2010ae
A
4541#if CONFIG_TRIGGERS
4542 nd.ni_op = OP_LINK;
4543#endif
91447636 4544 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 4545 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
4546 nd.ni_dirp = link;
4547 error = nameiat(&nd, fd2);
0a7de745 4548 if (error != 0) {
91447636 4549 goto out;
0a7de745 4550 }
91447636
A
4551 dvp = nd.ni_dvp;
4552 lvp = nd.ni_vp;
2d21ac55
A
4553
4554#if CONFIG_MACF
0a7de745 4555 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) {
2d21ac55 4556 goto out2;
0a7de745 4557 }
2d21ac55
A
4558#endif
4559
0a7de745
A
4560 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4561 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) {
4562 goto out2;
4563 }
2d21ac55 4564
91447636
A
4565 /* target node must not exist */
4566 if (lvp != NULLVP) {
4567 error = EEXIST;
4568 goto out2;
4569 }
0a7de745
A
4570 /* cannot link across mountpoints */
4571 if (vnode_mount(vp) != vnode_mount(dvp)) {
4572 error = EXDEV;
4573 goto out2;
4574 }
39037602 4575
0a7de745
A
4576 /* authorize creation of the target note */
4577 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4578 goto out2;
4579 }
91447636
A
4580
4581 /* and finally make the link */
2d21ac55 4582 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
0a7de745 4583 if (error) {
91447636 4584 goto out2;
0a7de745 4585 }
91447636 4586
39236c6e
A
4587#if CONFIG_MACF
4588 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4589#endif
4590
2d21ac55 4591#if CONFIG_FSE
91447636 4592 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
4593#else
4594 need_event = 0;
4595#endif
91447636
A
4596 has_listeners = kauth_authorize_fileop_has_listeners();
4597
b226f5e5
A
4598 need_kpath2 = 0;
4599#if CONFIG_AUDIT
4600 if (AUDIT_RECORD_EXISTS()) {
4601 need_kpath2 = 1;
4602 }
4603#endif
4604
4605 if (need_event || has_listeners || need_kpath2) {
91447636
A
4606 char *link_to_path = NULL;
4607 int len, link_name_len;
4608
4609 /* build the path to the new link file */
2d21ac55
A
4610 GET_PATH(target_path);
4611 if (target_path == NULL) {
4612 error = ENOMEM;
4613 goto out2;
4614 }
4615
b0d623f7 4616 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636 4617
b226f5e5
A
4618 AUDIT_ARG(kpath, target_path, ARG_KPATH2);
4619
91447636 4620 if (has_listeners) {
0a7de745 4621 /* build the path to file we are linking to */
2d21ac55
A
4622 GET_PATH(link_to_path);
4623 if (link_to_path == NULL) {
4624 error = ENOMEM;
4625 goto out2;
4626 }
4627
91447636 4628 link_name_len = MAXPATHLEN;
fe8ab488
A
4629 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4630 /*
39037602 4631 * Call out to allow 3rd party notification of rename.
fe8ab488
A
4632 * Ignore result of kauth_authorize_fileop call.
4633 */
39037602 4634 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
0a7de745
A
4635 (uintptr_t)link_to_path,
4636 (uintptr_t)target_path);
fe8ab488 4637 }
2d21ac55
A
4638 if (link_to_path != NULL) {
4639 RELEASE_PATH(link_to_path);
4640 }
91447636 4641 }
2d21ac55 4642#if CONFIG_FSE
91447636 4643 if (need_event) {
0a7de745
A
4644 /* construct fsevent */
4645 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
4646 if (truncated) {
4647 finfo.mode |= FSE_TRUNCATED_PATH;
4648 }
4649
0a7de745
A
4650 // build the path to the destination of the link
4651 add_fsevent(FSE_CREATE_FILE, ctx,
4652 FSE_ARG_STRING, len, target_path,
4653 FSE_ARG_FINFO, &finfo,
4654 FSE_ARG_DONE);
1c79356b 4655 }
b0d623f7 4656 if (vp->v_parent) {
0a7de745
A
4657 add_fsevent(FSE_STAT_CHANGED, ctx,
4658 FSE_ARG_VNODE, vp->v_parent,
4659 FSE_ARG_DONE);
b0d623f7 4660 }
1c79356b 4661 }
2d21ac55 4662#endif
1c79356b 4663 }
91447636
A
4664out2:
4665 /*
4666 * nameidone has to happen before we vnode_put(dvp)
4667 * since it may need to release the fs_nodelock on the dvp
4668 */
4669 nameidone(&nd);
2d21ac55
A
4670 if (target_path != NULL) {
4671 RELEASE_PATH(target_path);
4672 }
91447636 4673out:
0a7de745 4674 if (lvp) {
91447636 4675 vnode_put(lvp);
0a7de745
A
4676 }
4677 if (dvp) {
91447636 4678 vnode_put(dvp);
0a7de745 4679 }
91447636 4680 vnode_put(vp);
0a7de745 4681 return error;
91447636 4682}
1c79356b 4683
fe8ab488
A
4684int
4685link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4686{
0a7de745
A
4687 return linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4688 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE);
fe8ab488
A
4689}
4690
4691int
4692linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4693{
0a7de745
A
4694 if (uap->flag & ~AT_SYMLINK_FOLLOW) {
4695 return EINVAL;
4696 }
fe8ab488 4697
0a7de745
A
4698 return linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4699 uap->fd2, uap->link, uap->flag, UIO_USERSPACE);
fe8ab488
A
4700}
4701
1c79356b
A
4702/*
4703 * Make a symbolic link.
91447636
A
4704 *
4705 * We could add support for ACLs here too...
1c79356b 4706 */
1c79356b 4707/* ARGSUSED */
fe8ab488
A
4708static int
4709symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4710 user_addr_t link, enum uio_seg segflg)
1c79356b 4711{
91447636
A
4712 struct vnode_attr va;
4713 char *path;
1c79356b
A
4714 int error;
4715 struct nameidata nd;
0a7de745
A
4716 vnode_t vp, dvp;
4717 size_t dummy = 0;
fe8ab488
A
4718 proc_t p;
4719
4720 error = 0;
4721 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4722 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4723 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4724 } else {
4725 path = (char *)path_data;
4726 }
0a7de745 4727 if (error) {
1c79356b 4728 goto out;
0a7de745
A
4729 }
4730 AUDIT_ARG(text, path); /* This is the link string */
91447636 4731
fe8ab488 4732 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
0a7de745 4733 segflg, link, ctx);
fe8ab488
A
4734
4735 error = nameiat(&nd, fd);
0a7de745 4736 if (error) {
1c79356b 4737 goto out;
0a7de745 4738 }
91447636
A
4739 dvp = nd.ni_dvp;
4740 vp = nd.ni_vp;
55e303ae 4741
fe8ab488 4742 p = vfs_context_proc(ctx);
2d21ac55
A
4743 VATTR_INIT(&va);
4744 VATTR_SET(&va, va_type, VLNK);
4745 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488 4746
2d21ac55
A
4747#if CONFIG_MACF
4748 error = mac_vnode_check_create(ctx,
0a7de745 4749 dvp, &nd.ni_cnd, &va);
2d21ac55
A
4750#endif
4751 if (error != 0) {
0a7de745 4752 goto skipit;
2d21ac55 4753 }
91447636 4754
2d21ac55 4755 if (vp != NULL) {
0a7de745
A
4756 error = EEXIST;
4757 goto skipit;
2d21ac55
A
4758 }
4759
4760 /* authorize */
0a7de745 4761 if (error == 0) {
2d21ac55 4762 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
0a7de745 4763 }
2d21ac55 4764 /* get default ownership, etc. */
0a7de745 4765 if (error == 0) {
2d21ac55 4766 error = vnode_authattr_new(dvp, &va, 0, ctx);
0a7de745
A
4767 }
4768 if (error == 0) {
2d21ac55 4769 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
0a7de745 4770 }
2d21ac55 4771
39236c6e 4772#if CONFIG_MACF
0a7de745 4773 if (error == 0 && vp) {
39236c6e 4774 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
0a7de745 4775 }
39236c6e
A
4776#endif
4777
2d21ac55 4778 /* do fallback attribute handling */
0a7de745 4779 if (error == 0 && vp) {
2d21ac55 4780 error = vnode_setattr_fallback(vp, &va, ctx);
0a7de745 4781 }
39236c6e 4782
2d21ac55 4783 if (error == 0) {
0a7de745 4784 int update_flags = 0;
55e303ae 4785
3e170ce0 4786 /*check if a new vnode was created, else try to get one*/
2d21ac55
A
4787 if (vp == NULL) {
4788 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
4789#if CONFIG_TRIGGERS
4790 nd.ni_op = OP_LOOKUP;
4791#endif
2d21ac55 4792 nd.ni_cnd.cn_flags = 0;
fe8ab488 4793 error = nameiat(&nd, fd);
2d21ac55 4794 vp = nd.ni_vp;
55e303ae 4795
0a7de745 4796 if (vp == NULL) {
2d21ac55 4797 goto skipit;
0a7de745 4798 }
2d21ac55 4799 }
fe8ab488 4800
91447636 4801#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 4802 /* call out to allow 3rd party notification of rename.
2d21ac55
A
4803 * Ignore result of kauth_authorize_fileop call.
4804 */
4805 if (kauth_authorize_fileop_has_listeners() &&
4806 namei(&nd) == 0) {
4807 char *new_link_path = NULL;
0a7de745 4808 int len;
fe8ab488 4809
2d21ac55
A
4810 /* build the path to the new link file */
4811 new_link_path = get_pathbuff();
4812 len = MAXPATHLEN;
4813 vn_getpath(dvp, new_link_path, &len);
4814 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 4815 new_link_path[len - 1] = '/';
0a7de745 4816 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN - len);
91447636 4817 }
fe8ab488
A
4818
4819 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
0a7de745
A
4820 (uintptr_t)path, (uintptr_t)new_link_path);
4821 if (new_link_path != NULL) {
2d21ac55 4822 release_pathbuff(new_link_path);
0a7de745 4823 }
2d21ac55 4824 }
fe8ab488 4825#endif
2d21ac55 4826 // Make sure the name & parent pointers are hooked up
0a7de745 4827 if (vp->v_name == NULL) {
2d21ac55 4828 update_flags |= VNODE_UPDATE_NAME;
0a7de745
A
4829 }
4830 if (vp->v_parent == NULLVP) {
2d21ac55 4831 update_flags |= VNODE_UPDATE_PARENT;
0a7de745 4832 }
fe8ab488 4833
0a7de745 4834 if (update_flags) {
2d21ac55 4835 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
0a7de745 4836 }
91447636 4837
2d21ac55
A
4838#if CONFIG_FSE
4839 add_fsevent(FSE_CREATE_FILE, ctx,
0a7de745
A
4840 FSE_ARG_VNODE, vp,
4841 FSE_ARG_DONE);
2d21ac55
A
4842#endif
4843 }
91447636
A
4844
4845skipit:
4846 /*
4847 * nameidone has to happen before we vnode_put(dvp)
4848 * since it may need to release the fs_nodelock on the dvp
4849 */
4850 nameidone(&nd);
4851
0a7de745
A
4852 if (vp) {
4853 vnode_put(vp);
4854 }
91447636 4855 vnode_put(dvp);
1c79356b 4856out:
0a7de745 4857 if (path && (path != (char *)path_data)) {
fe8ab488 4858 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
0a7de745 4859 }
91447636 4860
0a7de745 4861 return error;
1c79356b
A
4862}
4863
fe8ab488
A
4864int
4865symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4866{
0a7de745
A
4867 return symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4868 uap->link, UIO_USERSPACE);
fe8ab488
A
4869}
4870
4871int
4872symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4873 __unused int32_t *retval)
4874{
0a7de745
A
4875 return symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4876 uap->path2, UIO_USERSPACE);
fe8ab488
A
4877}
4878
1c79356b
A
4879/*
4880 * Delete a whiteout from the filesystem.
fe8ab488 4881 * No longer supported.
1c79356b 4882 */
1c79356b 4883int
fe8ab488 4884undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 4885{
0a7de745 4886 return ENOTSUP;
1c79356b
A
4887}
4888
4889/*
4890 * Delete a name from the filesystem.
4891 */
1c79356b 4892/* ARGSUSED */
fe8ab488 4893static int
c18c124e
A
4894unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4895 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
1c79356b 4896{
c18c124e 4897 struct nameidata nd;
0a7de745 4898 vnode_t vp, dvp;
1c79356b 4899 int error;
91447636 4900 struct componentname *cnp;
2d21ac55 4901 char *path = NULL;
0a7de745 4902 int len = 0;
b0d623f7 4903#if CONFIG_FSE
2d21ac55 4904 fse_info finfo;
6d2010ae 4905 struct vnode_attr va;
b0d623f7 4906#endif
c18c124e
A
4907 int flags;
4908 int need_event;
4909 int has_listeners;
4910 int truncated_path;
6d2010ae 4911 int batched;
c18c124e
A
4912 struct vnode_attr *vap;
4913 int do_retry;
4914 int retry_count = 0;
4915 int cn_flags;
4916
4917 cn_flags = LOCKPARENT;
0a7de745 4918 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH)) {
c18c124e 4919 cn_flags |= AUDITVNPATH1;
0a7de745 4920 }
c18c124e 4921 /* If a starting dvp is passed, it trumps any fd passed. */
0a7de745 4922 if (start_dvp) {
c18c124e 4923 cn_flags |= USEDVP;
0a7de745 4924 }
6d2010ae 4925
c910b4d9
A
4926#if NAMEDRSRCFORK
4927 /* unlink or delete is allowed on rsrc forks and named streams */
c18c124e 4928 cn_flags |= CN_ALLOWRSRCFORK;
c910b4d9
A
4929#endif
4930
c18c124e
A
4931retry:
4932 do_retry = 0;
4933 flags = 0;
4934 need_event = 0;
4935 has_listeners = 0;
4936 truncated_path = 0;
4937 vap = NULL;
4938
4939 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4940
4941 nd.ni_dvp = start_dvp;
4942 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4943 cnp = &nd.ni_cnd;
91447636 4944
813fb2f6 4945continue_lookup:
c18c124e 4946 error = nameiat(&nd, fd);
0a7de745
A
4947 if (error) {
4948 return error;
4949 }
b0d623f7 4950
c18c124e
A
4951 dvp = nd.ni_dvp;
4952 vp = nd.ni_vp;
91447636 4953
6d2010ae 4954
91447636 4955 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4956 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4957 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4958 }
39037602 4959
39236c6e 4960 /* Skip any potential upcalls if told to. */
316670eb
A
4961 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4962 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4963 }
4964
6d2010ae
A
4965 if (vp) {
4966 batched = vnode_compound_remove_available(vp);
4967 /*
4968 * The root of a mounted filesystem cannot be deleted.
4969 */
4970 if (vp->v_flag & VROOT) {
4971 error = EBUSY;
4972 }
2d21ac55 4973
00867663 4974#if DEVELOPMENT || DEBUG
0a7de745
A
4975 /*
4976 * XXX VSWAP: Check for entitlements or special flag here
4977 * so we can restrict access appropriately.
4978 */
00867663
A
4979#else /* DEVELOPMENT || DEBUG */
4980
4981 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4982 error = EPERM;
4983 goto out;
4984 }
4985#endif /* DEVELOPMENT || DEBUG */
4986
6d2010ae
A
4987 if (!batched) {
4988 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4989 if (error) {
3e170ce0
A
4990 if (error == ENOENT) {
4991 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4992 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4993 do_retry = 1;
4994 retry_count++;
4995 }
c18c124e 4996 }
6d2010ae
A
4997 goto out;
4998 }
4999 }
5000 } else {
5001 batched = 1;
2d21ac55 5002
6d2010ae
A
5003 if (!vnode_compound_remove_available(dvp)) {
5004 panic("No vp, but no compound remove?");
5005 }
5006 }
2d21ac55 5007
2d21ac55
A
5008#if CONFIG_FSE
5009 need_event = need_fsevent(FSE_DELETE, dvp);
5010 if (need_event) {
6d2010ae
A
5011 if (!batched) {
5012 if ((vp->v_flag & VISHARDLINK) == 0) {
5013 /* XXX need to get these data in batched VNOP */
5014 get_fse_info(vp, &finfo, ctx);
5015 }
5016 } else {
5017 error = vfs_get_notify_attributes(&va);
5018 if (error) {
5019 goto out;
5020 }
5021
5022 vap = &va;
2d21ac55
A
5023 }
5024 }
5025#endif
5026 has_listeners = kauth_authorize_fileop_has_listeners();
5027 if (need_event || has_listeners) {
2d21ac55 5028 if (path == NULL) {
6d2010ae
A
5029 GET_PATH(path);
5030 if (path == NULL) {
5031 error = ENOMEM;
5032 goto out;
5033 }
2d21ac55 5034 }
c18c124e 5035 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
5036 }
5037
5038#if NAMEDRSRCFORK
0a7de745 5039 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
2d21ac55 5040 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
0a7de745 5041 } else
2d21ac55 5042#endif
6d2010ae 5043 {
c18c124e
A
5044 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
5045 vp = nd.ni_vp;
6d2010ae
A
5046 if (error == EKEEPLOOKING) {
5047 if (!batched) {
5048 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5049 }
5050
c18c124e 5051 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
5052 panic("EKEEPLOOKING, but continue flag not set?");
5053 }
5054
5055 if (vnode_isdir(vp)) {
5056 error = EISDIR;
5057 goto out;
5058 }
813fb2f6 5059 goto continue_lookup;
3e170ce0
A
5060 } else if (error == ENOENT && batched) {
5061 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
5062 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5063 /*
5064 * For compound VNOPs, the authorization callback may
5065 * return ENOENT in case of racing hardlink lookups
5066 * hitting the name cache, redrive the lookup.
5067 */
5068 do_retry = 1;
5069 retry_count += 1;
5070 goto out;
5071 }
6d2010ae
A
5072 }
5073 }
2d21ac55
A
5074
5075 /*
39037602 5076 * Call out to allow 3rd party notification of delete.
2d21ac55
A
5077 * Ignore result of kauth_authorize_fileop call.
5078 */
1c79356b 5079 if (!error) {
2d21ac55 5080 if (has_listeners) {
39037602 5081 kauth_authorize_fileop(vfs_context_ucred(ctx),
0a7de745
A
5082 KAUTH_FILEOP_DELETE,
5083 (uintptr_t)vp,
5084 (uintptr_t)path);
2d21ac55 5085 }
91447636 5086
2d21ac55 5087 if (vp->v_flag & VISHARDLINK) {
0a7de745
A
5088 //
5089 // if a hardlink gets deleted we want to blow away the
5090 // v_parent link because the path that got us to this
5091 // instance of the link is no longer valid. this will
5092 // force the next call to get the path to ask the file
5093 // system instead of just following the v_parent link.
5094 //
5095 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 5096 }
91447636 5097
2d21ac55
A
5098#if CONFIG_FSE
5099 if (need_event) {
5100 if (vp->v_flag & VISHARDLINK) {
5101 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
5102 } else if (vap) {
5103 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 5104 }
b0d623f7
A
5105 if (truncated_path) {
5106 finfo.mode |= FSE_TRUNCATED_PATH;
5107 }
2d21ac55 5108 add_fsevent(FSE_DELETE, ctx,
0a7de745
A
5109 FSE_ARG_STRING, len, path,
5110 FSE_ARG_FINFO, &finfo,
5111 FSE_ARG_DONE);
2d21ac55
A
5112 }
5113#endif
1c79356b 5114 }
6d2010ae
A
5115
5116out:
0a7de745 5117 if (path != NULL) {
2d21ac55 5118 RELEASE_PATH(path);
0a7de745 5119 }
2d21ac55 5120
c910b4d9 5121#if NAMEDRSRCFORK
39037602 5122 /* recycle the deleted rsrc fork vnode to force a reclaim, which
b0d623f7
A
5123 * will cause its shadow file to go away if necessary.
5124 */
0a7de745
A
5125 if (vp && (vnode_isnamedstream(vp)) &&
5126 (vp->v_parent != NULLVP) &&
5127 vnode_isshadow(vp)) {
5128 vnode_recycle(vp);
5129 }
c910b4d9 5130#endif
6d2010ae
A
5131 /*
5132 * nameidone has to happen before we vnode_put(dvp)
5133 * since it may need to release the fs_nodelock on the dvp
5134 */
c18c124e 5135 nameidone(&nd);
91447636 5136 vnode_put(dvp);
6d2010ae
A
5137 if (vp) {
5138 vnode_put(vp);
5139 }
c18c124e
A
5140
5141 if (do_retry) {
5142 goto retry;
5143 }
5144
0a7de745 5145 return error;
1c79356b
A
5146}
5147
fe8ab488 5148int
c18c124e
A
5149unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
5150 enum uio_seg segflg, int unlink_flags)
fe8ab488 5151{
0a7de745
A
5152 return unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
5153 unlink_flags);
fe8ab488
A
5154}
5155
1c79356b 5156/*
c18c124e 5157 * Delete a name from the filesystem using Carbon semantics.
1c79356b 5158 */
c18c124e
A
5159int
5160delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
fe8ab488 5161{
0a7de745
A
5162 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5163 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY);
fe8ab488
A
5164}
5165
c18c124e
A
5166/*
5167 * Delete a name from the filesystem using POSIX semantics.
5168 */
1c79356b 5169int
b0d623f7 5170unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 5171{
0a7de745
A
5172 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5173 uap->path, UIO_USERSPACE, 0);
fe8ab488 5174}
2d21ac55 5175
fe8ab488
A
5176int
5177unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
5178{
0a7de745
A
5179 if (uap->flag & ~AT_REMOVEDIR) {
5180 return EINVAL;
5181 }
fe8ab488 5182
0a7de745
A
5183 if (uap->flag & AT_REMOVEDIR) {
5184 return rmdirat_internal(vfs_context_current(), uap->fd,
5185 uap->path, UIO_USERSPACE);
5186 } else {
5187 return unlinkat_internal(vfs_context_current(), uap->fd,
5188 NULLVP, uap->path, UIO_USERSPACE, 0);
5189 }
1c79356b
A
5190}
5191
5192/*
5193 * Reposition read/write file offset.
5194 */
1c79356b 5195int
2d21ac55 5196lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 5197{
91447636 5198 struct fileproc *fp;
2d21ac55
A
5199 vnode_t vp;
5200 struct vfs_context *ctx;
91447636 5201 off_t offset = uap->offset, file_size;
1c79356b
A
5202 int error;
5203
0a7de745
A
5204 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
5205 if (error == ENOTSUP) {
5206 return ESPIPE;
5207 }
5208 return error;
55e303ae 5209 }
91447636
A
5210 if (vnode_isfifo(vp)) {
5211 file_drop(uap->fd);
0a7de745 5212 return ESPIPE;
91447636 5213 }
2d21ac55
A
5214
5215
5216 ctx = vfs_context_current();
5217#if CONFIG_MACF
0a7de745 5218 if (uap->whence == L_INCR && uap->offset == 0) {
2d21ac55
A
5219 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
5220 fp->f_fglob);
0a7de745 5221 } else {
2d21ac55
A
5222 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5223 fp->f_fglob);
0a7de745 5224 }
2d21ac55
A
5225 if (error) {
5226 file_drop(uap->fd);
0a7de745 5227 return error;
2d21ac55
A
5228 }
5229#endif
0a7de745 5230 if ((error = vnode_getwithref(vp))) {
91447636 5231 file_drop(uap->fd);
0a7de745 5232 return error;
91447636
A
5233 }
5234
1c79356b
A
5235 switch (uap->whence) {
5236 case L_INCR:
91447636 5237 offset += fp->f_fglob->fg_offset;
1c79356b
A
5238 break;
5239 case L_XTND:
0a7de745 5240 if ((error = vnode_size(vp, &file_size, ctx)) != 0) {
55e303ae 5241 break;
0a7de745 5242 }
91447636 5243 offset += file_size;
1c79356b
A
5244 break;
5245 case L_SET:
1c79356b 5246 break;
813fb2f6 5247 case SEEK_HOLE:
0a7de745 5248 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
813fb2f6
A
5249 break;
5250 case SEEK_DATA:
0a7de745 5251 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
813fb2f6 5252 break;
1c79356b 5253 default:
55e303ae 5254 error = EINVAL;
1c79356b 5255 }
55e303ae
A
5256 if (error == 0) {
5257 if (uap->offset > 0 && offset < 0) {
5258 /* Incremented/relative move past max size */
5259 error = EOVERFLOW;
5260 } else {
5261 /*
5262 * Allow negative offsets on character devices, per
5263 * POSIX 1003.1-2001. Most likely for writing disk
5264 * labels.
5265 */
5266 if (offset < 0 && vp->v_type != VCHR) {
5267 /* Decremented/relative move before start */
5268 error = EINVAL;
5269 } else {
5270 /* Success */
91447636
A
5271 fp->f_fglob->fg_offset = offset;
5272 *retval = fp->f_fglob->fg_offset;
55e303ae
A
5273 }
5274 }
5275 }
b0d623f7 5276
39037602 5277 /*
b0d623f7
A
5278 * An lseek can affect whether data is "available to read." Use
5279 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5280 */
5281 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
5282 (void)vnode_put(vp);
5283 file_drop(uap->fd);
0a7de745 5284 return error;
1c79356b
A
5285}
5286
91447636 5287
1c79356b 5288/*
91447636 5289 * Check access permissions.
2d21ac55
A
5290 *
5291 * Returns: 0 Success
5292 * vnode_authorize:???
1c79356b 5293 */
91447636
A
5294static int
5295access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 5296{
0a7de745 5297 kauth_action_t action;
1c79356b
A
5298 int error;
5299
0a7de745
A
5300 /*
5301 * If just the regular access bits, convert them to something
91447636 5302 * that vnode_authorize will understand.
0a7de745
A
5303 */
5304 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5305 action = 0;
5306 if (uflags & R_OK) {
5307 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5308 }
5309 if (uflags & W_OK) {
91447636
A
5310 if (vnode_isdir(vp)) {
5311 action |= KAUTH_VNODE_ADD_FILE |
5312 KAUTH_VNODE_ADD_SUBDIRECTORY;
5313 /* might want delete rights here too */
5314 } else {
5315 action |= KAUTH_VNODE_WRITE_DATA;
5316 }
5317 }
0a7de745 5318 if (uflags & X_OK) {
91447636
A
5319 if (vnode_isdir(vp)) {
5320 action |= KAUTH_VNODE_SEARCH;
5321 } else {
5322 action |= KAUTH_VNODE_EXECUTE;
5323 }
5324 }
0a7de745 5325 } else {
91447636
A
5326 /* take advantage of definition of uflags */
5327 action = uflags >> 8;
5328 }
39037602 5329
2d21ac55
A
5330#if CONFIG_MACF
5331 error = mac_vnode_check_access(ctx, vp, uflags);
0a7de745
A
5332 if (error) {
5333 return error;
5334 }
2d21ac55
A
5335#endif /* MAC */
5336
0a7de745
A
5337 /* action == 0 means only check for existence */
5338 if (action != 0) {
5339 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
91447636
A
5340 } else {
5341 error = 0;
5342 }
5343
0a7de745 5344 return error;
1c79356b 5345}
1c79356b 5346
91447636
A
5347
5348
2d21ac55 5349/*
b0d623f7 5350 * access_extended: Check access permissions in bulk.
2d21ac55 5351 *
b0d623f7 5352 * Description: uap->entries Pointer to an array of accessx
0a7de745
A
5353 * descriptor structs, plus one or
5354 * more NULL terminated strings (see
5355 * "Notes" section below).
b0d623f7
A
5356 * uap->size Size of the area pointed to by
5357 * uap->entries.
5358 * uap->results Pointer to the results array.
2d21ac55
A
5359 *
5360 * Returns: 0 Success
5361 * ENOMEM Insufficient memory
5362 * EINVAL Invalid arguments
5363 * namei:EFAULT Bad address
5364 * namei:ENAMETOOLONG Filename too long
5365 * namei:ENOENT No such file or directory
5366 * namei:ELOOP Too many levels of symbolic links
5367 * namei:EBADF Bad file descriptor
5368 * namei:ENOTDIR Not a directory
5369 * namei:???
5370 * access1:
5371 *
5372 * Implicit returns:
5373 * uap->results Array contents modified
5374 *
5375 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 5376 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
5377 * strings
5378 *
5379 * struct accessx_descriptor[0]
5380 * ...
5381 * struct accessx_descriptor[n]
5382 * char name_data[0];
5383 *
5384 * We determine the entry count by walking the buffer containing
b0d623f7 5385 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
5386 * see, the valid values for the offset ad_name_offset will be
5387 * in the byte range:
5388 *
5389 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5390 * to
5391 * [ uap->entries + uap->size - 2 ]
5392 *
5393 * since we must have at least one string, and the string must
b0d623f7 5394 * be at least one character plus the NULL terminator in length.
39037602 5395 *
2d21ac55
A
5396 * XXX: Need to support the check-as uid argument
5397 */
1c79356b 5398int
b0d623f7 5399access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 5400{
2d21ac55
A
5401 struct accessx_descriptor *input = NULL;
5402 errno_t *result = NULL;
5403 errno_t error = 0;
5404 int wantdelete = 0;
5405 unsigned int desc_max, desc_actual, i, j;
91447636 5406 struct vfs_context context;
1c79356b 5407 struct nameidata nd;
0a7de745 5408 int niopts;
2d21ac55
A
5409 vnode_t vp = NULL;
5410 vnode_t dvp = NULL;
5411#define ACCESSX_MAX_DESCR_ON_STACK 10
5412 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 5413
91447636
A
5414 context.vc_ucred = NULL;
5415
2d21ac55
A
5416 /*
5417 * Validate parameters; if valid, copy the descriptor array and string
5418 * arguments into local memory. Before proceeding, the following
5419 * conditions must have been met:
5420 *
5421 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5422 * o There must be sufficient room in the request for at least one
5423 * descriptor and a one yte NUL terminated string.
5424 * o The allocation of local storage must not fail.
5425 */
0a7de745
A
5426 if (uap->size > ACCESSX_MAX_TABLESIZE) {
5427 return ENOMEM;
5428 }
5429 if (uap->size < (sizeof(struct accessx_descriptor) + 2)) {
5430 return EINVAL;
5431 }
5432 if (uap->size <= sizeof(stack_input)) {
2d21ac55
A
5433 input = stack_input;
5434 } else {
0a7de745
A
5435 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5436 if (input == NULL) {
5437 error = ENOMEM;
5438 goto out;
5439 }
2d21ac55 5440 }
91447636 5441 error = copyin(uap->entries, input, uap->size);
0a7de745 5442 if (error) {
91447636 5443 goto out;
0a7de745 5444 }
1c79356b 5445
b0d623f7
A
5446 AUDIT_ARG(opaque, input, uap->size);
5447
91447636 5448 /*
2d21ac55
A
5449 * Force NUL termination of the copyin buffer to avoid nami() running
5450 * off the end. If the caller passes us bogus data, they may get a
5451 * bogus result.
5452 */
5453 ((char *)input)[uap->size - 1] = 0;
5454
5455 /*
5456 * Access is defined as checking against the process' real identity,
0a7de745 5457 * even if operations are checking the effective identity. This
2d21ac55 5458 * requires that we use a local vfs context.
0a7de745 5459 */
91447636 5460 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 5461 context.vc_thread = current_thread();
91447636
A
5462
5463 /*
2d21ac55
A
5464 * Find out how many entries we have, so we can allocate the result
5465 * array by walking the list and adjusting the count downward by the
5466 * earliest string offset we see.
91447636 5467 */
2d21ac55
A
5468 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5469 desc_actual = desc_max;
5470 for (i = 0; i < desc_actual; i++) {
91447636 5471 /*
2d21ac55
A
5472 * Take the offset to the name string for this entry and
5473 * convert to an input array index, which would be one off
5474 * the end of the array if this entry was the lowest-addressed
5475 * name string.
91447636
A
5476 */
5477 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
5478
5479 /*
5480 * An offset greater than the max allowable offset is an error.
5481 * It is also an error for any valid entry to point
5482 * to a location prior to the end of the current entry, if
5483 * it's not a reference to the string of the previous entry.
5484 */
5485 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
5486 error = EINVAL;
5487 goto out;
5488 }
2d21ac55 5489
39037602
A
5490 /* Also do not let ad_name_offset point to something beyond the size of the input */
5491 if (input[i].ad_name_offset >= uap->size) {
5492 error = EINVAL;
5493 goto out;
5494 }
5495
2d21ac55
A
5496 /*
5497 * An offset of 0 means use the previous descriptor's offset;
5498 * this is used to chain multiple requests for the same file
5499 * to avoid multiple lookups.
5500 */
91447636 5501 if (j == 0) {
2d21ac55 5502 /* This is not valid for the first entry */
91447636
A
5503 if (i == 0) {
5504 error = EINVAL;
5505 goto out;
5506 }
5507 continue;
5508 }
2d21ac55
A
5509
5510 /*
5511 * If the offset of the string for this descriptor is before
5512 * what we believe is the current actual last descriptor,
5513 * then we need to adjust our estimate downward; this permits
5514 * the string table following the last descriptor to be out
5515 * of order relative to the descriptor list.
5516 */
0a7de745 5517 if (j < desc_actual) {
2d21ac55 5518 desc_actual = j;
0a7de745 5519 }
91447636 5520 }
2d21ac55
A
5521
5522 /*
5523 * We limit the actual number of descriptors we are willing to process
5524 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5525 * requested does not exceed this limit,
5526 */
5527 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
5528 error = ENOMEM;
5529 goto out;
5530 }
d9a64523 5531 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK | M_ZERO);
91447636
A
5532 if (result == NULL) {
5533 error = ENOMEM;
5534 goto out;
5535 }
5536
5537 /*
2d21ac55
A
5538 * Do the work by iterating over the descriptor entries we know to
5539 * at least appear to contain valid data.
91447636
A
5540 */
5541 error = 0;
2d21ac55 5542 for (i = 0; i < desc_actual; i++) {
91447636 5543 /*
2d21ac55
A
5544 * If the ad_name_offset is 0, then we use the previous
5545 * results to make the check; otherwise, we are looking up
5546 * a new file name.
91447636
A
5547 */
5548 if (input[i].ad_name_offset != 0) {
5549 /* discard old vnodes */
5550 if (vp) {
5551 vnode_put(vp);
5552 vp = NULL;
5553 }
5554 if (dvp) {
5555 vnode_put(dvp);
5556 dvp = NULL;
5557 }
39037602 5558
2d21ac55
A
5559 /*
5560 * Scan forward in the descriptor list to see if we
5561 * need the parent vnode. We will need it if we are
5562 * deleting, since we must have rights to remove
5563 * entries in the parent directory, as well as the
5564 * rights to delete the object itself.
5565 */
91447636 5566 wantdelete = input[i].ad_flags & _DELETE_OK;
0a7de745
A
5567 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++) {
5568 if (input[j].ad_flags & _DELETE_OK) {
91447636 5569 wantdelete = 1;
0a7de745
A
5570 }
5571 }
39037602 5572
91447636 5573 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 5574
91447636 5575 /* need parent for vnode_authorize for deletion test */
0a7de745 5576 if (wantdelete) {
91447636 5577 niopts |= WANTPARENT;
0a7de745 5578 }
91447636
A
5579
5580 /* do the lookup */
6d2010ae 5581 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
0a7de745
A
5582 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5583 &context);
91447636
A
5584 error = namei(&nd);
5585 if (!error) {
5586 vp = nd.ni_vp;
0a7de745 5587 if (wantdelete) {
91447636 5588 dvp = nd.ni_dvp;
0a7de745 5589 }
91447636
A
5590 }
5591 nameidone(&nd);
5592 }
5593
5594 /*
5595 * Handle lookup errors.
5596 */
0a7de745 5597 switch (error) {
91447636
A
5598 case ENOENT:
5599 case EACCES:
5600 case EPERM:
5601 case ENOTDIR:
5602 result[i] = error;
5603 break;
5604 case 0:
5605 /* run this access check */
5606 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5607 break;
5608 default:
5609 /* fatal lookup error */
5610
5611 goto out;
5612 }
5613 }
5614
b0d623f7
A
5615 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5616
91447636 5617 /* copy out results */
2d21ac55 5618 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
39037602 5619
91447636 5620out:
0a7de745 5621 if (input && input != stack_input) {
91447636 5622 FREE(input, M_TEMP);
0a7de745
A
5623 }
5624 if (result) {
91447636 5625 FREE(result, M_TEMP);
0a7de745
A
5626 }
5627 if (vp) {
91447636 5628 vnode_put(vp);
0a7de745
A
5629 }
5630 if (dvp) {
91447636 5631 vnode_put(dvp);
0a7de745
A
5632 }
5633 if (IS_VALID_CRED(context.vc_ucred)) {
5634 kauth_cred_unref(&context.vc_ucred);
5635 }
5636 return error;
1c79356b
A
5637}
5638
2d21ac55
A
5639
5640/*
5641 * Returns: 0 Success
5642 * namei:EFAULT Bad address
5643 * namei:ENAMETOOLONG Filename too long
5644 * namei:ENOENT No such file or directory
5645 * namei:ELOOP Too many levels of symbolic links
5646 * namei:EBADF Bad file descriptor
5647 * namei:ENOTDIR Not a directory
5648 * namei:???
5649 * access1:
5650 */
fe8ab488
A
5651static int
5652faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5653 int flag, enum uio_seg segflg)
1c79356b 5654{
1c79356b
A
5655 int error;
5656 struct nameidata nd;
0a7de745 5657 int niopts;
91447636 5658 struct vfs_context context;
cf7d32b8
A
5659#if NAMEDRSRCFORK
5660 int is_namedstream = 0;
5661#endif
5662
0a7de745 5663 /*
fe8ab488
A
5664 * Unless the AT_EACCESS option is used, Access is defined as checking
5665 * against the process' real identity, even if operations are checking
5666 * the effective identity. So we need to tweak the credential
0a7de745
A
5667 * in the context for that case.
5668 */
5669 if (!(flag & AT_EACCESS)) {
fe8ab488 5670 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
0a7de745 5671 } else {
fe8ab488 5672 context.vc_ucred = ctx->vc_ucred;
0a7de745 5673 }
fe8ab488
A
5674 context.vc_thread = ctx->vc_thread;
5675
91447636
A
5676
5677 niopts = FOLLOW | AUDITVNPATH1;
0a7de745
A
5678 /* need parent for vnode_authorize for deletion test */
5679 if (amode & _DELETE_OK) {
5680 niopts |= WANTPARENT;
5681 }
5682 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5683 path, &context);
2d21ac55
A
5684
5685#if NAMEDRSRCFORK
5686 /* access(F_OK) calls are allowed for resource forks. */
0a7de745 5687 if (amode == F_OK) {
2d21ac55 5688 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
0a7de745 5689 }
2d21ac55 5690#endif
0a7de745
A
5691 error = nameiat(&nd, fd);
5692 if (error) {
5693 goto out;
5694 }
91447636 5695
cf7d32b8 5696#if NAMEDRSRCFORK
39037602 5697 /* Grab reference on the shadow stream file vnode to
b0d623f7
A
5698 * force an inactive on release which will mark it
5699 * for recycle.
cf7d32b8
A
5700 */
5701 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
5702 (nd.ni_vp->v_parent != NULLVP) &&
5703 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
5704 is_namedstream = 1;
5705 vnode_ref(nd.ni_vp);
5706 }
5707#endif
5708
fe8ab488 5709 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 5710
cf7d32b8
A
5711#if NAMEDRSRCFORK
5712 if (is_namedstream) {
5713 vnode_rele(nd.ni_vp);
5714 }
5715#endif
5716
0a7de745
A
5717 vnode_put(nd.ni_vp);
5718 if (amode & _DELETE_OK) {
5719 vnode_put(nd.ni_dvp);
5720 }
5721 nameidone(&nd);
39037602 5722
91447636 5723out:
0a7de745 5724 if (!(flag & AT_EACCESS)) {
fe8ab488 5725 kauth_cred_unref(&context.vc_ucred);
0a7de745
A
5726 }
5727 return error;
fe8ab488
A
5728}
5729
5730int
5731access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5732{
0a7de745
A
5733 return faccessat_internal(vfs_context_current(), AT_FDCWD,
5734 uap->path, uap->flags, 0, UIO_USERSPACE);
91447636
A
5735}
5736
fe8ab488
A
5737int
5738faccessat(__unused proc_t p, struct faccessat_args *uap,
0a7de745 5739 __unused int32_t *retval)
fe8ab488 5740{
0a7de745
A
5741 if (uap->flag & ~AT_EACCESS) {
5742 return EINVAL;
5743 }
fe8ab488 5744
0a7de745
A
5745 return faccessat_internal(vfs_context_current(), uap->fd,
5746 uap->path, uap->amode, uap->flag, UIO_USERSPACE);
fe8ab488 5747}
91447636 5748
2d21ac55
A
5749/*
5750 * Returns: 0 Success
5751 * EFAULT
5752 * copyout:EFAULT
5753 * namei:???
5754 * vn_stat:???
5755 */
91447636 5756static int
fe8ab488
A
5757fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5758 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5759 enum uio_seg segflg, int fd, int flag)
91447636 5760{
fe8ab488
A
5761 struct nameidata nd;
5762 int follow;
b0d623f7
A
5763 union {
5764 struct stat sb;
5765 struct stat64 sb64;
527f9951 5766 } source = {};
b0d623f7
A
5767 union {
5768 struct user64_stat user64_sb;
5769 struct user32_stat user32_sb;
5770 struct user64_stat64 user64_sb64;
5771 struct user32_stat64 user32_sb64;
527f9951 5772 } dest = {};
91447636
A
5773 caddr_t sbp;
5774 int error, my_size;
5775 kauth_filesec_t fsec;
5776 size_t xsecurity_bufsize;
2d21ac55 5777 void * statptr;
1c79356b 5778
fe8ab488
A
5779 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5780 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5781 segflg, path, ctx);
5782
2d21ac55 5783#if NAMEDRSRCFORK
cf7d32b8 5784 int is_namedstream = 0;
2d21ac55 5785 /* stat calls are allowed for resource forks. */
fe8ab488 5786 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 5787#endif
fe8ab488 5788 error = nameiat(&nd, fd);
0a7de745
A
5789 if (error) {
5790 return error;
5791 }
91447636 5792 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
5793
5794 statptr = (void *)&source;
cf7d32b8
A
5795
5796#if NAMEDRSRCFORK
39037602
A
5797 /* Grab reference on the shadow stream file vnode to
5798 * force an inactive on release which will mark it
b0d623f7 5799 * for recycle.
cf7d32b8 5800 */
fe8ab488
A
5801 if (vnode_isnamedstream(nd.ni_vp) &&
5802 (nd.ni_vp->v_parent != NULLVP) &&
5803 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 5804 is_namedstream = 1;
fe8ab488 5805 vnode_ref(nd.ni_vp);
cf7d32b8
A
5806 }
5807#endif
5808
fe8ab488 5809 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
2d21ac55
A
5810
5811#if NAMEDRSRCFORK
cf7d32b8 5812 if (is_namedstream) {
fe8ab488 5813 vnode_rele(nd.ni_vp);
2d21ac55
A
5814 }
5815#endif
fe8ab488
A
5816 vnode_put(nd.ni_vp);
5817 nameidone(&nd);
91447636 5818
0a7de745
A
5819 if (error) {
5820 return error;
5821 }
91447636 5822 /* Zap spare fields */
2d21ac55 5823 if (isstat64 != 0) {
b0d623f7
A
5824 source.sb64.st_lspare = 0;
5825 source.sb64.st_qspare[0] = 0LL;
5826 source.sb64.st_qspare[1] = 0LL;
2d21ac55 5827 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5828 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
b0d623f7
A
5829 my_size = sizeof(dest.user64_sb64);
5830 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 5831 } else {
39037602 5832 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
b0d623f7
A
5833 my_size = sizeof(dest.user32_sb64);
5834 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
5835 }
5836 /*
5837 * Check if we raced (post lookup) against the last unlink of a file.
5838 */
b0d623f7
A
5839 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5840 source.sb64.st_nlink = 1;
2d21ac55
A
5841 }
5842 } else {
b0d623f7
A
5843 source.sb.st_lspare = 0;
5844 source.sb.st_qspare[0] = 0LL;
5845 source.sb.st_qspare[1] = 0LL;
2d21ac55 5846 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5847 munge_user64_stat(&source.sb, &dest.user64_sb);
b0d623f7
A
5848 my_size = sizeof(dest.user64_sb);
5849 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 5850 } else {
39037602 5851 munge_user32_stat(&source.sb, &dest.user32_sb);
b0d623f7
A
5852 my_size = sizeof(dest.user32_sb);
5853 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
5854 }
5855
5856 /*
5857 * Check if we raced (post lookup) against the last unlink of a file.
5858 */
b0d623f7
A
5859 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5860 source.sb.st_nlink = 1;
2d21ac55 5861 }
91447636 5862 }
0a7de745 5863 if ((error = copyout(sbp, ub, my_size)) != 0) {
91447636 5864 goto out;
0a7de745 5865 }
91447636
A
5866
5867 /* caller wants extended security information? */
5868 if (xsecurity != USER_ADDR_NULL) {
91447636
A
5869 /* did we get any? */
5870 if (fsec == KAUTH_FILESEC_NONE) {
5871 if (susize(xsecurity_size, 0) != 0) {
5872 error = EFAULT;
5873 goto out;
5874 }
5875 } else {
5876 /* find the user buffer size */
5877 xsecurity_bufsize = fusize(xsecurity_size);
5878
5879 /* copy out the actual data size */
5880 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5881 error = EFAULT;
5882 goto out;
5883 }
5884
5885 /* if the caller supplied enough room, copy out to it */
0a7de745 5886 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
91447636 5887 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
0a7de745 5888 }
91447636
A
5889 }
5890 }
5891out:
0a7de745 5892 if (fsec != KAUTH_FILESEC_NONE) {
91447636 5893 kauth_filesec_free(fsec);
0a7de745
A
5894 }
5895 return error;
1c79356b
A
5896}
5897
b0d623f7
A
5898/*
5899 * stat_extended: Get file status; with extended security (ACL).
5900 *
5901 * Parameters: p (ignored)
5902 * uap User argument descriptor (see below)
39037602 5903 * retval (ignored)
b0d623f7
A
5904 *
5905 * Indirect: uap->path Path of file to get status from
5906 * uap->ub User buffer (holds file status info)
5907 * uap->xsecurity ACL to get (extended security)
5908 * uap->xsecurity_size Size of ACL
39037602 5909 *
b0d623f7
A
5910 * Returns: 0 Success
5911 * !0 errno value
5912 *
5913 */
2d21ac55 5914int
fe8ab488
A
5915stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5916 __unused int32_t *retval)
2d21ac55 5917{
0a7de745
A
5918 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5919 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5920 0);
1c79356b
A
5921}
5922
2d21ac55
A
5923/*
5924 * Returns: 0 Success
fe8ab488 5925 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 5926 */
91447636 5927int
b0d623f7 5928stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 5929{
0a7de745
A
5930 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5931 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0);
91447636 5932}
1c79356b 5933
91447636 5934int
b0d623f7 5935stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 5936{
0a7de745
A
5937 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5938 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0);
1c79356b 5939}
1c79356b 5940
b0d623f7
A
5941/*
5942 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5943 *
5944 * Parameters: p (ignored)
5945 * uap User argument descriptor (see below)
39037602 5946 * retval (ignored)
b0d623f7
A
5947 *
5948 * Indirect: uap->path Path of file to get status from
5949 * uap->ub User buffer (holds file status info)
5950 * uap->xsecurity ACL to get (extended security)
5951 * uap->xsecurity_size Size of ACL
39037602 5952 *
b0d623f7
A
5953 * Returns: 0 Success
5954 * !0 errno value
5955 *
5956 */
2d21ac55 5957int
b0d623f7 5958stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 5959{
0a7de745
A
5960 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5961 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5962 0);
2d21ac55 5963}
91447636 5964
b0d623f7
A
5965/*
5966 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5967 *
5968 * Parameters: p (ignored)
5969 * uap User argument descriptor (see below)
39037602 5970 * retval (ignored)
b0d623f7
A
5971 *
5972 * Indirect: uap->path Path of file to get status from
5973 * uap->ub User buffer (holds file status info)
5974 * uap->xsecurity ACL to get (extended security)
5975 * uap->xsecurity_size Size of ACL
39037602 5976 *
b0d623f7
A
5977 * Returns: 0 Success
5978 * !0 errno value
5979 *
5980 */
2d21ac55 5981int
b0d623f7 5982lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 5983{
0a7de745
A
5984 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5985 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5986 AT_SYMLINK_NOFOLLOW);
91447636
A
5987}
5988
fe8ab488
A
5989/*
5990 * Get file status; this version does not follow links.
5991 */
91447636 5992int
b0d623f7 5993lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 5994{
0a7de745
A
5995 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5996 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
2d21ac55 5997}
b0d623f7 5998
2d21ac55 5999int
b0d623f7 6000lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 6001{
0a7de745
A
6002 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6003 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
91447636
A
6004}
6005
b0d623f7
A
6006/*
6007 * lstat64_extended: Get file status; can handle large inode numbers; does not
6008 * follow links; with extended security (ACL).
6009 *
6010 * Parameters: p (ignored)
6011 * uap User argument descriptor (see below)
39037602 6012 * retval (ignored)
b0d623f7
A
6013 *
6014 * Indirect: uap->path Path of file to get status from
6015 * uap->ub User buffer (holds file status info)
6016 * uap->xsecurity ACL to get (extended security)
6017 * uap->xsecurity_size Size of ACL
39037602 6018 *
b0d623f7
A
6019 * Returns: 0 Success
6020 * !0 errno value
6021 *
6022 */
91447636 6023int
b0d623f7 6024lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 6025{
0a7de745
A
6026 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6027 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6028 AT_SYMLINK_NOFOLLOW);
fe8ab488
A
6029}
6030
6031int
6032fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
6033{
0a7de745
A
6034 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
6035 return EINVAL;
6036 }
fe8ab488 6037
0a7de745
A
6038 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6039 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag);
fe8ab488
A
6040}
6041
6042int
6043fstatat64(__unused proc_t p, struct fstatat64_args *uap,
6044 __unused int32_t *retval)
6045{
0a7de745
A
6046 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
6047 return EINVAL;
6048 }
fe8ab488 6049
0a7de745
A
6050 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6051 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag);
91447636
A
6052}
6053
1c79356b 6054/*
91447636 6055 * Get configurable pathname variables.
2d21ac55
A
6056 *
6057 * Returns: 0 Success
6058 * namei:???
6059 * vn_pathconf:???
6060 *
6061 * Notes: Global implementation constants are intended to be
6062 * implemented in this function directly; all other constants
6063 * are per-FS implementation, and therefore must be handled in
6064 * each respective FS, instead.
6065 *
6066 * XXX We implement some things globally right now that should actually be
6067 * XXX per-FS; we will need to deal with this at some point.
1c79356b 6068 */
1c79356b
A
6069/* ARGSUSED */
6070int
b0d623f7 6071pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 6072{
1c79356b
A
6073 int error;
6074 struct nameidata nd;
2d21ac55 6075 vfs_context_t ctx = vfs_context_current();
91447636 6076
39037602 6077 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
0a7de745 6078 UIO_USERSPACE, uap->path, ctx);
55e303ae 6079 error = namei(&nd);
0a7de745
A
6080 if (error) {
6081 return error;
6082 }
1c79356b 6083
2d21ac55 6084 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 6085
91447636
A
6086 vnode_put(nd.ni_vp);
6087 nameidone(&nd);
0a7de745 6088 return error;
1c79356b
A
6089}
6090
6091/*
6092 * Return target name of a symbolic link.
6093 */
1c79356b 6094/* ARGSUSED */
fe8ab488
A
6095static int
6096readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
6097 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
6098 int *retval)
1c79356b 6099{
2d21ac55 6100 vnode_t vp;
91447636 6101 uio_t auio;
1c79356b
A
6102 int error;
6103 struct nameidata nd;
0a7de745 6104 char uio_buf[UIO_SIZEOF(1)];
91447636 6105
fe8ab488
A
6106 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
6107 seg, path, ctx);
6108
6109 error = nameiat(&nd, fd);
0a7de745
A
6110 if (error) {
6111 return error;
6112 }
1c79356b 6113 vp = nd.ni_vp;
91447636
A
6114
6115 nameidone(&nd);
6116
fe8ab488 6117 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
0a7de745 6118 &uio_buf[0], sizeof(uio_buf));
fe8ab488
A
6119 uio_addiov(auio, buf, bufsize);
6120 if (vp->v_type != VLNK) {
1c79356b 6121 error = EINVAL;
fe8ab488 6122 } else {
2d21ac55 6123#if CONFIG_MACF
fe8ab488 6124 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55 6125#endif
0a7de745 6126 if (error == 0) {
fe8ab488 6127 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
0a7de745
A
6128 ctx);
6129 }
6130 if (error == 0) {
2d21ac55 6131 error = VNOP_READLINK(vp, auio, ctx);
0a7de745 6132 }
91447636
A
6133 }
6134 vnode_put(vp);
b0d623f7 6135
fe8ab488 6136 *retval = bufsize - (int)uio_resid(auio);
0a7de745 6137 return error;
1c79356b
A
6138}
6139
fe8ab488
A
6140int
6141readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
6142{
6143 enum uio_seg procseg;
6144
6145 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
0a7de745
A
6146 return readlinkat_internal(vfs_context_current(), AT_FDCWD,
6147 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
6148 uap->count, procseg, retval);
fe8ab488
A
6149}
6150
6151int
6152readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
6153{
6154 enum uio_seg procseg;
6155
6156 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
0a7de745
A
6157 return readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
6158 procseg, uap->buf, uap->bufsize, procseg, retval);
fe8ab488
A
6159}
6160
6161/*
6162 * Change file flags.
813fb2f6
A
6163 *
6164 * NOTE: this will vnode_put() `vp'
91447636
A
6165 */
6166static int
6167chflags1(vnode_t vp, int flags, vfs_context_t ctx)
6168{
6169 struct vnode_attr va;
0a7de745 6170 kauth_action_t action;
91447636
A
6171 int error;
6172
6173 VATTR_INIT(&va);
6174 VATTR_SET(&va, va_flags, flags);
6175
2d21ac55
A
6176#if CONFIG_MACF
6177 error = mac_vnode_check_setflags(ctx, vp, flags);
0a7de745 6178 if (error) {
2d21ac55 6179 goto out;
0a7de745 6180 }
2d21ac55
A
6181#endif
6182
91447636 6183 /* request authorisation, disregard immutability */
0a7de745 6184 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
91447636 6185 goto out;
0a7de745 6186 }
91447636
A
6187 /*
6188 * Request that the auth layer disregard those file flags it's allowed to when
6189 * authorizing this operation; we need to do this in order to be able to
6190 * clear immutable flags.
6191 */
0a7de745 6192 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) {
91447636 6193 goto out;
0a7de745 6194 }
91447636
A
6195 error = vnode_setattr(vp, &va, ctx);
6196
39037602 6197#if CONFIG_MACF
0a7de745 6198 if (error == 0) {
39037602 6199 mac_vnode_notify_setflags(ctx, vp, flags);
0a7de745 6200 }
39037602
A
6201#endif
6202
2d21ac55
A
6203 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
6204 error = ENOTSUP;
6205 }
91447636
A
6206out:
6207 vnode_put(vp);
0a7de745 6208 return error;
91447636
A
6209}
6210
1c79356b
A
6211/*
6212 * Change flags of a file given a path name.
6213 */
1c79356b
A
6214/* ARGSUSED */
6215int
b0d623f7 6216chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 6217{
2d21ac55
A
6218 vnode_t vp;
6219 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6220 int error;
6221 struct nameidata nd;
6222
55e303ae 6223 AUDIT_ARG(fflags, uap->flags);
39037602 6224 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
0a7de745 6225 UIO_USERSPACE, uap->path, ctx);
55e303ae 6226 error = namei(&nd);
0a7de745
A
6227 if (error) {
6228 return error;
6229 }
1c79356b 6230 vp = nd.ni_vp;
91447636
A
6231 nameidone(&nd);
6232
813fb2f6 6233 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 6234 error = chflags1(vp, uap->flags, ctx);
91447636 6235
0a7de745 6236 return error;
1c79356b
A
6237}
6238
6239/*
6240 * Change flags of a file given a file descriptor.
6241 */
1c79356b
A
6242/* ARGSUSED */
6243int
b0d623f7 6244fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 6245{
2d21ac55 6246 vnode_t vp;
1c79356b
A
6247 int error;
6248
55e303ae
A
6249 AUDIT_ARG(fd, uap->fd);
6250 AUDIT_ARG(fflags, uap->flags);
0a7de745
A
6251 if ((error = file_vnode(uap->fd, &vp))) {
6252 return error;
6253 }
55e303ae 6254
91447636
A
6255 if ((error = vnode_getwithref(vp))) {
6256 file_drop(uap->fd);
0a7de745 6257 return error;
91447636 6258 }
e5568f75
A
6259
6260 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6261
813fb2f6 6262 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 6263 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
6264
6265 file_drop(uap->fd);
0a7de745 6266 return error;
91447636
A
6267}
6268
6269/*
6270 * Change security information on a filesystem object.
2d21ac55
A
6271 *
6272 * Returns: 0 Success
6273 * EPERM Operation not permitted
6274 * vnode_authattr:??? [anything vnode_authattr can return]
6275 * vnode_authorize:??? [anything vnode_authorize can return]
6276 * vnode_setattr:??? [anything vnode_setattr can return]
6277 *
6278 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6279 * translated to EPERM before being returned.
91447636
A
6280 */
6281static int
fe8ab488 6282chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
6283{
6284 kauth_action_t action;
6285 int error;
39037602 6286
b0d623f7
A
6287 AUDIT_ARG(mode, vap->va_mode);
6288 /* XXX audit new args */
91447636 6289
2d21ac55
A
6290#if NAMEDSTREAMS
6291 /* chmod calls are not allowed for resource forks. */
6292 if (vp->v_flag & VISNAMEDSTREAM) {
0a7de745 6293 return EPERM;
2d21ac55
A
6294 }
6295#endif
6296
6297#if CONFIG_MACF
316670eb 6298 if (VATTR_IS_ACTIVE(vap, va_mode) &&
0a7de745
A
6299 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0) {
6300 return error;
6301 }
39037602
A
6302
6303 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6304 if ((error = mac_vnode_check_setowner(ctx, vp,
6305 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
0a7de745
A
6306 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1))) {
6307 return error;
6308 }
39037602
A
6309 }
6310
6311 if (VATTR_IS_ACTIVE(vap, va_acl) &&
0a7de745
A
6312 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl))) {
6313 return error;
6314 }
2d21ac55
A
6315#endif
6316
0a7de745 6317 /* make sure that the caller is allowed to set this security information */
91447636
A
6318 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6319 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
0a7de745 6320 if (error == EACCES) {
91447636 6321 error = EPERM;
0a7de745
A
6322 }
6323 return error;
6324 }
6325
6326 if ((error = vnode_setattr(vp, vap, ctx)) != 0) {
6327 return error;
91447636 6328 }
39037602 6329
39037602 6330#if CONFIG_MACF
0a7de745 6331 if (VATTR_IS_ACTIVE(vap, va_mode)) {
39037602 6332 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
0a7de745 6333 }
39037602 6334
0a7de745 6335 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
39037602 6336 mac_vnode_notify_setowner(ctx, vp,
0a7de745
A
6337 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6338 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6339 }
39037602 6340
0a7de745 6341 if (VATTR_IS_ACTIVE(vap, va_acl)) {
39037602 6342 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
0a7de745 6343 }
39037602 6344#endif
91447636 6345
0a7de745 6346 return error;
1c79356b
A
6347}
6348
91447636 6349
1c79356b 6350/*
b0d623f7 6351 * Change mode of a file given a path name.
2d21ac55
A
6352 *
6353 * Returns: 0 Success
6354 * namei:??? [anything namei can return]
fe8ab488 6355 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 6356 */
91447636 6357static int
fe8ab488
A
6358chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6359 int fd, int flag, enum uio_seg segflg)
91447636
A
6360{
6361 struct nameidata nd;
fe8ab488 6362 int follow, error;
91447636 6363
fe8ab488
A
6364 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6365 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6366 segflg, path, ctx);
0a7de745
A
6367 if ((error = nameiat(&nd, fd))) {
6368 return error;
6369 }
fe8ab488 6370 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
6371 vnode_put(nd.ni_vp);
6372 nameidone(&nd);
0a7de745 6373 return error;
91447636
A
6374}
6375
0c530ab8 6376/*
39037602 6377 * chmod_extended: Change the mode of a file given a path name; with extended
b0d623f7 6378 * argument list (including extended security (ACL)).
0c530ab8
A
6379 *
6380 * Parameters: p Process requesting the open
6381 * uap User argument descriptor (see below)
6382 * retval (ignored)
6383 *
6384 * Indirect: uap->path Path to object (same as 'chmod')
6385 * uap->uid UID to set
6386 * uap->gid GID to set
6387 * uap->mode File mode to set (same as 'chmod')
6388 * uap->xsecurity ACL to set (or delete)
6389 *
6390 * Returns: 0 Success
6391 * !0 errno value
6392 *
6393 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6394 *
6395 * XXX: We should enummerate the possible errno values here, and where
6396 * in the code they originated.
6397 */
1c79356b 6398int
b0d623f7 6399chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 6400{
1c79356b 6401 int error;
91447636
A
6402 struct vnode_attr va;
6403 kauth_filesec_t xsecdst;
6404
b0d623f7
A
6405 AUDIT_ARG(owner, uap->uid, uap->gid);
6406
91447636 6407 VATTR_INIT(&va);
0a7de745 6408 if (uap->mode != -1) {
91447636 6409 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
0a7de745
A
6410 }
6411 if (uap->uid != KAUTH_UID_NONE) {
91447636 6412 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
6413 }
6414 if (uap->gid != KAUTH_GID_NONE) {
91447636 6415 VATTR_SET(&va, va_gid, uap->gid);
0a7de745 6416 }
91447636
A
6417
6418 xsecdst = NULL;
0a7de745
A
6419 switch (uap->xsecurity) {
6420 /* explicit remove request */
6421 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
91447636
A
6422 VATTR_SET(&va, va_acl, NULL);
6423 break;
0a7de745 6424 /* not being set */
91447636
A
6425 case USER_ADDR_NULL:
6426 break;
6427 default:
0a7de745
A
6428 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
6429 return error;
6430 }
91447636
A
6431 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6432 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6433 }
1c79356b 6434
fe8ab488
A
6435 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6436 UIO_USERSPACE);
55e303ae 6437
0a7de745 6438 if (xsecdst != NULL) {
91447636 6439 kauth_filesec_free(xsecdst);
0a7de745
A
6440 }
6441 return error;
91447636 6442}
4a249263 6443
2d21ac55
A
6444/*
6445 * Returns: 0 Success
fe8ab488 6446 * chmodat:??? [anything chmodat can return]
2d21ac55 6447 */
fe8ab488
A
6448static int
6449fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6450 int flag, enum uio_seg segflg)
91447636 6451{
91447636
A
6452 struct vnode_attr va;
6453
6454 VATTR_INIT(&va);
fe8ab488
A
6455 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6456
0a7de745 6457 return chmodat(ctx, path, &va, fd, flag, segflg);
fe8ab488
A
6458}
6459
6460int
6461chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6462{
0a7de745
A
6463 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6464 AT_FDCWD, 0, UIO_USERSPACE);
fe8ab488 6465}
91447636 6466
fe8ab488
A
6467int
6468fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6469{
0a7de745
A
6470 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
6471 return EINVAL;
6472 }
fe8ab488 6473
0a7de745
A
6474 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6475 uap->fd, uap->flag, UIO_USERSPACE);
1c79356b
A
6476}
6477
6478/*
6479 * Change mode of a file given a file descriptor.
6480 */
91447636 6481static int
2d21ac55 6482fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 6483{
2d21ac55 6484 vnode_t vp;
1c79356b 6485 int error;
55e303ae 6486
91447636 6487 AUDIT_ARG(fd, fd);
55e303ae 6488
0a7de745
A
6489 if ((error = file_vnode(fd, &vp)) != 0) {
6490 return error;
6491 }
91447636
A
6492 if ((error = vnode_getwithref(vp)) != 0) {
6493 file_drop(fd);
0a7de745 6494 return error;
91447636 6495 }
55e303ae
A
6496 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6497
fe8ab488 6498 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
6499 (void)vnode_put(vp);
6500 file_drop(fd);
55e303ae 6501
0a7de745 6502 return error;
1c79356b
A
6503}
6504
b0d623f7
A
6505/*
6506 * fchmod_extended: Change mode of a file given a file descriptor; with
6507 * extended argument list (including extended security (ACL)).
6508 *
6509 * Parameters: p Process requesting to change file mode
6510 * uap User argument descriptor (see below)
39037602 6511 * retval (ignored)
b0d623f7
A
6512 *
6513 * Indirect: uap->mode File mode to set (same as 'chmod')
6514 * uap->uid UID to set
6515 * uap->gid GID to set
6516 * uap->xsecurity ACL to set (or delete)
6517 * uap->fd File descriptor of file to change mode
39037602 6518 *
b0d623f7
A
6519 * Returns: 0 Success
6520 * !0 errno value
6521 *
6522 */
91447636 6523int
b0d623f7 6524fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
6525{
6526 int error;
6527 struct vnode_attr va;
6528 kauth_filesec_t xsecdst;
6529
b0d623f7
A
6530 AUDIT_ARG(owner, uap->uid, uap->gid);
6531
91447636 6532 VATTR_INIT(&va);
0a7de745 6533 if (uap->mode != -1) {
91447636 6534 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
0a7de745
A
6535 }
6536 if (uap->uid != KAUTH_UID_NONE) {
91447636 6537 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
6538 }
6539 if (uap->gid != KAUTH_GID_NONE) {
91447636 6540 VATTR_SET(&va, va_gid, uap->gid);
0a7de745 6541 }
91447636
A
6542
6543 xsecdst = NULL;
0a7de745 6544 switch (uap->xsecurity) {
91447636
A
6545 case USER_ADDR_NULL:
6546 VATTR_SET(&va, va_acl, NULL);
6547 break;
0a7de745 6548 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
39236c6e
A
6549 VATTR_SET(&va, va_acl, NULL);
6550 break;
0a7de745 6551 /* not being set */
91447636
A
6552 case CAST_USER_ADDR_T(-1):
6553 break;
6554 default:
0a7de745
A
6555 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
6556 return error;
6557 }
91447636
A
6558 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6559 }
6560
6561 error = fchmod1(p, uap->fd, &va);
6562
39037602 6563
0a7de745 6564 switch (uap->xsecurity) {
91447636
A
6565 case USER_ADDR_NULL:
6566 case CAST_USER_ADDR_T(-1):
6567 break;
6568 default:
0a7de745 6569 if (xsecdst != NULL) {
91447636 6570 kauth_filesec_free(xsecdst);
0a7de745 6571 }
91447636 6572 }
0a7de745 6573 return error;
91447636
A
6574}
6575
6576int
b0d623f7 6577fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
6578{
6579 struct vnode_attr va;
6580
6581 VATTR_INIT(&va);
6582 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6583
0a7de745 6584 return fchmod1(p, uap->fd, &va);
91447636
A
6585}
6586
6587
1c79356b
A
6588/*
6589 * Set ownership given a path name.
6590 */
1c79356b 6591/* ARGSUSED */
91447636 6592static int
fe8ab488 6593fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
0a7de745 6594 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 6595{
2d21ac55 6596 vnode_t vp;
91447636 6597 struct vnode_attr va;
1c79356b
A
6598 int error;
6599 struct nameidata nd;
fe8ab488 6600 int follow;
91447636 6601 kauth_action_t action;
1c79356b 6602
fe8ab488 6603 AUDIT_ARG(owner, uid, gid);
55e303ae 6604
fe8ab488
A
6605 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6606 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6607 path, ctx);
6608 error = nameiat(&nd, fd);
0a7de745
A
6609 if (error) {
6610 return error;
6611 }
1c79356b
A
6612 vp = nd.ni_vp;
6613
91447636
A
6614 nameidone(&nd);
6615
91447636 6616 VATTR_INIT(&va);
0a7de745 6617 if (uid != (uid_t)VNOVAL) {
fe8ab488 6618 VATTR_SET(&va, va_uid, uid);
0a7de745
A
6619 }
6620 if (gid != (gid_t)VNOVAL) {
fe8ab488 6621 VATTR_SET(&va, va_gid, gid);
0a7de745 6622 }
91447636 6623
2d21ac55 6624#if CONFIG_MACF
fe8ab488 6625 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
0a7de745 6626 if (error) {
2d21ac55 6627 goto out;
0a7de745 6628 }
2d21ac55
A
6629#endif
6630
91447636 6631 /* preflight and authorize attribute changes */
0a7de745 6632 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
91447636 6633 goto out;
0a7de745
A
6634 }
6635 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636 6636 goto out;
0a7de745 6637 }
91447636 6638 error = vnode_setattr(vp, &va, ctx);
39037602
A
6639
6640#if CONFIG_MACF
0a7de745 6641 if (error == 0) {
39037602 6642 mac_vnode_notify_setowner(ctx, vp, uid, gid);
0a7de745 6643 }
39037602
A
6644#endif
6645
91447636
A
6646out:
6647 /*
6648 * EACCES is only allowed from namei(); permissions failure should
6649 * return EPERM, so we need to translate the error code.
6650 */
0a7de745 6651 if (error == EACCES) {
91447636 6652 error = EPERM;
0a7de745 6653 }
fe8ab488 6654
91447636 6655 vnode_put(vp);
0a7de745 6656 return error;
1c79356b
A
6657}
6658
91447636 6659int
fe8ab488 6660chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 6661{
0a7de745
A
6662 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6663 uap->uid, uap->gid, 0, UIO_USERSPACE);
91447636
A
6664}
6665
6666int
fe8ab488 6667lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 6668{
0a7de745
A
6669 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6670 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE);
fe8ab488
A
6671}
6672
6673int
6674fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6675{
0a7de745
A
6676 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
6677 return EINVAL;
6678 }
fe8ab488 6679
0a7de745
A
6680 return fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6681 uap->uid, uap->gid, uap->flag, UIO_USERSPACE);
91447636
A
6682}
6683
1c79356b
A
6684/*
6685 * Set ownership given a file descriptor.
6686 */
1c79356b
A
6687/* ARGSUSED */
6688int
b0d623f7 6689fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 6690{
91447636 6691 struct vnode_attr va;
2d21ac55
A
6692 vfs_context_t ctx = vfs_context_current();
6693 vnode_t vp;
1c79356b 6694 int error;
91447636 6695 kauth_action_t action;
1c79356b 6696
55e303ae
A
6697 AUDIT_ARG(owner, uap->uid, uap->gid);
6698 AUDIT_ARG(fd, uap->fd);
6699
0a7de745
A
6700 if ((error = file_vnode(uap->fd, &vp))) {
6701 return error;
6702 }
55e303ae 6703
0a7de745 6704 if ((error = vnode_getwithref(vp))) {
91447636 6705 file_drop(uap->fd);
0a7de745 6706 return error;
91447636 6707 }
55e303ae
A
6708 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6709
91447636 6710 VATTR_INIT(&va);
0a7de745 6711 if (uap->uid != VNOVAL) {
91447636 6712 VATTR_SET(&va, va_uid, uap->uid);
0a7de745
A
6713 }
6714 if (uap->gid != VNOVAL) {
91447636 6715 VATTR_SET(&va, va_gid, uap->gid);
0a7de745 6716 }
91447636 6717
2d21ac55
A
6718#if NAMEDSTREAMS
6719 /* chown calls are not allowed for resource forks. */
6720 if (vp->v_flag & VISNAMEDSTREAM) {
6721 error = EPERM;
6722 goto out;
6723 }
6724#endif
6725
6726#if CONFIG_MACF
6727 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
0a7de745 6728 if (error) {
2d21ac55 6729 goto out;
0a7de745 6730 }
2d21ac55 6731#endif
91447636 6732
0a7de745
A
6733 /* preflight and authorize attribute changes */
6734 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
91447636 6735 goto out;
0a7de745 6736 }
2d21ac55 6737 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
0a7de745 6738 if (error == EACCES) {
91447636 6739 error = EPERM;
0a7de745 6740 }
91447636
A
6741 goto out;
6742 }
2d21ac55 6743 error = vnode_setattr(vp, &va, ctx);
4a249263 6744
39037602 6745#if CONFIG_MACF
0a7de745 6746 if (error == 0) {
39037602 6747 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
0a7de745 6748 }
39037602
A
6749#endif
6750
91447636
A
6751out:
6752 (void)vnode_put(vp);
6753 file_drop(uap->fd);
0a7de745 6754 return error;
1c79356b
A
6755}
6756
9bccf70c 6757static int
2d21ac55 6758getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 6759{
9bccf70c
A
6760 int error;
6761
91447636
A
6762 if (usrtvp == USER_ADDR_NULL) {
6763 struct timeval old_tv;
6764 /* XXX Y2038 bug because of microtime argument */
6765 microtime(&old_tv);
6766 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
6767 tsp[1] = tsp[0];
6768 } else {
91447636 6769 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 6770 struct user64_timeval tv[2];
91447636 6771 error = copyin(usrtvp, (void *)tv, sizeof(tv));
0a7de745
A
6772 if (error) {
6773 return error;
6774 }
b0d623f7
A
6775 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6776 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6777 } else {
b0d623f7
A
6778 struct user32_timeval tv[2];
6779 error = copyin(usrtvp, (void *)tv, sizeof(tv));
0a7de745
A
6780 if (error) {
6781 return error;
6782 }
b0d623f7
A
6783 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6784 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6785 }
9bccf70c
A
6786 }
6787 return 0;
6788}
6789
6790static int
2d21ac55 6791setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
0a7de745 6792 int nullflag)
9bccf70c
A
6793{
6794 int error;
91447636
A
6795 struct vnode_attr va;
6796 kauth_action_t action;
e5568f75
A
6797
6798 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6799
91447636
A
6800 VATTR_INIT(&va);
6801 VATTR_SET(&va, va_access_time, ts[0]);
6802 VATTR_SET(&va, va_modify_time, ts[1]);
0a7de745 6803 if (nullflag) {
91447636 6804 va.va_vaflags |= VA_UTIMES_NULL;
0a7de745 6805 }
91447636 6806
2d21ac55
A
6807#if NAMEDSTREAMS
6808 /* utimes calls are not allowed for resource forks. */
6809 if (vp->v_flag & VISNAMEDSTREAM) {
6810 error = EPERM;
6811 goto out;
6812 }
6813#endif
6814
6815#if CONFIG_MACF
6816 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
0a7de745 6817 if (error) {
2d21ac55 6818 goto out;
0a7de745 6819 }
2d21ac55
A
6820#endif
6821 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
0a7de745 6822 if (!nullflag && error == EACCES) {
2d21ac55 6823 error = EPERM;
0a7de745 6824 }
91447636 6825 goto out;
2d21ac55
A
6826 }
6827
91447636 6828 /* since we may not need to auth anything, check here */
2d21ac55 6829 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
0a7de745 6830 if (!nullflag && error == EACCES) {
2d21ac55 6831 error = EPERM;
0a7de745 6832 }
91447636 6833 goto out;
2d21ac55 6834 }
91447636 6835 error = vnode_setattr(vp, &va, ctx);
4a249263 6836
39037602 6837#if CONFIG_MACF
0a7de745 6838 if (error == 0) {
39037602 6839 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
0a7de745 6840 }
39037602
A
6841#endif
6842
9bccf70c
A
6843out:
6844 return error;
6845}
6846
1c79356b
A
6847/*
6848 * Set the access and modification times of a file.
6849 */
1c79356b
A
6850/* ARGSUSED */
6851int
b0d623f7 6852utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 6853{
9bccf70c 6854 struct timespec ts[2];
91447636 6855 user_addr_t usrtvp;
1c79356b
A
6856 int error;
6857 struct nameidata nd;
2d21ac55 6858 vfs_context_t ctx = vfs_context_current();
1c79356b 6859
2d21ac55 6860 /*
39037602 6861 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
6862 * name lookup first because auditing wants the path.
6863 */
39037602 6864 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
0a7de745 6865 UIO_USERSPACE, uap->path, ctx);
55e303ae 6866 error = namei(&nd);
0a7de745
A
6867 if (error) {
6868 return error;
6869 }
91447636 6870 nameidone(&nd);
55e303ae 6871
91447636
A
6872 /*
6873 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6874 * the current time instead.
6875 */
55e303ae 6876 usrtvp = uap->tptr;
0a7de745 6877 if ((error = getutimes(usrtvp, ts)) != 0) {
91447636 6878 goto out;
0a7de745 6879 }
91447636 6880
2d21ac55 6881 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
6882
6883out:
6884 vnode_put(nd.ni_vp);
0a7de745 6885 return error;
1c79356b
A
6886}
6887
9bccf70c
A
6888/*
6889 * Set the access and modification times of a file.
6890 */
9bccf70c
A
6891/* ARGSUSED */
6892int
b0d623f7 6893futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
6894{
6895 struct timespec ts[2];
2d21ac55 6896 vnode_t vp;
91447636 6897 user_addr_t usrtvp;
9bccf70c
A
6898 int error;
6899
55e303ae 6900 AUDIT_ARG(fd, uap->fd);
9bccf70c 6901 usrtvp = uap->tptr;
0a7de745
A
6902 if ((error = getutimes(usrtvp, ts)) != 0) {
6903 return error;
6904 }
6905 if ((error = file_vnode(uap->fd, &vp)) != 0) {
6906 return error;
6907 }
6908 if ((error = vnode_getwithref(vp))) {
91447636 6909 file_drop(uap->fd);
0a7de745 6910 return error;
91447636 6911 }
55e303ae 6912
2d21ac55 6913 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
6914 vnode_put(vp);
6915 file_drop(uap->fd);
0a7de745 6916 return error;
9bccf70c
A
6917}
6918
1c79356b
A
6919/*
6920 * Truncate a file given its path name.
6921 */
1c79356b
A
6922/* ARGSUSED */
6923int
b0d623f7 6924truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 6925{
2d21ac55 6926 vnode_t vp;
91447636 6927 struct vnode_attr va;
2d21ac55 6928 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6929 int error;
6930 struct nameidata nd;
91447636
A
6931 kauth_action_t action;
6932
0a7de745
A
6933 if (uap->length < 0) {
6934 return EINVAL;
6935 }
39037602 6936 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
0a7de745
A
6937 UIO_USERSPACE, uap->path, ctx);
6938 if ((error = namei(&nd))) {
6939 return error;
6940 }
1c79356b 6941 vp = nd.ni_vp;
91447636
A
6942
6943 nameidone(&nd);
6944
6945 VATTR_INIT(&va);
6946 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
6947
6948#if CONFIG_MACF
6949 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
0a7de745 6950 if (error) {
2d21ac55 6951 goto out;
0a7de745 6952 }
2d21ac55
A
6953#endif
6954
0a7de745 6955 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
91447636 6956 goto out;
0a7de745
A
6957 }
6958 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636 6959 goto out;
0a7de745 6960 }
2d21ac55 6961 error = vnode_setattr(vp, &va, ctx);
39037602
A
6962
6963#if CONFIG_MACF
0a7de745 6964 if (error == 0) {
39037602 6965 mac_vnode_notify_truncate(ctx, NOCRED, vp);
0a7de745 6966 }
39037602
A
6967#endif
6968
91447636
A
6969out:
6970 vnode_put(vp);
0a7de745 6971 return error;
1c79356b
A
6972}
6973
6974/*
6975 * Truncate a file given a file descriptor.
6976 */
1c79356b
A
6977/* ARGSUSED */
6978int
b0d623f7 6979ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 6980{
2d21ac55 6981 vfs_context_t ctx = vfs_context_current();
91447636 6982 struct vnode_attr va;
2d21ac55 6983 vnode_t vp;
91447636 6984 struct fileproc *fp;
0a7de745 6985 int error;
91447636 6986 int fd = uap->fd;
1c79356b 6987
55e303ae 6988 AUDIT_ARG(fd, uap->fd);
0a7de745
A
6989 if (uap->length < 0) {
6990 return EINVAL;
6991 }
39037602 6992
0a7de745
A
6993 if ((error = fp_lookup(p, fd, &fp, 0))) {
6994 return error;
91447636 6995 }
1c79356b 6996
39236c6e
A
6997 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6998 case DTYPE_PSXSHM:
91447636
A
6999 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
7000 goto out;
39236c6e
A
7001 case DTYPE_VNODE:
7002 break;
7003 default:
91447636
A
7004 error = EINVAL;
7005 goto out;
1c79356b 7006 }
1c79356b 7007
2d21ac55 7008 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 7009
91447636
A
7010 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
7011 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7012 error = EINVAL;
7013 goto out;
1c79356b 7014 }
1c79356b 7015
91447636
A
7016 if ((error = vnode_getwithref(vp)) != 0) {
7017 goto out;
7018 }
1c79356b 7019
91447636 7020 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 7021
2d21ac55
A
7022#if CONFIG_MACF
7023 error = mac_vnode_check_truncate(ctx,
7024 fp->f_fglob->fg_cred, vp);
7025 if (error) {
7026 (void)vnode_put(vp);
7027 goto out;
7028 }
7029#endif
91447636
A
7030 VATTR_INIT(&va);
7031 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 7032 error = vnode_setattr(vp, &va, ctx);
39037602
A
7033
7034#if CONFIG_MACF
0a7de745 7035 if (error == 0) {
39037602 7036 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
0a7de745 7037 }
39037602
A
7038#endif
7039
91447636
A
7040 (void)vnode_put(vp);
7041out:
7042 file_drop(fd);
0a7de745 7043 return error;
1c79356b 7044}
91447636 7045
1c79356b
A
7046
7047/*
b0d623f7 7048 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 7049 */
1c79356b
A
7050/* ARGSUSED */
7051int
b0d623f7 7052fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 7053{
2d21ac55 7054 __pthread_testcancel(1);
0a7de745 7055 return fsync_common(p, uap, MNT_WAIT);
b0d623f7
A
7056}
7057
7058
7059/*
7060 * Sync an open file with synchronized I/O _file_ integrity completion
7061 *
7062 * Notes: This is a legacy support function that does not test for
7063 * thread cancellation points.
7064 */
7065/* ARGSUSED */
39037602 7066int
b0d623f7
A
7067fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
7068{
0a7de745 7069 return fsync_common(p, (struct fsync_args *)uap, MNT_WAIT);
2d21ac55
A
7070}
7071
b0d623f7
A
7072
7073/*
7074 * Sync an open file with synchronized I/O _data_ integrity completion
7075 */
7076/* ARGSUSED */
2d21ac55 7077int
b0d623f7
A
7078fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
7079{
7080 __pthread_testcancel(1);
0a7de745 7081 return fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT);
b0d623f7
A
7082}
7083
7084
7085/*
7086 * fsync_common
7087 *
7088 * Common fsync code to support both synchronized I/O file integrity completion
7089 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7090 *
7091 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7092 * will only guarantee that the file data contents are retrievable. If
7093 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7094 * includes additional metadata unnecessary for retrieving the file data
7095 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7096 * storage.
7097 *
7098 * Parameters: p The process
7099 * uap->fd The descriptor to synchronize
7100 * flags The data integrity flags
7101 *
7102 * Returns: int Success
7103 * fp_getfvp:EBADF Bad file descriptor
7104 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7105 * VNOP_FSYNC:??? unspecified
7106 *
7107 * Notes: We use struct fsync_args because it is a short name, and all
7108 * caller argument structures are otherwise identical.
7109 */
7110static int
7111fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
7112{
7113 vnode_t vp;
91447636 7114 struct fileproc *fp;
2d21ac55 7115 vfs_context_t ctx = vfs_context_current();
1c79356b
A
7116 int error;
7117
b0d623f7
A
7118 AUDIT_ARG(fd, uap->fd);
7119
0a7de745
A
7120 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
7121 return error;
7122 }
7123 if ((error = vnode_getwithref(vp))) {
91447636 7124 file_drop(uap->fd);
0a7de745 7125 return error;
91447636 7126 }
91447636 7127
b0d623f7
A
7128 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7129
7130 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
7131
7132#if NAMEDRSRCFORK
7133 /* Sync resource fork shadow file if necessary. */
7134 if ((error == 0) &&
39037602 7135 (vp->v_flag & VISNAMEDSTREAM) &&
2d21ac55 7136 (vp->v_parent != NULLVP) &&
b0d623f7 7137 vnode_isshadow(vp) &&
2d21ac55
A
7138 (fp->f_flags & FP_WRITTEN)) {
7139 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
7140 }
7141#endif
91447636
A
7142
7143 (void)vnode_put(vp);
7144 file_drop(uap->fd);
0a7de745 7145 return error;
1c79356b
A
7146}
7147
7148/*
39037602 7149 * Duplicate files. Source must be a file, target must be a file or
1c79356b 7150 * must not exist.
91447636
A
7151 *
7152 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7153 * perform inheritance correctly.
1c79356b 7154 */
1c79356b
A
7155/* ARGSUSED */
7156int
b0d623f7 7157copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 7158{
91447636 7159 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
7160 struct nameidata fromnd, tond;
7161 int error;
2d21ac55 7162 vfs_context_t ctx = vfs_context_current();
39037602
A
7163#if CONFIG_MACF
7164 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
7165 struct vnode_attr va;
7166#endif
55e303ae
A
7167
7168 /* Check that the flags are valid. */
1c79356b
A
7169
7170 if (uap->flags & ~CPF_MASK) {
0a7de745 7171 return EINVAL;
55e303ae 7172 }
1c79356b 7173
4bd07ac2 7174 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
0a7de745
A
7175 UIO_USERSPACE, uap->from, ctx);
7176 if ((error = namei(&fromnd))) {
7177 return error;
7178 }
1c79356b
A
7179 fvp = fromnd.ni_vp;
7180
6d2010ae 7181 NDINIT(&tond, CREATE, OP_LINK,
0a7de745
A
7182 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7183 UIO_USERSPACE, uap->to, ctx);
91447636 7184 if ((error = namei(&tond))) {
1c79356b
A
7185 goto out1;
7186 }
7187 tdvp = tond.ni_dvp;
7188 tvp = tond.ni_vp;
91447636 7189
1c79356b
A
7190 if (tvp != NULL) {
7191 if (!(uap->flags & CPF_OVERWRITE)) {
7192 error = EEXIST;
7193 goto out;
7194 }
7195 }
39037602 7196
1c79356b
A
7197 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
7198 error = EISDIR;
7199 goto out;
7200 }
7201
39037602
A
7202 /* This calls existing MAC hooks for open */
7203 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
7204 NULL))) {
7205 goto out;
7206 }
7207
7208 if (tvp) {
7209 /*
7210 * See unlinkat_internal for an explanation of the potential
7211 * ENOENT from the MAC hook but the gist is that the MAC hook
7212 * can fail because vn_getpath isn't able to return the full
7213 * path. We choose to ignore this failure.
7214 */
7215 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
0a7de745 7216 if (error && error != ENOENT) {
39037602 7217 goto out;
0a7de745 7218 }
39037602
A
7219 error = 0;
7220 }
7221
7222#if CONFIG_MACF
7223 VATTR_INIT(&va);
7224 VATTR_SET(&va, va_type, fvp->v_type);
7225 /* Mask off all but regular access permissions */
7226 VATTR_SET(&va, va_mode,
0a7de745 7227 ((((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
39037602 7228 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
0a7de745 7229 if (error) {
39037602 7230 goto out;
0a7de745 7231 }
39037602
A
7232#endif /* CONFIG_MACF */
7233
0a7de745 7234 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
1c79356b 7235 goto out;
0a7de745 7236 }
1c79356b 7237
0a7de745 7238 if (fvp == tdvp) {
1c79356b 7239 error = EINVAL;
0a7de745 7240 }
1c79356b
A
7241 /*
7242 * If source is the same as the destination (that is the
7243 * same inode number) then there is nothing to do.
7244 * (fixed to have POSIX semantics - CSM 3/2/98)
7245 */
0a7de745 7246 if (fvp == tvp) {
1c79356b 7247 error = -1;
0a7de745
A
7248 }
7249 if (!error) {
7250 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
7251 }
1c79356b 7252out:
91447636
A
7253 sdvp = tond.ni_startdir;
7254 /*
7255 * nameidone has to happen before we vnode_put(tdvp)
7256 * since it may need to release the fs_nodelock on the tdvp
7257 */
7258 nameidone(&tond);
7259
0a7de745 7260 if (tvp) {
91447636 7261 vnode_put(tvp);
0a7de745 7262 }
91447636
A
7263 vnode_put(tdvp);
7264 vnode_put(sdvp);
1c79356b 7265out1:
91447636
A
7266 vnode_put(fvp);
7267
91447636
A
7268 nameidone(&fromnd);
7269
0a7de745
A
7270 if (error == -1) {
7271 return 0;
7272 }
7273 return error;
1c79356b
A
7274}
7275
39037602 7276#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
91447636 7277
1c79356b 7278/*
39037602
A
7279 * Helper function for doing clones. The caller is expected to provide an
7280 * iocounted source vnode and release it.
1c79356b 7281 */
fe8ab488 7282static int
39037602
A
7283clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
7284 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
1c79356b 7285{
91447636 7286 vnode_t tvp, tdvp;
39037602 7287 struct nameidata tond;
1c79356b 7288 int error;
39037602 7289 int follow;
813fb2f6 7290 boolean_t free_src_acl;
39037602
A
7291 boolean_t attr_cleanup;
7292 enum vtype v_type;
7293 kauth_action_t action;
7294 struct componentname *cnp;
7295 uint32_t defaulted;
7296 struct vnode_attr va;
813fb2f6 7297 struct vnode_attr nva;
5ba3f43e 7298 uint32_t vnop_flags;
316670eb 7299
39037602
A
7300 v_type = vnode_vtype(fvp);
7301 switch (v_type) {
7302 case VLNK:
0a7de745 7303 /* FALLTHRU */
39037602
A
7304 case VREG:
7305 action = KAUTH_VNODE_ADD_FILE;
7306 break;
7307 case VDIR:
7308 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
7309 fvp->v_mountedhere) {
0a7de745 7310 return EINVAL;
39037602
A
7311 }
7312 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
7313 break;
7314 default:
0a7de745 7315 return EINVAL;
39037602
A
7316 }
7317
7318 AUDIT_ARG(fd2, dst_dirfd);
7319 AUDIT_ARG(value32, flags);
7320
7321 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7322 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
7323 UIO_USERSPACE, dst, ctx);
0a7de745
A
7324 if ((error = nameiat(&tond, dst_dirfd))) {
7325 return error;
7326 }
39037602
A
7327 cnp = &tond.ni_cnd;
7328 tdvp = tond.ni_dvp;
7329 tvp = tond.ni_vp;
7330
813fb2f6 7331 free_src_acl = FALSE;
39037602
A
7332 attr_cleanup = FALSE;
7333
7334 if (tvp != NULL) {
7335 error = EEXIST;
7336 goto out;
7337 }
7338
7339 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7340 error = EXDEV;
7341 goto out;
7342 }
7343
7344#if CONFIG_MACF
0a7de745 7345 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp))) {
39037602 7346 goto out;
0a7de745 7347 }
39037602 7348#endif
0a7de745 7349 if ((error = vnode_authorize(tdvp, NULL, action, ctx))) {
39037602 7350 goto out;
0a7de745 7351 }
39037602
A
7352
7353 action = KAUTH_VNODE_GENERIC_READ_BITS;
0a7de745 7354 if (data_read_authorised) {
39037602 7355 action &= ~KAUTH_VNODE_READ_DATA;
0a7de745
A
7356 }
7357 if ((error = vnode_authorize(fvp, NULL, action, ctx))) {
39037602 7358 goto out;
0a7de745 7359 }
39037602
A
7360
7361 /*
7362 * certain attributes may need to be changed from the source, we ask for
7363 * those here.
7364 */
7365 VATTR_INIT(&va);
813fb2f6
A
7366 VATTR_WANTED(&va, va_uid);
7367 VATTR_WANTED(&va, va_gid);
39037602
A
7368 VATTR_WANTED(&va, va_mode);
7369 VATTR_WANTED(&va, va_flags);
7370 VATTR_WANTED(&va, va_acl);
7371
0a7de745 7372 if ((error = vnode_getattr(fvp, &va, ctx)) != 0) {
39037602 7373 goto out;
0a7de745 7374 }
39037602 7375
813fb2f6
A
7376 VATTR_INIT(&nva);
7377 VATTR_SET(&nva, va_type, v_type);
7378 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7379 VATTR_SET(&nva, va_acl, va.va_acl);
7380 free_src_acl = TRUE;
39037602
A
7381 }
7382
7383 /* Handle ACL inheritance, initialize vap. */
7384 if (v_type == VLNK) {
813fb2f6 7385 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
39037602 7386 } else {
813fb2f6 7387 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
0a7de745 7388 if (error) {
813fb2f6 7389 goto out;
0a7de745 7390 }
39037602
A
7391 attr_cleanup = TRUE;
7392 }
7393
5ba3f43e 7394 vnop_flags = VNODE_CLONEFILE_DEFAULT;
813fb2f6
A
7395 /*
7396 * We've got initial values for all security parameters,
7397 * If we are superuser, then we can change owners to be the
7398 * same as the source. Both superuser and the owner have default
7399 * WRITE_SECURITY privileges so all other fields can be taken
7400 * from source as well.
7401 */
5ba3f43e 7402 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
0a7de745 7403 if (VATTR_IS_SUPPORTED(&va, va_uid)) {
813fb2f6 7404 VATTR_SET(&nva, va_uid, va.va_uid);
0a7de745
A
7405 }
7406 if (VATTR_IS_SUPPORTED(&va, va_gid)) {
813fb2f6 7407 VATTR_SET(&nva, va_gid, va.va_gid);
0a7de745 7408 }
5ba3f43e
A
7409 } else {
7410 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
813fb2f6 7411 }
5ba3f43e 7412
0a7de745 7413 if (VATTR_IS_SUPPORTED(&va, va_mode)) {
813fb2f6 7414 VATTR_SET(&nva, va_mode, va.va_mode);
0a7de745 7415 }
813fb2f6
A
7416 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7417 VATTR_SET(&nva, va_flags,
5ba3f43e
A
7418 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7419 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
39037602
A
7420 }
7421
5ba3f43e 7422 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
39037602
A
7423
7424 if (!error && tvp) {
0a7de745 7425 int update_flags = 0;
39037602
A
7426#if CONFIG_FSE
7427 int fsevent;
7428#endif /* CONFIG_FSE */
7429
7430#if CONFIG_MACF
7431 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7432 VNODE_LABEL_CREATE, ctx);
7433#endif
7434 /*
7435 * If some of the requested attributes weren't handled by the
7436 * VNOP, use our fallback code.
7437 */
0a7de745 7438 if (!VATTR_ALL_SUPPORTED(&va)) {
813fb2f6 7439 (void)vnode_setattr_fallback(tvp, &nva, ctx);
0a7de745 7440 }
39037602
A
7441
7442 // Make sure the name & parent pointers are hooked up
0a7de745 7443 if (tvp->v_name == NULL) {
39037602 7444 update_flags |= VNODE_UPDATE_NAME;
0a7de745
A
7445 }
7446 if (tvp->v_parent == NULLVP) {
39037602 7447 update_flags |= VNODE_UPDATE_PARENT;
0a7de745 7448 }
39037602
A
7449
7450 if (update_flags) {
7451 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7452 cnp->cn_namelen, cnp->cn_hash, update_flags);
7453 }
7454
7455#if CONFIG_FSE
7456 switch (vnode_vtype(tvp)) {
7457 case VLNK:
0a7de745 7458 /* FALLTHRU */
39037602
A
7459 case VREG:
7460 fsevent = FSE_CREATE_FILE;
7461 break;
7462 case VDIR:
7463 fsevent = FSE_CREATE_DIR;
7464 break;
7465 default:
7466 goto out;
7467 }
7468
7469 if (need_fsevent(fsevent, tvp)) {
5ba3f43e
A
7470 /*
7471 * The following is a sequence of three explicit events.
7472 * A pair of FSE_CLONE events representing the source and destination
7473 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7474 * fseventsd may coalesce the destination clone and create events
7475 * into a single event resulting in the following sequence for a client
7476 * FSE_CLONE (src)
7477 * FSE_CLONE | FSE_CREATE (dst)
7478 */
7479 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7480 FSE_ARG_DONE);
39037602
A
7481 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7482 FSE_ARG_DONE);
7483 }
7484#endif /* CONFIG_FSE */
7485 }
39037602
A
7486
7487out:
0a7de745 7488 if (attr_cleanup) {
813fb2f6 7489 vn_attribute_cleanup(&nva, defaulted);
0a7de745
A
7490 }
7491 if (free_src_acl && va.va_acl) {
39037602 7492 kauth_acl_free(va.va_acl);
0a7de745 7493 }
39037602 7494 nameidone(&tond);
0a7de745 7495 if (tvp) {
39037602 7496 vnode_put(tvp);
0a7de745 7497 }
39037602 7498 vnode_put(tdvp);
0a7de745 7499 return error;
39037602
A
7500}
7501
7502/*
7503 * clone files or directories, target must not exist.
7504 */
7505/* ARGSUSED */
7506int
7507clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7508 __unused int32_t *retval)
7509{
7510 vnode_t fvp;
7511 struct nameidata fromnd;
7512 int follow;
7513 int error;
7514 vfs_context_t ctx = vfs_context_current();
7515
7516 /* Check that the flags are valid. */
0a7de745
A
7517 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
7518 return EINVAL;
7519 }
39037602
A
7520
7521 AUDIT_ARG(fd, uap->src_dirfd);
7522
7523 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7524 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7525 UIO_USERSPACE, uap->src, ctx);
0a7de745
A
7526 if ((error = nameiat(&fromnd, uap->src_dirfd))) {
7527 return error;
7528 }
39037602
A
7529
7530 fvp = fromnd.ni_vp;
7531 nameidone(&fromnd);
7532
7533 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7534 uap->flags, ctx);
7535
7536 vnode_put(fvp);
0a7de745 7537 return error;
39037602
A
7538}
7539
7540int
7541fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7542 __unused int32_t *retval)
7543{
7544 vnode_t fvp;
7545 struct fileproc *fp;
7546 int error;
7547 vfs_context_t ctx = vfs_context_current();
7548
5ba3f43e 7549 /* Check that the flags are valid. */
0a7de745
A
7550 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
7551 return EINVAL;
7552 }
5ba3f43e 7553
39037602
A
7554 AUDIT_ARG(fd, uap->src_fd);
7555 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
0a7de745
A
7556 if (error) {
7557 return error;
7558 }
39037602
A
7559
7560 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7561 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7562 error = EBADF;
7563 goto out;
7564 }
7565
0a7de745 7566 if ((error = vnode_getwithref(fvp))) {
39037602 7567 goto out;
0a7de745 7568 }
39037602
A
7569
7570 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7571
7572 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7573 uap->flags, ctx);
7574
7575 vnode_put(fvp);
7576out:
7577 file_drop(uap->src_fd);
0a7de745 7578 return error;
39037602
A
7579}
7580
7581/*
7582 * Rename files. Source and destination must either both be directories,
7583 * or both not be directories. If target is a directory, it must be empty.
7584 */
7585/* ARGSUSED */
7586static int
7587renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7588 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7589{
0a7de745 7590 if (flags & ~VFS_RENAME_FLAGS_MASK) {
39037602 7591 return EINVAL;
0a7de745 7592 }
39037602 7593
0a7de745 7594 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL)) {
39037602 7595 return EINVAL;
0a7de745 7596 }
39037602
A
7597
7598 vnode_t tvp, tdvp;
7599 vnode_t fvp, fdvp;
7600 struct nameidata *fromnd, *tond;
7601 int error;
7602 int do_retry;
7603 int retry_count;
7604 int mntrename;
7605 int need_event;
b226f5e5
A
7606 int need_kpath2;
7607 int has_listeners;
39037602
A
7608 const char *oname = NULL;
7609 char *from_name = NULL, *to_name = NULL;
0a7de745 7610 int from_len = 0, to_len = 0;
39037602
A
7611 int holding_mntlock;
7612 mount_t locked_mp = NULL;
7613 vnode_t oparent = NULLVP;
7614#if CONFIG_FSE
7615 fse_info from_finfo, to_finfo;
7616#endif
0a7de745 7617 int from_truncated = 0, to_truncated;
39037602
A
7618 int batched = 0;
7619 struct vnode_attr *fvap, *tvap;
7620 int continuing = 0;
7621 /* carving out a chunk for structs that are too big to be on stack. */
7622 struct {
7623 struct nameidata from_node, to_node;
7624 struct vnode_attr fv_attr, tv_attr;
7625 } * __rename_data;
7626 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7627 fromnd = &__rename_data->from_node;
7628 tond = &__rename_data->to_node;
7629
7630 holding_mntlock = 0;
7631 do_retry = 0;
7632 retry_count = 0;
91447636
A
7633retry:
7634 fvp = tvp = NULL;
7635 fdvp = tdvp = NULL;
6d2010ae 7636 fvap = tvap = NULL;
1c79356b
A
7637 mntrename = FALSE;
7638
316670eb 7639 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 7640 segflg, from, ctx);
316670eb 7641 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7642
316670eb 7643 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 7644 segflg, to, ctx);
316670eb 7645 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7646
6d2010ae 7647continue_lookup:
316670eb 7648 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
0a7de745 7649 if ((error = nameiat(fromnd, fromfd))) {
6d2010ae 7650 goto out1;
0a7de745 7651 }
316670eb
A
7652 fdvp = fromnd->ni_dvp;
7653 fvp = fromnd->ni_vp;
1c79356b 7654
0a7de745 7655 if (fvp && fvp->v_type == VDIR) {
316670eb 7656 tond->ni_cnd.cn_flags |= WILLBEDIR;
0a7de745 7657 }
6d2010ae 7658 }
2d21ac55 7659
316670eb 7660 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
0a7de745 7661 if ((error = nameiat(tond, tofd))) {
6d2010ae
A
7662 /*
7663 * Translate error code for rename("dir1", "dir2/.").
7664 */
0a7de745 7665 if (error == EISDIR && fvp->v_type == VDIR) {
6d2010ae 7666 error = EINVAL;
0a7de745 7667 }
6d2010ae
A
7668 goto out1;
7669 }
316670eb
A
7670 tdvp = tond->ni_dvp;
7671 tvp = tond->ni_vp;
fe8ab488 7672 }
91447636 7673
00867663
A
7674#if DEVELOPMENT || DEBUG
7675 /*
7676 * XXX VSWAP: Check for entitlements or special flag here
7677 * so we can restrict access appropriately.
7678 */
7679#else /* DEVELOPMENT || DEBUG */
7680
7681 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7682 error = EPERM;
7683 goto out1;
7684 }
7685
7686 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7687 error = EPERM;
7688 goto out1;
7689 }
7690#endif /* DEVELOPMENT || DEBUG */
7691
39037602
A
7692 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7693 error = ENOENT;
7694 goto out1;
7695 }
7696
7697 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7698 error = EEXIST;
7699 goto out1;
7700 }
7701
6d2010ae 7702 batched = vnode_compound_rename_available(fdvp);
d9a64523
A
7703
7704#if CONFIG_FSE
7705 need_event = need_fsevent(FSE_RENAME, fdvp);
7706 if (need_event) {
7707 if (fvp) {
7708 get_fse_info(fvp, &from_finfo, ctx);
7709 } else {
7710 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
7711 if (error) {
7712 goto out1;
7713 }
7714
7715 fvap = &__rename_data->fv_attr;
7716 }
7717
7718 if (tvp) {
0a7de745 7719 get_fse_info(tvp, &to_finfo, ctx);
d9a64523
A
7720 } else if (batched) {
7721 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
7722 if (error) {
7723 goto out1;
7724 }
7725
7726 tvap = &__rename_data->tv_attr;
7727 }
7728 }
7729#else
7730 need_event = 0;
7731#endif /* CONFIG_FSE */
7732
b226f5e5
A
7733 has_listeners = kauth_authorize_fileop_has_listeners();
7734
7735 need_kpath2 = 0;
7736#if CONFIG_AUDIT
7737 if (AUDIT_RECORD_EXISTS()) {
7738 need_kpath2 = 1;
7739 }
7740#endif
7741
7742 if (need_event || has_listeners) {
d9a64523
A
7743 if (from_name == NULL) {
7744 GET_PATH(from_name);
7745 if (from_name == NULL) {
7746 error = ENOMEM;
7747 goto out1;
7748 }
7749 }
7750
7751 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
b226f5e5 7752 }
d9a64523 7753
b226f5e5 7754 if (need_event || need_kpath2 || has_listeners) {
d9a64523
A
7755 if (to_name == NULL) {
7756 GET_PATH(to_name);
7757 if (to_name == NULL) {
7758 error = ENOMEM;
7759 goto out1;
7760 }
7761 }
7762
7763 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
b226f5e5
A
7764 if (to_name && need_kpath2) {
7765 AUDIT_ARG(kpath, to_name, ARG_KPATH2);
7766 }
d9a64523 7767 }
6d2010ae 7768 if (!fvp) {
fe8ab488 7769 /*
6d2010ae
A
7770 * Claim: this check will never reject a valid rename.
7771 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7772 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 7773 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
0a7de745 7774 * then you can't move it to within another dir on the same mountpoint.
6d2010ae
A
7775 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7776 *
7777 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 7778 */
6d2010ae
A
7779 if (fdvp->v_mount != tdvp->v_mount) {
7780 error = EXDEV;
7781 goto out1;
7782 }
7783 goto skipped_lookup;
1c79356b 7784 }
2d21ac55 7785
6d2010ae 7786 if (!batched) {
d9a64523 7787 error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
6d2010ae 7788 if (error) {
3e170ce0
A
7789 if (error == ENOENT) {
7790 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7791 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7792 /*
7793 * We encountered a race where after doing the namei, tvp stops
7794 * being valid. If so, simply re-drive the rename call from the
7795 * top.
7796 */
7797 do_retry = 1;
7798 retry_count += 1;
7799 }
6d2010ae 7800 }
91447636 7801 goto out1;
1c79356b
A
7802 }
7803 }
6d2010ae 7804
0a7de745
A
7805 /*
7806 * If the source and destination are the same (i.e. they're
7807 * links to the same vnode) and the target file system is
7808 * case sensitive, then there is nothing to do.
6d2010ae
A
7809 *
7810 * XXX Come back to this.
0a7de745 7811 */
2d21ac55
A
7812 if (fvp == tvp) {
7813 int pathconf_val;
fe8ab488 7814
2d21ac55
A
7815 /*
7816 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7817 * then assume that this file system is case sensitive.
7818 */
7819 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7820 pathconf_val != 0) {
7821 goto out1;
fe8ab488 7822 }
2d21ac55 7823 }
91447636 7824
1c79356b
A
7825 /*
7826 * Allow the renaming of mount points.
7827 * - target must not exist
7828 * - target must reside in the same directory as source
7829 * - union mounts cannot be renamed
7830 * - "/" cannot be renamed
6d2010ae
A
7831 *
7832 * XXX Handle this in VFS after a continued lookup (if we missed
7833 * in the cache to start off)
39037602
A
7834 *
7835 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7836 * we'll skip past here. The file system is responsible for
7837 * checking that @tvp is not a descendent of @fvp and vice versa
7838 * so it should always return EINVAL if either @tvp or @fvp is the
7839 * root of a volume.
1c79356b 7840 */
91447636 7841 if ((fvp->v_flag & VROOT) &&
1c79356b 7842 (fvp->v_type == VDIR) &&
0a7de745
A
7843 (tvp == NULL) &&
7844 (fvp->v_mountedhere == NULL) &&
7845 (fdvp == tdvp) &&
7846 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
1c79356b 7847 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 7848 vnode_t coveredvp;
fe8ab488 7849
1c79356b 7850 /* switch fvp to the covered vnode */
91447636 7851 coveredvp = fvp->v_mount->mnt_vnodecovered;
0a7de745
A
7852 if ((vnode_getwithref(coveredvp))) {
7853 error = ENOENT;
91447636
A
7854 goto out1;
7855 }
7856 vnode_put(fvp);
7857
7858 fvp = coveredvp;
1c79356b
A
7859 mntrename = TRUE;
7860 }
91447636
A
7861 /*
7862 * Check for cross-device rename.
7863 */
7864 if ((fvp->v_mount != tdvp->v_mount) ||
7865 (tvp && (fvp->v_mount != tvp->v_mount))) {
7866 error = EXDEV;
7867 goto out1;
7868 }
55e303ae 7869
91447636
A
7870 /*
7871 * If source is the same as the destination (that is the
7872 * same inode number) then there is nothing to do...
7873 * EXCEPT if the underlying file system supports case
7874 * insensitivity and is case preserving. In this case
7875 * the file system needs to handle the special case of
7876 * getting the same vnode as target (fvp) and source (tvp).
7877 *
7878 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7879 * and _PC_CASE_PRESERVING can have this exception, and they need to
7880 * handle the special case of getting the same vnode as target and
7881 * source. NOTE: Then the target is unlocked going into vnop_rename,
7882 * so not to cause locking problems. There is a single reference on tvp.
7883 *
fe8ab488 7884 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
7885 * that correct behaviour then is just to return success without doing
7886 * anything.
6d2010ae
A
7887 *
7888 * XXX filesystem should take care of this itself, perhaps...
91447636
A
7889 */
7890 if (fvp == tvp && fdvp == tdvp) {
316670eb 7891 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
0a7de745
A
7892 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7893 fromnd->ni_cnd.cn_namelen)) {
91447636 7894 goto out1;
55e303ae 7895 }
91447636 7896 }
55e303ae 7897
91447636 7898 if (holding_mntlock && fvp->v_mount != locked_mp) {
0a7de745 7899 /*
91447636
A
7900 * we're holding a reference and lock
7901 * on locked_mp, but it no longer matches
7902 * what we want to do... so drop our hold
7903 */
7904 mount_unlock_renames(locked_mp);
7905 mount_drop(locked_mp, 0);
0a7de745 7906 holding_mntlock = 0;
91447636
A
7907 }
7908 if (tdvp != fdvp && fvp->v_type == VDIR) {
0a7de745 7909 /*
91447636
A
7910 * serialize renames that re-shape
7911 * the tree... if holding_mntlock is
7912 * set, then we're ready to go...
7913 * otherwise we
7914 * first need to drop the iocounts
7915 * we picked up, second take the
7916 * lock to serialize the access,
7917 * then finally start the lookup
7918 * process over with the lock held
7919 */
0a7de745
A
7920 if (!holding_mntlock) {
7921 /*
91447636
A
7922 * need to grab a reference on
7923 * the mount point before we
7924 * drop all the iocounts... once
7925 * the iocounts are gone, the mount
7926 * could follow
7927 */
7928 locked_mp = fvp->v_mount;
7929 mount_ref(locked_mp, 0);
55e303ae 7930
91447636
A
7931 /*
7932 * nameidone has to happen before we vnode_put(tvp)
7933 * since it may need to release the fs_nodelock on the tvp
7934 */
316670eb 7935 nameidone(tond);
55e303ae 7936
0a7de745
A
7937 if (tvp) {
7938 vnode_put(tvp);
7939 }
91447636
A
7940 vnode_put(tdvp);
7941
7942 /*
7943 * nameidone has to happen before we vnode_put(fdvp)
7944 * since it may need to release the fs_nodelock on the fvp
7945 */
316670eb 7946 nameidone(fromnd);
55e303ae 7947
91447636
A
7948 vnode_put(fvp);
7949 vnode_put(fdvp);
7950
7951 mount_lock_renames(locked_mp);
7952 holding_mntlock = 1;
7953
7954 goto retry;
55e303ae 7955 }
91447636 7956 } else {
0a7de745 7957 /*
91447636 7958 * when we dropped the iocounts to take
fe8ab488 7959 * the lock, we allowed the identity of
91447636
A
7960 * the various vnodes to change... if they did,
7961 * we may no longer be dealing with a rename
7962 * that reshapes the tree... once we're holding
7963 * the iocounts, the vnodes can't change type
7964 * so we're free to drop the lock at this point
7965 * and continue on
1c79356b 7966 */
0a7de745 7967 if (holding_mntlock) {
91447636
A
7968 mount_unlock_renames(locked_mp);
7969 mount_drop(locked_mp, 0);
0a7de745 7970 holding_mntlock = 0;
1c79356b 7971 }
91447636 7972 }
6d2010ae 7973
91447636
A
7974 // save these off so we can later verify that fvp is the same
7975 oname = fvp->v_name;
7976 oparent = fvp->v_parent;
55e303ae 7977
6d2010ae 7978skipped_lookup:
316670eb 7979 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
0a7de745
A
7980 tdvp, &tvp, &tond->ni_cnd, tvap,
7981 flags, ctx);
55e303ae 7982
91447636
A
7983 if (holding_mntlock) {
7984 /*
7985 * we can drop our serialization
7986 * lock now
7987 */
7988 mount_unlock_renames(locked_mp);
7989 mount_drop(locked_mp, 0);
7990 holding_mntlock = 0;
7991 }
7992 if (error) {
6d2010ae 7993 if (error == EKEEPLOOKING) {
316670eb
A
7994 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7995 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
7996 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7997 }
7998 }
7999
316670eb
A
8000 fromnd->ni_vp = fvp;
8001 tond->ni_vp = tvp;
fe8ab488 8002
6d2010ae
A
8003 goto continue_lookup;
8004 }
8005
8006 /*
fe8ab488
A
8007 * We may encounter a race in the VNOP where the destination didn't
8008 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
8009 * try to create the entry. In this case, we should re-drive this rename
8010 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 8011 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
8012 */
8013 if (error == ERECYCLE) {
8014 do_retry = 1;
8015 }
55e303ae 8016
c18c124e
A
8017 /*
8018 * For compound VNOPs, the authorization callback may return
8019 * ENOENT in case of racing hardlink lookups hitting the name
8020 * cache, redrive the lookup.
8021 */
3e170ce0
A
8022 if (batched && error == ENOENT) {
8023 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8024 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8025 do_retry = 1;
8026 retry_count += 1;
8027 }
c18c124e
A
8028 }
8029
91447636 8030 goto out1;
fe8ab488
A
8031 }
8032
8033 /* call out to allow 3rd party notification of rename.
91447636
A
8034 * Ignore result of kauth_authorize_fileop call.
8035 */
fe8ab488 8036 kauth_authorize_fileop(vfs_context_ucred(ctx),
0a7de745
A
8037 KAUTH_FILEOP_RENAME,
8038 (uintptr_t)from_name, (uintptr_t)to_name);
39037602
A
8039 if (flags & VFS_RENAME_SWAP) {
8040 kauth_authorize_fileop(vfs_context_ucred(ctx),
0a7de745
A
8041 KAUTH_FILEOP_RENAME,
8042 (uintptr_t)to_name, (uintptr_t)from_name);
39037602 8043 }
91447636 8044
2d21ac55 8045#if CONFIG_FSE
91447636 8046 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
8047 if (from_truncated || to_truncated) {
8048 // set it here since only the from_finfo gets reported up to user space
8049 from_finfo.mode |= FSE_TRUNCATED_PATH;
8050 }
6d2010ae
A
8051
8052 if (tvap && tvp) {
8053 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
8054 }
8055 if (fvap) {
8056 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
8057 }
8058
39037602
A
8059 if (tvp) {
8060 add_fsevent(FSE_RENAME, ctx,
0a7de745
A
8061 FSE_ARG_STRING, from_len, from_name,
8062 FSE_ARG_FINFO, &from_finfo,
8063 FSE_ARG_STRING, to_len, to_name,
8064 FSE_ARG_FINFO, &to_finfo,
8065 FSE_ARG_DONE);
39037602
A
8066 if (flags & VFS_RENAME_SWAP) {
8067 /*
8068 * Strictly speaking, swap is the equivalent of
8069 * *three* renames. FSEvents clients should only take
8070 * the events as a hint, so we only bother reporting
8071 * two.
8072 */
8073 add_fsevent(FSE_RENAME, ctx,
0a7de745
A
8074 FSE_ARG_STRING, to_len, to_name,
8075 FSE_ARG_FINFO, &to_finfo,
91447636
A
8076 FSE_ARG_STRING, from_len, from_name,
8077 FSE_ARG_FINFO, &from_finfo,
91447636 8078 FSE_ARG_DONE);
0a7de745
A
8079 }
8080 } else {
8081 add_fsevent(FSE_RENAME, ctx,
8082 FSE_ARG_STRING, from_len, from_name,
8083 FSE_ARG_FINFO, &from_finfo,
8084 FSE_ARG_STRING, to_len, to_name,
8085 FSE_ARG_DONE);
91447636
A
8086 }
8087 }
2d21ac55 8088#endif /* CONFIG_FSE */
fe8ab488 8089
91447636
A
8090 /*
8091 * update filesystem's mount point data
8092 */
8093 if (mntrename) {
0a7de745 8094 char *cp, *pathend, *mpname;
91447636
A
8095 char * tobuf;
8096 struct mount *mp;
8097 int maxlen;
8098 size_t len = 0;
8099
8100 mp = fvp->v_mountedhere;
8101
8102 if (vfs_busy(mp, LK_NOWAIT)) {
0a7de745 8103 error = EBUSY;
91447636 8104 goto out1;
55e303ae 8105 }
91447636 8106 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 8107
0a7de745 8108 if (UIO_SEG_IS_USER_SPACE(segflg)) {
fe8ab488 8109 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
0a7de745 8110 } else {
fe8ab488 8111 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
0a7de745 8112 }
91447636 8113 if (!error) {
0a7de745
A
8114 /* find current mount point prefix */
8115 pathend = &mp->mnt_vfsstat.f_mntonname[0];
91447636 8116 for (cp = pathend; *cp != '\0'; ++cp) {
0a7de745
A
8117 if (*cp == '/') {
8118 pathend = cp + 1;
8119 }
91447636
A
8120 }
8121 /* find last component of target name */
8122 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
0a7de745
A
8123 if (*cp == '/') {
8124 mpname = cp + 1;
8125 }
91447636
A
8126 }
8127 /* append name to prefix */
8128 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
8129 bzero(pathend, maxlen);
2d21ac55 8130 strlcpy(pathend, mpname, maxlen);
91447636
A
8131 }
8132 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
8133
8134 vfs_unbusy(mp);
8135 }
8136 /*
fe8ab488 8137 * fix up name & parent pointers. note that we first
91447636
A
8138 * check that fvp has the same name/parent pointers it
8139 * had before the rename call... this is a 'weak' check
8140 * at best...
6d2010ae
A
8141 *
8142 * XXX oparent and oname may not be set in the compound vnop case
91447636 8143 */
6d2010ae 8144 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
0a7de745 8145 int update_flags;
91447636 8146
0a7de745 8147 update_flags = VNODE_UPDATE_NAME;
91447636 8148
0a7de745
A
8149 if (fdvp != tdvp) {
8150 update_flags |= VNODE_UPDATE_PARENT;
8151 }
91447636 8152
0a7de745 8153 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
8154 }
8155out1:
593a1d5f
A
8156 if (to_name != NULL) {
8157 RELEASE_PATH(to_name);
8158 to_name = NULL;
8159 }
8160 if (from_name != NULL) {
8161 RELEASE_PATH(from_name);
8162 from_name = NULL;
8163 }
91447636 8164 if (holding_mntlock) {
0a7de745 8165 mount_unlock_renames(locked_mp);
91447636 8166 mount_drop(locked_mp, 0);
593a1d5f 8167 holding_mntlock = 0;
91447636
A
8168 }
8169 if (tdvp) {
8170 /*
8171 * nameidone has to happen before we vnode_put(tdvp)
8172 * since it may need to release the fs_nodelock on the tdvp
8173 */
316670eb 8174 nameidone(tond);
91447636 8175
0a7de745
A
8176 if (tvp) {
8177 vnode_put(tvp);
8178 }
8179 vnode_put(tdvp);
91447636
A
8180 }
8181 if (fdvp) {
8182 /*
8183 * nameidone has to happen before we vnode_put(fdvp)
8184 * since it may need to release the fs_nodelock on the fdvp
8185 */
316670eb 8186 nameidone(fromnd);
91447636 8187
0a7de745
A
8188 if (fvp) {
8189 vnode_put(fvp);
8190 }
8191 vnode_put(fdvp);
91447636 8192 }
fe8ab488 8193
6d2010ae
A
8194 /*
8195 * If things changed after we did the namei, then we will re-drive
8196 * this rename call from the top.
8197 */
316670eb 8198 if (do_retry) {
6d2010ae 8199 do_retry = 0;
593a1d5f
A
8200 goto retry;
8201 }
316670eb
A
8202
8203 FREE(__rename_data, M_TEMP);
0a7de745 8204 return error;
1c79356b
A
8205}
8206
fe8ab488
A
8207int
8208rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
8209{
0a7de745
A
8210 return renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
8211 AT_FDCWD, uap->to, UIO_USERSPACE, 0);
fe8ab488
A
8212}
8213
0a7de745
A
8214int
8215renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
fe8ab488
A
8216{
8217 return renameat_internal(
39037602
A
8218 vfs_context_current(),
8219 uap->fromfd, uap->from,
8220 uap->tofd, uap->to,
fe8ab488
A
8221 UIO_USERSPACE, uap->flags);
8222}
39037602 8223
fe8ab488
A
8224int
8225renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
8226{
0a7de745
A
8227 return renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
8228 uap->tofd, uap->to, UIO_USERSPACE, 0);
fe8ab488
A
8229}
8230
1c79356b
A
8231/*
8232 * Make a directory file.
2d21ac55
A
8233 *
8234 * Returns: 0 Success
8235 * EEXIST
8236 * namei:???
8237 * vnode_authorize:???
8238 * vn_create:???
1c79356b 8239 */
1c79356b 8240/* ARGSUSED */
91447636 8241static int
fe8ab488
A
8242mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
8243 enum uio_seg segflg)
1c79356b 8244{
0a7de745 8245 vnode_t vp, dvp;
1c79356b 8246 int error;
91447636 8247 int update_flags = 0;
6d2010ae 8248 int batched;
1c79356b
A
8249 struct nameidata nd;
8250
91447636 8251 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 8252 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
0a7de745 8253 path, ctx);
9bccf70c 8254 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
8255 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
8256
8257continue_lookup:
fe8ab488 8258 error = nameiat(&nd, fd);
0a7de745
A
8259 if (error) {
8260 return error;
8261 }
91447636 8262 dvp = nd.ni_dvp;
1c79356b 8263 vp = nd.ni_vp;
55e303ae 8264
fe8ab488
A
8265 if (vp != NULL) {
8266 error = EEXIST;
8267 goto out;
8268 }
8269
6d2010ae 8270 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
8271
8272 VATTR_SET(vap, va_type, VDIR);
fe8ab488 8273
6d2010ae
A
8274 /*
8275 * XXX
8276 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8277 * only get EXISTS or EISDIR for existing path components, and not that it could see
8278 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8279 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8280 */
fe8ab488 8281 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
8282 if (error == EACCES || error == EPERM) {
8283 int error2;
8284
8285 nameidone(&nd);
8286 vnode_put(dvp);
8287 dvp = NULLVP;
8288
fe8ab488
A
8289 /*
8290 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
8291 * rather than EACCESS if the target exists.
8292 */
fe8ab488 8293 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
0a7de745 8294 path, ctx);
fe8ab488 8295 error2 = nameiat(&nd, fd);
6d2010ae
A
8296 if (error2) {
8297 goto out;
8298 } else {
8299 vp = nd.ni_vp;
8300 error = EEXIST;
8301 goto out;
8302 }
8303 }
8304
2d21ac55 8305 goto out;
6d2010ae
A
8306 }
8307
8308 /*
fe8ab488 8309 * make the directory
6d2010ae 8310 */
fe8ab488 8311 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
8312 if (error == EKEEPLOOKING) {
8313 nd.ni_vp = vp;
8314 goto continue_lookup;
8315 }
2d21ac55 8316
fe8ab488 8317 goto out;
6d2010ae 8318 }
fe8ab488 8319
91447636 8320 // Make sure the name & parent pointers are hooked up
0a7de745
A
8321 if (vp->v_name == NULL) {
8322 update_flags |= VNODE_UPDATE_NAME;
8323 }
8324 if (vp->v_parent == NULLVP) {
8325 update_flags |= VNODE_UPDATE_PARENT;
8326 }
91447636 8327
0a7de745
A
8328 if (update_flags) {
8329 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
8330 }
55e303ae 8331
2d21ac55 8332#if CONFIG_FSE
91447636 8333 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 8334#endif
91447636
A
8335
8336out:
8337 /*
8338 * nameidone has to happen before we vnode_put(dvp)
8339 * since it may need to release the fs_nodelock on the dvp
8340 */
8341 nameidone(&nd);
8342
0a7de745 8343 if (vp) {
6d2010ae 8344 vnode_put(vp);
0a7de745
A
8345 }
8346 if (dvp) {
6d2010ae 8347 vnode_put(dvp);
0a7de745 8348 }
55e303ae 8349
0a7de745 8350 return error;
1c79356b
A
8351}
8352
b0d623f7
A
8353/*
8354 * mkdir_extended: Create a directory; with extended security (ACL).
8355 *
8356 * Parameters: p Process requesting to create the directory
8357 * uap User argument descriptor (see below)
fe8ab488 8358 * retval (ignored)
b0d623f7
A
8359 *
8360 * Indirect: uap->path Path of directory to create
8361 * uap->mode Access permissions to set
8362 * uap->xsecurity ACL to set
fe8ab488 8363 *
b0d623f7
A
8364 * Returns: 0 Success
8365 * !0 Not success
8366 *
8367 */
1c79356b 8368int
b0d623f7 8369mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 8370{
91447636
A
8371 int ciferror;
8372 kauth_filesec_t xsecdst;
8373 struct vnode_attr va;
8374
b0d623f7
A
8375 AUDIT_ARG(owner, uap->uid, uap->gid);
8376
91447636
A
8377 xsecdst = NULL;
8378 if ((uap->xsecurity != USER_ADDR_NULL) &&
0a7de745 8379 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
91447636 8380 return ciferror;
0a7de745 8381 }
91447636 8382
91447636 8383 VATTR_INIT(&va);
fe8ab488 8384 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
0a7de745 8385 if (xsecdst != NULL) {
91447636 8386 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
0a7de745 8387 }
91447636 8388
fe8ab488
A
8389 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8390 UIO_USERSPACE);
0a7de745 8391 if (xsecdst != NULL) {
91447636 8392 kauth_filesec_free(xsecdst);
0a7de745 8393 }
91447636 8394 return ciferror;
1c79356b
A
8395}
8396
1c79356b 8397int
b0d623f7 8398mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 8399{
91447636 8400 struct vnode_attr va;
1c79356b 8401
91447636 8402 VATTR_INIT(&va);
fe8ab488 8403 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 8404
0a7de745
A
8405 return mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8406 UIO_USERSPACE);
91447636 8407}
1c79356b 8408
91447636 8409int
fe8ab488
A
8410mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8411{
8412 struct vnode_attr va;
8413
8414 VATTR_INIT(&va);
8415 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8416
0a7de745
A
8417 return mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8418 UIO_USERSPACE);
fe8ab488
A
8419}
8420
8421static int
8422rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8423 enum uio_seg segflg)
1c79356b 8424{
2d21ac55 8425 vnode_t vp, dvp;
91447636
A
8426 int error;
8427 struct nameidata nd;
6d2010ae 8428 char *path = NULL;
0a7de745 8429 int len = 0;
6d2010ae
A
8430 int has_listeners = 0;
8431 int need_event = 0;
8432 int truncated = 0;
6d2010ae
A
8433#if CONFIG_FSE
8434 struct vnode_attr va;
8435#endif /* CONFIG_FSE */
8436 struct vnode_attr *vap = NULL;
c18c124e 8437 int restart_count = 0;
6d2010ae 8438 int batched;
91447636 8439
b0d623f7 8440 int restart_flag;
91447636 8441
fe8ab488 8442 /*
2d21ac55
A
8443 * This loop exists to restart rmdir in the unlikely case that two
8444 * processes are simultaneously trying to remove the same directory
8445 * containing orphaned appleDouble files.
8446 */
8447 do {
6d2010ae 8448 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 8449 segflg, dirpath, ctx);
6d2010ae
A
8450 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8451continue_lookup:
2d21ac55 8452 restart_flag = 0;
6d2010ae 8453 vap = NULL;
2d21ac55 8454
fe8ab488 8455 error = nameiat(&nd, fd);
0a7de745
A
8456 if (error) {
8457 return error;
8458 }
2d21ac55
A
8459
8460 dvp = nd.ni_dvp;
8461 vp = nd.ni_vp;
8462
6d2010ae
A
8463 if (vp) {
8464 batched = vnode_compound_rmdir_available(vp);
2d21ac55 8465
6d2010ae
A
8466 if (vp->v_flag & VROOT) {
8467 /*
8468 * The root of a mounted filesystem cannot be deleted.
8469 */
8470 error = EBUSY;
8471 goto out;
8472 }
1c79356b 8473
00867663
A
8474#if DEVELOPMENT || DEBUG
8475 /*
0a7de745
A
8476 * XXX VSWAP: Check for entitlements or special flag here
8477 * so we can restrict access appropriately.
8478 */
00867663
A
8479#else /* DEVELOPMENT || DEBUG */
8480
8481 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8482 error = EPERM;
8483 goto out;
8484 }
8485#endif /* DEVELOPMENT || DEBUG */
8486
2d21ac55 8487 /*
6d2010ae
A
8488 * Removed a check here; we used to abort if vp's vid
8489 * was not the same as what we'd seen the last time around.
8490 * I do not think that check was valid, because if we retry
8491 * and all dirents are gone, the directory could legitimately
8492 * be recycled but still be present in a situation where we would
fe8ab488 8493 * have had permission to delete. Therefore, we won't make
6d2010ae
A
8494 * an effort to preserve that check now that we may not have a
8495 * vp here.
2d21ac55 8496 */
6d2010ae
A
8497
8498 if (!batched) {
8499 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8500 if (error) {
3e170ce0
A
8501 if (error == ENOENT) {
8502 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8503 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8504 restart_flag = 1;
8505 restart_count += 1;
8506 }
c18c124e 8507 }
6d2010ae
A
8508 goto out;
8509 }
8510 }
2d21ac55 8511 } else {
6d2010ae
A
8512 batched = 1;
8513
8514 if (!vnode_compound_rmdir_available(dvp)) {
8515 panic("No error, but no compound rmdir?");
8516 }
91447636 8517 }
6d2010ae 8518
2d21ac55 8519#if CONFIG_FSE
6d2010ae 8520 fse_info finfo;
b0d623f7 8521
6d2010ae
A
8522 need_event = need_fsevent(FSE_DELETE, dvp);
8523 if (need_event) {
8524 if (!batched) {
2d21ac55 8525 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
8526 } else {
8527 error = vfs_get_notify_attributes(&va);
8528 if (error) {
8529 goto out;
8530 }
8531
8532 vap = &va;
2d21ac55 8533 }
6d2010ae 8534 }
2d21ac55 8535#endif
6d2010ae
A
8536 has_listeners = kauth_authorize_fileop_has_listeners();
8537 if (need_event || has_listeners) {
8538 if (path == NULL) {
2d21ac55
A
8539 GET_PATH(path);
8540 if (path == NULL) {
8541 error = ENOMEM;
8542 goto out;
8543 }
6d2010ae 8544 }
b0d623f7 8545
6d2010ae 8546 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 8547#if CONFIG_FSE
6d2010ae
A
8548 if (truncated) {
8549 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 8550 }
6d2010ae
A
8551#endif
8552 }
91447636 8553
6d2010ae
A
8554 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8555 nd.ni_vp = vp;
8556 if (vp == NULLVP) {
8557 /* Couldn't find a vnode */
8558 goto out;
8559 }
2d21ac55 8560
6d2010ae
A
8561 if (error == EKEEPLOOKING) {
8562 goto continue_lookup;
3e170ce0
A
8563 } else if (batched && error == ENOENT) {
8564 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8565 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8566 /*
8567 * For compound VNOPs, the authorization callback
8568 * may return ENOENT in case of racing hard link lookups
8569 * redrive the lookup.
8570 */
8571 restart_flag = 1;
8572 restart_count += 1;
8573 goto out;
8574 }
6d2010ae 8575 }
39236c6e 8576#if CONFIG_APPLEDOUBLE
6d2010ae
A
8577 /*
8578 * Special case to remove orphaned AppleDouble
8579 * files. I don't like putting this in the kernel,
8580 * but carbon does not like putting this in carbon either,
8581 * so here we are.
8582 */
8583 if (error == ENOTEMPTY) {
8584 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8585 if (error == EBUSY) {
8586 goto out;
2d21ac55
A
8587 }
8588
6d2010ae 8589
2d21ac55 8590 /*
fe8ab488 8591 * Assuming everything went well, we will try the RMDIR again
2d21ac55 8592 */
0a7de745 8593 if (!error) {
6d2010ae 8594 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
0a7de745 8595 }
6d2010ae 8596 }
39236c6e 8597#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 8598 /*
fe8ab488 8599 * Call out to allow 3rd party notification of delete.
6d2010ae
A
8600 * Ignore result of kauth_authorize_fileop call.
8601 */
8602 if (!error) {
8603 if (has_listeners) {
fe8ab488 8604 kauth_authorize_fileop(vfs_context_ucred(ctx),
0a7de745
A
8605 KAUTH_FILEOP_DELETE,
8606 (uintptr_t)vp,
8607 (uintptr_t)path);
6d2010ae
A
8608 }
8609
8610 if (vp->v_flag & VISHARDLINK) {
8611 // see the comment in unlink1() about why we update
8612 // the parent of a hard link when it is removed
8613 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8614 }
2d21ac55
A
8615
8616#if CONFIG_FSE
6d2010ae
A
8617 if (need_event) {
8618 if (vap) {
8619 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 8620 }
6d2010ae 8621 add_fsevent(FSE_DELETE, ctx,
0a7de745
A
8622 FSE_ARG_STRING, len, path,
8623 FSE_ARG_FINFO, &finfo,
8624 FSE_ARG_DONE);
2d21ac55 8625 }
6d2010ae 8626#endif
2d21ac55
A
8627 }
8628
8629out:
6d2010ae
A
8630 if (path != NULL) {
8631 RELEASE_PATH(path);
8632 path = NULL;
8633 }
2d21ac55
A
8634 /*
8635 * nameidone has to happen before we vnode_put(dvp)
8636 * since it may need to release the fs_nodelock on the dvp
8637 */
8638 nameidone(&nd);
2d21ac55 8639 vnode_put(dvp);
6d2010ae 8640
0a7de745 8641 if (vp) {
6d2010ae 8642 vnode_put(vp);
0a7de745 8643 }
2d21ac55
A
8644
8645 if (restart_flag == 0) {
8646 wakeup_one((caddr_t)vp);
0a7de745 8647 return error;
2d21ac55
A
8648 }
8649 tsleep(vp, PVFS, "rm AD", 1);
2d21ac55 8650 } while (restart_flag != 0);
91447636 8651
0a7de745 8652 return error;
1c79356b 8653}
91447636 8654
fe8ab488
A
8655/*
8656 * Remove a directory file.
8657 */
8658/* ARGSUSED */
8659int
8660rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8661{
0a7de745
A
8662 return rmdirat_internal(vfs_context_current(), AT_FDCWD,
8663 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE);
fe8ab488
A
8664}
8665
2d21ac55
A
8666/* Get direntry length padded to 8 byte alignment */
8667#define DIRENT64_LEN(namlen) \
8668 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8669
5ba3f43e
A
8670/* Get dirent length padded to 4 byte alignment */
8671#define DIRENT_LEN(namelen) \
8672 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8673
8674/* Get the end of this dirent */
8675#define DIRENT_END(dep) \
8676 (((char *)(dep)) + (dep)->d_reclen - 1)
8677
fe8ab488 8678errno_t
2d21ac55 8679vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
0a7de745 8680 int *numdirent, vfs_context_t ctxp)
2d21ac55
A
8681{
8682 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
39037602 8683 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
0a7de745 8684 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
8685 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8686 } else {
8687 size_t bufsize;
8688 void * bufptr;
8689 uio_t auio;
15129b1c 8690 struct direntry *entry64;
2d21ac55
A
8691 struct dirent *dep;
8692 int bytesread;
8693 int error;
8694
8695 /*
5ba3f43e
A
8696 * We're here because the underlying file system does not
8697 * support direnties or we mounted denying support so we must
8698 * fall back to dirents and convert them to direntries.
8699 *
8700 * Our kernel buffer needs to be smaller since re-packing will
8701 * expand each dirent. The worse case (when the name length
8702 * is 3 or less) corresponds to a struct direntry size of 32
2d21ac55
A
8703 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8704 * (4-byte aligned). So having a buffer that is 3/8 the size
8705 * will prevent us from reading more than we can pack.
0a7de745 8706 *
2d21ac55 8707 * Since this buffer is wired memory, we will limit the
39037602 8708 * buffer size to a maximum of 32K. We would really like to
2d21ac55 8709 * use 32K in the MIN(), but we use magic number 87371 to
39037602 8710 * prevent uio_resid() * 3 / 8 from overflowing.
2d21ac55 8711 */
316670eb 8712 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 8713 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
8714 if (bufptr == NULL) {
8715 return ENOMEM;
8716 }
2d21ac55 8717
b0d623f7 8718 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
8719 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8720 auio->uio_offset = uio->uio_offset;
8721
8722 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8723
8724 dep = (struct dirent *)bufptr;
8725 bytesread = bufsize - uio_resid(auio);
8726
15129b1c 8727 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
0a7de745 8728 M_TEMP, M_WAITOK);
2d21ac55
A
8729 /*
8730 * Convert all the entries and copy them out to user's buffer.
8731 */
8732 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
0a7de745 8733 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
15129b1c 8734
5ba3f43e
A
8735 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
8736 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
8737 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
0a7de745
A
8738 vp->v_mount->mnt_vfsstat.f_mntonname,
8739 vp->v_name ? vp->v_name : "<unknown>");
5ba3f43e
A
8740 error = EIO;
8741 break;
8742 }
8743
15129b1c 8744 bzero(entry64, enbufsize);
2d21ac55 8745 /* Convert a dirent to a dirent64. */
15129b1c
A
8746 entry64->d_ino = dep->d_ino;
8747 entry64->d_seekoff = 0;
8748 entry64->d_reclen = enbufsize;
8749 entry64->d_namlen = dep->d_namlen;
8750 entry64->d_type = dep->d_type;
8751 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
8752
8753 /* Move to next entry. */
8754 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8755
8756 /* Copy entry64 to user's buffer. */
15129b1c 8757 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
8758 }
8759
8760 /* Update the real offset using the offset we got from VNOP_READDIR. */
8761 if (error == 0) {
8762 uio->uio_offset = auio->uio_offset;
8763 }
8764 uio_free(auio);
8765 FREE(bufptr, M_TEMP);
15129b1c 8766 FREE(entry64, M_TEMP);
0a7de745 8767 return error;
2d21ac55
A
8768 }
8769}
1c79356b 8770
0a7de745 8771#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
39236c6e 8772
1c79356b
A
8773/*
8774 * Read a block of directory entries in a file system independent format.
8775 */
2d21ac55
A
8776static int
8777getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
0a7de745 8778 off_t *offset, int flags)
1c79356b 8779{
2d21ac55 8780 vnode_t vp;
0a7de745 8781 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
8782 struct fileproc *fp;
8783 uio_t auio;
2d21ac55
A
8784 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8785 off_t loff;
8786 int error, eofflag, numdirent;
0a7de745 8787 char uio_buf[UIO_SIZEOF(1)];
1c79356b 8788
2d21ac55
A
8789 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8790 if (error) {
0a7de745 8791 return error;
2d21ac55 8792 }
91447636
A
8793 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8794 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8795 error = EBADF;
8796 goto out;
8797 }
2d21ac55 8798
0a7de745 8799 if (bufsize > GETDIRENTRIES_MAXBUFSIZE) {
39236c6e 8800 bufsize = GETDIRENTRIES_MAXBUFSIZE;
0a7de745 8801 }
39236c6e 8802
2d21ac55
A
8803#if CONFIG_MACF
8804 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
0a7de745 8805 if (error) {
2d21ac55 8806 goto out;
0a7de745 8807 }
2d21ac55 8808#endif
0a7de745 8809 if ((error = vnode_getwithref(vp))) {
91447636
A
8810 goto out;
8811 }
91447636 8812 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 8813
1c79356b 8814unionread:
91447636
A
8815 if (vp->v_type != VDIR) {
8816 (void)vnode_put(vp);
8817 error = EINVAL;
8818 goto out;
8819 }
2d21ac55
A
8820
8821#if CONFIG_MACF
8822 error = mac_vnode_check_readdir(&context, vp);
8823 if (error != 0) {
8824 (void)vnode_put(vp);
8825 goto out;
8826 }
8827#endif /* MAC */
91447636
A
8828
8829 loff = fp->f_fglob->fg_offset;
2d21ac55
A
8830 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8831 uio_addiov(auio, bufp, bufsize);
91447636 8832
2d21ac55
A
8833 if (flags & VNODE_READDIR_EXTENDED) {
8834 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8835 fp->f_fglob->fg_offset = uio_offset(auio);
8836 } else {
8837 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8838 fp->f_fglob->fg_offset = uio_offset(auio);
8839 }
91447636
A
8840 if (error) {
8841 (void)vnode_put(vp);
8842 goto out;
8843 }
1c79356b 8844
0a7de745 8845 if ((user_ssize_t)bufsize == uio_resid(auio)) {
2d21ac55
A
8846 if (union_dircheckp) {
8847 error = union_dircheckp(&vp, fp, &context);
0a7de745 8848 if (error == -1) {
2d21ac55 8849 goto unionread;
0a7de745 8850 }
813fb2f6
A
8851 if (error) {
8852 (void)vnode_put(vp);
2d21ac55 8853 goto out;
813fb2f6 8854 }
1c79356b
A
8855 }
8856
39236c6e 8857 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 8858 struct vnode *tvp = vp;
39236c6e
A
8859 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8860 vnode_ref(vp);
8861 fp->f_fglob->fg_data = (caddr_t) vp;
8862 fp->f_fglob->fg_offset = 0;
8863 vnode_rele(tvp);
8864 vnode_put(tvp);
8865 goto unionread;
8866 }
8867 vp = tvp;
1c79356b
A
8868 }
8869 }
2d21ac55 8870
91447636 8871 vnode_put(vp);
2d21ac55
A
8872 if (offset) {
8873 *offset = loff;
8874 }
39037602 8875
2d21ac55 8876 *bytesread = bufsize - uio_resid(auio);
91447636
A
8877out:
8878 file_drop(fd);
0a7de745 8879 return error;
1c79356b
A
8880}
8881
2d21ac55
A
8882
8883int
b0d623f7 8884getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
8885{
8886 off_t offset;
2d21ac55
A
8887 ssize_t bytesread;
8888 int error;
8889
8890 AUDIT_ARG(fd, uap->fd);
8891 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8892
8893 if (error == 0) {
b0d623f7
A
8894 if (proc_is64bit(p)) {
8895 user64_long_t base = (user64_long_t)offset;
8896 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8897 } else {
8898 user32_long_t base = (user32_long_t)offset;
8899 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8900 }
2d21ac55
A
8901 *retval = bytesread;
8902 }
0a7de745 8903 return error;
2d21ac55
A
8904}
8905
8906int
8907getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8908{
8909 off_t offset;
8910 ssize_t bytesread;
8911 int error;
8912
8913 AUDIT_ARG(fd, uap->fd);
8914 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8915
8916 if (error == 0) {
8917 *retval = bytesread;
8918 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8919 }
0a7de745 8920 return error;
2d21ac55
A
8921}
8922
8923
1c79356b
A
8924/*
8925 * Set the mode mask for creation of filesystem nodes.
b0d623f7 8926 * XXX implement xsecurity
1c79356b 8927 */
0a7de745 8928#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
91447636 8929static int
b0d623f7 8930umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 8931{
2d21ac55 8932 struct filedesc *fdp;
1c79356b 8933
91447636 8934 AUDIT_ARG(mask, newmask);
2d21ac55 8935 proc_fdlock(p);
1c79356b
A
8936 fdp = p->p_fd;
8937 *retval = fdp->fd_cmask;
91447636 8938 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 8939 proc_fdunlock(p);
0a7de745 8940 return 0;
1c79356b
A
8941}
8942
b0d623f7
A
8943/*
8944 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8945 *
8946 * Parameters: p Process requesting to set the umask
8947 * uap User argument descriptor (see below)
8948 * retval umask of the process (parameter p)
8949 *
8950 * Indirect: uap->newmask umask to set
8951 * uap->xsecurity ACL to set
39037602 8952 *
b0d623f7
A
8953 * Returns: 0 Success
8954 * !0 Not success
8955 *
8956 */
8957int
8958umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
8959{
8960 int ciferror;
8961 kauth_filesec_t xsecdst;
8962
8963 xsecdst = KAUTH_FILESEC_NONE;
8964 if (uap->xsecurity != USER_ADDR_NULL) {
0a7de745 8965 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
91447636 8966 return ciferror;
0a7de745 8967 }
91447636
A
8968 } else {
8969 xsecdst = KAUTH_FILESEC_NONE;
8970 }
8971
8972 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8973
0a7de745 8974 if (xsecdst != KAUTH_FILESEC_NONE) {
91447636 8975 kauth_filesec_free(xsecdst);
0a7de745 8976 }
91447636
A
8977 return ciferror;
8978}
8979
8980int
b0d623f7 8981umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636 8982{
0a7de745 8983 return umask1(p, uap->newmask, UMASK_NOXSECURITY, retval);
91447636
A
8984}
8985
1c79356b
A
8986/*
8987 * Void all references to file by ripping underlying filesystem
8988 * away from vnode.
8989 */
1c79356b
A
8990/* ARGSUSED */
8991int
b0d623f7 8992revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 8993{
2d21ac55 8994 vnode_t vp;
91447636 8995 struct vnode_attr va;
2d21ac55 8996 vfs_context_t ctx = vfs_context_current();
1c79356b
A
8997 int error;
8998 struct nameidata nd;
8999
6d2010ae 9000 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
0a7de745 9001 uap->path, ctx);
55e303ae 9002 error = namei(&nd);
0a7de745
A
9003 if (error) {
9004 return error;
9005 }
1c79356b 9006 vp = nd.ni_vp;
91447636
A
9007
9008 nameidone(&nd);
9009
b0d623f7
A
9010 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
9011 error = ENOTSUP;
9012 goto out;
9013 }
9014
9015 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
9016 error = EBUSY;
9017 goto out;
9018 }
9019
2d21ac55
A
9020#if CONFIG_MACF
9021 error = mac_vnode_check_revoke(ctx, vp);
0a7de745 9022 if (error) {
2d21ac55 9023 goto out;
0a7de745 9024 }
2d21ac55
A
9025#endif
9026
91447636
A
9027 VATTR_INIT(&va);
9028 VATTR_WANTED(&va, va_uid);
0a7de745 9029 if ((error = vnode_getattr(vp, &va, ctx))) {
1c79356b 9030 goto out;
0a7de745 9031 }
2d21ac55 9032 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
0a7de745 9033 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
1c79356b 9034 goto out;
0a7de745
A
9035 }
9036 if (vp->v_usecount > 0 || (vnode_isaliased(vp))) {
2d21ac55 9037 VNOP_REVOKE(vp, REVOKEALL, ctx);
0a7de745 9038 }
1c79356b 9039out:
91447636 9040 vnode_put(vp);
0a7de745 9041 return error;
1c79356b
A
9042}
9043
0b4e3aa0 9044
1c79356b
A
9045/*
9046 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 9047 * The following system calls are designed to support features
1c79356b
A
9048 * which are specific to the HFS & HFS Plus volume formats
9049 */
9050
9bccf70c 9051
1c79356b 9052/*
39236c6e
A
9053 * Obtain attribute information on objects in a directory while enumerating
9054 * the directory.
9055 */
1c79356b
A
9056/* ARGSUSED */
9057int
0a7de745 9058getdirentriesattr(proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 9059{
2d21ac55 9060 vnode_t vp;
91447636
A
9061 struct fileproc *fp;
9062 uio_t auio = NULL;
9063 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
d9a64523
A
9064 uint32_t count = 0, savecount = 0;
9065 uint32_t newstate = 0;
91447636 9066 int error, eofflag;
d9a64523 9067 uint32_t loff = 0;
39037602 9068 struct attrlist attributelist;
2d21ac55 9069 vfs_context_t ctx = vfs_context_current();
91447636 9070 int fd = uap->fd;
0a7de745 9071 char uio_buf[UIO_SIZEOF(1)];
91447636
A
9072 kauth_action_t action;
9073
9074 AUDIT_ARG(fd, fd);
39037602 9075
91447636 9076 /* Get the attributes into kernel space */
2d21ac55 9077 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
0a7de745 9078 return error;
2d21ac55
A
9079 }
9080 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
0a7de745 9081 return error;
2d21ac55 9082 }
39236c6e 9083 savecount = count;
0a7de745
A
9084 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
9085 return error;
2d21ac55 9086 }
91447636
A
9087 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
9088 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9089 error = EBADF;
9090 goto out;
9091 }
2d21ac55
A
9092
9093
9094#if CONFIG_MACF
9095 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
9096 fp->f_fglob);
0a7de745 9097 if (error) {
2d21ac55 9098 goto out;
0a7de745 9099 }
2d21ac55
A
9100#endif
9101
9102
0a7de745 9103 if ((error = vnode_getwithref(vp))) {
91447636 9104 goto out;
0a7de745 9105 }
55e303ae 9106
91447636 9107 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 9108
39236c6e 9109unionread:
91447636
A
9110 if (vp->v_type != VDIR) {
9111 (void)vnode_put(vp);
9112 error = EINVAL;
9113 goto out;
9114 }
55e303ae 9115
2d21ac55
A
9116#if CONFIG_MACF
9117 error = mac_vnode_check_readdir(ctx, vp);
9118 if (error != 0) {
9119 (void)vnode_put(vp);
9120 goto out;
9121 }
9122#endif /* MAC */
9123
91447636
A
9124 /* set up the uio structure which will contain the users return buffer */
9125 loff = fp->f_fglob->fg_offset;
39236c6e 9126 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636 9127 uio_addiov(auio, uap->buffer, uap->buffersize);
39037602 9128
91447636
A
9129 /*
9130 * If the only item requested is file names, we can let that past with
9131 * just LIST_DIRECTORY. If they want any other attributes, that means
9132 * they need SEARCH as well.
9133 */
9134 action = KAUTH_VNODE_LIST_DIRECTORY;
9135 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
0a7de745 9136 attributelist.fileattr || attributelist.dirattr) {
91447636 9137 action |= KAUTH_VNODE_SEARCH;
0a7de745 9138 }
39037602 9139
2d21ac55 9140 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
b0d623f7
A
9141 /* Believe it or not, uap->options only has 32-bits of valid
9142 * info, so truncate before extending again */
39236c6e
A
9143
9144 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
0a7de745 9145 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
39236c6e
A
9146 }
9147
9148 if (error) {
9149 (void) vnode_put(vp);
9150 goto out;
9151 }
9152
9153 /*
9154 * If we've got the last entry of a directory in a union mount
9155 * then reset the eofflag and pretend there's still more to come.
9156 * The next call will again set eofflag and the buffer will be empty,
9157 * so traverse to the underlying directory and do the directory
9158 * read there.
9159 */
9160 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
9161 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
9162 eofflag = 0;
0a7de745 9163 } else { // Empty buffer
39236c6e
A
9164 struct vnode *tvp = vp;
9165 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
9166 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
9167 fp->f_fglob->fg_data = (caddr_t) vp;
9168 fp->f_fglob->fg_offset = 0; // reset index for new dir
9169 count = savecount;
9170 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
9171 vnode_put(tvp);
9172 goto unionread;
9173 }
9174 vp = tvp;
9175 }
2d21ac55 9176 }
39236c6e 9177
91447636 9178 (void)vnode_put(vp);
1c79356b 9179
0a7de745 9180 if (error) {
91447636 9181 goto out;
0a7de745 9182 }
91447636 9183 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 9184
0a7de745 9185 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count)))) {
91447636 9186 goto out;
0a7de745
A
9187 }
9188 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate)))) {
91447636 9189 goto out;
0a7de745
A
9190 }
9191 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff)))) {
91447636 9192 goto out;
0a7de745 9193 }
1c79356b
A
9194
9195 *retval = eofflag; /* similar to getdirentries */
91447636 9196 error = 0;
2d21ac55 9197out:
91447636 9198 file_drop(fd);
0a7de745 9199 return error; /* return error earlier, an retval of 0 or 1 now */
39236c6e 9200} /* end of getdirentriesattr system call */
1c79356b
A
9201
9202/*
0a7de745
A
9203 * Exchange data between two files
9204 */
1c79356b 9205
1c79356b
A
9206/* ARGSUSED */
9207int
0a7de745 9208exchangedata(__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b 9209{
1c79356b 9210 struct nameidata fnd, snd;
2d21ac55
A
9211 vfs_context_t ctx = vfs_context_current();
9212 vnode_t fvp;
9213 vnode_t svp;
9214 int error;
b0d623f7 9215 u_int32_t nameiflags;
91447636
A
9216 char *fpath = NULL;
9217 char *spath = NULL;
0a7de745
A
9218 int flen = 0, slen = 0;
9219 int from_truncated = 0, to_truncated = 0;
b0d623f7 9220#if CONFIG_FSE
91447636 9221 fse_info f_finfo, s_finfo;
b0d623f7 9222#endif
39037602 9223
1c79356b 9224 nameiflags = 0;
0a7de745
A
9225 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
9226 nameiflags |= FOLLOW;
9227 }
1c79356b 9228
6d2010ae 9229 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
0a7de745 9230 UIO_USERSPACE, uap->path1, ctx);
1c79356b 9231
6d2010ae 9232 error = namei(&fnd);
0a7de745 9233 if (error) {
6d2010ae 9234 goto out2;
0a7de745 9235 }
1c79356b 9236
91447636
A
9237 nameidone(&fnd);
9238 fvp = fnd.ni_vp;
1c79356b 9239
39037602 9240 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
0a7de745 9241 UIO_USERSPACE, uap->path2, ctx);
1c79356b 9242
6d2010ae
A
9243 error = namei(&snd);
9244 if (error) {
91447636 9245 vnode_put(fvp);
55e303ae 9246 goto out2;
6d2010ae 9247 }
91447636 9248 nameidone(&snd);
1c79356b
A
9249 svp = snd.ni_vp;
9250
91447636
A
9251 /*
9252 * if the files are the same, return an inval error
9253 */
1c79356b 9254 if (svp == fvp) {
91447636
A
9255 error = EINVAL;
9256 goto out;
39037602 9257 }
1c79356b 9258
91447636
A
9259 /*
9260 * if the files are on different volumes, return an error
9261 */
9262 if (svp->v_mount != fvp->v_mount) {
0a7de745 9263 error = EXDEV;
91447636
A
9264 goto out;
9265 }
2d21ac55 9266
39236c6e 9267 /* If they're not files, return an error */
0a7de745 9268 if ((vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
9269 error = EINVAL;
9270 goto out;
9271 }
9272
2d21ac55
A
9273#if CONFIG_MACF
9274 error = mac_vnode_check_exchangedata(ctx,
9275 fvp, svp);
0a7de745 9276 if (error) {
2d21ac55 9277 goto out;
0a7de745 9278 }
2d21ac55
A
9279#endif
9280 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
0a7de745 9281 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)) {
91447636 9282 goto out;
0a7de745 9283 }
1c79356b 9284
2d21ac55
A
9285 if (
9286#if CONFIG_FSE
0a7de745 9287 need_fsevent(FSE_EXCHANGE, fvp) ||
2d21ac55 9288#endif
0a7de745 9289 kauth_authorize_fileop_has_listeners()) {
2d21ac55
A
9290 GET_PATH(fpath);
9291 GET_PATH(spath);
9292 if (fpath == NULL || spath == NULL) {
9293 error = ENOMEM;
9294 goto out;
9295 }
b0d623f7
A
9296
9297 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
9298 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
39037602 9299
2d21ac55
A
9300#if CONFIG_FSE
9301 get_fse_info(fvp, &f_finfo, ctx);
9302 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
9303 if (from_truncated || to_truncated) {
9304 // set it here since only the f_finfo gets reported up to user space
9305 f_finfo.mode |= FSE_TRUNCATED_PATH;
9306 }
2d21ac55 9307#endif
91447636 9308 }
1c79356b 9309 /* Ok, make the call */
2d21ac55 9310 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 9311
91447636 9312 if (error == 0) {
0a7de745 9313 const char *tmpname;
91447636 9314
0a7de745
A
9315 if (fpath != NULL && spath != NULL) {
9316 /* call out to allow 3rd party notification of exchangedata.
9317 * Ignore result of kauth_authorize_fileop call.
9318 */
9319 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
9320 (uintptr_t)fpath, (uintptr_t)spath);
9321 }
9322 name_cache_lock();
91447636 9323
0a7de745
A
9324 tmpname = fvp->v_name;
9325 fvp->v_name = svp->v_name;
9326 svp->v_name = tmpname;
39037602 9327
0a7de745
A
9328 if (fvp->v_parent != svp->v_parent) {
9329 vnode_t tmp;
91447636 9330
0a7de745
A
9331 tmp = fvp->v_parent;
9332 fvp->v_parent = svp->v_parent;
9333 svp->v_parent = tmp;
9334 }
9335 name_cache_unlock();
91447636 9336
2d21ac55 9337#if CONFIG_FSE
0a7de745
A
9338 if (fpath != NULL && spath != NULL) {
9339 add_fsevent(FSE_EXCHANGE, ctx,
9340 FSE_ARG_STRING, flen, fpath,
9341 FSE_ARG_FINFO, &f_finfo,
9342 FSE_ARG_STRING, slen, spath,
9343 FSE_ARG_FINFO, &s_finfo,
9344 FSE_ARG_DONE);
9345 }
2d21ac55 9346#endif
55e303ae
A
9347 }
9348
1c79356b 9349out:
0a7de745
A
9350 if (fpath != NULL) {
9351 RELEASE_PATH(fpath);
9352 }
9353 if (spath != NULL) {
9354 RELEASE_PATH(spath);
9355 }
91447636
A
9356 vnode_put(svp);
9357 vnode_put(fvp);
1c79356b 9358out2:
0a7de745 9359 return error;
91447636 9360}
1c79356b 9361
39236c6e
A
9362/*
9363 * Return (in MB) the amount of freespace on the given vnode's volume.
9364 */
9365uint32_t freespace_mb(vnode_t vp);
9366
9367uint32_t
9368freespace_mb(vnode_t vp)
9369{
39037602 9370 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
0a7de745
A
9371 return ((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
9372 vp->v_mount->mnt_vfsstat.f_bsize) >> 20;
39236c6e
A
9373}
9374
316670eb 9375#if CONFIG_SEARCHFS
1c79356b 9376
1c79356b
A
9377/* ARGSUSED */
9378
9379int
b0d623f7 9380searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 9381{
39236c6e 9382 vnode_t vp, tvp;
0a7de745 9383 int i, error = 0;
1c79356b
A
9384 int fserror = 0;
9385 struct nameidata nd;
b0d623f7 9386 struct user64_fssearchblock searchblock;
1c79356b
A
9387 struct searchstate *state;
9388 struct attrlist *returnattrs;
b0d623f7 9389 struct timeval timelimit;
0a7de745 9390 void *searchparams1, *searchparams2;
91447636
A
9391 uio_t auio = NULL;
9392 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 9393 uint32_t nummatches;
1c79356b 9394 int mallocsize;
b0d623f7 9395 uint32_t nameiflags;
2d21ac55 9396 vfs_context_t ctx = vfs_context_current();
0a7de745 9397 char uio_buf[UIO_SIZEOF(1)];
1c79356b 9398
39236c6e 9399 /* Start by copying in fsearchblock parameter list */
0a7de745
A
9400 if (IS_64BIT_PROCESS(p)) {
9401 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
9402 timelimit.tv_sec = searchblock.timelimit.tv_sec;
9403 timelimit.tv_usec = searchblock.timelimit.tv_usec;
9404 } else {
9405 struct user32_fssearchblock tmp_searchblock;
9406
9407 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
9408 // munge into 64-bit version
9409 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
9410 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
9411 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
9412 searchblock.maxmatches = tmp_searchblock.maxmatches;
39037602 9413 /*
b0d623f7
A
9414 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9415 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9416 */
0a7de745
A
9417 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9418 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
9419 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9420 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9421 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9422 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9423 searchblock.searchattrs = tmp_searchblock.searchattrs;
9424 }
9425 if (error) {
9426 return error;
9427 }
1c79356b 9428
39037602 9429 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
a3d08fcd 9430 */
39037602 9431 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
0a7de745
A
9432 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) {
9433 return EINVAL;
9434 }
39037602 9435
1c79356b
A
9436 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9437 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9438 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
0a7de745 9439 /* block. */
fe8ab488
A
9440 /* */
9441 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9442 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9443 /* assumes the size is still 556 bytes it will continue to work */
39037602 9444
91447636 9445 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
0a7de745 9446 sizeof(struct attrlist) + sizeof(struct searchstate) + (2 * sizeof(uint32_t));
1c79356b
A
9447
9448 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9449
9450 /* Now set up the various pointers to the correct place in our newly allocated memory */
9451
9452 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9453 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
0a7de745 9454 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof(struct attrlist));
1c79356b
A
9455
9456 /* Now copy in the stuff given our local variables. */
9457
0a7de745 9458 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1))) {
1c79356b 9459 goto freeandexit;
0a7de745 9460 }
1c79356b 9461
0a7de745 9462 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2))) {
1c79356b 9463 goto freeandexit;
0a7de745 9464 }
1c79356b 9465
0a7de745 9466 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) {
1c79356b 9467 goto freeandexit;
0a7de745 9468 }
39037602 9469
0a7de745 9470 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) {
1c79356b 9471 goto freeandexit;
0a7de745 9472 }
1c79356b 9473
39236c6e
A
9474 /*
9475 * When searching a union mount, need to set the
9476 * start flag at the first call on each layer to
9477 * reset state for the new volume.
9478 */
0a7de745 9479 if (uap->options & SRCHFS_START) {
39236c6e 9480 state->ss_union_layer = 0;
0a7de745 9481 } else {
39236c6e 9482 uap->options |= state->ss_union_flags;
0a7de745 9483 }
39236c6e 9484 state->ss_union_flags = 0;
b0d623f7
A
9485
9486 /*
9487 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9488 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
39037602
A
9489 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9490 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
b0d623f7
A
9491 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9492 */
9493
9494 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
9495 attrreference_t* string_ref;
9496 u_int32_t* start_length;
39037602 9497 user64_size_t param_length;
b0d623f7
A
9498
9499 /* validate searchparams1 */
39037602 9500 param_length = searchblock.sizeofsearchparams1;
b0d623f7 9501 /* skip the word that specifies length of the buffer */
0a7de745
A
9502 start_length = (u_int32_t*) searchparams1;
9503 start_length = start_length + 1;
9504 string_ref = (attrreference_t*) start_length;
b0d623f7
A
9505
9506 /* ensure no negative offsets or too big offsets */
0a7de745 9507 if (string_ref->attr_dataoffset < 0) {
b0d623f7 9508 error = EINVAL;
39037602 9509 goto freeandexit;
b0d623f7
A
9510 }
9511 if (string_ref->attr_length > MAXPATHLEN) {
9512 error = EINVAL;
9513 goto freeandexit;
9514 }
39037602 9515
b0d623f7
A
9516 /* Check for pointer overflow in the string ref */
9517 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
9518 error = EINVAL;
9519 goto freeandexit;
9520 }
9521
9522 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
9523 error = EINVAL;
9524 goto freeandexit;
9525 }
9526 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
9527 error = EINVAL;
9528 goto freeandexit;
9529 }
9530 }
9531
9532 /* set up the uio structure which will contain the users return buffer */
39236c6e 9533 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
0a7de745 9534 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 9535
91447636 9536 nameiflags = 0;
0a7de745
A
9537 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
9538 nameiflags |= FOLLOW;
9539 }
6d2010ae 9540 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
0a7de745 9541 UIO_USERSPACE, uap->path, ctx);
1c79356b 9542
55e303ae 9543 error = namei(&nd);
0a7de745 9544 if (error) {
1c79356b 9545 goto freeandexit;
0a7de745 9546 }
39236c6e 9547 vp = nd.ni_vp;
91447636 9548 nameidone(&nd);
39236c6e
A
9549
9550 /*
9551 * Switch to the root vnode for the volume
9552 */
9553 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 9554 vnode_put(vp);
0a7de745 9555 if (error) {
39236c6e 9556 goto freeandexit;
0a7de745 9557 }
39236c6e
A
9558 vp = tvp;
9559
9560 /*
9561 * If it's a union mount, the path lookup takes
9562 * us to the top layer. But we may need to descend
9563 * to a lower layer. For non-union mounts the layer
9564 * is always zero.
9565 */
9566 for (i = 0; i < (int) state->ss_union_layer; i++) {
0a7de745 9567 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0) {
39236c6e 9568 break;
0a7de745 9569 }
39236c6e
A
9570 tvp = vp;
9571 vp = vp->v_mount->mnt_vnodecovered;
9572 if (vp == NULL) {
fe8ab488 9573 vnode_put(tvp);
39236c6e
A
9574 error = ENOENT;
9575 goto freeandexit;
9576 }
813fb2f6 9577 error = vnode_getwithref(vp);
39236c6e 9578 vnode_put(tvp);
0a7de745 9579 if (error) {
813fb2f6 9580 goto freeandexit;
0a7de745 9581 }
39236c6e 9582 }
1c79356b 9583
6d2010ae
A
9584#if CONFIG_MACF
9585 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9586 if (error) {
9587 vnode_put(vp);
9588 goto freeandexit;
9589 }
9590#endif
9591
39037602 9592
1c79356b 9593 /*
39037602 9594 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 9595 * before and sometimes the underlying code doesnt deal with it well.
1c79356b 9596 */
0a7de745 9597 if (searchblock.maxmatches == 0) {
1c79356b
A
9598 nummatches = 0;
9599 goto saveandexit;
0a7de745 9600 }
1c79356b
A
9601
9602 /*
39236c6e 9603 * Allright, we have everything we need, so lets make that call.
39037602 9604 *
39236c6e
A
9605 * We keep special track of the return value from the file system:
9606 * EAGAIN is an acceptable error condition that shouldn't keep us
9607 * from copying out any results...
1c79356b
A
9608 */
9609
6d2010ae 9610 fserror = VNOP_SEARCHFS(vp,
0a7de745
A
9611 searchparams1,
9612 searchparams2,
9613 &searchblock.searchattrs,
9614 (u_long)searchblock.maxmatches,
9615 &timelimit,
9616 returnattrs,
9617 &nummatches,
9618 (u_long)uap->scriptcode,
9619 (u_long)uap->options,
9620 auio,
9621 (struct searchstate *) &state->ss_fsstate,
9622 ctx);
39037602 9623
39236c6e
A
9624 /*
9625 * If it's a union mount we need to be called again
9626 * to search the mounted-on filesystem.
9627 */
9628 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9629 state->ss_union_flags = SRCHFS_START;
0a7de745 9630 state->ss_union_layer++; // search next layer down
39236c6e
A
9631 fserror = EAGAIN;
9632 }
9633
6d2010ae
A
9634saveandexit:
9635
9636 vnode_put(vp);
9637
9638 /* Now copy out the stuff that needs copying out. That means the number of matches, the
0a7de745 9639 * search state. Everything was already put into he return buffer by the vop call. */
6d2010ae 9640
0a7de745 9641 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) {
6d2010ae 9642 goto freeandexit;
0a7de745 9643 }
6d2010ae 9644
0a7de745 9645 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) {
6d2010ae 9646 goto freeandexit;
0a7de745 9647 }
39037602 9648
6d2010ae
A
9649 error = fserror;
9650
9651freeandexit:
9652
0a7de745 9653 FREE(searchparams1, M_TEMP);
6d2010ae 9654
0a7de745 9655 return error;
6d2010ae
A
9656} /* end of searchfs system call */
9657
316670eb
A
9658#else /* CONFIG_SEARCHFS */
9659
9660int
9661searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9662{
0a7de745 9663 return ENOTSUP;
316670eb
A
9664}
9665
9666#endif /* CONFIG_SEARCHFS */
6d2010ae
A
9667
9668
9669lck_grp_attr_t * nspace_group_attr;
9670lck_attr_t * nspace_lock_attr;
9671lck_grp_t * nspace_mutex_group;
9672
9673lck_mtx_t nspace_handler_lock;
9674lck_mtx_t nspace_handler_exclusion_lock;
9675
0a7de745
A
9676time_t snapshot_timestamp = 0;
9677int nspace_allow_virtual_devs = 0;
6d2010ae
A
9678
9679void nspace_handler_init(void);
9680
9681typedef struct nspace_item_info {
9682 struct vnode *vp;
9683 void *arg;
9684 uint64_t op;
9685 uint32_t vid;
9686 uint32_t flags;
9687 uint32_t token;
9688 uint32_t refcount;
9689} nspace_item_info;
9690
9691#define MAX_NSPACE_ITEMS 128
9692nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
0a7de745
A
9693uint32_t nspace_item_idx = 0; // also used as the sleep/wakeup rendezvous address
9694uint32_t nspace_token_id = 0;
6d2010ae
A
9695uint32_t nspace_handler_timeout = 15; // seconds
9696
9697#define NSPACE_ITEM_NEW 0x0001
9698#define NSPACE_ITEM_PROCESSING 0x0002
9699#define NSPACE_ITEM_DEAD 0x0004
9700#define NSPACE_ITEM_CANCELLED 0x0008
9701#define NSPACE_ITEM_DONE 0x0010
9702#define NSPACE_ITEM_RESET_TIMER 0x0020
9703
9704#define NSPACE_ITEM_NSPACE_EVENT 0x0040
9705#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
6d2010ae 9706
fe8ab488 9707#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
6d2010ae
A
9708
9709//#pragma optimization_level 0
9710
9711typedef enum {
9712 NSPACE_HANDLER_NSPACE = 0,
9713 NSPACE_HANDLER_SNAPSHOT = 1,
6d2010ae
A
9714
9715 NSPACE_HANDLER_COUNT,
9716} nspace_type_t;
9717
9718typedef struct {
9719 uint64_t handler_tid;
9720 struct proc *handler_proc;
9721 int handler_busy;
9722} nspace_handler_t;
9723
9724nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9725
39236c6e
A
9726/* namespace fsctl functions */
9727static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9728static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9729static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9730static nspace_type_t nspace_type_for_op(uint64_t op);
9731static int nspace_is_special_process(struct proc *proc);
9732static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9733static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
0a7de745 9734static int validate_namespace_args(int is64bit, int size);
39236c6e
A
9735static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9736
9737
0a7de745
A
9738static inline int
9739nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
6d2010ae 9740{
0a7de745
A
9741 switch (nspace_type) {
9742 case NSPACE_HANDLER_NSPACE:
9743 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9744 case NSPACE_HANDLER_SNAPSHOT:
9745 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
9746 default:
9747 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9748 return 0;
6d2010ae
A
9749 }
9750}
9751
0a7de745
A
9752static inline int
9753nspace_item_flags_for_type(nspace_type_t nspace_type)
6d2010ae 9754{
0a7de745
A
9755 switch (nspace_type) {
9756 case NSPACE_HANDLER_NSPACE:
9757 return NSPACE_ITEM_NSPACE_EVENT;
9758 case NSPACE_HANDLER_SNAPSHOT:
9759 return NSPACE_ITEM_SNAPSHOT_EVENT;
9760 default:
9761 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9762 return 0;
6d2010ae
A
9763 }
9764}
9765
0a7de745
A
9766static inline int
9767nspace_open_flags_for_type(nspace_type_t nspace_type)
6d2010ae 9768{
0a7de745
A
9769 switch (nspace_type) {
9770 case NSPACE_HANDLER_NSPACE:
9771 return FREAD | FWRITE | O_EVTONLY;
9772 case NSPACE_HANDLER_SNAPSHOT:
9773 return FREAD | O_EVTONLY;
9774 default:
9775 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9776 return 0;
6d2010ae
A
9777 }
9778}
9779
0a7de745
A
9780static inline nspace_type_t
9781nspace_type_for_op(uint64_t op)
6d2010ae 9782{
0a7de745
A
9783 switch (op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9784 case NAMESPACE_HANDLER_NSPACE_EVENT:
9785 return NSPACE_HANDLER_NSPACE;
9786 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9787 return NSPACE_HANDLER_SNAPSHOT;
9788 default:
9789 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9790 return NSPACE_HANDLER_NSPACE;
6d2010ae
A
9791 }
9792}
9793
0a7de745
A
9794static inline int
9795nspace_is_special_process(struct proc *proc)
6d2010ae
A
9796{
9797 int i;
9798 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
0a7de745 9799 if (proc == nspace_handlers[i].handler_proc) {
6d2010ae 9800 return 1;
0a7de745 9801 }
6d2010ae
A
9802 }
9803 return 0;
9804}
9805
9806void
9807nspace_handler_init(void)
9808{
9809 nspace_lock_attr = lck_attr_alloc_init();
9810 nspace_group_attr = lck_grp_attr_alloc_init();
9811 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9812 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9813 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9814 memset(&nspace_items[0], 0, sizeof(nspace_items));
9815}
9816
9817void
9818nspace_proc_exit(struct proc *p)
9819{
9820 int i, event_mask = 0;
39037602 9821
6d2010ae
A
9822 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9823 if (p == nspace_handlers[i].handler_proc) {
9824 event_mask |= nspace_item_flags_for_type(i);
9825 nspace_handlers[i].handler_tid = 0;
9826 nspace_handlers[i].handler_proc = NULL;
9827 }
9828 }
9829
9830 if (event_mask == 0) {
9831 return;
9832 }
39037602
A
9833
9834 lck_mtx_lock(&nspace_handler_lock);
6d2010ae
A
9835 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9836 // if this process was the snapshot handler, zero snapshot_timeout
9837 snapshot_timestamp = 0;
9838 }
39037602 9839
6d2010ae
A
9840 //
9841 // unblock anyone that's waiting for the handler that died
9842 //
0a7de745 9843 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae 9844 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
0a7de745 9845 if (nspace_items[i].flags & event_mask) {
6d2010ae
A
9846 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9847 vnode_lock_spin(nspace_items[i].vp);
9848 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9849 vnode_unlock(nspace_items[i].vp);
9850 }
9851 nspace_items[i].vp = NULL;
9852 nspace_items[i].vid = 0;
9853 nspace_items[i].flags = NSPACE_ITEM_DONE;
9854 nspace_items[i].token = 0;
39037602 9855
6d2010ae
A
9856 wakeup((caddr_t)&(nspace_items[i].vp));
9857 }
9858 }
9859 }
39037602 9860
6d2010ae
A
9861 wakeup((caddr_t)&nspace_item_idx);
9862 lck_mtx_unlock(&nspace_handler_lock);
9863}
9864
9865
39037602 9866int
6d2010ae
A
9867resolve_nspace_item(struct vnode *vp, uint64_t op)
9868{
9869 return resolve_nspace_item_ext(vp, op, NULL);
9870}
9871
39037602 9872int
6d2010ae
A
9873resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9874{
9875 int i, error, keep_waiting;
9876 struct timespec ts;
9877 nspace_type_t nspace_type = nspace_type_for_op(op);
9878
9879 // only allow namespace events on regular files, directories and symlinks.
9880 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9881 return 0;
9882 }
9883
9884 //
9885 // if this is a snapshot event and the vnode is on a
9886 // disk image just pretend nothing happened since any
9887 // change to the disk image will cause the disk image
9888 // itself to get backed up and this avoids multi-way
9889 // deadlocks between the snapshot handler and the ever
9890 // popular diskimages-helper process. the variable
9891 // nspace_allow_virtual_devs allows this behavior to
9892 // be overridden (for use by the Mobile TimeMachine
9893 // testing infrastructure which uses disk images)
9894 //
0a7de745 9895 if ((op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
6d2010ae
A
9896 && (vp->v_mount != NULL)
9897 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9898 && !nspace_allow_virtual_devs) {
6d2010ae
A
9899 return 0;
9900 }
9901
9902 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9903 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9904 return 0;
9905 }
9906
9907 if (nspace_is_special_process(current_proc())) {
9908 return EDEADLK;
9909 }
9910
9911 lck_mtx_lock(&nspace_handler_lock);
9912
9913retry:
0a7de745 9914 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae
A
9915 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9916 break;
9917 }
9918 }
9919
9920 if (i >= MAX_NSPACE_ITEMS) {
0a7de745 9921 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae
A
9922 if (nspace_items[i].flags == 0) {
9923 break;
9924 }
9925 }
9926 } else {
9927 nspace_items[i].refcount++;
9928 }
39037602 9929
6d2010ae
A
9930 if (i >= MAX_NSPACE_ITEMS) {
9931 ts.tv_sec = nspace_handler_timeout;
9932 ts.tv_nsec = 0;
9933
0a7de745 9934 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS | PCATCH, "nspace-no-space", &ts);
6d2010ae
A
9935 if (error == 0) {
9936 // an entry got free'd up, go see if we can get a slot
9937 goto retry;
9938 } else {
9939 lck_mtx_unlock(&nspace_handler_lock);
9940 return error;
9941 }
9942 }
9943
9944 //
9945 // if it didn't already exist, add it. if it did exist
9946 // we'll get woken up when someone does a wakeup() on
9947 // the slot in the nspace_items table.
9948 //
9949 if (vp != nspace_items[i].vp) {
9950 nspace_items[i].vp = vp;
39236c6e 9951 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
9952 nspace_items[i].op = op;
9953 nspace_items[i].vid = vnode_vid(vp);
9954 nspace_items[i].flags = NSPACE_ITEM_NEW;
9955 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9956 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9957 if (arg) {
9958 vnode_lock_spin(vp);
9959 vp->v_flag |= VNEEDSSNAPSHOT;
9960 vnode_unlock(vp);
9961 }
9962 }
9963
9964 nspace_items[i].token = 0;
9965 nspace_items[i].refcount = 1;
39037602 9966
6d2010ae
A
9967 wakeup((caddr_t)&nspace_item_idx);
9968 }
9969
9970 //
9971 // Now go to sleep until the handler does a wakeup on this
9972 // slot in the nspace_items table (or we timeout).
9973 //
9974 keep_waiting = 1;
0a7de745 9975 while (keep_waiting) {
6d2010ae
A
9976 ts.tv_sec = nspace_handler_timeout;
9977 ts.tv_nsec = 0;
0a7de745 9978 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS | PCATCH, "namespace-done", &ts);
6d2010ae
A
9979
9980 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9981 error = 0;
9982 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9983 error = nspace_items[i].token;
9984 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9985 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9986 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9987 continue;
9988 } else {
9989 error = ETIMEDOUT;
9990 }
9991 } else if (error == 0) {
9992 // hmmm, why did we get woken up?
9993 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
0a7de745 9994 nspace_items[i].token);
39037602 9995 }
6d2010ae
A
9996
9997 if (--nspace_items[i].refcount == 0) {
9998 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9999 nspace_items[i].arg = NULL;
10000 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
10001 nspace_items[i].flags = 0; // this clears it for re-use
10002 }
10003 wakeup(&nspace_token_id);
10004 keep_waiting = 0;
10005 }
10006
10007 lck_mtx_unlock(&nspace_handler_lock);
10008
10009 return error;
10010}
10011
0a7de745
A
10012int
10013nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
6d2010ae 10014{
39037602 10015 int snapshot_error = 0;
6d2010ae 10016
39037602
A
10017 if (vp == NULL) {
10018 return 0;
10019 }
10020
10021 /* Swap files are special; skip them */
10022 if (vnode_isswap(vp)) {
10023 return 0;
10024 }
10025
10026 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
10027 // the change time is within this epoch
10028 int error;
10029
10030 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
10031 if (error == EDEADLK) {
10032 snapshot_error = 0;
10033 } else if (error) {
10034 if (error == EAGAIN) {
10035 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
10036 } else if (error == EINTR) {
10037 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
10038 snapshot_error = EINTR;
10039 }
10040 }
10041 }
10042
10043 return snapshot_error;
10044}
10045
10046int
10047get_nspace_item_status(struct vnode *vp, int32_t *status)
10048{
10049 int i;
10050
10051 lck_mtx_lock(&nspace_handler_lock);
0a7de745 10052 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
39037602
A
10053 if (nspace_items[i].vp == vp) {
10054 break;
6d2010ae
A
10055 }
10056 }
10057
10058 if (i >= MAX_NSPACE_ITEMS) {
10059 lck_mtx_unlock(&nspace_handler_lock);
10060 return ENOENT;
10061 }
10062
10063 *status = nspace_items[i].flags;
10064 lck_mtx_unlock(&nspace_handler_lock);
10065 return 0;
10066}
39037602 10067
6d2010ae
A
10068
10069#if 0
10070static int
10071build_volfs_path(struct vnode *vp, char *path, int *len)
10072{
10073 struct vnode_attr va;
10074 int ret;
10075
10076 VATTR_INIT(&va);
10077 VATTR_WANTED(&va, va_fsid);
10078 VATTR_WANTED(&va, va_fileid);
10079
10080 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
10081 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
10082 ret = -1;
10083 } else {
10084 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
10085 ret = 0;
10086 }
10087
10088 return ret;
10089}
10090#endif
10091
10092//
10093// Note: this function does NOT check permissions on all of the
10094// parent directories leading to this vnode. It should only be
10095// called on behalf of a root process. Otherwise a process may
10096// get access to a file because the file itself is readable even
10097// though its parent directories would prevent access.
10098//
10099static int
10100vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
10101{
10102 int error, action;
10103
10104 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10105 return error;
10106 }
10107
10108#if CONFIG_MACF
10109 error = mac_vnode_check_open(ctx, vp, fmode);
0a7de745 10110 if (error) {
6d2010ae 10111 return error;
0a7de745 10112 }
6d2010ae 10113#endif
1c79356b 10114
6d2010ae
A
10115 /* compute action to be authorized */
10116 action = 0;
10117 if (fmode & FREAD) {
10118 action |= KAUTH_VNODE_READ_DATA;
10119 }
10120 if (fmode & (FWRITE | O_TRUNC)) {
10121 /*
10122 * If we are writing, appending, and not truncating,
10123 * indicate that we are appending so that if the
10124 * UF_APPEND or SF_APPEND bits are set, we do not deny
10125 * the open.
10126 */
10127 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
10128 action |= KAUTH_VNODE_APPEND_DATA;
10129 } else {
10130 action |= KAUTH_VNODE_WRITE_DATA;
10131 }
10132 }
1c79356b 10133
0a7de745 10134 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
6d2010ae 10135 return error;
0a7de745 10136 }
39037602 10137
1c79356b 10138
6d2010ae
A
10139 //
10140 // if the vnode is tagged VOPENEVT and the current process
10141 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
10142 // flag to the open mode so that this open won't count against
10143 // the vnode when carbon delete() does a vnode_isinuse() to see
10144 // if a file is currently in use. this allows spotlight
10145 // importers to not interfere with carbon apps that depend on
10146 // the no-delete-if-busy semantics of carbon delete().
10147 //
10148 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
10149 fmode |= O_EVTONLY;
10150 }
1c79356b 10151
0a7de745 10152 if ((error = VNOP_OPEN(vp, fmode, ctx))) {
6d2010ae
A
10153 return error;
10154 }
0a7de745 10155 if ((error = vnode_ref_ext(vp, fmode, 0))) {
6d2010ae
A
10156 VNOP_CLOSE(vp, fmode, ctx);
10157 return error;
10158 }
1c79356b 10159
39037602 10160 /* Call out to allow 3rd party notification of open.
6d2010ae
A
10161 * Ignore result of kauth_authorize_fileop call.
10162 */
4b17d6b6
A
10163#if CONFIG_MACF
10164 mac_vnode_notify_open(ctx, vp, fmode);
10165#endif
39037602 10166 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
0a7de745 10167 (uintptr_t)vp, 0);
1c79356b 10168
1c79356b 10169
6d2010ae
A
10170 return 0;
10171}
1c79356b 10172
6d2010ae 10173static int
39236c6e 10174wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae 10175{
39037602
A
10176 int i;
10177 int error = 0;
10178 int unblock = 0;
6d2010ae 10179 task_t curtask;
39037602 10180
6d2010ae
A
10181 lck_mtx_lock(&nspace_handler_exclusion_lock);
10182 if (nspace_handlers[nspace_type].handler_busy) {
10183 lck_mtx_unlock(&nspace_handler_exclusion_lock);
10184 return EBUSY;
10185 }
39037602 10186
6d2010ae
A
10187 nspace_handlers[nspace_type].handler_busy = 1;
10188 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602
A
10189
10190 /*
6d2010ae
A
10191 * Any process that gets here will be one of the namespace handlers.
10192 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
10193 * as we can cause deadlocks to occur, because the namespace handler may prevent
39037602 10194 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
6d2010ae
A
10195 * process.
10196 */
10197 curtask = current_task();
0a7de745 10198 bsd_set_dependency_capable(curtask);
39037602 10199
6d2010ae
A
10200 lck_mtx_lock(&nspace_handler_lock);
10201 if (nspace_handlers[nspace_type].handler_proc == NULL) {
10202 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
10203 nspace_handlers[nspace_type].handler_proc = current_proc();
10204 }
39037602
A
10205
10206 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
0a7de745 10207 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602
A
10208 error = EINVAL;
10209 }
10210
6d2010ae 10211 while (error == 0) {
39037602
A
10212 /* Try to find matching namespace item */
10213 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae 10214 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
39037602
A
10215 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
10216 break;
6d2010ae 10217 }
6d2010ae
A
10218 }
10219 }
39236c6e 10220
39037602
A
10221 if (i >= MAX_NSPACE_ITEMS) {
10222 /* Nothing is there yet. Wait for wake up and retry */
0a7de745 10223 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS | PCATCH, "namespace-items", 0);
6d2010ae 10224 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 10225 /* Prevent infinite loop if snapshot handler exited */
6d2010ae
A
10226 error = EINVAL;
10227 break;
10228 }
39037602 10229 continue;
6d2010ae 10230 }
39037602
A
10231
10232 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
10233 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
10234 nspace_items[i].token = ++nspace_token_id;
10235
10236 assert(nspace_items[i].vp);
10237 struct fileproc *fp;
10238 int32_t indx;
10239 int32_t fmode;
10240 struct proc *p = current_proc();
10241 vfs_context_t ctx = vfs_context_current();
10242 struct vnode_attr va;
10243 bool vn_get_succsessful = false;
10244 bool vn_open_successful = false;
10245 bool fp_alloc_successful = false;
10246
10247 /*
10248 * Use vnode pointer to acquire a file descriptor for
10249 * hand-off to userland
10250 */
10251 fmode = nspace_open_flags_for_type(nspace_type);
10252 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
0a7de745
A
10253 if (error) {
10254 goto cleanup;
10255 }
39037602
A
10256 vn_get_succsessful = true;
10257
10258 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
0a7de745
A
10259 if (error) {
10260 goto cleanup;
10261 }
39037602
A
10262 vn_open_successful = true;
10263
10264 error = falloc(p, &fp, &indx, ctx);
0a7de745
A
10265 if (error) {
10266 goto cleanup;
10267 }
39037602
A
10268 fp_alloc_successful = true;
10269
10270 fp->f_fglob->fg_flag = fmode;
10271 fp->f_fglob->fg_ops = &vnops;
10272 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
10273
10274 proc_fdlock(p);
10275 procfdtbl_releasefd(p, indx, NULL);
10276 fp_drop(p, indx, fp, 1);
10277 proc_fdunlock(p);
10278
10279 /*
10280 * All variants of the namespace handler struct support these three fields:
10281 * token, flags, and the FD pointer
10282 */
10283 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
0a7de745
A
10284 if (error) {
10285 goto cleanup;
10286 }
39037602 10287 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
0a7de745
A
10288 if (error) {
10289 goto cleanup;
10290 }
39037602 10291 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
0a7de745
A
10292 if (error) {
10293 goto cleanup;
10294 }
39037602
A
10295
10296 /*
10297 * Handle optional fields:
10298 * extended version support an info ptr (offset, length), and the
10299 *
10300 * namedata version supports a unique per-link object ID
10301 *
10302 */
10303 if (nhd->infoptr) {
10304 uio_t uio = (uio_t)nspace_items[i].arg;
10305 uint64_t u_offset, u_length;
10306
10307 if (uio) {
10308 u_offset = uio_offset(uio);
10309 u_length = uio_resid(uio);
10310 } else {
10311 u_offset = 0;
10312 u_length = 0;
10313 }
10314 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
0a7de745
A
10315 if (error) {
10316 goto cleanup;
10317 }
39037602 10318 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
0a7de745
A
10319 if (error) {
10320 goto cleanup;
10321 }
39037602
A
10322 }
10323
10324 if (nhd->objid) {
10325 VATTR_INIT(&va);
10326 VATTR_WANTED(&va, va_linkid);
10327 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
0a7de745
A
10328 if (error) {
10329 goto cleanup;
10330 }
39037602
A
10331
10332 uint64_t linkid = 0;
0a7de745 10333 if (VATTR_IS_SUPPORTED(&va, va_linkid)) {
39037602
A
10334 linkid = (uint64_t)va.va_linkid;
10335 }
10336 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
10337 }
10338cleanup:
10339 if (error) {
0a7de745
A
10340 if (fp_alloc_successful) {
10341 fp_free(p, indx, fp);
10342 }
10343 if (vn_open_successful) {
10344 vn_close(nspace_items[i].vp, fmode, ctx);
10345 }
39037602
A
10346 unblock = 1;
10347 }
10348
0a7de745
A
10349 if (vn_get_succsessful) {
10350 vnode_put(nspace_items[i].vp);
10351 }
39037602
A
10352
10353 break;
6d2010ae 10354 }
39037602 10355
6d2010ae
A
10356 if (unblock) {
10357 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
10358 vnode_lock_spin(nspace_items[i].vp);
10359 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10360 vnode_unlock(nspace_items[i].vp);
10361 }
10362 nspace_items[i].vp = NULL;
10363 nspace_items[i].vid = 0;
10364 nspace_items[i].flags = NSPACE_ITEM_DONE;
10365 nspace_items[i].token = 0;
39037602 10366
6d2010ae
A
10367 wakeup((caddr_t)&(nspace_items[i].vp));
10368 }
39037602 10369
6d2010ae
A
10370 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
10371 // just go through every snapshot event and unblock it immediately.
10372 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
0a7de745 10373 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae
A
10374 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
10375 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
10376 nspace_items[i].vp = NULL;
10377 nspace_items[i].vid = 0;
10378 nspace_items[i].flags = NSPACE_ITEM_DONE;
10379 nspace_items[i].token = 0;
39037602
A
10380
10381 wakeup((caddr_t)&(nspace_items[i].vp));
6d2010ae
A
10382 }
10383 }
10384 }
10385 }
10386 }
39037602 10387
6d2010ae 10388 lck_mtx_unlock(&nspace_handler_lock);
39037602 10389
6d2010ae
A
10390 lck_mtx_lock(&nspace_handler_exclusion_lock);
10391 nspace_handlers[nspace_type].handler_busy = 0;
10392 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602 10393
6d2010ae
A
10394 return error;
10395}
1c79356b 10396
0a7de745
A
10397static inline int
10398validate_namespace_args(int is64bit, int size)
10399{
39236c6e
A
10400 if (is64bit) {
10401 /* Must be one of these */
10402 if (size == sizeof(user64_namespace_handler_info)) {
10403 goto sizeok;
10404 }
10405 if (size == sizeof(user64_namespace_handler_info_ext)) {
10406 goto sizeok;
10407 }
10408 if (size == sizeof(user64_namespace_handler_data)) {
10409 goto sizeok;
10410 }
10411 return EINVAL;
0a7de745 10412 } else {
39236c6e
A
10413 /* 32 bit -- must be one of these */
10414 if (size == sizeof(user32_namespace_handler_info)) {
10415 goto sizeok;
10416 }
10417 if (size == sizeof(user32_namespace_handler_info_ext)) {
10418 goto sizeok;
10419 }
10420 if (size == sizeof(user32_namespace_handler_data)) {
10421 goto sizeok;
10422 }
10423 return EINVAL;
10424 }
10425
10426sizeok:
10427
10428 return 0;
39236c6e 10429}
1c79356b 10430
0a7de745
A
10431static int
10432process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
6d2010ae
A
10433{
10434 int error = 0;
39236c6e 10435 namespace_handler_data nhd;
39037602 10436
0a7de745 10437 bzero(&nhd, sizeof(namespace_handler_data));
39236c6e 10438
6d2010ae
A
10439 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10440 return error;
10441 }
39037602 10442
0a7de745 10443 error = validate_namespace_args(is64bit, size);
39236c6e
A
10444 if (error) {
10445 return error;
6d2010ae 10446 }
39037602 10447
39236c6e
A
10448 /* Copy in the userland pointers into our kernel-only struct */
10449
6d2010ae 10450 if (is64bit) {
39236c6e
A
10451 /* 64 bit userland structures */
10452 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
10453 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
10454 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
10455
10456 /* If the size is greater than the standard info struct, add in extra fields */
10457 if (size > (sizeof(user64_namespace_handler_info))) {
10458 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
10459 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
10460 }
10461 if (size == (sizeof(user64_namespace_handler_data))) {
10462 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
10463 }
10464 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 10465 }
0a7de745 10466 } else {
39236c6e
A
10467 /* 32 bit userland structures */
10468 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
10469 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
10470 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
39037602 10471
39236c6e
A
10472 if (size > (sizeof(user32_namespace_handler_info))) {
10473 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
10474 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
10475 }
10476 if (size == (sizeof(user32_namespace_handler_data))) {
10477 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
10478 }
10479 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
10480 }
10481 }
39037602 10482
39236c6e 10483 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 10484}
1c79356b 10485
5ba3f43e
A
10486static unsigned long
10487fsctl_bogus_command_compat(unsigned long cmd)
10488{
5ba3f43e
A
10489 switch (cmd) {
10490 case IOCBASECMD(FSIOC_SYNC_VOLUME):
0a7de745 10491 return FSIOC_SYNC_VOLUME;
5ba3f43e 10492 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
0a7de745 10493 return FSIOC_ROUTEFS_SETROUTEID;
5ba3f43e 10494 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
0a7de745 10495 return FSIOC_SET_PACKAGE_EXTS;
5ba3f43e 10496 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET):
0a7de745 10497 return FSIOC_NAMESPACE_HANDLER_GET;
5ba3f43e 10498 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET):
0a7de745 10499 return FSIOC_OLD_SNAPSHOT_HANDLER_GET;
5ba3f43e 10500 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT):
0a7de745 10501 return FSIOC_SNAPSHOT_HANDLER_GET_EXT;
5ba3f43e 10502 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE):
0a7de745 10503 return FSIOC_NAMESPACE_HANDLER_UPDATE;
5ba3f43e 10504 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK):
0a7de745 10505 return FSIOC_NAMESPACE_HANDLER_UNBLOCK;
5ba3f43e 10506 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL):
0a7de745 10507 return FSIOC_NAMESPACE_HANDLER_CANCEL;
5ba3f43e 10508 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME):
0a7de745 10509 return FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME;
5ba3f43e 10510 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS):
0a7de745 10511 return FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS;
5ba3f43e 10512 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
0a7de745 10513 return FSIOC_SET_FSTYPENAME_OVERRIDE;
5ba3f43e 10514 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
0a7de745 10515 return DISK_CONDITIONER_IOC_GET;
5ba3f43e 10516 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
0a7de745 10517 return DISK_CONDITIONER_IOC_SET;
5ba3f43e 10518 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
0a7de745 10519 return FSIOC_FIOSEEKHOLE;
5ba3f43e 10520 case IOCBASECMD(FSIOC_FIOSEEKDATA):
0a7de745 10521 return FSIOC_FIOSEEKDATA;
5ba3f43e 10522 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
0a7de745 10523 return SPOTLIGHT_IOC_GET_MOUNT_TIME;
5ba3f43e 10524 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
0a7de745 10525 return SPOTLIGHT_IOC_GET_LAST_MTIME;
5ba3f43e
A
10526 }
10527
0a7de745 10528 return cmd;
5ba3f43e
A
10529}
10530
1c79356b
A
10531/*
10532 * Make a filesystem-specific control call:
10533 */
1c79356b 10534/* ARGSUSED */
b0d623f7
A
10535static int
10536fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 10537{
0a7de745 10538 int error = 0;
91447636 10539 boolean_t is64bit;
2d21ac55 10540 u_int size;
1c79356b 10541#define STK_PARAMS 128
39037602 10542 char stkbuf[STK_PARAMS] = {0};
1c79356b 10543 caddr_t data, memp;
b0d623f7 10544 vnode_t vp = *arg_vp;
1c79356b 10545
5ba3f43e
A
10546 cmd = fsctl_bogus_command_compat(cmd);
10547
1c79356b 10548 size = IOCPARM_LEN(cmd);
0a7de745
A
10549 if (size > IOCPARM_MAX) {
10550 return EINVAL;
10551 }
1c79356b 10552
6d2010ae 10553 is64bit = proc_is64bit(p);
91447636 10554
1c79356b 10555 memp = NULL;
04b8595b 10556
0a7de745
A
10557 if (size > sizeof(stkbuf)) {
10558 if ((memp = (caddr_t)kalloc(size)) == 0) {
10559 return ENOMEM;
10560 }
1c79356b
A
10561 data = memp;
10562 } else {
91447636 10563 data = &stkbuf[0];
1c79356b 10564 };
39037602 10565
1c79356b
A
10566 if (cmd & IOC_IN) {
10567 if (size) {
b0d623f7 10568 error = copyin(udata, data, size);
39037602 10569 if (error) {
fe8ab488 10570 if (memp) {
0a7de745 10571 kfree(memp, size);
fe8ab488
A
10572 }
10573 return error;
10574 }
1c79356b 10575 } else {
6d2010ae
A
10576 if (is64bit) {
10577 *(user_addr_t *)data = udata;
0a7de745 10578 } else {
6d2010ae
A
10579 *(uint32_t *)data = (uint32_t)udata;
10580 }
1c79356b
A
10581 };
10582 } else if ((cmd & IOC_OUT) && size) {
10583 /*
10584 * Zero the buffer so the user always
10585 * gets back something deterministic.
10586 */
10587 bzero(data, size);
91447636 10588 } else if (cmd & IOC_VOID) {
b0d623f7 10589 if (is64bit) {
6d2010ae 10590 *(user_addr_t *)data = udata;
0a7de745 10591 } else {
6d2010ae 10592 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 10593 }
91447636 10594 }
1c79356b 10595
b0d623f7 10596 /* Check to see if it's a generic command */
5ba3f43e 10597 switch (cmd) {
0a7de745
A
10598 case FSIOC_SYNC_VOLUME: {
10599 mount_t mp = vp->v_mount;
10600 int arg = *(uint32_t*)data;
91447636 10601
0a7de745
A
10602 /* record vid of vp so we can drop it below. */
10603 uint32_t vvid = vp->v_id;
b0d623f7 10604
0a7de745
A
10605 /*
10606 * Then grab mount_iterref so that we can release the vnode.
10607 * Without this, a thread may call vnode_iterate_prepare then
10608 * get into a deadlock because we've never released the root vp
10609 */
10610 error = mount_iterref(mp, 0);
10611 if (error) {
10612 break;
10613 }
10614 vnode_put(vp);
b0d623f7 10615
0a7de745
A
10616 /* issue the sync for this volume */
10617 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
fe8ab488 10618
0a7de745
A
10619 /*
10620 * Then release the mount_iterref once we're done syncing; it's not
10621 * needed for the VNOP_IOCTL below
10622 */
10623 mount_iterdrop(mp);
fe8ab488 10624
0a7de745
A
10625 if (arg & FSCTL_SYNC_FULLSYNC) {
10626 /* re-obtain vnode iocount on the root vp, if possible */
10627 error = vnode_getwithvid(vp, vvid);
10628 if (error == 0) {
10629 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10630 vnode_put(vp);
b0d623f7
A
10631 }
10632 }
0a7de745
A
10633 /* mark the argument VP as having been released */
10634 *arg_vp = NULL;
10635 }
10636 break;
b0d623f7 10637
0a7de745 10638 case FSIOC_ROUTEFS_SETROUTEID: {
490019cf 10639#if ROUTEFS
0a7de745
A
10640 char routepath[MAXPATHLEN];
10641 size_t len = 0;
39037602 10642
0a7de745
A
10643 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10644 break;
10645 }
10646 bzero(routepath, MAXPATHLEN);
10647 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10648 if (error) {
10649 break;
10650 }
10651 error = routefs_kernel_mount(routepath);
10652 if (error) {
10653 break;
10654 }
490019cf 10655#endif
0a7de745
A
10656 }
10657 break;
10658
10659 case FSIOC_SET_PACKAGE_EXTS: {
10660 user_addr_t ext_strings;
10661 uint32_t num_entries;
10662 uint32_t max_width;
10663
10664 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0))) {
10665 break;
490019cf 10666 }
490019cf 10667
0a7de745
A
10668 if ((is64bit && size != sizeof(user64_package_ext_info))
10669 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10670 // either you're 64-bit and passed a 64-bit struct or
10671 // you're 32-bit and passed a 32-bit struct. otherwise
10672 // it's not ok.
10673 error = EINVAL;
10674 break;
10675 }
b0d623f7 10676
0a7de745
A
10677 if (is64bit) {
10678 ext_strings = ((user64_package_ext_info *)data)->strings;
10679 num_entries = ((user64_package_ext_info *)data)->num_entries;
10680 max_width = ((user64_package_ext_info *)data)->max_width;
10681 } else {
10682 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10683 num_entries = ((user32_package_ext_info *)data)->num_entries;
10684 max_width = ((user32_package_ext_info *)data)->max_width;
10685 }
10686 error = set_package_extensions_table(ext_strings, num_entries, max_width);
10687 }
10688 break;
39037602 10689
0a7de745
A
10690 /* namespace handlers */
10691 case FSIOC_NAMESPACE_HANDLER_GET: {
10692 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
10693 }
10694 break;
fe8ab488 10695
0a7de745
A
10696 /* Snapshot handlers */
10697 case FSIOC_OLD_SNAPSHOT_HANDLER_GET: {
10698 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10699 }
10700 break;
fe8ab488 10701
0a7de745
A
10702 case FSIOC_SNAPSHOT_HANDLER_GET_EXT: {
10703 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10704 }
10705 break;
2d21ac55 10706
0a7de745
A
10707 case FSIOC_NAMESPACE_HANDLER_UPDATE: {
10708 uint32_t token, val;
10709 int i;
b0d623f7 10710
0a7de745
A
10711 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10712 break;
39037602 10713 }
39236c6e 10714
0a7de745
A
10715 if (!nspace_is_special_process(p)) {
10716 error = EINVAL;
10717 break;
fe8ab488 10718 }
39236c6e 10719
0a7de745
A
10720 token = ((uint32_t *)data)[0];
10721 val = ((uint32_t *)data)[1];
39236c6e 10722
0a7de745 10723 lck_mtx_lock(&nspace_handler_lock);
39236c6e 10724
0a7de745
A
10725 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
10726 if (nspace_items[i].token == token) {
10727 break; /* exit for loop, not case stmt */
fe8ab488 10728 }
0a7de745 10729 }
6d2010ae 10730
0a7de745
A
10731 if (i >= MAX_NSPACE_ITEMS) {
10732 error = ENOENT;
10733 } else {
10734 //
10735 // if this bit is set, when resolve_nspace_item() times out
10736 // it will loop and go back to sleep.
10737 //
10738 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10739 }
6d2010ae 10740
0a7de745 10741 lck_mtx_unlock(&nspace_handler_lock);
6d2010ae 10742
0a7de745
A
10743 if (error) {
10744 printf("nspace-handler-update: did not find token %u\n", token);
10745 }
10746 }
10747 break;
6d2010ae 10748
0a7de745
A
10749 case FSIOC_NAMESPACE_HANDLER_UNBLOCK: {
10750 uint32_t token, val;
10751 int i;
6d2010ae 10752
0a7de745
A
10753 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10754 break;
10755 }
fe8ab488 10756
0a7de745
A
10757 if (!nspace_is_special_process(p)) {
10758 error = EINVAL;
10759 break;
39037602 10760 }
39037602 10761
0a7de745
A
10762 token = ((uint32_t *)data)[0];
10763 val = ((uint32_t *)data)[1];
fe8ab488 10764
0a7de745 10765 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10766
0a7de745
A
10767 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
10768 if (nspace_items[i].token == token) {
10769 break; /* exit for loop, not case statement */
fe8ab488 10770 }
0a7de745 10771 }
6d2010ae 10772
0a7de745
A
10773 if (i >= MAX_NSPACE_ITEMS) {
10774 printf("nspace-handler-unblock: did not find token %u\n", token);
10775 error = ENOENT;
10776 } else {
10777 if (val == 0 && nspace_items[i].vp) {
10778 vnode_lock_spin(nspace_items[i].vp);
10779 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10780 vnode_unlock(nspace_items[i].vp);
fe8ab488 10781 }
6d2010ae 10782
0a7de745
A
10783 nspace_items[i].vp = NULL;
10784 nspace_items[i].arg = NULL;
10785 nspace_items[i].op = 0;
10786 nspace_items[i].vid = 0;
10787 nspace_items[i].flags = NSPACE_ITEM_DONE;
10788 nspace_items[i].token = 0;
fe8ab488 10789
0a7de745 10790 wakeup((caddr_t)&(nspace_items[i].vp));
39037602 10791 }
6d2010ae 10792
0a7de745
A
10793 lck_mtx_unlock(&nspace_handler_lock);
10794 }
10795 break;
6d2010ae 10796
0a7de745
A
10797 case FSIOC_NAMESPACE_HANDLER_CANCEL: {
10798 uint32_t token, val;
10799 int i;
6d2010ae 10800
0a7de745
A
10801 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10802 break;
10803 }
6d2010ae 10804
0a7de745
A
10805 if (!nspace_is_special_process(p)) {
10806 error = EINVAL;
10807 break;
10808 }
6d2010ae 10809
0a7de745
A
10810 token = ((uint32_t *)data)[0];
10811 val = ((uint32_t *)data)[1];
6d2010ae 10812
0a7de745 10813 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10814
0a7de745
A
10815 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
10816 if (nspace_items[i].token == token) {
10817 break; /* exit for loop, not case stmt */
fe8ab488 10818 }
39037602 10819 }
6d2010ae 10820
0a7de745
A
10821 if (i >= MAX_NSPACE_ITEMS) {
10822 printf("nspace-handler-cancel: did not find token %u\n", token);
10823 error = ENOENT;
10824 } else {
10825 if (nspace_items[i].vp) {
10826 vnode_lock_spin(nspace_items[i].vp);
10827 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10828 vnode_unlock(nspace_items[i].vp);
6d2010ae 10829 }
6d2010ae 10830
0a7de745
A
10831 nspace_items[i].vp = NULL;
10832 nspace_items[i].arg = NULL;
10833 nspace_items[i].vid = 0;
10834 nspace_items[i].token = val;
10835 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
10836 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
6d2010ae 10837
0a7de745
A
10838 wakeup((caddr_t)&(nspace_items[i].vp));
10839 }
6d2010ae 10840
0a7de745
A
10841 lck_mtx_unlock(&nspace_handler_lock);
10842 }
10843 break;
10844
10845 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10846 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10847 break;
39037602 10848 }
6d2010ae 10849
0a7de745 10850 // we explicitly do not do the namespace_handler_proc check here
6d2010ae 10851
0a7de745
A
10852 lck_mtx_lock(&nspace_handler_lock);
10853 snapshot_timestamp = ((uint32_t *)data)[0];
10854 wakeup(&nspace_item_idx);
10855 lck_mtx_unlock(&nspace_handler_lock);
10856 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
10857 }
10858 break;
6d2010ae 10859
0a7de745
A
10860 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10861 {
10862 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10863 break;
6d2010ae
A
10864 }
10865
0a7de745
A
10866 lck_mtx_lock(&nspace_handler_lock);
10867 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10868 lck_mtx_unlock(&nspace_handler_lock);
10869 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10870 nspace_allow_virtual_devs ? "" : " NOT");
10871 error = 0;
10872 }
10873 break;
10874
10875 case FSIOC_SET_FSTYPENAME_OVERRIDE:
10876 {
10877 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10878 break;
10879 }
10880 if (vp->v_mount) {
10881 mount_lock(vp->v_mount);
10882 if (data[0] != 0) {
10883 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10884 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10885 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10886 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10887 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
6d2010ae 10888 }
0a7de745
A
10889 } else {
10890 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10891 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10892 }
10893 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10894 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 10895 }
0a7de745 10896 mount_unlock(vp->v_mount);
6d2010ae 10897 }
0a7de745
A
10898 }
10899 break;
39037602 10900
0a7de745
A
10901 case DISK_CONDITIONER_IOC_GET: {
10902 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
10903 }
10904 break;
5ba3f43e 10905
0a7de745
A
10906 case DISK_CONDITIONER_IOC_SET: {
10907 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
10908 }
10909 break;
5ba3f43e 10910
0a7de745
A
10911 case FSIOC_FD_ONLY_OPEN_ONCE: {
10912 if (vnode_usecount(vp) > 1) {
10913 error = EBUSY;
10914 } else {
10915 error = 0;
fe8ab488 10916 }
0a7de745
A
10917 }
10918 break;
10919
10920 default: {
10921 /* other, known commands shouldn't be passed down here */
10922 switch (cmd) {
10923 case F_PUNCHHOLE:
10924 case F_TRIM_ACTIVE_FILE:
10925 case F_RDADVISE:
10926 case F_TRANSCODEKEY:
10927 case F_GETPROTECTIONLEVEL:
10928 case F_GETDEFAULTPROTLEVEL:
10929 case F_MAKECOMPRESSED:
10930 case F_SET_GREEDY_MODE:
10931 case F_SETSTATICCONTENT:
10932 case F_SETIOTYPE:
10933 case F_SETBACKINGSTORE:
10934 case F_GETPATH_MTMINFO:
10935 case APFSIOC_REVERT_TO_SNAPSHOT:
10936 case FSIOC_FIOSEEKHOLE:
10937 case FSIOC_FIOSEEKDATA:
10938 case HFS_GET_BOOT_INFO:
10939 case HFS_SET_BOOT_INFO:
10940 case FIOPINSWAP:
10941 case F_CHKCLEAN:
10942 case F_FULLFSYNC:
10943 case F_BARRIERFSYNC:
10944 case F_FREEZE_FS:
10945 case F_THAW_FS:
10946 error = EINVAL;
10947 goto outdrop;
10948 }
10949 /* Invoke the filesystem-specific code */
10950 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
10951 }
fe8ab488
A
10952 } /* end switch stmt */
10953
1c79356b 10954 /*
fe8ab488 10955 * if no errors, copy any data to user. Size was
1c79356b
A
10956 * already set and checked above.
10957 */
0a7de745 10958 if (error == 0 && (cmd & IOC_OUT) && size) {
b0d623f7 10959 error = copyout(data, udata, size);
0a7de745 10960 }
39037602 10961
a39ff7e2 10962outdrop:
fe8ab488
A
10963 if (memp) {
10964 kfree(memp, size);
10965 }
39037602 10966
1c79356b
A
10967 return error;
10968}
b0d623f7
A
10969
10970/* ARGSUSED */
10971int
0a7de745 10972fsctl(proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
b0d623f7
A
10973{
10974 int error;
39037602 10975 struct nameidata nd;
b0d623f7
A
10976 u_long nameiflags;
10977 vnode_t vp = NULL;
10978 vfs_context_t ctx = vfs_context_current();
10979
10980 AUDIT_ARG(cmd, uap->cmd);
10981 AUDIT_ARG(value32, uap->options);
10982 /* Get the vnode for the file we are getting info on: */
10983 nameiflags = 0;
0a7de745
A
10984 //
10985 // if we come through fsctl() then the file is by definition not open.
10986 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
10987 // lest the caller mistakenly thinks the only open is their own (but in
10988 // reality it's someone elses).
10989 //
10990 if (uap->cmd == FSIOC_FD_ONLY_OPEN_ONCE) {
10991 return EINVAL;
10992 }
10993 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
10994 nameiflags |= FOLLOW;
10995 }
6d2010ae 10996 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
0a7de745
A
10997 UIO_USERSPACE, uap->path, ctx);
10998 if ((error = namei(&nd))) {
10999 goto done;
11000 }
b0d623f7
A
11001 vp = nd.ni_vp;
11002 nameidone(&nd);
11003
11004#if CONFIG_MACF
11005 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
11006 if (error) {
11007 goto done;
11008 }
11009#endif
11010
11011 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11012
11013done:
0a7de745 11014 if (vp) {
b0d623f7 11015 vnode_put(vp);
0a7de745 11016 }
b0d623f7
A
11017 return error;
11018}
11019/* ARGSUSED */
11020int
0a7de745 11021ffsctl(proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
b0d623f7
A
11022{
11023 int error;
11024 vnode_t vp = NULL;
11025 vfs_context_t ctx = vfs_context_current();
11026 int fd = -1;
11027
11028 AUDIT_ARG(fd, uap->fd);
11029 AUDIT_ARG(cmd, uap->cmd);
11030 AUDIT_ARG(value32, uap->options);
39037602 11031
b0d623f7 11032 /* Get the vnode for the file we are getting info on: */
0a7de745 11033 if ((error = file_vnode(uap->fd, &vp))) {
3e170ce0 11034 return error;
0a7de745 11035 }
b0d623f7
A
11036 fd = uap->fd;
11037 if ((error = vnode_getwithref(vp))) {
3e170ce0
A
11038 file_drop(fd);
11039 return error;
b0d623f7
A
11040 }
11041
11042#if CONFIG_MACF
3e170ce0
A
11043 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
11044 file_drop(fd);
11045 vnode_put(vp);
11046 return error;
b0d623f7
A
11047 }
11048#endif
11049
11050 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11051
3e170ce0 11052 file_drop(fd);
b0d623f7 11053
3e170ce0
A
11054 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11055 if (vp) {
b0d623f7 11056 vnode_put(vp);
3e170ce0
A
11057 }
11058
b0d623f7
A
11059 return error;
11060}
1c79356b 11061/* end of fsctl system call */
0b4e3aa0 11062
91447636
A
11063/*
11064 * Retrieve the data of an extended attribute.
11065 */
11066int
2d21ac55 11067getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 11068{
2d21ac55 11069 vnode_t vp;
91447636 11070 struct nameidata nd;
0a7de745 11071 char attrname[XATTR_MAXNAMELEN + 1];
2d21ac55 11072 vfs_context_t ctx = vfs_context_current();
91447636
A
11073 uio_t auio = NULL;
11074 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11075 size_t attrsize = 0;
11076 size_t namelen;
b0d623f7 11077 u_int32_t nameiflags;
91447636 11078 int error;
0a7de745 11079 char uio_buf[UIO_SIZEOF(1)];
55e303ae 11080
0a7de745
A
11081 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11082 return EINVAL;
11083 }
55e303ae 11084
91447636 11085 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 11086 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636 11087 if ((error = namei(&nd))) {
0a7de745 11088 return error;
91447636
A
11089 }
11090 vp = nd.ni_vp;
11091 nameidone(&nd);
55e303ae 11092
d9a64523
A
11093 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11094 if (error != 0) {
91447636
A
11095 goto out;
11096 }
11097 if (xattr_protected(attrname)) {
6d2010ae
A
11098 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
11099 error = EPERM;
11100 goto out;
11101 }
91447636 11102 }
b0d623f7
A
11103 /*
11104 * the specific check for 0xffffffff is a hack to preserve
11105 * binaray compatibilty in K64 with applications that discovered
39037602 11106 * that passing in a buf pointer and a size of -1 resulted in
b0d623f7
A
11107 * just the size of the indicated extended attribute being returned.
11108 * this isn't part of the documented behavior, but because of the
11109 * original implemtation's check for "uap->size > 0", this behavior
11110 * was allowed. In K32 that check turned into a signed comparison
11111 * even though uap->size is unsigned... in K64, we blow by that
11112 * check because uap->size is unsigned and doesn't get sign smeared
39037602 11113 * in the munger for a 32 bit user app. we also need to add a
b0d623f7
A
11114 * check to limit the maximum size of the buffer being passed in...
11115 * unfortunately, the underlying fileystems seem to just malloc
11116 * the requested size even if the actual extended attribute is tiny.
11117 * because that malloc is for kernel wired memory, we have to put a
11118 * sane limit on it.
11119 *
11120 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11121 * U64 running on K64 will yield -1 (64 bits wide)
11122 * U32/U64 running on K32 will yield -1 (32 bits wide)
11123 */
0a7de745 11124 if (uap->size == 0xffffffff || uap->size == (size_t)-1) {
b0d623f7 11125 goto no_uio;
0a7de745 11126 }
b0d623f7 11127
b0d623f7 11128 if (uap->value) {
0a7de745 11129 if (uap->size > (size_t)XATTR_MAXSIZE) {
6d2010ae 11130 uap->size = XATTR_MAXSIZE;
0a7de745 11131 }
39037602 11132
91447636 11133 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
0a7de745 11134 &uio_buf[0], sizeof(uio_buf));
91447636
A
11135 uio_addiov(auio, uap->value, uap->size);
11136 }
b0d623f7 11137no_uio:
2d21ac55 11138 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
11139out:
11140 vnode_put(vp);
55e303ae 11141
91447636
A
11142 if (auio) {
11143 *retval = uap->size - uio_resid(auio);
11144 } else {
11145 *retval = (user_ssize_t)attrsize;
55e303ae
A
11146 }
11147
0a7de745 11148 return error;
91447636 11149}
55e303ae 11150
91447636
A
11151/*
11152 * Retrieve the data of an extended attribute.
11153 */
11154int
2d21ac55 11155fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 11156{
2d21ac55 11157 vnode_t vp;
0a7de745 11158 char attrname[XATTR_MAXNAMELEN + 1];
91447636
A
11159 uio_t auio = NULL;
11160 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11161 size_t attrsize = 0;
11162 size_t namelen;
11163 int error;
0a7de745 11164 char uio_buf[UIO_SIZEOF(1)];
55e303ae 11165
0a7de745
A
11166 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11167 return EINVAL;
11168 }
55e303ae 11169
0a7de745
A
11170 if ((error = file_vnode(uap->fd, &vp))) {
11171 return error;
91447636 11172 }
0a7de745 11173 if ((error = vnode_getwithref(vp))) {
91447636 11174 file_drop(uap->fd);
0a7de745 11175 return error;
91447636 11176 }
d9a64523
A
11177 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11178 if (error != 0) {
91447636
A
11179 goto out;
11180 }
11181 if (xattr_protected(attrname)) {
11182 error = EPERM;
11183 goto out;
11184 }
11185 if (uap->value && uap->size > 0) {
11186 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
0a7de745 11187 &uio_buf[0], sizeof(uio_buf));
91447636
A
11188 uio_addiov(auio, uap->value, uap->size);
11189 }
55e303ae 11190
2d21ac55 11191 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
11192out:
11193 (void)vnode_put(vp);
11194 file_drop(uap->fd);
55e303ae 11195
91447636
A
11196 if (auio) {
11197 *retval = uap->size - uio_resid(auio);
11198 } else {
11199 *retval = (user_ssize_t)attrsize;
11200 }
0a7de745 11201 return error;
91447636 11202}
55e303ae 11203
91447636
A
11204/*
11205 * Set the data of an extended attribute.
11206 */
55e303ae 11207int
2d21ac55 11208setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 11209{
2d21ac55 11210 vnode_t vp;
91447636 11211 struct nameidata nd;
0a7de745 11212 char attrname[XATTR_MAXNAMELEN + 1];
2d21ac55 11213 vfs_context_t ctx = vfs_context_current();
91447636
A
11214 uio_t auio = NULL;
11215 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11216 size_t namelen;
b0d623f7 11217 u_int32_t nameiflags;
91447636 11218 int error;
0a7de745 11219 char uio_buf[UIO_SIZEOF(1)];
55e303ae 11220
0a7de745
A
11221 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11222 return EINVAL;
11223 }
55e303ae 11224
d9a64523
A
11225 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11226 if (error != 0) {
6d2010ae
A
11227 if (error == EPERM) {
11228 /* if the string won't fit in attrname, copyinstr emits EPERM */
0a7de745 11229 return ENAMETOOLONG;
6d2010ae
A
11230 }
11231 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11232 return error;
91447636 11233 }
0a7de745
A
11234 if (xattr_protected(attrname)) {
11235 return EPERM;
11236 }
2d21ac55 11237 if (uap->size != 0 && uap->value == 0) {
0a7de745 11238 return EINVAL;
55e303ae 11239 }
55e303ae 11240
91447636 11241 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 11242 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636 11243 if ((error = namei(&nd))) {
0a7de745 11244 return error;
91447636
A
11245 }
11246 vp = nd.ni_vp;
11247 nameidone(&nd);
55e303ae 11248
91447636 11249 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
0a7de745 11250 &uio_buf[0], sizeof(uio_buf));
91447636 11251 uio_addiov(auio, uap->value, uap->size);
55e303ae 11252
2d21ac55
A
11253 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
11254#if CONFIG_FSE
11255 if (error == 0) {
11256 add_fsevent(FSE_XATTR_MODIFIED, ctx,
11257 FSE_ARG_VNODE, vp,
11258 FSE_ARG_DONE);
11259 }
11260#endif
91447636
A
11261 vnode_put(vp);
11262 *retval = 0;
0a7de745 11263 return error;
91447636 11264}
55e303ae 11265
91447636
A
11266/*
11267 * Set the data of an extended attribute.
11268 */
11269int
2d21ac55 11270fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 11271{
2d21ac55 11272 vnode_t vp;
0a7de745 11273 char attrname[XATTR_MAXNAMELEN + 1];
91447636
A
11274 uio_t auio = NULL;
11275 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11276 size_t namelen;
11277 int error;
0a7de745 11278 char uio_buf[UIO_SIZEOF(1)];
6d2010ae 11279#if CONFIG_FSE
2d21ac55 11280 vfs_context_t ctx = vfs_context_current();
6d2010ae 11281#endif
55e303ae 11282
0a7de745
A
11283 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11284 return EINVAL;
11285 }
55e303ae 11286
d9a64523
A
11287 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11288 if (error != 0) {
3e170ce0
A
11289 if (error == EPERM) {
11290 /* if the string won't fit in attrname, copyinstr emits EPERM */
0a7de745 11291 return ENAMETOOLONG;
3e170ce0
A
11292 }
11293 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11294 return error;
55e303ae 11295 }
0a7de745
A
11296 if (xattr_protected(attrname)) {
11297 return EPERM;
11298 }
2d21ac55 11299 if (uap->size != 0 && uap->value == 0) {
0a7de745 11300 return EINVAL;
55e303ae 11301 }
0a7de745
A
11302 if ((error = file_vnode(uap->fd, &vp))) {
11303 return error;
55e303ae 11304 }
0a7de745 11305 if ((error = vnode_getwithref(vp))) {
91447636 11306 file_drop(uap->fd);
0a7de745 11307 return error;
91447636
A
11308 }
11309 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
0a7de745 11310 &uio_buf[0], sizeof(uio_buf));
91447636 11311 uio_addiov(auio, uap->value, uap->size);
91447636 11312
2d21ac55
A
11313 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
11314#if CONFIG_FSE
11315 if (error == 0) {
11316 add_fsevent(FSE_XATTR_MODIFIED, ctx,
11317 FSE_ARG_VNODE, vp,
11318 FSE_ARG_DONE);
11319 }
11320#endif
91447636
A
11321 vnode_put(vp);
11322 file_drop(uap->fd);
11323 *retval = 0;
0a7de745 11324 return error;
91447636 11325}
55e303ae 11326
91447636
A
11327/*
11328 * Remove an extended attribute.
b0d623f7 11329 * XXX Code duplication here.
91447636 11330 */
91447636 11331int
2d21ac55 11332removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 11333{
2d21ac55 11334 vnode_t vp;
91447636 11335 struct nameidata nd;
0a7de745 11336 char attrname[XATTR_MAXNAMELEN + 1];
91447636 11337 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 11338 vfs_context_t ctx = vfs_context_current();
91447636 11339 size_t namelen;
b0d623f7 11340 u_int32_t nameiflags;
91447636 11341 int error;
55e303ae 11342
0a7de745
A
11343 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11344 return EINVAL;
11345 }
55e303ae 11346
91447636
A
11347 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11348 if (error != 0) {
0a7de745
A
11349 return error;
11350 }
11351 if (xattr_protected(attrname)) {
11352 return EPERM;
91447636 11353 }
91447636 11354 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 11355 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636 11356 if ((error = namei(&nd))) {
0a7de745 11357 return error;
91447636
A
11358 }
11359 vp = nd.ni_vp;
11360 nameidone(&nd);
55e303ae 11361
2d21ac55
A
11362 error = vn_removexattr(vp, attrname, uap->options, ctx);
11363#if CONFIG_FSE
11364 if (error == 0) {
11365 add_fsevent(FSE_XATTR_REMOVED, ctx,
11366 FSE_ARG_VNODE, vp,
11367 FSE_ARG_DONE);
11368 }
11369#endif
91447636
A
11370 vnode_put(vp);
11371 *retval = 0;
0a7de745 11372 return error;
55e303ae
A
11373}
11374
91447636
A
11375/*
11376 * Remove an extended attribute.
b0d623f7 11377 * XXX Code duplication here.
91447636 11378 */
91447636 11379int
2d21ac55 11380fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 11381{
2d21ac55 11382 vnode_t vp;
0a7de745 11383 char attrname[XATTR_MAXNAMELEN + 1];
91447636
A
11384 size_t namelen;
11385 int error;
6d2010ae 11386#if CONFIG_FSE
2d21ac55 11387 vfs_context_t ctx = vfs_context_current();
6d2010ae 11388#endif
55e303ae 11389
0a7de745
A
11390 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11391 return EINVAL;
11392 }
91447636
A
11393
11394 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11395 if (error != 0) {
0a7de745 11396 return error;
91447636 11397 }
0a7de745
A
11398 if (xattr_protected(attrname)) {
11399 return EPERM;
91447636 11400 }
0a7de745
A
11401 if ((error = file_vnode(uap->fd, &vp))) {
11402 return error;
11403 }
11404 if ((error = vnode_getwithref(vp))) {
91447636 11405 file_drop(uap->fd);
0a7de745 11406 return error;
91447636 11407 }
4a249263 11408
2d21ac55
A
11409 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
11410#if CONFIG_FSE
11411 if (error == 0) {
11412 add_fsevent(FSE_XATTR_REMOVED, ctx,
11413 FSE_ARG_VNODE, vp,
11414 FSE_ARG_DONE);
11415 }
11416#endif
91447636
A
11417 vnode_put(vp);
11418 file_drop(uap->fd);
11419 *retval = 0;
0a7de745 11420 return error;
55e303ae
A
11421}
11422
91447636
A
11423/*
11424 * Retrieve the list of extended attribute names.
b0d623f7 11425 * XXX Code duplication here.
91447636 11426 */
91447636 11427int
2d21ac55 11428listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 11429{
2d21ac55 11430 vnode_t vp;
91447636 11431 struct nameidata nd;
2d21ac55 11432 vfs_context_t ctx = vfs_context_current();
91447636
A
11433 uio_t auio = NULL;
11434 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11435 size_t attrsize = 0;
b0d623f7 11436 u_int32_t nameiflags;
91447636 11437 int error;
0a7de745 11438 char uio_buf[UIO_SIZEOF(1)];
4a249263 11439
0a7de745
A
11440 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11441 return EINVAL;
11442 }
55e303ae 11443
fe8ab488 11444 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 11445 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636 11446 if ((error = namei(&nd))) {
0a7de745 11447 return error;
91447636
A
11448 }
11449 vp = nd.ni_vp;
11450 nameidone(&nd);
11451 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae 11452 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
0a7de745 11453 &uio_buf[0], sizeof(uio_buf));
91447636
A
11454 uio_addiov(auio, uap->namebuf, uap->bufsize);
11455 }
55e303ae 11456
2d21ac55 11457 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 11458
91447636
A
11459 vnode_put(vp);
11460 if (auio) {
11461 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11462 } else {
11463 *retval = (user_ssize_t)attrsize;
11464 }
0a7de745 11465 return error;
55e303ae
A
11466}
11467
91447636
A
11468/*
11469 * Retrieve the list of extended attribute names.
b0d623f7 11470 * XXX Code duplication here.
91447636 11471 */
55e303ae 11472int
2d21ac55 11473flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 11474{
2d21ac55 11475 vnode_t vp;
91447636
A
11476 uio_t auio = NULL;
11477 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11478 size_t attrsize = 0;
11479 int error;
0a7de745 11480 char uio_buf[UIO_SIZEOF(1)];
91447636 11481
0a7de745
A
11482 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11483 return EINVAL;
11484 }
91447636 11485
0a7de745
A
11486 if ((error = file_vnode(uap->fd, &vp))) {
11487 return error;
91447636 11488 }
0a7de745 11489 if ((error = vnode_getwithref(vp))) {
91447636 11490 file_drop(uap->fd);
0a7de745 11491 return error;
91447636
A
11492 }
11493 if (uap->namebuf != 0 && uap->bufsize > 0) {
39037602 11494 auio = uio_createwithbuffer(1, 0, spacetype,
0a7de745 11495 UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636
A
11496 uio_addiov(auio, uap->namebuf, uap->bufsize);
11497 }
91447636 11498
2d21ac55 11499 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 11500
91447636
A
11501 vnode_put(vp);
11502 file_drop(uap->fd);
11503 if (auio) {
11504 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11505 } else {
11506 *retval = (user_ssize_t)attrsize;
11507 }
0a7de745 11508 return error;
55e303ae 11509}
4a249263 11510
0a7de745
A
11511static int
11512fsgetpath_internal(
fe8ab488
A
11513 vfs_context_t ctx, int volfs_id, uint64_t objid,
11514 vm_size_t bufsize, caddr_t buf, int *pathlen)
b0d623f7 11515{
fe8ab488 11516 int error;
b0d623f7 11517 struct mount *mp = NULL;
fe8ab488 11518 vnode_t vp;
b0d623f7 11519 int length;
fe8ab488 11520 int bpflags;
813fb2f6
A
11521 /* maximum number of times to retry build_path */
11522 unsigned int retries = 0x10;
b0d623f7 11523
fe8ab488 11524 if (bufsize > PAGE_SIZE) {
0a7de745 11525 return EINVAL;
fe8ab488
A
11526 }
11527
11528 if (buf == NULL) {
0a7de745 11529 return ENOMEM;
b0d623f7 11530 }
fe8ab488 11531
813fb2f6 11532retry:
fe8ab488 11533 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 11534 error = ENOTSUP; /* unexpected failure */
fe8ab488 11535 return ENOTSUP;
b0d623f7 11536 }
fe8ab488 11537
39236c6e 11538unionget:
fe8ab488 11539 if (objid == 2) {
b0d623f7
A
11540 error = VFS_ROOT(mp, &vp, ctx);
11541 } else {
fe8ab488 11542 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 11543 }
39236c6e
A
11544
11545 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11546 /*
11547 * If the fileid isn't found and we're in a union
11548 * mount volume, then see if the fileid is in the
11549 * mounted-on volume.
11550 */
11551 struct mount *tmp = mp;
11552 mp = vnode_mount(tmp->mnt_vnodecovered);
11553 vfs_unbusy(tmp);
0a7de745 11554 if (vfs_busy(mp, LK_NOWAIT) == 0) {
39236c6e 11555 goto unionget;
0a7de745 11556 }
fe8ab488 11557 } else {
39236c6e 11558 vfs_unbusy(mp);
fe8ab488 11559 }
39236c6e 11560
b0d623f7 11561 if (error) {
fe8ab488 11562 return error;
b0d623f7 11563 }
fe8ab488 11564
6d2010ae
A
11565#if CONFIG_MACF
11566 error = mac_vnode_check_fsgetpath(ctx, vp);
11567 if (error) {
11568 vnode_put(vp);
fe8ab488 11569 return error;
6d2010ae
A
11570 }
11571#endif
fe8ab488 11572
b0d623f7
A
11573 /* Obtain the absolute path to this vnode. */
11574 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 11575 bpflags |= BUILDPATH_CHECK_MOVED;
fe8ab488 11576 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
b0d623f7 11577 vnode_put(vp);
fe8ab488 11578
b0d623f7 11579 if (error) {
813fb2f6
A
11580 /* there was a race building the path, try a few more times */
11581 if (error == EAGAIN) {
11582 --retries;
0a7de745 11583 if (retries > 0) {
813fb2f6 11584 goto retry;
0a7de745 11585 }
813fb2f6
A
11586
11587 error = ENOENT;
11588 }
b0d623f7
A
11589 goto out;
11590 }
fe8ab488
A
11591
11592 AUDIT_ARG(text, buf);
39236c6e
A
11593
11594 if (kdebug_enable) {
11595 long dbg_parms[NUMPARMS];
d9a64523 11596 int dbg_namelen;
39236c6e 11597
d9a64523 11598 dbg_namelen = (int)sizeof(dbg_parms);
39236c6e 11599
0a7de745 11600 if (length < dbg_namelen) {
fe8ab488 11601 memcpy((char *)dbg_parms, buf, length);
39236c6e
A
11602 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11603
11604 dbg_namelen = length;
fe8ab488
A
11605 } else {
11606 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11607 }
39236c6e 11608
d9a64523 11609 kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)vp,
0a7de745 11610 KDBG_VFS_LOOKUP_FLAG_LOOKUP);
39236c6e 11611 }
fe8ab488
A
11612
11613 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11614
11615out:
0a7de745 11616 return error;
fe8ab488
A
11617}
11618
11619/*
11620 * Obtain the full pathname of a file system object by id.
fe8ab488 11621 */
fe8ab488
A
11622int
11623fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11624{
11625 vfs_context_t ctx = vfs_context_current();
11626 fsid_t fsid;
11627 char *realpath;
11628 int length;
11629 int error;
11630
11631 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
0a7de745 11632 return error;
fe8ab488
A
11633 }
11634 AUDIT_ARG(value32, fsid.val[0]);
11635 AUDIT_ARG(value64, uap->objid);
11636 /* Restrict output buffer size for now. */
39037602 11637
fe8ab488 11638 if (uap->bufsize > PAGE_SIZE) {
0a7de745 11639 return EINVAL;
39037602 11640 }
d9a64523 11641 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK | M_ZERO);
fe8ab488 11642 if (realpath == NULL) {
0a7de745 11643 return ENOMEM;
fe8ab488
A
11644 }
11645
11646 error = fsgetpath_internal(
39037602 11647 ctx, fsid.val[0], uap->objid,
fe8ab488
A
11648 uap->bufsize, realpath, &length);
11649
11650 if (error) {
11651 goto out;
11652 }
39037602 11653
b0d623f7
A
11654 error = copyout((caddr_t)realpath, uap->buf, length);
11655
11656 *retval = (user_ssize_t)length; /* may be superseded by error */
11657out:
11658 if (realpath) {
11659 FREE(realpath, M_TEMP);
11660 }
0a7de745 11661 return error;
b0d623f7
A
11662}
11663
91447636
A
11664/*
11665 * Common routine to handle various flavors of statfs data heading out
11666 * to user space.
2d21ac55
A
11667 *
11668 * Returns: 0 Success
11669 * EFAULT
91447636
A
11670 */
11671static int
39037602
A
11672munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11673 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 11674 boolean_t partial_copy)
4a249263 11675{
0a7de745
A
11676 int error;
11677 int my_size, copy_size;
91447636
A
11678
11679 if (is_64_bit) {
b0d623f7 11680 struct user64_statfs sfs;
91447636
A
11681 my_size = copy_size = sizeof(sfs);
11682 bzero(&sfs, my_size);
11683 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11684 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11685 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
11686 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
11687 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
11688 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
11689 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
11690 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
11691 sfs.f_files = (user64_long_t)sfsp->f_files;
11692 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
11693 sfs.f_fsid = sfsp->f_fsid;
11694 sfs.f_owner = sfsp->f_owner;
6d2010ae 11695 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11696 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11697 } else {
11698 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11699 }
2d21ac55
A
11700 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11701 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11702
11703 if (partial_copy) {
11704 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11705 }
11706 error = copyout((caddr_t)&sfs, bufp, copy_size);
0a7de745 11707 } else {
b0d623f7
A
11708 struct user32_statfs sfs;
11709
91447636
A
11710 my_size = copy_size = sizeof(sfs);
11711 bzero(&sfs, my_size);
39037602 11712
91447636
A
11713 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11714 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11715 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
39037602 11716
91447636
A
11717 /*
11718 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11719 * have to fudge the numbers here in that case. We inflate the blocksize in order
11720 * to reflect the filesystem size as best we can.
11721 */
39037602 11722 if ((sfsp->f_blocks > INT_MAX)
0a7de745
A
11723 /* Hack for 4061702 . I think the real fix is for Carbon to
11724 * look for some volume capability and not depend on hidden
11725 * semantics agreed between a FS and carbon.
11726 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11727 * for Carbon to set bNoVolumeSizes volume attribute.
11728 * Without this the webdavfs files cannot be copied onto
11729 * disk as they look huge. This change should not affect
11730 * XSAN as they should not setting these to -1..
11731 */
11732 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11733 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11734 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
11735 int shift;
91447636
A
11736
11737 /*
11738 * Work out how far we have to shift the block count down to make it fit.
11739 * Note that it's possible to have to shift so far that the resulting
11740 * blocksize would be unreportably large. At that point, we will clip
11741 * any values that don't fit.
11742 *
11743 * For safety's sake, we also ensure that f_iosize is never reported as
11744 * being smaller than f_bsize.
11745 */
11746 for (shift = 0; shift < 32; shift++) {
0a7de745 11747 if ((sfsp->f_blocks >> shift) <= INT_MAX) {
91447636 11748 break;
0a7de745
A
11749 }
11750 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX) {
91447636 11751 break;
0a7de745 11752 }
91447636 11753 }
0a7de745 11754#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
b0d623f7
A
11755 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11756 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11757 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 11758#undef __SHIFT_OR_CLIP
b0d623f7 11759 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
11760 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11761 } else {
11762 /* filesystem is small enough to be reported honestly */
b0d623f7
A
11763 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11764 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11765 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11766 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11767 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 11768 }
b0d623f7
A
11769 sfs.f_files = (user32_long_t)sfsp->f_files;
11770 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
11771 sfs.f_fsid = sfsp->f_fsid;
11772 sfs.f_owner = sfsp->f_owner;
6d2010ae 11773 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11774 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11775 } else {
11776 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11777 }
2d21ac55
A
11778 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11779 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11780
11781 if (partial_copy) {
11782 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11783 }
11784 error = copyout((caddr_t)&sfs, bufp, copy_size);
11785 }
39037602 11786
91447636
A
11787 if (sizep != NULL) {
11788 *sizep = my_size;
11789 }
0a7de745 11790 return error;
91447636
A
11791}
11792
11793/*
11794 * copy stat structure into user_stat structure.
11795 */
0a7de745
A
11796void
11797munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 11798{
b0d623f7
A
11799 bzero(usbp, sizeof(*usbp));
11800
11801 usbp->st_dev = sbp->st_dev;
11802 usbp->st_ino = sbp->st_ino;
11803 usbp->st_mode = sbp->st_mode;
11804 usbp->st_nlink = sbp->st_nlink;
11805 usbp->st_uid = sbp->st_uid;
11806 usbp->st_gid = sbp->st_gid;
11807 usbp->st_rdev = sbp->st_rdev;
11808#ifndef _POSIX_C_SOURCE
11809 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11810 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11811 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11812 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11813 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11814 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11815#else
11816 usbp->st_atime = sbp->st_atime;
11817 usbp->st_atimensec = sbp->st_atimensec;
11818 usbp->st_mtime = sbp->st_mtime;
11819 usbp->st_mtimensec = sbp->st_mtimensec;
11820 usbp->st_ctime = sbp->st_ctime;
11821 usbp->st_ctimensec = sbp->st_ctimensec;
11822#endif
11823 usbp->st_size = sbp->st_size;
11824 usbp->st_blocks = sbp->st_blocks;
11825 usbp->st_blksize = sbp->st_blksize;
11826 usbp->st_flags = sbp->st_flags;
11827 usbp->st_gen = sbp->st_gen;
11828 usbp->st_lspare = sbp->st_lspare;
11829 usbp->st_qspare[0] = sbp->st_qspare[0];
11830 usbp->st_qspare[1] = sbp->st_qspare[1];
11831}
11832
0a7de745
A
11833void
11834munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
b0d623f7
A
11835{
11836 bzero(usbp, sizeof(*usbp));
0c530ab8 11837
91447636
A
11838 usbp->st_dev = sbp->st_dev;
11839 usbp->st_ino = sbp->st_ino;
11840 usbp->st_mode = sbp->st_mode;
11841 usbp->st_nlink = sbp->st_nlink;
11842 usbp->st_uid = sbp->st_uid;
11843 usbp->st_gid = sbp->st_gid;
11844 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
11845#ifndef _POSIX_C_SOURCE
11846 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11847 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11848 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11849 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11850 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11851 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11852#else
11853 usbp->st_atime = sbp->st_atime;
11854 usbp->st_atimensec = sbp->st_atimensec;
11855 usbp->st_mtime = sbp->st_mtime;
11856 usbp->st_mtimensec = sbp->st_mtimensec;
11857 usbp->st_ctime = sbp->st_ctime;
11858 usbp->st_ctimensec = sbp->st_ctimensec;
11859#endif
11860 usbp->st_size = sbp->st_size;
11861 usbp->st_blocks = sbp->st_blocks;
11862 usbp->st_blksize = sbp->st_blksize;
11863 usbp->st_flags = sbp->st_flags;
11864 usbp->st_gen = sbp->st_gen;
11865 usbp->st_lspare = sbp->st_lspare;
11866 usbp->st_qspare[0] = sbp->st_qspare[0];
11867 usbp->st_qspare[1] = sbp->st_qspare[1];
11868}
11869
11870/*
11871 * copy stat64 structure into user_stat64 structure.
11872 */
0a7de745
A
11873void
11874munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
b0d623f7
A
11875{
11876 bzero(usbp, sizeof(*usbp));
11877
11878 usbp->st_dev = sbp->st_dev;
11879 usbp->st_ino = sbp->st_ino;
11880 usbp->st_mode = sbp->st_mode;
11881 usbp->st_nlink = sbp->st_nlink;
11882 usbp->st_uid = sbp->st_uid;
11883 usbp->st_gid = sbp->st_gid;
11884 usbp->st_rdev = sbp->st_rdev;
11885#ifndef _POSIX_C_SOURCE
11886 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11887 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11888 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11889 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11890 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11891 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11892 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11893 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11894#else
11895 usbp->st_atime = sbp->st_atime;
11896 usbp->st_atimensec = sbp->st_atimensec;
11897 usbp->st_mtime = sbp->st_mtime;
11898 usbp->st_mtimensec = sbp->st_mtimensec;
11899 usbp->st_ctime = sbp->st_ctime;
11900 usbp->st_ctimensec = sbp->st_ctimensec;
11901 usbp->st_birthtime = sbp->st_birthtime;
11902 usbp->st_birthtimensec = sbp->st_birthtimensec;
11903#endif
11904 usbp->st_size = sbp->st_size;
11905 usbp->st_blocks = sbp->st_blocks;
11906 usbp->st_blksize = sbp->st_blksize;
11907 usbp->st_flags = sbp->st_flags;
11908 usbp->st_gen = sbp->st_gen;
11909 usbp->st_lspare = sbp->st_lspare;
11910 usbp->st_qspare[0] = sbp->st_qspare[0];
11911 usbp->st_qspare[1] = sbp->st_qspare[1];
11912}
11913
0a7de745
A
11914void
11915munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 11916{
b0d623f7 11917 bzero(usbp, sizeof(*usbp));
2d21ac55
A
11918
11919 usbp->st_dev = sbp->st_dev;
11920 usbp->st_ino = sbp->st_ino;
11921 usbp->st_mode = sbp->st_mode;
11922 usbp->st_nlink = sbp->st_nlink;
11923 usbp->st_uid = sbp->st_uid;
11924 usbp->st_gid = sbp->st_gid;
11925 usbp->st_rdev = sbp->st_rdev;
11926#ifndef _POSIX_C_SOURCE
91447636
A
11927 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11928 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11929 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11930 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11931 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11932 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
11933 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11934 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
11935#else
11936 usbp->st_atime = sbp->st_atime;
11937 usbp->st_atimensec = sbp->st_atimensec;
11938 usbp->st_mtime = sbp->st_mtime;
11939 usbp->st_mtimensec = sbp->st_mtimensec;
11940 usbp->st_ctime = sbp->st_ctime;
11941 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
11942 usbp->st_birthtime = sbp->st_birthtime;
11943 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
11944#endif
11945 usbp->st_size = sbp->st_size;
11946 usbp->st_blocks = sbp->st_blocks;
11947 usbp->st_blksize = sbp->st_blksize;
11948 usbp->st_flags = sbp->st_flags;
11949 usbp->st_gen = sbp->st_gen;
11950 usbp->st_lspare = sbp->st_lspare;
11951 usbp->st_qspare[0] = sbp->st_qspare[0];
11952 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 11953}
39236c6e
A
11954
11955/*
11956 * Purge buffer cache for simulating cold starts
11957 */
0a7de745
A
11958static int
11959vnode_purge_callback(struct vnode *vp, __unused void *cargs)
39236c6e
A
11960{
11961 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11962
11963 return VNODE_RETURNED;
11964}
11965
0a7de745
A
11966static int
11967vfs_purge_callback(mount_t mp, __unused void * arg)
39236c6e
A
11968{
11969 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11970
11971 return VFS_RETURNED;
11972}
11973
11974int
11975vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11976{
0a7de745 11977 if (!kauth_cred_issuser(kauth_cred_get())) {
39236c6e 11978 return EPERM;
0a7de745 11979 }
39236c6e 11980
0a7de745 11981 vfs_iterate(0 /* flags */, vfs_purge_callback, NULL);
39236c6e
A
11982
11983 return 0;
11984}
11985
39037602
A
11986/*
11987 * gets the vnode associated with the (unnamed) snapshot directory
11988 * for a Filesystem. The snapshot directory vnode is returned with
11989 * an iocount on it.
11990 */
11991int
11992vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11993{
0a7de745 11994 return VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx);
39037602
A
11995}
11996
11997/*
11998 * Get the snapshot vnode.
11999 *
12000 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12001 * needs nameidone() on ndp.
12002 *
12003 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12004 *
12005 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12006 * not needed.
12007 */
12008static int
12009vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
12010 user_addr_t name, struct nameidata *ndp, int32_t op,
12011#if !CONFIG_TRIGGERS
12012 __unused
12013#endif
12014 enum path_operation pathop,
12015 vfs_context_t ctx)
12016{
12017 int error, i;
12018 caddr_t name_buf;
12019 size_t name_len;
12020 struct vfs_attr vfa;
12021
12022 *sdvpp = NULLVP;
12023 *rvpp = NULLVP;
12024
12025 error = vnode_getfromfd(ctx, dirfd, rvpp);
0a7de745
A
12026 if (error) {
12027 return error;
12028 }
39037602
A
12029
12030 if (!vnode_isvroot(*rvpp)) {
12031 error = EINVAL;
12032 goto out;
12033 }
12034
12035 /* Make sure the filesystem supports snapshots */
12036 VFSATTR_INIT(&vfa);
12037 VFSATTR_WANTED(&vfa, f_capabilities);
12038 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
12039 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
12040 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
12041 VOL_CAP_INT_SNAPSHOT)) ||
12042 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
12043 VOL_CAP_INT_SNAPSHOT))) {
12044 error = ENOTSUP;
12045 goto out;
12046 }
12047
12048 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
0a7de745 12049 if (error) {
39037602 12050 goto out;
0a7de745 12051 }
39037602
A
12052
12053 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12054 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
0a7de745 12055 if (error) {
39037602 12056 goto out1;
0a7de745 12057 }
39037602
A
12058
12059 /*
12060 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12061 * (the length returned by copyinstr includes the terminating NUL)
12062 */
12063 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
12064 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
12065 error = EINVAL;
12066 goto out1;
12067 }
0a7de745
A
12068 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++) {
12069 ;
12070 }
39037602
A
12071 if (i < (int)name_len) {
12072 error = EINVAL;
12073 goto out1;
12074 }
12075
12076#if CONFIG_MACF
12077 if (op == CREATE) {
12078 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
12079 name_buf);
12080 } else if (op == DELETE) {
12081 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
12082 name_buf);
12083 }
0a7de745 12084 if (error) {
39037602 12085 goto out1;
0a7de745 12086 }
39037602
A
12087#endif
12088
12089 /* Check if the snapshot already exists ... */
12090 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
12091 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
12092 ndp->ni_dvp = *sdvpp;
12093
12094 error = namei(ndp);
12095out1:
12096 FREE(name_buf, M_TEMP);
12097out:
12098 if (error) {
12099 if (*sdvpp) {
12100 vnode_put(*sdvpp);
12101 *sdvpp = NULLVP;
12102 }
12103 if (*rvpp) {
12104 vnode_put(*rvpp);
12105 *rvpp = NULLVP;
12106 }
12107 }
0a7de745 12108 return error;
39037602
A
12109}
12110
12111/*
12112 * create a filesystem snapshot (for supporting filesystems)
12113 *
12114 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12115 * We get to the (unnamed) snapshot directory vnode and create the vnode
12116 * for the snapshot in it.
12117 *
12118 * Restrictions:
12119 *
12120 * a) Passed in name for snapshot cannot have slashes.
12121 * b) name can't be "." or ".."
12122 *
12123 * Since this requires superuser privileges, vnode_authorize calls are not
12124 * made.
12125 */
12126static int
12127snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
12128 vfs_context_t ctx)
12129{
12130 vnode_t rvp, snapdvp;
12131 int error;
12132 struct nameidata namend;
12133
12134 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
12135 OP_LINK, ctx);
0a7de745
A
12136 if (error) {
12137 return error;
12138 }
39037602
A
12139
12140 if (namend.ni_vp) {
12141 vnode_put(namend.ni_vp);
12142 error = EEXIST;
12143 } else {
12144 struct vnode_attr va;
12145 vnode_t vp = NULLVP;
12146
12147 VATTR_INIT(&va);
12148 VATTR_SET(&va, va_type, VREG);
12149 VATTR_SET(&va, va_mode, 0);
12150
12151 error = vn_create(snapdvp, &vp, &namend, &va,
12152 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
0a7de745 12153 if (!error && vp) {
39037602 12154 vnode_put(vp);
0a7de745 12155 }
39037602
A
12156 }
12157
12158 nameidone(&namend);
12159 vnode_put(snapdvp);
12160 vnode_put(rvp);
0a7de745 12161 return error;
39037602
A
12162}
12163
12164/*
12165 * Delete a Filesystem snapshot
12166 *
12167 * get the vnode for the unnamed snapshot directory and the snapshot and
12168 * delete the snapshot.
12169 */
12170static int
12171snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
12172 vfs_context_t ctx)
12173{
12174 vnode_t rvp, snapdvp;
12175 int error;
12176 struct nameidata namend;
12177
12178 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
12179 OP_UNLINK, ctx);
0a7de745 12180 if (error) {
39037602 12181 goto out;
0a7de745 12182 }
39037602
A
12183
12184 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
12185 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
12186
12187 vnode_put(namend.ni_vp);
12188 nameidone(&namend);
12189 vnode_put(snapdvp);
12190 vnode_put(rvp);
12191out:
0a7de745 12192 return error;
39037602
A
12193}
12194
12195/*
12196 * Revert a filesystem to a snapshot
12197 *
12198 * Marks the filesystem to revert to the given snapshot on next mount.
12199 */
12200static int
12201snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
0a7de745
A
12202 vfs_context_t ctx)
12203{
12204 int error;
12205 vnode_t rvp;
12206 mount_t mp;
12207 struct fs_snapshot_revert_args revert_data;
12208 struct componentname cnp;
12209 caddr_t name_buf;
12210 size_t name_len;
12211
12212 error = vnode_getfromfd(ctx, dirfd, &rvp);
12213 if (error) {
12214 return error;
12215 }
12216 mp = vnode_mount(rvp);
12217
12218 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12219 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12220 if (error) {
12221 FREE(name_buf, M_TEMP);
12222 vnode_put(rvp);
12223 return error;
12224 }
813fb2f6
A
12225
12226#if CONFIG_MACF
0a7de745
A
12227 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
12228 if (error) {
12229 FREE(name_buf, M_TEMP);
12230 vnode_put(rvp);
12231 return error;
12232 }
12233#endif
12234
12235 /*
12236 * Grab mount_iterref so that we can release the vnode,
12237 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12238 */
12239 error = mount_iterref(mp, 0);
12240 vnode_put(rvp);
12241 if (error) {
12242 FREE(name_buf, M_TEMP);
12243 return error;
12244 }
12245
12246 memset(&cnp, 0, sizeof(cnp));
12247 cnp.cn_pnbuf = (char *)name_buf;
12248 cnp.cn_nameiop = LOOKUP;
12249 cnp.cn_flags = ISLASTCN | HASBUF;
12250 cnp.cn_pnlen = MAXPATHLEN;
12251 cnp.cn_nameptr = cnp.cn_pnbuf;
12252 cnp.cn_namelen = (int)name_len;
12253 revert_data.sr_cnp = &cnp;
12254
12255 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
12256 mount_iterdrop(mp);
12257 FREE(name_buf, M_TEMP);
12258
12259 if (error) {
12260 /* If there was any error, try again using VNOP_IOCTL */
12261
12262 vnode_t snapdvp;
12263 struct nameidata namend;
12264
12265 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
12266 OP_LOOKUP, ctx);
12267 if (error) {
12268 return error;
12269 }
12270
12271
12272 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
12273 0, ctx);
12274
12275 vnode_put(namend.ni_vp);
12276 nameidone(&namend);
12277 vnode_put(snapdvp);
12278 vnode_put(rvp);
12279 }
12280
12281 return error;
39037602
A
12282}
12283
12284/*
12285 * rename a Filesystem snapshot
12286 *
12287 * get the vnode for the unnamed snapshot directory and the snapshot and
12288 * rename the snapshot. This is a very specialised (and simple) case of
12289 * rename(2) (which has to deal with a lot more complications). It differs
12290 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12291 */
12292static int
12293snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
12294 __unused uint32_t flags, vfs_context_t ctx)
12295{
12296 vnode_t rvp, snapdvp;
12297 int error, i;
12298 caddr_t newname_buf;
12299 size_t name_len;
12300 vnode_t fvp;
12301 struct nameidata *fromnd, *tond;
12302 /* carving out a chunk for structs that are too big to be on stack. */
12303 struct {
12304 struct nameidata from_node;
12305 struct nameidata to_node;
12306 } * __rename_data;
12307
12308 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
12309 fromnd = &__rename_data->from_node;
12310 tond = &__rename_data->to_node;
12311
12312 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
12313 OP_UNLINK, ctx);
0a7de745 12314 if (error) {
39037602 12315 goto out;
0a7de745 12316 }
39037602
A
12317 fvp = fromnd->ni_vp;
12318
12319 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12320 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
0a7de745 12321 if (error) {
39037602 12322 goto out1;
0a7de745 12323 }
39037602
A
12324
12325 /*
12326 * Some sanity checks- new name can't be empty, "." or ".." or have
12327 * slashes.
12328 * (the length returned by copyinstr includes the terminating NUL)
12329 *
12330 * The FS rename VNOP is suppossed to handle this but we'll pick it
12331 * off here itself.
12332 */
12333 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
12334 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
12335 error = EINVAL;
12336 goto out1;
12337 }
0a7de745
A
12338 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++) {
12339 ;
12340 }
39037602
A
12341 if (i < (int)name_len) {
12342 error = EINVAL;
12343 goto out1;
12344 }
12345
12346#if CONFIG_MACF
12347 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
12348 newname_buf);
0a7de745 12349 if (error) {
39037602 12350 goto out1;
0a7de745 12351 }
39037602
A
12352#endif
12353
12354 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
12355 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
12356 tond->ni_dvp = snapdvp;
12357
12358 error = namei(tond);
12359 if (error) {
12360 goto out2;
12361 } else if (tond->ni_vp) {
12362 /*
12363 * snapshot rename behaves differently than rename(2) - if the
12364 * new name exists, EEXIST is returned.
12365 */
12366 vnode_put(tond->ni_vp);
12367 error = EEXIST;
12368 goto out2;
12369 }
12370
12371 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
12372 &tond->ni_cnd, ctx);
12373
12374out2:
12375 nameidone(tond);
12376out1:
12377 FREE(newname_buf, M_TEMP);
12378 vnode_put(fvp);
12379 vnode_put(snapdvp);
12380 vnode_put(rvp);
12381 nameidone(fromnd);
12382out:
12383 FREE(__rename_data, M_TEMP);
0a7de745 12384 return error;
39037602
A
12385}
12386
12387/*
12388 * Mount a Filesystem snapshot
12389 *
12390 * get the vnode for the unnamed snapshot directory and the snapshot and
12391 * mount the snapshot.
12392 */
12393static int
12394snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
813fb2f6 12395 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
39037602
A
12396{
12397 vnode_t rvp, snapdvp, snapvp, vp, pvp;
12398 int error;
12399 struct nameidata *snapndp, *dirndp;
12400 /* carving out a chunk for structs that are too big to be on stack. */
12401 struct {
12402 struct nameidata snapnd;
12403 struct nameidata dirnd;
12404 } * __snapshot_mount_data;
12405
12406 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
12407 M_TEMP, M_WAITOK);
12408 snapndp = &__snapshot_mount_data->snapnd;
12409 dirndp = &__snapshot_mount_data->dirnd;
12410
12411 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
12412 OP_LOOKUP, ctx);
0a7de745 12413 if (error) {
39037602 12414 goto out;
0a7de745 12415 }
39037602
A
12416
12417 snapvp = snapndp->ni_vp;
12418 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
12419 error = EIO;
12420 goto out1;
12421 }
12422
12423 /* Get the vnode to be covered */
12424 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
12425 UIO_USERSPACE, directory, ctx);
12426 error = namei(dirndp);
0a7de745 12427 if (error) {
39037602 12428 goto out1;
0a7de745 12429 }
39037602
A
12430
12431 vp = dirndp->ni_vp;
12432 pvp = dirndp->ni_dvp;
12433
12434 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
12435 error = EINVAL;
12436 } else {
12437 mount_t mp = vnode_mount(rvp);
12438 struct fs_snapshot_mount_args smnt_data;
12439
12440 smnt_data.sm_mp = mp;
12441 smnt_data.sm_cnp = &snapndp->ni_cnd;
12442 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
0a7de745
A
12443 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
12444 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
39037602
A
12445 }
12446
12447 vnode_put(vp);
12448 vnode_put(pvp);
12449 nameidone(dirndp);
12450out1:
12451 vnode_put(snapvp);
12452 vnode_put(snapdvp);
12453 vnode_put(rvp);
12454 nameidone(snapndp);
12455out:
12456 FREE(__snapshot_mount_data, M_TEMP);
0a7de745 12457 return error;
39037602
A
12458}
12459
813fb2f6
A
12460/*
12461 * Root from a snapshot of the filesystem
12462 *
12463 * Marks the filesystem to root from the given snapshot on next boot.
12464 */
12465static int
12466snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
0a7de745
A
12467 vfs_context_t ctx)
12468{
12469 int error;
12470 vnode_t rvp;
12471 mount_t mp;
12472 struct fs_snapshot_root_args root_data;
12473 struct componentname cnp;
12474 caddr_t name_buf;
12475 size_t name_len;
12476
12477 error = vnode_getfromfd(ctx, dirfd, &rvp);
12478 if (error) {
12479 return error;
12480 }
12481 mp = vnode_mount(rvp);
12482
12483 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12484 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12485 if (error) {
12486 FREE(name_buf, M_TEMP);
12487 vnode_put(rvp);
12488 return error;
12489 }
12490
12491 // XXX MAC checks ?
12492
12493 /*
12494 * Grab mount_iterref so that we can release the vnode,
12495 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12496 */
12497 error = mount_iterref(mp, 0);
12498 vnode_put(rvp);
12499 if (error) {
12500 FREE(name_buf, M_TEMP);
12501 return error;
12502 }
12503
12504 memset(&cnp, 0, sizeof(cnp));
12505 cnp.cn_pnbuf = (char *)name_buf;
12506 cnp.cn_nameiop = LOOKUP;
12507 cnp.cn_flags = ISLASTCN | HASBUF;
12508 cnp.cn_pnlen = MAXPATHLEN;
12509 cnp.cn_nameptr = cnp.cn_pnbuf;
12510 cnp.cn_namelen = (int)name_len;
12511 root_data.sr_cnp = &cnp;
12512
12513 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
12514
12515 mount_iterdrop(mp);
12516 FREE(name_buf, M_TEMP);
12517
12518 return error;
813fb2f6
A
12519}
12520
39037602
A
12521/*
12522 * FS snapshot operations dispatcher
12523 */
12524int
12525fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12526 __unused int32_t *retval)
12527{
12528 int error;
12529 vfs_context_t ctx = vfs_context_current();
12530
813fb2f6
A
12531 AUDIT_ARG(fd, uap->dirfd);
12532 AUDIT_ARG(value32, uap->op);
12533
39037602 12534 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
0a7de745
A
12535 if (error) {
12536 return error;
12537 }
12538
12539 /*
12540 * Enforce user authorization for snapshot modification operations
12541 */
12542 if ((uap->op != SNAPSHOT_OP_MOUNT) &&
12543 (uap->op != SNAPSHOT_OP_ROOT)) {
12544 vnode_t dvp = NULLVP;
12545 vnode_t devvp = NULLVP;
12546 mount_t mp;
12547
12548 error = vnode_getfromfd(ctx, uap->dirfd, &dvp);
12549 if (error) {
12550 return error;
12551 }
12552 mp = vnode_mount(dvp);
12553 devvp = mp->mnt_devvp;
12554
12555 /* get an iocount on devvp */
12556 if (devvp == NULLVP) {
12557 error = vnode_lookup(mp->mnt_vfsstat.f_mntfromname, 0, &devvp, ctx);
12558 /* for mounts which arent block devices */
12559 if (error == ENOENT) {
12560 error = ENXIO;
12561 }
12562 } else {
12563 error = vnode_getwithref(devvp);
12564 }
12565
12566 if (error) {
12567 vnode_put(dvp);
12568 return error;
12569 }
12570
12571 if ((vfs_context_issuser(ctx) == 0) &&
12572 (vnode_authorize(devvp, NULL, KAUTH_VNODE_WRITE_DATA, ctx) != 0)) {
12573 error = EPERM;
12574 }
12575 vnode_put(dvp);
12576 vnode_put(devvp);
12577
12578 if (error) {
12579 return error;
12580 }
12581 }
39037602
A
12582
12583 switch (uap->op) {
12584 case SNAPSHOT_OP_CREATE:
12585 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12586 break;
12587 case SNAPSHOT_OP_DELETE:
12588 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12589 break;
12590 case SNAPSHOT_OP_RENAME:
12591 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12592 uap->flags, ctx);
12593 break;
12594 case SNAPSHOT_OP_MOUNT:
12595 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12596 uap->data, uap->flags, ctx);
12597 break;
0a7de745
A
12598 case SNAPSHOT_OP_REVERT:
12599 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12600 break;
d9a64523 12601#if CONFIG_MNT_ROOTSNAP
813fb2f6
A
12602 case SNAPSHOT_OP_ROOT:
12603 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12604 break;
d9a64523 12605#endif /* CONFIG_MNT_ROOTSNAP */
39037602
A
12606 default:
12607 error = ENOSYS;
12608 }
12609
0a7de745 12610 return error;
39037602 12611}