]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
5ba3f43e 2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
3e170ce0 102#include <sys/content_protection.h>
39037602
A
103#include <sys/clonefile.h>
104#include <sys/snapshot.h>
490019cf 105#include <sys/priv.h>
91447636
A
106#include <machine/cons.h>
107#include <machine/limits.h>
108#include <miscfs/specfs/specdev.h>
e5568f75 109
5ba3f43e
A
110#include <vfs/vfs_disk_conditioner.h>
111
b0d623f7 112#include <security/audit/audit.h>
e5568f75
A
113#include <bsm/audit_kevents.h>
114
91447636
A
115#include <mach/mach_types.h>
116#include <kern/kern_types.h>
117#include <kern/kalloc.h>
6d2010ae 118#include <kern/task.h>
91447636
A
119
120#include <vm/vm_pageout.h>
39037602 121#include <vm/vm_protos.h>
1c79356b 122
91447636 123#include <libkern/OSAtomic.h>
b0d623f7 124#include <pexpert/pexpert.h>
3e170ce0 125#include <IOKit/IOBSD.h>
55e303ae 126
490019cf
A
127#if ROUTEFS
128#include <miscfs/routefs/routefs.h>
129#endif /* ROUTEFS */
130
2d21ac55
A
131#if CONFIG_MACF
132#include <security/mac.h>
133#include <security/mac_framework.h>
134#endif
1c79356b 135
39037602 136#if CONFIG_FSE
2d21ac55 137#define GET_PATH(x) \
39037602 138 (x) = get_pathbuff();
2d21ac55
A
139#define RELEASE_PATH(x) \
140 release_pathbuff(x);
39037602 141#else
2d21ac55 142#define GET_PATH(x) \
39037602 143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2d21ac55
A
144#define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146#endif /* CONFIG_FSE */
147
a39ff7e2
A
148#ifndef HFS_GET_BOOT_INFO
149#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
150#endif
151
152#ifndef HFS_SET_BOOT_INFO
153#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
154#endif
155
156#ifndef APFSIOC_REVERT_TO_SNAPSHOT
157#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
158#endif
159
5ba3f43e
A
160extern void disk_conditioner_unmount(mount_t mp);
161
2d21ac55
A
162/* struct for checkdirs iteration */
163struct cdirargs {
164 vnode_t olddp;
165 vnode_t newdp;
166};
167/* callback for checkdirs iteration */
168static int checkdirs_callback(proc_t p, void * arg);
1c79356b 169
91447636 170static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 171static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
172void enablequotas(struct mount *mp, vfs_context_t ctx);
173static int getfsstat_callback(mount_t mp, void * arg);
174static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 175static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 176static int sync_callback(mount_t, void *);
39037602
A
177static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
178 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 179 boolean_t partial_copy);
b0d623f7
A
180static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
181 user_addr_t bufp);
182static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae
A
183static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
184 struct componentname *cnp, user_addr_t fsmountargs,
185 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
186 vfs_context_t ctx);
187void vfs_notify_mount(vnode_t pdvp);
188
189int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 190
fe8ab488
A
191struct fd_vn_data * fg_vn_data_alloc(void);
192
c18c124e
A
193/*
194 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
195 * Concurrent lookups (or lookups by ids) on hard links can cause the
196 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
197 * does) to return ENOENT as the path cannot be returned from the name cache
198 * alone. We have no option but to retry and hope to get one namei->reverse path
199 * generation done without an intervening lookup, lookup by id on the hard link
200 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
201 * which currently are the MAC hooks for rename, unlink and rmdir.
202 */
203#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
204
fe8ab488
A
205static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
206
207static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
208
b7266188 209#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
210static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
211static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
212static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
213static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
214static void mount_end_update(mount_t mp);
6d2010ae 215static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
216#endif /* CONFIG_IMGSRC_ACCESS */
217
d9a64523
A
218//snapshot functions
219#if CONFIG_MNT_ROOTSNAP
220static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
221#else
222static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
223#endif
224
2d21ac55
A
225int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
226
227__private_extern__
228int sync_internal(void);
229
2d21ac55 230__private_extern__
c18c124e 231int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
91447636 232
fe8ab488
A
233extern lck_grp_t *fd_vn_lck_grp;
234extern lck_grp_attr_t *fd_vn_lck_grp_attr;
235extern lck_attr_t *fd_vn_lck_attr;
236
2d21ac55
A
237/*
238 * incremented each time a mount or unmount operation occurs
239 * used to invalidate the cached value of the rootvp in the
240 * mount structure utilized by cache_lookup_path
241 */
b0d623f7 242uint32_t mount_generation = 0;
1c79356b
A
243
244/* counts number of mount and unmount operations */
245unsigned int vfs_nummntops=0;
246
39236c6e
A
247extern const struct fileops vnops;
248#if CONFIG_APPLEDOUBLE
39037602 249extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 250#endif /* CONFIG_APPLEDOUBLE */
91447636 251
1c79356b
A
252/*
253 * Virtual File System System Calls
254 */
255
490019cf 256#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
257/*
258 * Private in-kernel mounting spi (NFS only, not exported)
259 */
260 __private_extern__
261boolean_t
262vfs_iskernelmount(mount_t mp)
263{
264 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
265}
266
267 __private_extern__
268int
269kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
270 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
271{
272 struct nameidata nd;
273 boolean_t did_namei;
274 int error;
275
39037602 276 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
277 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
278
279 /*
280 * Get the vnode to be covered if it's not supplied
281 */
282 if (vp == NULLVP) {
283 error = namei(&nd);
284 if (error)
285 return (error);
286 vp = nd.ni_vp;
287 pvp = nd.ni_dvp;
288 did_namei = TRUE;
289 } else {
290 char *pnbuf = CAST_DOWN(char *, path);
291
292 nd.ni_cnd.cn_pnbuf = pnbuf;
293 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
294 did_namei = FALSE;
295 }
296
297 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
298 syscall_flags, kern_flags, NULL, TRUE, ctx);
299
300 if (did_namei) {
301 vnode_put(vp);
302 vnode_put(pvp);
303 nameidone(&nd);
304 }
305
306 return (error);
307}
fe8ab488 308#endif /* NFSCLIENT || DEVFS */
6d2010ae 309
1c79356b
A
310/*
311 * Mount a file system.
312 */
1c79356b
A
313/* ARGSUSED */
314int
b0d623f7 315mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
316{
317 struct __mac_mount_args muap;
318
319 muap.type = uap->type;
320 muap.path = uap->path;
321 muap.flags = uap->flags;
322 muap.data = uap->data;
323 muap.mac_p = USER_ADDR_NULL;
324 return (__mac_mount(p, &muap, retval));
325}
326
5ba3f43e
A
327int
328fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
329{
330 struct componentname cn;
331 vfs_context_t ctx = vfs_context_current();
332 size_t dummy = 0;
333 int error;
334 int flags = uap->flags;
335 char fstypename[MFSNAMELEN];
336 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
337 vnode_t pvp;
338 vnode_t vp;
339
340 AUDIT_ARG(fd, uap->fd);
341 AUDIT_ARG(fflags, flags);
342 /* fstypename will get audited by mount_common */
343
344 /* Sanity check the flags */
345 if (flags & (MNT_IMGSRC_BY_INDEX|MNT_ROOTFS)) {
346 return (ENOTSUP);
347 }
348
349 if (flags & MNT_UNION) {
350 return (EPERM);
351 }
352
353 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
354 if (error) {
355 return (error);
356 }
357
358 if ((error = file_vnode(uap->fd, &vp)) != 0) {
359 return (error);
360 }
361
362 if ((error = vnode_getwithref(vp)) != 0) {
363 file_drop(uap->fd);
364 return (error);
365 }
366
367 pvp = vnode_getparent(vp);
368 if (pvp == NULL) {
369 vnode_put(vp);
370 file_drop(uap->fd);
371 return (EINVAL);
372 }
373
374 memset(&cn, 0, sizeof(struct componentname));
375 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
376 cn.cn_pnlen = MAXPATHLEN;
377
378 if((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
379 FREE(cn.cn_pnbuf, M_TEMP);
380 vnode_put(pvp);
381 vnode_put(vp);
382 file_drop(uap->fd);
383 return (error);
384 }
385
386 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
387
388 FREE(cn.cn_pnbuf, M_TEMP);
389 vnode_put(pvp);
390 vnode_put(vp);
391 file_drop(uap->fd);
392
393 return (error);
394}
395
6d2010ae 396void
39037602 397vfs_notify_mount(vnode_t pdvp)
6d2010ae
A
398{
399 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
400 lock_vnode_and_post(pdvp, NOTE_WRITE);
401}
402
b0d623f7
A
403/*
404 * __mac_mount:
405 * Mount a file system taking into account MAC label behavior.
406 * See mount(2) man page for more information
407 *
408 * Parameters: p Process requesting the mount
409 * uap User argument descriptor (see below)
39037602 410 * retval (ignored)
b0d623f7
A
411 *
412 * Indirect: uap->type Filesystem type
413 * uap->path Path to mount
39037602
A
414 * uap->data Mount arguments
415 * uap->mac_p MAC info
b0d623f7 416 * uap->flags Mount flags
39037602 417 *
b0d623f7
A
418 *
419 * Returns: 0 Success
420 * !0 Not success
421 */
6d2010ae
A
422boolean_t root_fs_upgrade_try = FALSE;
423
2d21ac55 424int
b0d623f7 425__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 426{
39236c6e
A
427 vnode_t pvp = NULL;
428 vnode_t vp = NULL;
429 int need_nameidone = 0;
6d2010ae
A
430 vfs_context_t ctx = vfs_context_current();
431 char fstypename[MFSNAMELEN];
432 struct nameidata nd;
433 size_t dummy=0;
434 char *labelstr = NULL;
435 int flags = uap->flags;
436 int error;
39037602 437#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 438 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
439#else
440#pragma unused(p)
441#endif
6d2010ae
A
442 /*
443 * Get the fs type name from user space
444 */
445 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
446 if (error)
447 return (error);
448
449 /*
450 * Get the vnode to be covered
451 */
39037602 452 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
453 UIO_USERSPACE, uap->path, ctx);
454 error = namei(&nd);
39236c6e
A
455 if (error) {
456 goto out;
457 }
458 need_nameidone = 1;
6d2010ae
A
459 vp = nd.ni_vp;
460 pvp = nd.ni_dvp;
39037602 461
6d2010ae
A
462#ifdef CONFIG_IMGSRC_ACCESS
463 /* Mounting image source cannot be batched with other operations */
464 if (flags == MNT_IMGSRC_BY_INDEX) {
465 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
466 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
467 goto out;
468 }
469#endif /* CONFIG_IMGSRC_ACCESS */
470
471#if CONFIG_MACF
472 /*
473 * Get the label string (if any) from user space
474 */
475 if (uap->mac_p != USER_ADDR_NULL) {
476 struct user_mac mac;
477 size_t ulen = 0;
478
479 if (is_64bit) {
480 struct user64_mac mac64;
481 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
482 mac.m_buflen = mac64.m_buflen;
483 mac.m_string = mac64.m_string;
484 } else {
485 struct user32_mac mac32;
486 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
487 mac.m_buflen = mac32.m_buflen;
488 mac.m_string = mac32.m_string;
489 }
490 if (error)
491 goto out;
492 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
493 (mac.m_buflen < 2)) {
494 error = EINVAL;
495 goto out;
496 }
497 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
498 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
499 if (error) {
500 goto out;
501 }
502 AUDIT_ARG(mac_string, labelstr);
503 }
504#endif /* CONFIG_MACF */
505
506 AUDIT_ARG(fflags, flags);
507
4bd07ac2
A
508#if SECURE_KERNEL
509 if (flags & MNT_UNION) {
510 /* No union mounts on release kernels */
511 error = EPERM;
512 goto out;
513 }
514#endif
515
6d2010ae 516 if ((vp->v_flag & VROOT) &&
39236c6e
A
517 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
518 if (!(flags & MNT_UNION)) {
6d2010ae 519 flags |= MNT_UPDATE;
39236c6e
A
520 }
521 else {
39037602 522 /*
39236c6e 523 * For a union mount on '/', treat it as fresh
39037602
A
524 * mount instead of update.
525 * Otherwise, union mouting on '/' used to panic the
526 * system before, since mnt_vnodecovered was found to
527 * be NULL for '/' which is required for unionlookup
39236c6e
A
528 * after it gets ENOENT on union mount.
529 */
530 flags = (flags & ~(MNT_UPDATE));
531 }
532
4bd07ac2 533#if SECURE_KERNEL
39236c6e
A
534 if ((flags & MNT_RDONLY) == 0) {
535 /* Release kernels are not allowed to mount "/" as rw */
536 error = EPERM;
39037602 537 goto out;
39236c6e 538 }
39236c6e
A
539#endif
540 /*
541 * See 7392553 for more details on why this check exists.
542 * Suffice to say: If this check is ON and something tries
543 * to mount the rootFS RW, we'll turn off the codesign
39037602
A
544 * bitmap optimization.
545 */
6d2010ae 546#if CHECK_CS_VALIDATION_BITMAP
39236c6e 547 if ((flags & MNT_RDONLY) == 0 ) {
6d2010ae
A
548 root_fs_upgrade_try = TRUE;
549 }
550#endif
551 }
552
553 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
554 labelstr, FALSE, ctx);
39236c6e 555
6d2010ae 556out:
39236c6e 557
6d2010ae
A
558#if CONFIG_MACF
559 if (labelstr)
560 FREE(labelstr, M_MACTEMP);
561#endif /* CONFIG_MACF */
562
39236c6e
A
563 if (vp) {
564 vnode_put(vp);
565 }
566 if (pvp) {
567 vnode_put(pvp);
568 }
569 if (need_nameidone) {
570 nameidone(&nd);
571 }
6d2010ae
A
572
573 return (error);
574}
575
576/*
577 * common mount implementation (final stage of mounting)
39037602 578
6d2010ae
A
579 * Arguments:
580 * fstypename file system type (ie it's vfs name)
581 * pvp parent of covered vnode
582 * vp covered vnode
583 * cnp component name (ie path) of covered vnode
584 * flags generic mount flags
585 * fsmountargs file system specific data
586 * labelstr optional MAC label
587 * kernelmount TRUE for mounts initiated from inside the kernel
588 * ctx caller's context
589 */
590static int
591mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
592 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
593 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
594{
39236c6e
A
595#if !CONFIG_MACF
596#pragma unused(labelstr)
597#endif
91447636
A
598 struct vnode *devvp = NULLVP;
599 struct vnode *device_vnode = NULLVP;
2d21ac55
A
600#if CONFIG_MACF
601 struct vnode *rvp;
602#endif
1c79356b 603 struct mount *mp;
6601e61a 604 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 605 struct proc *p = vfs_context_proc(ctx);
91447636 606 int error, flag = 0;
91447636 607 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
608 int ronly = 0;
609 int mntalloc = 0;
b0d623f7 610 boolean_t vfsp_ref = FALSE;
743b1565 611 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
612 boolean_t did_rele = FALSE;
613 boolean_t have_usecount = FALSE;
9bccf70c 614
1c79356b 615 /*
6d2010ae 616 * Process an update for an existing mount
1c79356b 617 */
6d2010ae 618 if (flags & MNT_UPDATE) {
1c79356b 619 if ((vp->v_flag & VROOT) == 0) {
91447636
A
620 error = EINVAL;
621 goto out1;
1c79356b
A
622 }
623 mp = vp->v_mount;
d12e1678 624
91447636 625 /* unmount in progress return error */
b0d623f7 626 mount_lock_spin(mp);
91447636
A
627 if (mp->mnt_lflag & MNT_LUNMOUNT) {
628 mount_unlock(mp);
629 error = EBUSY;
630 goto out1;
d12e1678 631 }
91447636
A
632 mount_unlock(mp);
633 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 634 is_rwlock_locked = TRUE;
1c79356b
A
635 /*
636 * We only allow the filesystem to be reloaded if it
637 * is currently mounted read-only.
638 */
6d2010ae 639 if ((flags & MNT_RELOAD) &&
1c79356b 640 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
641 error = ENOTSUP;
642 goto out1;
1c79356b 643 }
b7266188 644
316670eb
A
645 /*
646 * If content protection is enabled, update mounts are not
647 * allowed to turn it off.
648 */
39037602 649 if ((mp->mnt_flag & MNT_CPROTECT) &&
316670eb
A
650 ((flags & MNT_CPROTECT) == 0)) {
651 error = EINVAL;
652 goto out1;
653 }
654
39037602 655#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
656 /* Can't downgrade the backer of the root FS */
657 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
6d2010ae 658 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
659 error = ENOTSUP;
660 goto out1;
661 }
662#endif /* CONFIG_IMGSRC_ACCESS */
663
1c79356b
A
664 /*
665 * Only root, or the user that did the original mount is
666 * permitted to update it.
667 */
2d21ac55
A
668 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
669 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
670 goto out1;
671 }
672#if CONFIG_MACF
673 error = mac_mount_check_remount(ctx, mp);
674 if (error != 0) {
91447636 675 goto out1;
1c79356b 676 }
2d21ac55 677#endif
1c79356b 678 /*
91447636
A
679 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
680 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 681 */
6d2010ae
A
682 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
683 flags |= MNT_NOSUID | MNT_NODEV;
d12e1678 684 if (mp->mnt_flag & MNT_NOEXEC)
6d2010ae 685 flags |= MNT_NOEXEC;
1c79356b 686 }
d12e1678
A
687 flag = mp->mnt_flag;
688
316670eb
A
689
690
6d2010ae 691 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 692
91447636 693 vfsp = mp->mnt_vtable;
1c79356b
A
694 goto update;
695 }
5ba3f43e 696
1c79356b 697 /*
91447636 698 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
699 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
700 */
6d2010ae
A
701 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
702 flags |= MNT_NOSUID | MNT_NODEV;
1c79356b 703 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
6d2010ae 704 flags |= MNT_NOEXEC;
1c79356b 705 }
91447636 706
55e303ae
A
707 /* XXXAUDIT: Should we capture the type on the error path as well? */
708 AUDIT_ARG(text, fstypename);
91447636 709 mount_list_lock();
1c79356b 710 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
b0d623f7
A
711 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
712 vfsp->vfc_refcount++;
713 vfsp_ref = TRUE;
1c79356b 714 break;
b0d623f7 715 }
91447636 716 mount_list_unlock();
1c79356b 717 if (vfsp == NULL) {
91447636
A
718 error = ENODEV;
719 goto out1;
1c79356b 720 }
6d2010ae
A
721
722 /*
723 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
724 */
725 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
726 error = EINVAL; /* unsupported request */
2d21ac55 727 goto out1;
6d2010ae
A
728 }
729
730 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
731 if (error != 0) {
91447636 732 goto out1;
1c79356b 733 }
1c79356b
A
734
735 /*
6d2010ae 736 * Allocate and initialize the filesystem (mount_t)
1c79356b 737 */
b0d623f7 738 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
1c79356b 739 M_MOUNT, M_WAITOK);
b0d623f7 740 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 741 mntalloc = 1;
0b4e3aa0
A
742
743 /* Initialize the default IO constraints */
744 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
745 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
746 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
747 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
748 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 749 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
750 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
751 mp->mnt_ioscale = 1;
2d21ac55
A
752 mp->mnt_ioflags = 0;
753 mp->mnt_realrootvp = NULLVP;
754 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
755
756 TAILQ_INIT(&mp->mnt_vnodelist);
757 TAILQ_INIT(&mp->mnt_workerqueue);
758 TAILQ_INIT(&mp->mnt_newvnodes);
759 mount_lock_init(mp);
760 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 761 is_rwlock_locked = TRUE;
1c79356b 762 mp->mnt_op = vfsp->vfc_vfsops;
91447636 763 mp->mnt_vtable = vfsp;
91447636 764 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 765 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488
A
766 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
767 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 768 mp->mnt_vnodecovered = vp;
2d21ac55 769 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
770 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
771 mp->mnt_devbsdunit = 0;
1c79356b 772
91447636
A
773 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
774 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 775
490019cf 776#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
777 if (kernelmount)
778 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
779 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
780 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
fe8ab488 781#endif /* NFSCLIENT || DEVFS */
6d2010ae 782
1c79356b 783update:
5ba3f43e 784
1c79356b
A
785 /*
786 * Set the mount level flags.
787 */
6d2010ae 788 if (flags & MNT_RDONLY)
1c79356b 789 mp->mnt_flag |= MNT_RDONLY;
6d2010ae
A
790 else if (mp->mnt_flag & MNT_RDONLY) {
791 // disallow read/write upgrades of file systems that
792 // had the TYPENAME_OVERRIDE feature set.
793 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
794 error = EPERM;
795 goto out1;
796 }
1c79356b 797 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 798 }
0b4e3aa0
A
799 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
800 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
6d2010ae
A
801 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
802 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
803 MNT_QUARANTINE | MNT_CPROTECT);
813fb2f6
A
804
805#if SECURE_KERNEL
806#if !CONFIG_MNT_SUID
807 /*
5ba3f43e 808 * On release builds of iOS based platforms, always enforce NOSUID on
813fb2f6
A
809 * all mounts. We do this here because we can catch update mounts as well as
810 * non-update mounts in this case.
811 */
812 mp->mnt_flag |= (MNT_NOSUID);
813#endif
814#endif
815
6d2010ae
A
816 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
817 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
818 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
819 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
820 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
821
822#if CONFIG_MACF
6d2010ae 823 if (flags & MNT_MULTILABEL) {
2d21ac55
A
824 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
825 error = EINVAL;
826 goto out1;
827 }
828 mp->mnt_flag |= MNT_MULTILABEL;
829 }
830#endif
6d2010ae
A
831 /*
832 * Process device path for local file systems if requested
833 */
39037602
A
834 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
835 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
6d2010ae 836 if (vfs_context_is64bit(ctx)) {
91447636 837 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
39037602 838 goto out1;
91447636
A
839 fsmountargs += sizeof(devpath);
840 } else {
b0d623f7 841 user32_addr_t tmp;
91447636 842 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
39037602 843 goto out1;
91447636
A
844 /* munge into LP64 addr */
845 devpath = CAST_USER_ADDR_T(tmp);
846 fsmountargs += sizeof(tmp);
847 }
848
6d2010ae 849 /* Lookup device and authorize access to it */
91447636 850 if ((devpath)) {
6d2010ae
A
851 struct nameidata nd;
852
853 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
854 if ( (error = namei(&nd)) )
91447636
A
855 goto out1;
856
3e170ce0 857 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
6d2010ae 858 devvp = nd.ni_vp;
91447636 859
6d2010ae 860 nameidone(&nd);
91447636
A
861
862 if (devvp->v_type != VBLK) {
863 error = ENOTBLK;
864 goto out2;
865 }
866 if (major(devvp->v_rdev) >= nblkdev) {
867 error = ENXIO;
868 goto out2;
869 }
870 /*
871 * If mount by non-root, then verify that user has necessary
872 * permissions on the device.
873 */
2d21ac55 874 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
875 mode_t accessmode = KAUTH_VNODE_READ_DATA;
876
91447636
A
877 if ((mp->mnt_flag & MNT_RDONLY) == 0)
878 accessmode |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 879 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
91447636
A
880 goto out2;
881 }
882 }
6d2010ae
A
883 /* On first mount, preflight and open device */
884 if (devpath && ((flags & MNT_UPDATE) == 0)) {
91447636
A
885 if ( (error = vnode_ref(devvp)) )
886 goto out2;
887 /*
888 * Disallow multiple mounts of the same device.
889 * Disallow mounting of a device that is currently in use
890 * (except for root, which might share swap device for miniroot).
891 * Flush out any old buffers remaining from a previous use.
892 */
893 if ( (error = vfs_mountedon(devvp)) )
894 goto out3;
39037602 895
91447636
A
896 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
897 error = EBUSY;
898 goto out3;
899 }
2d21ac55 900 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
91447636
A
901 error = ENOTBLK;
902 goto out3;
903 }
904 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
905 goto out3;
906
907 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
908#if CONFIG_MACF
909 error = mac_vnode_check_open(ctx,
910 devvp,
911 ronly ? FREAD : FREAD|FWRITE);
912 if (error)
913 goto out3;
914#endif /* MAC */
915 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
91447636
A
916 goto out3;
917
918 mp->mnt_devvp = devvp;
919 device_vnode = devvp;
b0d623f7 920
6d2010ae
A
921 } else if ((mp->mnt_flag & MNT_RDONLY) &&
922 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
923 (device_vnode = mp->mnt_devvp)) {
924 dev_t dev;
925 int maj;
926 /*
927 * If upgrade to read-write by non-root, then verify
928 * that user has necessary permissions on the device.
929 */
930 vnode_getalways(device_vnode);
b0d623f7 931
6d2010ae 932 if (suser(vfs_context_ucred(ctx), NULL) &&
39037602 933 (error = vnode_authorize(device_vnode, NULL,
6d2010ae
A
934 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
935 ctx)) != 0) {
936 vnode_put(device_vnode);
937 goto out2;
938 }
b0d623f7 939
6d2010ae
A
940 /* Tell the device that we're upgrading */
941 dev = (dev_t)device_vnode->v_rdev;
942 maj = major(dev);
b0d623f7 943
6d2010ae
A
944 if ((u_int)maj >= (u_int)nblkdev)
945 panic("Volume mounted on a device with invalid major number.");
b0d623f7 946
6d2010ae
A
947 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
948 vnode_put(device_vnode);
91447636 949 device_vnode = NULLVP;
6d2010ae
A
950 if (error != 0) {
951 goto out2;
952 }
91447636
A
953 }
954 }
2d21ac55 955#if CONFIG_MACF
6d2010ae 956 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
957 mac_mount_label_init(mp);
958 mac_mount_label_associate(ctx, mp);
959 }
6d2010ae
A
960 if (labelstr) {
961 if ((flags & MNT_UPDATE) != 0) {
962 error = mac_mount_check_label_update(ctx, mp);
2d21ac55
A
963 if (error != 0)
964 goto out3;
965 }
2d21ac55
A
966 }
967#endif
1c79356b
A
968 /*
969 * Mount the filesystem.
970 */
39037602
A
971 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
972 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
973 (caddr_t)fsmountargs, 0, ctx);
974 } else {
975 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
976 }
d12e1678 977
6d2010ae 978 if (flags & MNT_UPDATE) {
1c79356b
A
979 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
980 mp->mnt_flag &= ~MNT_RDONLY;
981 mp->mnt_flag &=~
982 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
983 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
984 if (error)
6d2010ae 985 mp->mnt_flag = flag; /* restore flag value */
91447636
A
986 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
987 lck_rw_done(&mp->mnt_rwlock);
743b1565 988 is_rwlock_locked = FALSE;
9bccf70c 989 if (!error)
2d21ac55 990 enablequotas(mp, ctx);
6d2010ae 991 goto exit;
1c79356b 992 }
6d2010ae 993
1c79356b
A
994 /*
995 * Put the new filesystem on the mount list after root.
996 */
6601e61a 997 if (error == 0) {
2d21ac55
A
998 struct vfs_attr vfsattr;
999#if CONFIG_MACF
1000 if (vfs_flags(mp) & MNT_MULTILABEL) {
1001 error = VFS_ROOT(mp, &rvp, ctx);
1002 if (error) {
1003 printf("%s() VFS_ROOT returned %d\n", __func__, error);
1004 goto out3;
1005 }
2d21ac55 1006 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
b0d623f7
A
1007 /*
1008 * drop reference provided by VFS_ROOT
1009 */
1010 vnode_put(rvp);
1011
2d21ac55
A
1012 if (error)
1013 goto out3;
1014 }
1015#endif /* MAC */
1016
1017 vnode_lock_spin(vp);
1018 CLR(vp->v_flag, VMOUNT);
91447636
A
1019 vp->v_mountedhere = mp;
1020 vnode_unlock(vp);
1021
2d21ac55
A
1022 /*
1023 * taking the name_cache_lock exclusively will
1024 * insure that everyone is out of the fast path who
1025 * might be trying to use a now stale copy of
1026 * vp->v_mountedhere->mnt_realrootvp
1027 * bumping mount_generation causes the cached values
1028 * to be invalidated
1029 */
1030 name_cache_lock();
1031 mount_generation++;
1032 name_cache_unlock();
1033
b0d623f7
A
1034 error = vnode_ref(vp);
1035 if (error != 0) {
1036 goto out4;
1037 }
1038
1039 have_usecount = TRUE;
91447636 1040
2d21ac55 1041 error = checkdirs(vp, ctx);
6601e61a
A
1042 if (error != 0) {
1043 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1044 goto out4;
1045 }
39037602
A
1046 /*
1047 * there is no cleanup code here so I have made it void
91447636
A
1048 * we need to revisit this
1049 */
2d21ac55 1050 (void)VFS_START(mp, 0, ctx);
1c79356b 1051
6d2010ae
A
1052 if (mount_list_add(mp) != 0) {
1053 /*
1054 * The system is shutting down trying to umount
1055 * everything, so fail with a plausible errno.
1056 */
1057 error = EBUSY;
b0d623f7
A
1058 goto out4;
1059 }
6601e61a
A
1060 lck_rw_done(&mp->mnt_rwlock);
1061 is_rwlock_locked = FALSE;
1062
2d21ac55
A
1063 /* Check if this mounted file system supports EAs or named streams. */
1064 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1065 VFSATTR_INIT(&vfsattr);
1066 VFSATTR_WANTED(&vfsattr, f_capabilities);
1067 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
39037602 1068 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
2d21ac55
A
1069 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1070 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1071 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1072 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1073 }
1074#if NAMEDSTREAMS
1075 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1076 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1077 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1078 }
1079#endif
1080 /* Check if this file system supports path from id lookups. */
1081 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1082 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1083 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1084 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1085 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1086 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1087 }
39037602
A
1088
1089 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1090 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1091 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1092 }
2d21ac55
A
1093 }
1094 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1095 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1096 }
1097 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1098 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1099 }
1c79356b 1100 /* increment the operations count */
b0d623f7 1101 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 1102 enablequotas(mp, ctx);
91447636
A
1103
1104 if (device_vnode) {
1105 device_vnode->v_specflags |= SI_MOUNTEDON;
1106
1107 /*
1108 * cache the IO attributes for the underlying physical media...
1109 * an error return indicates the underlying driver doesn't
1110 * support all the queries necessary... however, reasonable
1111 * defaults will have been set, so no reason to bail or care
1112 */
1113 vfs_init_io_attributes(device_vnode, mp);
39037602 1114 }
6601e61a
A
1115
1116 /* Now that mount is setup, notify the listeners */
6d2010ae 1117 vfs_notify_mount(pvp);
3e170ce0
A
1118 IOBSDMountChange(mp, kIOMountChangeMount);
1119
1c79356b 1120 } else {
6d2010ae
A
1121 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1122 if (mp->mnt_vnodelist.tqh_first != NULL) {
39037602 1123 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
6d2010ae
A
1124 mp->mnt_vtable->vfc_name, error);
1125 }
1126
2d21ac55 1127 vnode_lock_spin(vp);
1c79356b 1128 CLR(vp->v_flag, VMOUNT);
6601e61a 1129 vnode_unlock(vp);
91447636
A
1130 mount_list_lock();
1131 mp->mnt_vtable->vfc_refcount--;
1132 mount_list_unlock();
55e303ae 1133
91447636 1134 if (device_vnode ) {
91447636 1135 vnode_rele(device_vnode);
b0d623f7 1136 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
91447636
A
1137 }
1138 lck_rw_done(&mp->mnt_rwlock);
743b1565 1139 is_rwlock_locked = FALSE;
39037602 1140
6d2010ae
A
1141 /*
1142 * if we get here, we have a mount structure that needs to be freed,
1143 * but since the coveredvp hasn't yet been updated to point at it,
1144 * no need to worry about other threads holding a crossref on this mp
1145 * so it's ok to just free it
1146 */
91447636 1147 mount_lock_destroy(mp);
2d21ac55
A
1148#if CONFIG_MACF
1149 mac_mount_label_destroy(mp);
1150#endif
55e303ae 1151 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1c79356b 1152 }
6d2010ae 1153exit:
91447636 1154 /*
6d2010ae 1155 * drop I/O count on the device vp if there was one
91447636
A
1156 */
1157 if (devpath && devvp)
1158 vnode_put(devvp);
b0d623f7 1159
91447636 1160 return(error);
b0d623f7 1161
6d2010ae 1162/* Error condition exits */
6601e61a 1163out4:
2d21ac55 1164 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
39037602
A
1165
1166 /*
6d2010ae
A
1167 * If the mount has been placed on the covered vp,
1168 * it may have been discovered by now, so we have
1169 * to treat this just like an unmount
1170 */
1171 mount_lock_spin(mp);
1172 mp->mnt_lflag |= MNT_LDEAD;
1173 mount_unlock(mp);
1174
6601e61a 1175 if (device_vnode != NULLVP) {
b0d623f7 1176 vnode_rele(device_vnode);
2d21ac55
A
1177 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1178 ctx);
b0d623f7 1179 did_rele = TRUE;
6601e61a 1180 }
6d2010ae 1181
2d21ac55 1182 vnode_lock_spin(vp);
6d2010ae
A
1183
1184 mp->mnt_crossref++;
6601e61a 1185 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1186
6601e61a 1187 vnode_unlock(vp);
6d2010ae 1188
b0d623f7
A
1189 if (have_usecount) {
1190 vnode_rele(vp);
1191 }
91447636 1192out3:
6d2010ae 1193 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
2d21ac55 1194 vnode_rele(devvp);
91447636
A
1195out2:
1196 if (devpath && devvp)
1197 vnode_put(devvp);
1198out1:
743b1565
A
1199 /* Release mnt_rwlock only when it was taken */
1200 if (is_rwlock_locked == TRUE) {
1201 lck_rw_done(&mp->mnt_rwlock);
1202 }
39037602 1203
6601e61a 1204 if (mntalloc) {
6d2010ae
A
1205 if (mp->mnt_crossref)
1206 mount_dropcrossref(mp, vp, 0);
1207 else {
1208 mount_lock_destroy(mp);
2d21ac55 1209#if CONFIG_MACF
6d2010ae 1210 mac_mount_label_destroy(mp);
2d21ac55 1211#endif
6d2010ae
A
1212 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1213 }
b0d623f7 1214 }
b0d623f7 1215 if (vfsp_ref) {
6601e61a
A
1216 mount_list_lock();
1217 vfsp->vfc_refcount--;
1218 mount_list_unlock();
6601e61a 1219 }
91447636
A
1220
1221 return(error);
1c79356b
A
1222}
1223
39037602 1224/*
b7266188
A
1225 * Flush in-core data, check for competing mount attempts,
1226 * and set VMOUNT
1227 */
6d2010ae
A
1228int
1229prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1230{
39236c6e
A
1231#if !CONFIG_MACF
1232#pragma unused(cnp,fsname)
1233#endif
b7266188
A
1234 struct vnode_attr va;
1235 int error;
1236
6d2010ae
A
1237 if (!skip_auth) {
1238 /*
1239 * If the user is not root, ensure that they own the directory
1240 * onto which we are attempting to mount.
1241 */
1242 VATTR_INIT(&va);
1243 VATTR_WANTED(&va, va_uid);
1244 if ((error = vnode_getattr(vp, &va, ctx)) ||
1245 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1246 (!vfs_context_issuser(ctx)))) {
6d2010ae
A
1247 error = EPERM;
1248 goto out;
1249 }
b7266188
A
1250 }
1251
1252 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1253 goto out;
1254
1255 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1256 goto out;
1257
1258 if (vp->v_type != VDIR) {
1259 error = ENOTDIR;
1260 goto out;
1261 }
1262
1263 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1264 error = EBUSY;
1265 goto out;
1266 }
1267
1268#if CONFIG_MACF
1269 error = mac_mount_check_mount(ctx, vp,
1270 cnp, fsname);
1271 if (error != 0)
1272 goto out;
1273#endif
1274
1275 vnode_lock_spin(vp);
1276 SET(vp->v_flag, VMOUNT);
1277 vnode_unlock(vp);
1278
1279out:
1280 return error;
1281}
1282
6d2010ae
A
1283#if CONFIG_IMGSRC_ACCESS
1284
1285#if DEBUG
1286#define IMGSRC_DEBUG(args...) printf(args)
1287#else
1288#define IMGSRC_DEBUG(args...) do { } while(0)
39037602 1289#endif
6d2010ae 1290
b7266188
A
1291static int
1292authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1293{
1294 struct nameidata nd;
6d2010ae 1295 vnode_t vp, realdevvp;
b7266188
A
1296 mode_t accessmode;
1297 int error;
1298
6d2010ae
A
1299 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1300 if ( (error = namei(&nd)) ) {
1301 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1302 return error;
6d2010ae 1303 }
b7266188 1304
b7266188 1305 vp = nd.ni_vp;
b7266188 1306
6d2010ae
A
1307 if (!vnode_isblk(vp)) {
1308 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1309 error = ENOTBLK;
1310 goto out;
1311 }
6d2010ae
A
1312
1313 realdevvp = mp->mnt_devvp;
1314 if (realdevvp == NULLVP) {
1315 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1316 error = ENXIO;
1317 goto out;
1318 }
6d2010ae
A
1319
1320 error = vnode_getwithref(realdevvp);
1321 if (error != 0) {
1322 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1323 goto out;
1324 }
1325
1326 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1327 IMGSRC_DEBUG("Wrong dev_t.\n");
1328 error = ENXIO;
1329 goto out1;
1330 }
1331
1332 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1333
b7266188
A
1334 /*
1335 * If mount by non-root, then verify that user has necessary
1336 * permissions on the device.
1337 */
1338 if (!vfs_context_issuser(ctx)) {
1339 accessmode = KAUTH_VNODE_READ_DATA;
1340 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1341 accessmode |= KAUTH_VNODE_WRITE_DATA;
6d2010ae
A
1342 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1343 IMGSRC_DEBUG("Access denied.\n");
1344 goto out1;
1345 }
b7266188
A
1346 }
1347
1348 *devvpp = vp;
6d2010ae
A
1349
1350out1:
1351 vnode_put(realdevvp);
b7266188 1352out:
6d2010ae 1353 nameidone(&nd);
b7266188
A
1354 if (error) {
1355 vnode_put(vp);
1356 }
1357
1358 return error;
1359}
1360
1361/*
1362 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1363 * and call checkdirs()
1364 */
1365static int
1366place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1367{
1368 int error;
1369
1370 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1371
1372 vnode_lock_spin(vp);
1373 CLR(vp->v_flag, VMOUNT);
1374 vp->v_mountedhere = mp;
1375 vnode_unlock(vp);
1376
1377 /*
1378 * taking the name_cache_lock exclusively will
1379 * insure that everyone is out of the fast path who
1380 * might be trying to use a now stale copy of
1381 * vp->v_mountedhere->mnt_realrootvp
1382 * bumping mount_generation causes the cached values
1383 * to be invalidated
1384 */
1385 name_cache_lock();
1386 mount_generation++;
1387 name_cache_unlock();
1388
1389 error = vnode_ref(vp);
1390 if (error != 0) {
1391 goto out;
1392 }
1393
1394 error = checkdirs(vp, ctx);
1395 if (error != 0) {
1396 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1397 vnode_rele(vp);
1398 goto out;
1399 }
1400
1401out:
1402 if (error != 0) {
1403 mp->mnt_vnodecovered = NULLVP;
1404 }
1405 return error;
1406}
1407
1408static void
1409undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1410{
1411 vnode_rele(vp);
1412 vnode_lock_spin(vp);
1413 vp->v_mountedhere = (mount_t)NULL;
1414 vnode_unlock(vp);
1415
1416 mp->mnt_vnodecovered = NULLVP;
1417}
1418
1419static int
1420mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1421{
1422 int error;
1423
1424 /* unmount in progress return error */
1425 mount_lock_spin(mp);
1426 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1427 mount_unlock(mp);
1428 return EBUSY;
1429 }
1430 mount_unlock(mp);
1431 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1432
1433 /*
1434 * We only allow the filesystem to be reloaded if it
1435 * is currently mounted read-only.
1436 */
1437 if ((flags & MNT_RELOAD) &&
1438 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1439 error = ENOTSUP;
1440 goto out;
1441 }
1442
1443 /*
1444 * Only root, or the user that did the original mount is
1445 * permitted to update it.
1446 */
1447 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1448 (!vfs_context_issuser(ctx))) {
b7266188
A
1449 error = EPERM;
1450 goto out;
1451 }
1452#if CONFIG_MACF
1453 error = mac_mount_check_remount(ctx, mp);
1454 if (error != 0) {
1455 goto out;
1456 }
1457#endif
1458
1459out:
1460 if (error) {
1461 lck_rw_done(&mp->mnt_rwlock);
1462 }
1463
1464 return error;
1465}
1466
39037602 1467static void
b7266188
A
1468mount_end_update(mount_t mp)
1469{
1470 lck_rw_done(&mp->mnt_rwlock);
1471}
1472
1473static int
6d2010ae
A
1474get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1475{
1476 vnode_t vp;
1477
1478 if (height >= MAX_IMAGEBOOT_NESTING) {
1479 return EINVAL;
1480 }
1481
1482 vp = imgsrc_rootvnodes[height];
1483 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1484 *rvpp = vp;
1485 return 0;
1486 } else {
1487 return ENOENT;
1488 }
1489}
1490
1491static int
39037602
A
1492relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1493 const char *fsname, vfs_context_t ctx,
6d2010ae 1494 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1495{
1496 int error;
1497 mount_t mp;
1498 boolean_t placed = FALSE;
6d2010ae 1499 vnode_t devvp = NULLVP;
b7266188
A
1500 struct vfstable *vfsp;
1501 user_addr_t devpath;
1502 char *old_mntonname;
6d2010ae
A
1503 vnode_t rvp;
1504 uint32_t height;
1505 uint32_t flags;
b7266188
A
1506
1507 /* If we didn't imageboot, nothing to move */
6d2010ae 1508 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1509 return EINVAL;
1510 }
1511
1512 /* Only root can do this */
1513 if (!vfs_context_issuser(ctx)) {
1514 return EPERM;
1515 }
1516
6d2010ae
A
1517 IMGSRC_DEBUG("looking for root vnode.\n");
1518
1519 /*
1520 * Get root vnode of filesystem we're moving.
1521 */
1522 if (by_index) {
1523 if (is64bit) {
1524 struct user64_mnt_imgsrc_args mia64;
1525 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1526 if (error != 0) {
1527 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1528 return error;
1529 }
1530
1531 height = mia64.mi_height;
1532 flags = mia64.mi_flags;
1533 devpath = mia64.mi_devpath;
1534 } else {
1535 struct user32_mnt_imgsrc_args mia32;
1536 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1537 if (error != 0) {
1538 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1539 return error;
1540 }
1541
1542 height = mia32.mi_height;
1543 flags = mia32.mi_flags;
1544 devpath = mia32.mi_devpath;
1545 }
1546 } else {
1547 /*
1548 * For binary compatibility--assumes one level of nesting.
1549 */
1550 if (is64bit) {
1551 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1552 return error;
1553 } else {
1554 user32_addr_t tmp;
1555 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1556 return error;
1557
1558 /* munge into LP64 addr */
1559 devpath = CAST_USER_ADDR_T(tmp);
1560 }
1561
1562 height = 0;
1563 flags = 0;
1564 }
1565
1566 if (flags != 0) {
1567 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1568 return EINVAL;
1569 }
1570
1571 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1572 if (error != 0) {
6d2010ae 1573 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1574 return error;
1575 }
1576
6d2010ae
A
1577 IMGSRC_DEBUG("got root vnode.\n");
1578
b7266188
A
1579 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1580
1581 /* Can only move once */
6d2010ae 1582 mp = vnode_mount(rvp);
b7266188 1583 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1584 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1585 error = EBUSY;
1586 goto out0;
1587 }
1588
6d2010ae
A
1589 IMGSRC_DEBUG("Starting updated.\n");
1590
b7266188
A
1591 /* Get exclusive rwlock on mount, authorize update on mp */
1592 error = mount_begin_update(mp , ctx, 0);
1593 if (error != 0) {
6d2010ae 1594 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1595 goto out0;
1596 }
1597
39037602 1598 /*
b7266188
A
1599 * It can only be moved once. Flag is set under the rwlock,
1600 * so we're now safe to proceed.
1601 */
1602 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1603 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1604 goto out1;
1605 }
39037602
A
1606
1607
6d2010ae 1608 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1609
1610 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1611 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1612 if (error != 0) {
6d2010ae 1613 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1614 goto out1;
1615 }
39037602 1616
6d2010ae
A
1617 IMGSRC_DEBUG("Covered vp OK.\n");
1618
b7266188
A
1619 /* Sanity check the name caller has provided */
1620 vfsp = mp->mnt_vtable;
1621 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1622 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1623 error = EINVAL;
1624 goto out2;
1625 }
1626
1627 /* Check the device vnode and update mount-from name, for local filesystems */
1628 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1629 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1630
1631 if (devpath != USER_ADDR_NULL) {
1632 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1633 if (error) {
6d2010ae 1634 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1635 goto out2;
1636 }
1637
1638 vnode_put(devvp);
1639 }
1640 }
1641
39037602 1642 /*
b7266188 1643 * Place mp on top of vnode, ref the vnode, call checkdirs(),
39037602 1644 * and increment the name cache's mount generation
b7266188 1645 */
6d2010ae
A
1646
1647 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1648 error = place_mount_and_checkdirs(mp, vp, ctx);
1649 if (error != 0) {
1650 goto out2;
1651 }
1652
1653 placed = TRUE;
1654
3e170ce0
A
1655 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1656 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
b7266188
A
1657
1658 /* Forbid future moves */
1659 mount_lock(mp);
1660 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1661 mount_unlock(mp);
1662
1663 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1664 if (mount_list_add(mp) != 0) {
1665 /*
1666 * The system is shutting down trying to umount
1667 * everything, so fail with a plausible errno.
1668 */
1669 error = EBUSY;
b7266188
A
1670 goto out3;
1671 }
1672
1673 mount_end_update(mp);
6d2010ae 1674 vnode_put(rvp);
b7266188
A
1675 FREE(old_mntonname, M_TEMP);
1676
6d2010ae
A
1677 vfs_notify_mount(pvp);
1678
b7266188
A
1679 return 0;
1680out3:
3e170ce0 1681 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
b7266188
A
1682
1683 mount_lock(mp);
1684 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1685 mount_unlock(mp);
1686
1687out2:
39037602 1688 /*
b7266188 1689 * Placing the mp on the vnode clears VMOUNT,
39037602 1690 * so cleanup is different after that point
b7266188
A
1691 */
1692 if (placed) {
1693 /* Rele the vp, clear VMOUNT and v_mountedhere */
1694 undo_place_on_covered_vp(mp, vp);
1695 } else {
1696 vnode_lock_spin(vp);
1697 CLR(vp->v_flag, VMOUNT);
1698 vnode_unlock(vp);
1699 }
1700out1:
1701 mount_end_update(mp);
1702
1703out0:
6d2010ae 1704 vnode_put(rvp);
b7266188
A
1705 FREE(old_mntonname, M_TEMP);
1706 return error;
1707}
1708
1709#endif /* CONFIG_IMGSRC_ACCESS */
1710
91447636 1711void
2d21ac55 1712enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1713{
9bccf70c
A
1714 struct nameidata qnd;
1715 int type;
1716 char qfpath[MAXPATHLEN];
91447636
A
1717 const char *qfname = QUOTAFILENAME;
1718 const char *qfopsname = QUOTAOPSNAME;
1719 const char *qfextension[] = INITQFNAMES;
9bccf70c 1720
2d21ac55 1721 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
b0d623f7
A
1722 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1723 return;
1724 }
39037602 1725 /*
9bccf70c
A
1726 * Enable filesystem disk quotas if necessary.
1727 * We ignore errors as this should not interfere with final mount
1728 */
1729 for (type=0; type < MAXQUOTAS; type++) {
2d21ac55 1730 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae
A
1731 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1732 CAST_USER_ADDR_T(qfpath), ctx);
91447636
A
1733 if (namei(&qnd) != 0)
1734 continue; /* option file to trigger quotas is not present */
1735 vnode_put(qnd.ni_vp);
1736 nameidone(&qnd);
2d21ac55 1737 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1738
2d21ac55 1739 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1740 }
1741 return;
1742}
1743
2d21ac55
A
1744
1745static int
39037602 1746checkdirs_callback(proc_t p, void * arg)
2d21ac55
A
1747{
1748 struct cdirargs * cdrp = (struct cdirargs * )arg;
1749 vnode_t olddp = cdrp->olddp;
1750 vnode_t newdp = cdrp->newdp;
1751 struct filedesc *fdp;
1752 vnode_t tvp;
1753 vnode_t fdp_cvp;
1754 vnode_t fdp_rvp;
1755 int cdir_changed = 0;
1756 int rdir_changed = 0;
1757
1758 /*
1759 * XXX Also needs to iterate each thread in the process to see if it
1760 * XXX is using a per-thread current working directory, and, if so,
1761 * XXX update that as well.
1762 */
1763
1764 proc_fdlock(p);
1765 fdp = p->p_fd;
1766 if (fdp == (struct filedesc *)0) {
1767 proc_fdunlock(p);
1768 return(PROC_RETURNED);
1769 }
1770 fdp_cvp = fdp->fd_cdir;
1771 fdp_rvp = fdp->fd_rdir;
1772 proc_fdunlock(p);
1773
1774 if (fdp_cvp == olddp) {
1775 vnode_ref(newdp);
1776 tvp = fdp->fd_cdir;
1777 fdp_cvp = newdp;
1778 cdir_changed = 1;
1779 vnode_rele(tvp);
1780 }
1781 if (fdp_rvp == olddp) {
1782 vnode_ref(newdp);
1783 tvp = fdp->fd_rdir;
1784 fdp_rvp = newdp;
1785 rdir_changed = 1;
1786 vnode_rele(tvp);
1787 }
1788 if (cdir_changed || rdir_changed) {
1789 proc_fdlock(p);
1790 fdp->fd_cdir = fdp_cvp;
1791 fdp->fd_rdir = fdp_rvp;
1792 proc_fdunlock(p);
1793 }
1794 return(PROC_RETURNED);
1795}
1796
1797
1798
1c79356b
A
1799/*
1800 * Scan all active processes to see if any of them have a current
1801 * or root directory onto which the new filesystem has just been
1802 * mounted. If so, replace them with the new mount point.
1803 */
6601e61a 1804static int
2d21ac55 1805checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1806{
2d21ac55
A
1807 vnode_t newdp;
1808 vnode_t tvp;
6601e61a 1809 int err;
2d21ac55 1810 struct cdirargs cdr;
1c79356b
A
1811
1812 if (olddp->v_usecount == 1)
6601e61a 1813 return(0);
2d21ac55 1814 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
1815
1816 if (err != 0) {
6601e61a 1817#if DIAGNOSTIC
2d21ac55 1818 panic("mount: lost mount: error %d", err);
6601e61a
A
1819#endif
1820 return(err);
1821 }
91447636 1822
2d21ac55
A
1823 cdr.olddp = olddp;
1824 cdr.newdp = newdp;
1825 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1826 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1827
1c79356b 1828 if (rootvnode == olddp) {
91447636 1829 vnode_ref(newdp);
fa4905b1 1830 tvp = rootvnode;
1c79356b 1831 rootvnode = newdp;
91447636 1832 vnode_rele(tvp);
1c79356b 1833 }
91447636
A
1834
1835 vnode_put(newdp);
6601e61a 1836 return(0);
1c79356b
A
1837}
1838
1839/*
1840 * Unmount a file system.
1841 *
1842 * Note: unmount takes a path to the vnode mounted on as argument,
1843 * not special file (as before).
1844 */
1c79356b
A
1845/* ARGSUSED */
1846int
b0d623f7 1847unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1848{
2d21ac55 1849 vnode_t vp;
1c79356b
A
1850 struct mount *mp;
1851 int error;
1852 struct nameidata nd;
2d21ac55 1853 vfs_context_t ctx = vfs_context_current();
91447636 1854
39037602 1855 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2d21ac55 1856 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
1857 error = namei(&nd);
1858 if (error)
1c79356b
A
1859 return (error);
1860 vp = nd.ni_vp;
1861 mp = vp->v_mount;
91447636 1862 nameidone(&nd);
1c79356b 1863
2d21ac55
A
1864#if CONFIG_MACF
1865 error = mac_mount_check_umount(ctx, mp);
1866 if (error != 0) {
1867 vnode_put(vp);
1868 return (error);
1869 }
1870#endif
55e303ae
A
1871 /*
1872 * Must be the root of the filesystem
1873 */
1874 if ((vp->v_flag & VROOT) == 0) {
91447636 1875 vnode_put(vp);
55e303ae
A
1876 return (EINVAL);
1877 }
6601e61a 1878 mount_ref(mp, 0);
91447636 1879 vnode_put(vp);
6601e61a 1880 /* safedounmount consumes the mount ref */
2d21ac55
A
1881 return (safedounmount(mp, uap->flags, ctx));
1882}
1883
1884int
39037602 1885vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2d21ac55
A
1886{
1887 mount_t mp;
1888
1889 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1890 if (mp == (mount_t)0) {
1891 return(ENOENT);
1892 }
1893 mount_ref(mp, 0);
1894 mount_iterdrop(mp);
1895 /* safedounmount consumes the mount ref */
1896 return(safedounmount(mp, flags, ctx));
55e303ae
A
1897}
1898
2d21ac55 1899
55e303ae 1900/*
6601e61a 1901 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1902 * Do the actual file system unmount, prevent some common foot shooting.
1903 */
1904int
2d21ac55 1905safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1906{
1907 int error;
2d21ac55 1908 proc_t p = vfs_context_proc(ctx);
55e303ae 1909
316670eb
A
1910 /*
1911 * If the file system is not responding and MNT_NOBLOCK
1912 * is set and not a forced unmount then return EBUSY.
1913 */
1914 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1915 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1916 error = EBUSY;
1917 goto out;
1918 }
1919
1c79356b 1920 /*
39037602 1921 * Skip authorization if the mount is tagged as permissive and
6d2010ae 1922 * this is not a forced-unmount attempt.
1c79356b 1923 */
6d2010ae
A
1924 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1925 /*
1926 * Only root, or the user that did the original mount is
1927 * permitted to unmount this filesystem.
1928 */
1929 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1930 (error = suser(kauth_cred_get(), &p->p_acflag)))
1931 goto out;
1932 }
1c79356b
A
1933 /*
1934 * Don't allow unmounting the root file system.
1935 */
6601e61a 1936 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1937 error = EBUSY; /* the root is always busy */
6601e61a
A
1938 goto out;
1939 }
1c79356b 1940
b7266188
A
1941#ifdef CONFIG_IMGSRC_ACCESS
1942 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1943 error = EBUSY;
1944 goto out;
1945 }
1946#endif /* CONFIG_IMGSRC_ACCESS */
1947
2d21ac55
A
1948 return (dounmount(mp, flags, 1, ctx));
1949
6601e61a
A
1950out:
1951 mount_drop(mp, 0);
1952 return(error);
1c79356b
A
1953}
1954
1955/*
1956 * Do the actual file system unmount.
1957 */
1958int
2d21ac55 1959dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1960{
2d21ac55 1961 vnode_t coveredvp = (vnode_t)0;
1c79356b 1962 int error;
91447636 1963 int needwakeup = 0;
91447636
A
1964 int forcedunmount = 0;
1965 int lflags = 0;
593a1d5f 1966 struct vnode *devvp = NULLVP;
6d2010ae 1967#if CONFIG_TRIGGERS
39236c6e 1968 proc_t p = vfs_context_proc(ctx);
6d2010ae 1969 int did_vflush = 0;
39236c6e 1970 int pflags_save = 0;
6d2010ae 1971#endif /* CONFIG_TRIGGERS */
91447636 1972
813fb2f6
A
1973#if CONFIG_FSE
1974 if (!(flags & MNT_FORCE)) {
1975 fsevent_unmount(mp, ctx); /* has to come first! */
1976 }
1977#endif
1978
91447636 1979 mount_lock(mp);
fe8ab488
A
1980
1981 /*
1982 * If already an unmount in progress just return EBUSY.
1983 * Even a forced unmount cannot override.
1984 */
91447636 1985 if (mp->mnt_lflag & MNT_LUNMOUNT) {
fe8ab488 1986 if (withref != 0)
6601e61a 1987 mount_drop(mp, 1);
fe8ab488 1988 mount_unlock(mp);
9bccf70c
A
1989 return (EBUSY);
1990 }
39236c6e 1991
fe8ab488
A
1992 if (flags & MNT_FORCE) {
1993 forcedunmount = 1;
1994 mp->mnt_lflag |= MNT_LFORCE;
1995 }
1996
39236c6e
A
1997#if CONFIG_TRIGGERS
1998 if (flags & MNT_NOBLOCK && p != kernproc)
1999 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
2000#endif
2001
1c79356b 2002 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636
A
2003 mp->mnt_lflag |= MNT_LUNMOUNT;
2004 mp->mnt_flag &=~ MNT_ASYNC;
2d21ac55
A
2005 /*
2006 * anyone currently in the fast path that
2007 * trips over the cached rootvp will be
2008 * dumped out and forced into the slow path
2009 * to regenerate a new cached value
2010 */
2011 mp->mnt_realrootvp = NULLVP;
91447636 2012 mount_unlock(mp);
39037602 2013
fe8ab488
A
2014 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2015 /*
2016 * Force unmount any mounts in this filesystem.
2017 * If any unmounts fail - just leave them dangling.
2018 * Avoids recursion.
2019 */
2020 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2021 }
2022
2d21ac55
A
2023 /*
2024 * taking the name_cache_lock exclusively will
2025 * insure that everyone is out of the fast path who
2026 * might be trying to use a now stale copy of
2027 * vp->v_mountedhere->mnt_realrootvp
2028 * bumping mount_generation causes the cached values
2029 * to be invalidated
2030 */
2031 name_cache_lock();
2032 mount_generation++;
2033 name_cache_unlock();
2034
2035
91447636 2036 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6601e61a
A
2037 if (withref != 0)
2038 mount_drop(mp, 0);
91447636
A
2039 error = 0;
2040 if (forcedunmount == 0) {
2041 ubc_umount(mp); /* release cached vnodes */
2042 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 2043 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
2044 if (error) {
2045 mount_lock(mp);
2046 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2047 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2048 mp->mnt_lflag &= ~MNT_LFORCE;
2049 goto out;
2050 }
2051 }
2052 }
6d2010ae 2053
5ba3f43e
A
2054 /* free disk_conditioner_info structure for this mount */
2055 disk_conditioner_unmount(mp);
2056
3e170ce0
A
2057 IOBSDMountChange(mp, kIOMountChangeUnmount);
2058
6d2010ae
A
2059#if CONFIG_TRIGGERS
2060 vfs_nested_trigger_unmounts(mp, flags, ctx);
2061 did_vflush = 1;
39037602 2062#endif
91447636
A
2063 if (forcedunmount)
2064 lflags |= FORCECLOSE;
2065 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2066 if ((forcedunmount == 0) && error) {
2067 mount_lock(mp);
9bccf70c 2068 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2069 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2070 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
2071 goto out;
2072 }
91447636
A
2073
2074 /* make sure there are no one in the mount iterations or lookup */
2075 mount_iterdrain(mp);
2076
2d21ac55 2077 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 2078 if (error) {
91447636
A
2079 mount_iterreset(mp);
2080 mount_lock(mp);
1c79356b 2081 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2082 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2083 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
2084 goto out;
2085 }
2086
2087 /* increment the operations count */
2088 if (!error)
b0d623f7 2089 OSAddAtomic(1, &vfs_nummntops);
91447636
A
2090
2091 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
2092 /* hold an io reference and drop the usecount before close */
2093 devvp = mp->mnt_devvp;
593a1d5f
A
2094 vnode_getalways(devvp);
2095 vnode_rele(devvp);
2096 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2d21ac55 2097 ctx);
b0d623f7 2098 vnode_clearmountedon(devvp);
593a1d5f 2099 vnode_put(devvp);
91447636
A
2100 }
2101 lck_rw_done(&mp->mnt_rwlock);
2102 mount_list_remove(mp);
2103 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 2104
91447636 2105 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 2106 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
2107 /*
2108 * The covered vnode needs special handling. Trying to get an
2109 * iocount must not block here as this may lead to deadlocks
2110 * if the Filesystem to which the covered vnode belongs is
2111 * undergoing forced unmounts. Since we hold a usecount, the
2112 * vnode cannot be reused (it can, however, still be terminated)
2113 */
2114 vnode_getalways(coveredvp);
6d2010ae
A
2115 vnode_lock_spin(coveredvp);
2116
2117 mp->mnt_crossref++;
2118 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 2119 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
2120
2121 vnode_unlock(coveredvp);
2122 vnode_put(coveredvp);
1c79356b 2123 }
91447636
A
2124
2125 mount_list_lock();
2126 mp->mnt_vtable->vfc_refcount--;
2127 mount_list_unlock();
2128
2129 cache_purgevfs(mp); /* remove cache entries for this file sys */
2130 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2131 mount_lock(mp);
2132 mp->mnt_lflag |= MNT_LDEAD;
2133
2134 if (mp->mnt_lflag & MNT_LWAIT) {
2135 /*
2136 * do the wakeup here
2137 * in case we block in mount_refdrain
2138 * which will drop the mount lock
2139 * and allow anyone blocked in vfs_busy
2140 * to wakeup and see the LDEAD state
2141 */
2142 mp->mnt_lflag &= ~MNT_LWAIT;
2143 wakeup((caddr_t)mp);
1c79356b 2144 }
91447636 2145 mount_refdrain(mp);
1c79356b 2146out:
91447636
A
2147 if (mp->mnt_lflag & MNT_LWAIT) {
2148 mp->mnt_lflag &= ~MNT_LWAIT;
39037602 2149 needwakeup = 1;
91447636 2150 }
6d2010ae 2151
6d2010ae 2152#if CONFIG_TRIGGERS
39236c6e
A
2153 if (flags & MNT_NOBLOCK && p != kernproc) {
2154 // Restore P_NOREMOTEHANG bit to its previous value
2155 if ((pflags_save & P_NOREMOTEHANG) == 0)
2156 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2157 }
2158
39037602 2159 /*
6d2010ae 2160 * Callback and context are set together under the mount lock, and
39037602 2161 * never cleared, so we're safe to examine them here, drop the lock,
6d2010ae
A
2162 * and call out.
2163 */
2164 if (mp->mnt_triggercallback != NULL) {
2165 mount_unlock(mp);
2166 if (error == 0) {
2167 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2168 } else if (did_vflush) {
2169 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2170 }
2171 } else {
2172 mount_unlock(mp);
2173 }
39037602 2174#else
91447636 2175 mount_unlock(mp);
6d2010ae
A
2176#endif /* CONFIG_TRIGGERS */
2177
91447636
A
2178 lck_rw_done(&mp->mnt_rwlock);
2179
2180 if (needwakeup)
1c79356b 2181 wakeup((caddr_t)mp);
6d2010ae 2182
55e303ae 2183 if (!error) {
91447636 2184 if ((coveredvp != NULLVP)) {
fe8ab488 2185 vnode_t pvp = NULLVP;
b0d623f7 2186
fe8ab488
A
2187 /*
2188 * The covered vnode needs special handling. Trying to
2189 * get an iocount must not block here as this may lead
2190 * to deadlocks if the Filesystem to which the covered
2191 * vnode belongs is undergoing forced unmounts. Since we
2192 * hold a usecount, the vnode cannot be reused
2193 * (it can, however, still be terminated).
2194 */
2195 vnode_getalways(coveredvp);
6d2010ae
A
2196
2197 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2198 /*
2199 * We'll _try_ to detect if this really needs to be
2200 * done. The coveredvp can only be in termination (or
2201 * terminated) if the coveredvp's mount point is in a
2202 * forced unmount (or has been) since we still hold the
2203 * ref.
2204 */
2205 if (!vnode_isrecycled(coveredvp)) {
2206 pvp = vnode_getparent(coveredvp);
6d2010ae 2207#if CONFIG_TRIGGERS
fe8ab488
A
2208 if (coveredvp->v_resolve) {
2209 vnode_trigger_rearm(coveredvp, ctx);
2210 }
2211#endif
2212 }
2213
2214 vnode_rele(coveredvp);
91447636 2215 vnode_put(coveredvp);
fe8ab488 2216 coveredvp = NULLVP;
b0d623f7
A
2217
2218 if (pvp) {
2219 lock_vnode_and_post(pvp, NOTE_WRITE);
2220 vnode_put(pvp);
2221 }
91447636
A
2222 } else if (mp->mnt_flag & MNT_ROOTFS) {
2223 mount_lock_destroy(mp);
2d21ac55
A
2224#if CONFIG_MACF
2225 mac_mount_label_destroy(mp);
2226#endif
91447636
A
2227 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2228 } else
2229 panic("dounmount: no coveredvp");
55e303ae 2230 }
1c79356b
A
2231 return (error);
2232}
2233
fe8ab488
A
2234/*
2235 * Unmount any mounts in this filesystem.
2236 */
2237void
2238dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2239{
2240 mount_t smp;
2241 fsid_t *fsids, fsid;
2242 int fsids_sz;
2243 int count = 0, i, m = 0;
2244 vnode_t vp;
2245
2246 mount_list_lock();
2247
2248 // Get an array to hold the submounts fsids.
2249 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2250 count++;
2251 fsids_sz = count * sizeof(fsid_t);
2252 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2253 if (fsids == NULL) {
2254 mount_list_unlock();
2255 goto out;
2256 }
2257 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2258
2259 /*
2260 * Fill the array with submount fsids.
2261 * Since mounts are always added to the tail of the mount list, the
39037602 2262 * list is always in mount order.
fe8ab488
A
2263 * For each mount check if the mounted-on vnode belongs to a
2264 * mount that's already added to our array of mounts to be unmounted.
2265 */
2266 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2267 vp = smp->mnt_vnodecovered;
2268 if (vp == NULL)
2269 continue;
2270 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2271 for (i = 0; i <= m; i++) {
2272 if (fsids[i].val[0] == fsid.val[0] &&
2273 fsids[i].val[1] == fsid.val[1]) {
2274 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2275 break;
2276 }
2277 }
2278 }
2279 mount_list_unlock();
2280
2281 // Unmount the submounts in reverse order. Ignore errors.
2282 for (i = m; i > 0; i--) {
2283 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2284 if (smp) {
2285 mount_ref(smp, 0);
2286 mount_iterdrop(smp);
2287 (void) dounmount(smp, flags, 1, ctx);
2288 }
2289 }
2290out:
2291 if (fsids)
2292 FREE(fsids, M_TEMP);
2293}
2294
91447636
A
2295void
2296mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2297{
6d2010ae
A
2298 vnode_lock(dp);
2299 mp->mnt_crossref--;
2300
2301 if (mp->mnt_crossref < 0)
2302 panic("mount cross refs -ve");
2303
2304 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
39037602 2305
91447636 2306 if (need_put)
6d2010ae 2307 vnode_put_locked(dp);
91447636 2308 vnode_unlock(dp);
6d2010ae
A
2309
2310 mount_lock_destroy(mp);
2311#if CONFIG_MACF
2312 mac_mount_label_destroy(mp);
2313#endif
2314 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2315 return;
2316 }
2317 if (need_put)
2318 vnode_put_locked(dp);
2319 vnode_unlock(dp);
91447636
A
2320}
2321
2322
1c79356b
A
2323/*
2324 * Sync each mounted filesystem.
2325 */
2326#if DIAGNOSTIC
2327int syncprt = 0;
1c79356b
A
2328#endif
2329
1c79356b 2330int print_vmpage_stat=0;
a39ff7e2 2331
39037602 2332static int
fe8ab488 2333sync_callback(mount_t mp, __unused void *arg)
1c79356b 2334{
91447636 2335 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488
A
2336 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2337
2338 mp->mnt_flag &= ~MNT_ASYNC;
2339 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2340 if (asyncflag)
2341 mp->mnt_flag |= MNT_ASYNC;
1c79356b 2342 }
1c79356b 2343
fe8ab488
A
2344 return (VFS_RETURNED);
2345}
91447636 2346
91447636
A
2347/* ARGSUSED */
2348int
b0d623f7 2349sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2350{
fe8ab488 2351 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2352
fe8ab488
A
2353 if (print_vmpage_stat) {
2354 vm_countdirtypages();
2355 }
2356
2357#if DIAGNOSTIC
2358 if (syncprt)
2359 vfs_bufstats();
2360#endif /* DIAGNOSTIC */
2361 return 0;
2362}
2363
d9a64523
A
2364typedef enum {
2365 SYNC_ALL = 0,
2366 SYNC_ONLY_RELIABLE_MEDIA = 1,
2367 SYNC_ONLY_UNRELIABLE_MEDIA = 2
2368} sync_type_t;
2369
2370static int
2371sync_internal_callback(mount_t mp, void *arg)
2372{
2373 if (arg) {
2374 int is_reliable = !(mp->mnt_kern_flag & MNTK_VIRTUALDEV) &&
2375 (mp->mnt_flag & MNT_LOCAL);
2376 sync_type_t sync_type = *((sync_type_t *)arg);
2377
2378 if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable)
2379 return (VFS_RETURNED);
2380 else if ((sync_type = SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable)
2381 return (VFS_RETURNED);
2382 }
2383
2384 (void)sync_callback(mp, NULL);
2385
2386 return (VFS_RETURNED);
2387}
2388
2389int sync_thread_state = 0;
2390int sync_timeout_seconds = 5;
2391
2392#define SYNC_THREAD_RUN 0x0001
2393#define SYNC_THREAD_RUNNING 0x0002
2394
fe8ab488 2395static void
d9a64523 2396sync_thread(__unused void *arg, __unused wait_result_t wr)
fe8ab488 2397{
d9a64523 2398 sync_type_t sync_type;
fe8ab488 2399
d9a64523
A
2400 lck_mtx_lock(sync_mtx_lck);
2401 while (sync_thread_state & SYNC_THREAD_RUN) {
2402 sync_thread_state &= ~SYNC_THREAD_RUN;
2403 lck_mtx_unlock(sync_mtx_lck);
2404
2405 sync_type = SYNC_ONLY_RELIABLE_MEDIA;
2406 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2407 sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
2408 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2409
2410 lck_mtx_lock(sync_mtx_lck);
2411 }
2412 /*
2413 * This wakeup _has_ to be issued before the lock is released otherwise
2414 * we may end up waking up a thread in sync_internal which is
2415 * expecting a wakeup from a thread it just created and not from this
2416 * thread which is about to exit.
2417 */
2418 wakeup(&sync_thread_state);
2419 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2420 lck_mtx_unlock(sync_mtx_lck);
fe8ab488 2421
fe8ab488 2422 if (print_vmpage_stat) {
1c79356b 2423 vm_countdirtypages();
1c79356b 2424 }
39236c6e 2425
1c79356b
A
2426#if DIAGNOSTIC
2427 if (syncprt)
2428 vfs_bufstats();
2429#endif /* DIAGNOSTIC */
1c79356b
A
2430}
2431
d9a64523
A
2432struct timeval sync_timeout_last_print = {0, 0};
2433
1c79356b 2434/*
d9a64523
A
2435 * An in-kernel sync for power management to call.
2436 * This function always returns within sync_timeout seconds.
1c79356b 2437 */
d9a64523
A
2438__private_extern__ int
2439sync_internal(void)
2d21ac55 2440{
fe8ab488 2441 thread_t thd;
2d21ac55 2442 int error;
d9a64523
A
2443 int thread_created = FALSE;
2444 struct timespec ts = {sync_timeout_seconds, 0};
fe8ab488
A
2445
2446 lck_mtx_lock(sync_mtx_lck);
d9a64523
A
2447 sync_thread_state |= SYNC_THREAD_RUN;
2448 if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
2449 int kr;
2450
2451 sync_thread_state |= SYNC_THREAD_RUNNING;
2452 kr = kernel_thread_start(sync_thread, NULL, &thd);
2453 if (kr != KERN_SUCCESS) {
2454 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2455 lck_mtx_unlock(sync_mtx_lck);
2456 printf("sync_thread failed\n");
2457 return (0);
2458 }
2459 thread_created = TRUE;
fe8ab488
A
2460 }
2461
d9a64523
A
2462 error = msleep((caddr_t)&sync_thread_state, sync_mtx_lck,
2463 (PVFS | PDROP | PCATCH), "sync_thread", &ts);
fe8ab488 2464 if (error) {
d9a64523
A
2465 struct timeval now;
2466
2467 microtime(&now);
2468 if (now.tv_sec - sync_timeout_last_print.tv_sec > 120) {
2469 printf("sync timed out: %d sec\n", sync_timeout_seconds);
2470 sync_timeout_last_print.tv_sec = now.tv_sec;
2471 }
fe8ab488 2472 }
fe8ab488 2473
d9a64523
A
2474 if (thread_created)
2475 thread_deallocate(thd);
2d21ac55 2476
d9a64523 2477 return (0);
fe8ab488
A
2478} /* end of sync_internal call */
2479
2480/*
2481 * Change filesystem quotas.
2482 */
2483#if QUOTA
2484int
2485quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2486{
2d21ac55 2487 struct mount *mp;
d9a64523 2488 int error, quota_cmd, quota_status = 0;
91447636
A
2489 caddr_t datap;
2490 size_t fnamelen;
1c79356b 2491 struct nameidata nd;
2d21ac55 2492 vfs_context_t ctx = vfs_context_current();
d9a64523 2493 struct dqblk my_dqblk = {};
91447636 2494
b0d623f7 2495 AUDIT_ARG(uid, uap->uid);
55e303ae 2496 AUDIT_ARG(cmd, uap->cmd);
6d2010ae
A
2497 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2498 uap->path, ctx);
55e303ae
A
2499 error = namei(&nd);
2500 if (error)
1c79356b
A
2501 return (error);
2502 mp = nd.ni_vp->v_mount;
91447636
A
2503 vnode_put(nd.ni_vp);
2504 nameidone(&nd);
2505
2506 /* copyin any data we will need for downstream code */
2507 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2508
2509 switch (quota_cmd) {
2510 case Q_QUOTAON:
2511 /* uap->arg specifies a file from which to take the quotas */
2512 fnamelen = MAXPATHLEN;
2513 datap = kalloc(MAXPATHLEN);
2514 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2515 break;
2516 case Q_GETQUOTA:
2517 /* uap->arg is a pointer to a dqblk structure. */
2518 datap = (caddr_t) &my_dqblk;
2519 break;
2520 case Q_SETQUOTA:
2521 case Q_SETUSE:
2522 /* uap->arg is a pointer to a dqblk structure. */
2523 datap = (caddr_t) &my_dqblk;
2524 if (proc_is64bit(p)) {
2525 struct user_dqblk my_dqblk64;
2526 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2527 if (error == 0) {
2528 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2529 }
2530 }
2531 else {
2532 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2533 }
2534 break;
2535 case Q_QUOTASTAT:
2536 /* uap->arg is a pointer to an integer */
2537 datap = (caddr_t) &quota_status;
2538 break;
2539 default:
2540 datap = NULL;
2541 break;
2542 } /* switch */
2543
2544 if (error == 0) {
2d21ac55 2545 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2546 }
2547
2548 switch (quota_cmd) {
2549 case Q_QUOTAON:
2550 if (datap != NULL)
2551 kfree(datap, MAXPATHLEN);
2552 break;
2553 case Q_GETQUOTA:
2554 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2555 if (error == 0) {
2556 if (proc_is64bit(p)) {
5ba3f43e
A
2557 struct user_dqblk my_dqblk64;
2558
2559 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
91447636
A
2560 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2561 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2562 }
2563 else {
2564 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2565 }
2566 }
2567 break;
2568 case Q_QUOTASTAT:
2569 /* uap->arg is a pointer to an integer */
2570 if (error == 0) {
2571 error = copyout(datap, uap->arg, sizeof(quota_status));
2572 }
2573 break;
2574 default:
2575 break;
2576 } /* switch */
2577
2578 return (error);
1c79356b 2579}
2d21ac55
A
2580#else
2581int
b0d623f7 2582quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55
A
2583{
2584 return (EOPNOTSUPP);
2585}
2586#endif /* QUOTA */
1c79356b
A
2587
2588/*
2589 * Get filesystem statistics.
2d21ac55
A
2590 *
2591 * Returns: 0 Success
2592 * namei:???
2593 * vfs_update_vfsstat:???
2594 * munge_statfs:EFAULT
1c79356b 2595 */
1c79356b
A
2596/* ARGSUSED */
2597int
b0d623f7 2598statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2599{
91447636
A
2600 struct mount *mp;
2601 struct vfsstatfs *sp;
1c79356b
A
2602 int error;
2603 struct nameidata nd;
2d21ac55 2604 vfs_context_t ctx = vfs_context_current();
91447636 2605 vnode_t vp;
1c79356b 2606
39037602 2607 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55 2608 UIO_USERSPACE, uap->path, ctx);
55e303ae 2609 error = namei(&nd);
39037602 2610 if (error != 0)
1c79356b 2611 return (error);
91447636
A
2612 vp = nd.ni_vp;
2613 mp = vp->v_mount;
2614 sp = &mp->mnt_vfsstat;
2615 nameidone(&nd);
2616
39037602
A
2617#if CONFIG_MACF
2618 error = mac_mount_check_stat(ctx, mp);
2619 if (error != 0)
2620 return (error);
2621#endif
2622
2d21ac55 2623 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39037602 2624 if (error != 0) {
39236c6e 2625 vnode_put(vp);
1c79356b 2626 return (error);
39236c6e 2627 }
91447636
A
2628
2629 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2630 vnode_put(vp);
91447636 2631 return (error);
1c79356b
A
2632}
2633
2634/*
2635 * Get filesystem statistics.
2636 */
1c79356b
A
2637/* ARGSUSED */
2638int
b0d623f7 2639fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2640{
2d21ac55 2641 vnode_t vp;
1c79356b 2642 struct mount *mp;
91447636 2643 struct vfsstatfs *sp;
1c79356b
A
2644 int error;
2645
55e303ae
A
2646 AUDIT_ARG(fd, uap->fd);
2647
91447636 2648 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 2649 return (error);
55e303ae 2650
d1ecb069
A
2651 error = vnode_getwithref(vp);
2652 if (error) {
2653 file_drop(uap->fd);
2654 return (error);
2655 }
2656
91447636 2657 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2658
91447636
A
2659 mp = vp->v_mount;
2660 if (!mp) {
d1ecb069
A
2661 error = EBADF;
2662 goto out;
91447636 2663 }
39037602
A
2664
2665#if CONFIG_MACF
2666 error = mac_mount_check_stat(vfs_context_current(), mp);
2667 if (error != 0)
2668 goto out;
2669#endif
2670
91447636 2671 sp = &mp->mnt_vfsstat;
39037602 2672 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2673 goto out;
91447636 2674 }
91447636
A
2675
2676 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2677
d1ecb069
A
2678out:
2679 file_drop(uap->fd);
2680 vnode_put(vp);
2681
91447636 2682 return (error);
1c79356b
A
2683}
2684
39037602
A
2685/*
2686 * Common routine to handle copying of statfs64 data to user space
2d21ac55 2687 */
39037602 2688static int
2d21ac55
A
2689statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2690{
2691 int error;
2692 struct statfs64 sfs;
39037602 2693
2d21ac55
A
2694 bzero(&sfs, sizeof(sfs));
2695
2696 sfs.f_bsize = sfsp->f_bsize;
2697 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2698 sfs.f_blocks = sfsp->f_blocks;
2699 sfs.f_bfree = sfsp->f_bfree;
2700 sfs.f_bavail = sfsp->f_bavail;
2701 sfs.f_files = sfsp->f_files;
2702 sfs.f_ffree = sfsp->f_ffree;
2703 sfs.f_fsid = sfsp->f_fsid;
2704 sfs.f_owner = sfsp->f_owner;
2705 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2706 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2707 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2708 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2709 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2710 } else {
2711 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2712 }
2d21ac55
A
2713 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2714 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2715
2716 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2717
2718 return(error);
2719}
2720
39037602
A
2721/*
2722 * Get file system statistics in 64-bit mode
2d21ac55
A
2723 */
2724int
b0d623f7 2725statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2726{
2727 struct mount *mp;
2728 struct vfsstatfs *sp;
2729 int error;
2730 struct nameidata nd;
2731 vfs_context_t ctxp = vfs_context_current();
2732 vnode_t vp;
2733
39037602 2734 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2735 UIO_USERSPACE, uap->path, ctxp);
2736 error = namei(&nd);
39037602 2737 if (error != 0)
2d21ac55
A
2738 return (error);
2739 vp = nd.ni_vp;
2740 mp = vp->v_mount;
2741 sp = &mp->mnt_vfsstat;
2742 nameidone(&nd);
2743
39037602
A
2744#if CONFIG_MACF
2745 error = mac_mount_check_stat(ctxp, mp);
2746 if (error != 0)
2747 return (error);
2748#endif
2749
2d21ac55 2750 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39037602 2751 if (error != 0) {
39236c6e 2752 vnode_put(vp);
2d21ac55 2753 return (error);
39236c6e 2754 }
2d21ac55
A
2755
2756 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2757 vnode_put(vp);
2d21ac55
A
2758
2759 return (error);
2760}
2761
39037602
A
2762/*
2763 * Get file system statistics in 64-bit mode
2d21ac55
A
2764 */
2765int
b0d623f7 2766fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2767{
2768 struct vnode *vp;
2769 struct mount *mp;
2770 struct vfsstatfs *sp;
2771 int error;
2772
2773 AUDIT_ARG(fd, uap->fd);
2774
2775 if ( (error = file_vnode(uap->fd, &vp)) )
2776 return (error);
2777
d1ecb069
A
2778 error = vnode_getwithref(vp);
2779 if (error) {
2780 file_drop(uap->fd);
2781 return (error);
2782 }
2783
2d21ac55
A
2784 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2785
2786 mp = vp->v_mount;
2787 if (!mp) {
316670eb 2788 error = EBADF;
d1ecb069 2789 goto out;
2d21ac55 2790 }
39037602
A
2791
2792#if CONFIG_MACF
2793 error = mac_mount_check_stat(vfs_context_current(), mp);
2794 if (error != 0)
2795 goto out;
2796#endif
2797
2d21ac55
A
2798 sp = &mp->mnt_vfsstat;
2799 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2800 goto out;
2d21ac55 2801 }
2d21ac55
A
2802
2803 error = statfs64_common(mp, sp, uap->buf);
2804
d1ecb069
A
2805out:
2806 file_drop(uap->fd);
2807 vnode_put(vp);
2808
2d21ac55
A
2809 return (error);
2810}
91447636
A
2811
2812struct getfsstat_struct {
2813 user_addr_t sfsp;
2d21ac55 2814 user_addr_t *mp;
91447636
A
2815 int count;
2816 int maxcount;
2817 int flags;
2818 int error;
1c79356b 2819};
1c79356b 2820
91447636
A
2821
2822static int
2823getfsstat_callback(mount_t mp, void * arg)
2824{
39037602 2825
91447636
A
2826 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2827 struct vfsstatfs *sp;
91447636 2828 int error, my_size;
2d21ac55 2829 vfs_context_t ctx = vfs_context_current();
91447636
A
2830
2831 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2832#if CONFIG_MACF
2833 error = mac_mount_check_stat(ctx, mp);
2834 if (error != 0) {
2835 fstp->error = error;
2836 return(VFS_RETURNED_DONE);
2837 }
2838#endif
91447636
A
2839 sp = &mp->mnt_vfsstat;
2840 /*
2841 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2842 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2843 */
b0d623f7 2844 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2845 (error = vfs_update_vfsstat(mp, ctx,
2846 VFS_USER_EVENT))) {
91447636
A
2847 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2848 return(VFS_RETURNED);
1c79356b 2849 }
91447636
A
2850
2851 /*
2852 * Need to handle LP64 version of struct statfs
2853 */
2d21ac55 2854 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2855 if (error) {
2856 fstp->error = error;
2857 return(VFS_RETURNED_DONE);
1c79356b 2858 }
91447636 2859 fstp->sfsp += my_size;
2d21ac55
A
2860
2861 if (fstp->mp) {
39236c6e 2862#if CONFIG_MACF
2d21ac55
A
2863 error = mac_mount_label_get(mp, *fstp->mp);
2864 if (error) {
2865 fstp->error = error;
2866 return(VFS_RETURNED_DONE);
2867 }
39236c6e 2868#endif
2d21ac55
A
2869 fstp->mp++;
2870 }
2871 }
91447636
A
2872 fstp->count++;
2873 return(VFS_RETURNED);
2874}
2875
2876/*
2877 * Get statistics on all filesystems.
2878 */
2879int
2880getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2881{
2882 struct __mac_getfsstat_args muap;
2883
2884 muap.buf = uap->buf;
2885 muap.bufsize = uap->bufsize;
2886 muap.mac = USER_ADDR_NULL;
2887 muap.macsize = 0;
2888 muap.flags = uap->flags;
2889
2890 return (__mac_getfsstat(p, &muap, retval));
2891}
2892
b0d623f7
A
2893/*
2894 * __mac_getfsstat: Get MAC-related file system statistics
2895 *
2896 * Parameters: p (ignored)
2897 * uap User argument descriptor (see below)
39037602 2898 * retval Count of file system statistics (N stats)
b0d623f7
A
2899 *
2900 * Indirect: uap->bufsize Buffer size
2901 * uap->macsize MAC info size
2902 * uap->buf Buffer where information will be returned
2903 * uap->mac MAC info
2904 * uap->flags File system flags
39037602 2905 *
b0d623f7
A
2906 *
2907 * Returns: 0 Success
2908 * !0 Not success
2909 *
2910 */
2d21ac55
A
2911int
2912__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2913{
2914 user_addr_t sfsp;
2d21ac55 2915 user_addr_t *mp;
b0d623f7 2916 size_t count, maxcount, bufsize, macsize;
91447636
A
2917 struct getfsstat_struct fst;
2918
b0d623f7
A
2919 bufsize = (size_t) uap->bufsize;
2920 macsize = (size_t) uap->macsize;
2921
91447636 2922 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2923 maxcount = bufsize / sizeof(struct user64_statfs);
91447636
A
2924 }
2925 else {
b0d623f7 2926 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2927 }
2928 sfsp = uap->buf;
2929 count = 0;
2930
2d21ac55
A
2931 mp = NULL;
2932
2933#if CONFIG_MACF
2934 if (uap->mac != USER_ADDR_NULL) {
2935 u_int32_t *mp0;
2936 int error;
b0d623f7 2937 unsigned int i;
2d21ac55 2938
b0d623f7 2939 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2d21ac55
A
2940 if (count != maxcount)
2941 return (EINVAL);
2942
2943 /* Copy in the array */
b0d623f7
A
2944 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2945 if (mp0 == NULL) {
2946 return (ENOMEM);
2947 }
2948
2949 error = copyin(uap->mac, mp0, macsize);
2950 if (error) {
2951 FREE(mp0, M_MACTEMP);
2d21ac55 2952 return (error);
b0d623f7 2953 }
2d21ac55
A
2954
2955 /* Normalize to an array of user_addr_t */
2956 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
2957 if (mp == NULL) {
2958 FREE(mp0, M_MACTEMP);
2959 return (ENOMEM);
2960 }
2961
2d21ac55
A
2962 for (i = 0; i < count; i++) {
2963 if (IS_64BIT_PROCESS(p))
2964 mp[i] = ((user_addr_t *)mp0)[i];
2965 else
2966 mp[i] = (user_addr_t)mp0[i];
2967 }
2968 FREE(mp0, M_MACTEMP);
2969 }
2970#endif
2971
2972
91447636 2973 fst.sfsp = sfsp;
2d21ac55 2974 fst.mp = mp;
91447636
A
2975 fst.flags = uap->flags;
2976 fst.count = 0;
2977 fst.error = 0;
2978 fst.maxcount = maxcount;
2979
39037602 2980
91447636
A
2981 vfs_iterate(0, getfsstat_callback, &fst);
2982
2d21ac55
A
2983 if (mp)
2984 FREE(mp, M_MACTEMP);
2985
91447636
A
2986 if (fst.error ) {
2987 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2988 return(fst.error);
2989 }
2990
2991 if (fst.sfsp && fst.count > fst.maxcount)
2992 *retval = fst.maxcount;
1c79356b 2993 else
91447636 2994 *retval = fst.count;
1c79356b
A
2995 return (0);
2996}
2997
2d21ac55
A
2998static int
2999getfsstat64_callback(mount_t mp, void * arg)
3000{
3001 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3002 struct vfsstatfs *sp;
3003 int error;
3004
3005 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
3006#if CONFIG_MACF
3007 error = mac_mount_check_stat(vfs_context_current(), mp);
3008 if (error != 0) {
3009 fstp->error = error;
3010 return(VFS_RETURNED_DONE);
3011 }
3012#endif
2d21ac55
A
3013 sp = &mp->mnt_vfsstat;
3014 /*
b0d623f7
A
3015 * If MNT_NOWAIT is specified, do not refresh the fsstat
3016 * cache. MNT_WAIT overrides MNT_NOWAIT.
3017 *
3018 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3019 * getfsstat, since the constants are out of the same
3020 * namespace.
2d21ac55 3021 */
b0d623f7
A
3022 if (((fstp->flags & MNT_NOWAIT) == 0 ||
3023 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
3024 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
3025 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3026 return(VFS_RETURNED);
3027 }
3028
3029 error = statfs64_common(mp, sp, fstp->sfsp);
3030 if (error) {
3031 fstp->error = error;
3032 return(VFS_RETURNED_DONE);
3033 }
3034 fstp->sfsp += sizeof(struct statfs64);
3035 }
3036 fstp->count++;
3037 return(VFS_RETURNED);
3038}
3039
3040/*
3041 * Get statistics on all file systems in 64 bit mode.
3042 */
3043int
3044getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
3045{
3046 user_addr_t sfsp;
3047 int count, maxcount;
3048 struct getfsstat_struct fst;
3049
3050 maxcount = uap->bufsize / sizeof(struct statfs64);
3051
3052 sfsp = uap->buf;
3053 count = 0;
3054
3055 fst.sfsp = sfsp;
3056 fst.flags = uap->flags;
3057 fst.count = 0;
3058 fst.error = 0;
3059 fst.maxcount = maxcount;
3060
3061 vfs_iterate(0, getfsstat64_callback, &fst);
3062
3063 if (fst.error ) {
3064 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3065 return(fst.error);
3066 }
3067
3068 if (fst.sfsp && fst.count > fst.maxcount)
3069 *retval = fst.maxcount;
3070 else
3071 *retval = fst.count;
3072
3073 return (0);
3074}
3075
fe8ab488
A
3076/*
3077 * gets the associated vnode with the file descriptor passed.
3078 * as input
3079 *
3080 * INPUT
3081 * ctx - vfs context of caller
3082 * fd - file descriptor for which vnode is required.
3083 * vpp - Pointer to pointer to vnode to be returned.
3084 *
3085 * The vnode is returned with an iocount so any vnode obtained
3086 * by this call needs a vnode_put
3087 *
3088 */
39037602 3089int
fe8ab488
A
3090vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3091{
3092 int error;
3093 vnode_t vp;
3094 struct fileproc *fp;
3095 proc_t p = vfs_context_proc(ctx);
3096
3097 *vpp = NULLVP;
3098
3099 error = fp_getfvp(p, fd, &fp, &vp);
3100 if (error)
3101 return (error);
3102
3103 error = vnode_getwithref(vp);
3104 if (error) {
3105 (void)fp_drop(p, fd, fp, 0);
3106 return (error);
3107 }
3108
3109 (void)fp_drop(p, fd, fp, 0);
3110 *vpp = vp;
3111 return (error);
3112}
3113
3114/*
3115 * Wrapper function around namei to start lookup from a directory
3116 * specified by a file descriptor ni_dirfd.
3117 *
3118 * In addition to all the errors returned by namei, this call can
3119 * return ENOTDIR if the file descriptor does not refer to a directory.
3120 * and EBADF if the file descriptor is not valid.
3121 */
3122int
3123nameiat(struct nameidata *ndp, int dirfd)
3124{
3125 if ((dirfd != AT_FDCWD) &&
3126 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3127 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3128 int error = 0;
3129 char c;
3130
3131 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3132 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3133 if (error)
3134 return (error);
3135 } else {
3136 c = *((char *)(ndp->ni_dirp));
3137 }
3138
3139 if (c != '/') {
3140 vnode_t dvp_at;
3141
3142 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3143 &dvp_at);
3144 if (error)
3145 return (error);
3146
3147 if (vnode_vtype(dvp_at) != VDIR) {
3148 vnode_put(dvp_at);
3149 return (ENOTDIR);
3150 }
3151
3152 ndp->ni_dvp = dvp_at;
3153 ndp->ni_cnd.cn_flags |= USEDVP;
3154 error = namei(ndp);
3155 ndp->ni_cnd.cn_flags &= ~USEDVP;
3156 vnode_put(dvp_at);
3157 return (error);
3158 }
3159 }
3160
3161 return (namei(ndp));
3162}
3163
1c79356b
A
3164/*
3165 * Change current working directory to a given file descriptor.
3166 */
1c79356b 3167/* ARGSUSED */
2d21ac55
A
3168static int
3169common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 3170{
2d21ac55
A
3171 struct filedesc *fdp = p->p_fd;
3172 vnode_t vp;
3173 vnode_t tdp;
3174 vnode_t tvp;
1c79356b 3175 struct mount *mp;
1c79356b 3176 int error;
2d21ac55 3177 vfs_context_t ctx = vfs_context_current();
1c79356b 3178
b0d623f7 3179 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
3180 if (per_thread && uap->fd == -1) {
3181 /*
3182 * Switching back from per-thread to per process CWD; verify we
3183 * in fact have one before proceeding. The only success case
3184 * for this code path is to return 0 preemptively after zapping
3185 * the thread structure contents.
3186 */
3187 thread_t th = vfs_context_thread(ctx);
3188 if (th) {
3189 uthread_t uth = get_bsdthread_info(th);
3190 tvp = uth->uu_cdir;
3191 uth->uu_cdir = NULLVP;
3192 if (tvp != NULLVP) {
3193 vnode_rele(tvp);
3194 return (0);
3195 }
3196 }
3197 return (EBADF);
3198 }
91447636
A
3199
3200 if ( (error = file_vnode(uap->fd, &vp)) )
3201 return(error);
3202 if ( (error = vnode_getwithref(vp)) ) {
3203 file_drop(uap->fd);
3204 return(error);
3205 }
55e303ae
A
3206
3207 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3208
2d21ac55 3209 if (vp->v_type != VDIR) {
1c79356b 3210 error = ENOTDIR;
2d21ac55
A
3211 goto out;
3212 }
3213
3214#if CONFIG_MACF
3215 error = mac_vnode_check_chdir(ctx, vp);
3216 if (error)
3217 goto out;
3218#endif
3219 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3220 if (error)
3221 goto out;
3222
1c79356b 3223 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
3224 if (vfs_busy(mp, LK_NOWAIT)) {
3225 error = EACCES;
3226 goto out;
55e303ae 3227 }
2d21ac55 3228 error = VFS_ROOT(mp, &tdp, ctx);
91447636 3229 vfs_unbusy(mp);
1c79356b
A
3230 if (error)
3231 break;
91447636 3232 vnode_put(vp);
1c79356b
A
3233 vp = tdp;
3234 }
91447636
A
3235 if (error)
3236 goto out;
3237 if ( (error = vnode_ref(vp)) )
3238 goto out;
3239 vnode_put(vp);
3240
2d21ac55
A
3241 if (per_thread) {
3242 thread_t th = vfs_context_thread(ctx);
3243 if (th) {
3244 uthread_t uth = get_bsdthread_info(th);
3245 tvp = uth->uu_cdir;
3246 uth->uu_cdir = vp;
b0d623f7 3247 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3248 } else {
3249 vnode_rele(vp);
3250 return (ENOENT);
3251 }
3252 } else {
3253 proc_fdlock(p);
3254 tvp = fdp->fd_cdir;
3255 fdp->fd_cdir = vp;
3256 proc_fdunlock(p);
3257 }
91447636
A
3258
3259 if (tvp)
3260 vnode_rele(tvp);
3261 file_drop(uap->fd);
3262
1c79356b 3263 return (0);
91447636
A
3264out:
3265 vnode_put(vp);
3266 file_drop(uap->fd);
3267
3268 return(error);
1c79356b
A
3269}
3270
2d21ac55 3271int
b0d623f7 3272fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3273{
3274 return common_fchdir(p, uap, 0);
3275}
3276
3277int
b0d623f7 3278__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3279{
3280 return common_fchdir(p, (void *)uap, 1);
3281}
3282
1c79356b 3283/*
b0d623f7 3284 * Change current working directory (".").
2d21ac55
A
3285 *
3286 * Returns: 0 Success
3287 * change_dir:ENOTDIR
3288 * change_dir:???
3289 * vnode_ref:ENOENT No such file or directory
1c79356b 3290 */
1c79356b 3291/* ARGSUSED */
2d21ac55
A
3292static int
3293common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 3294{
2d21ac55 3295 struct filedesc *fdp = p->p_fd;
1c79356b
A
3296 int error;
3297 struct nameidata nd;
2d21ac55
A
3298 vnode_t tvp;
3299 vfs_context_t ctx = vfs_context_current();
91447636 3300
39037602 3301 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3302 UIO_USERSPACE, uap->path, ctx);
3303 error = change_dir(&nd, ctx);
55e303ae 3304 if (error)
1c79356b 3305 return (error);
91447636
A
3306 if ( (error = vnode_ref(nd.ni_vp)) ) {
3307 vnode_put(nd.ni_vp);
3308 return (error);
3309 }
3310 /*
3311 * drop the iocount we picked up in change_dir
3312 */
3313 vnode_put(nd.ni_vp);
3314
2d21ac55
A
3315 if (per_thread) {
3316 thread_t th = vfs_context_thread(ctx);
3317 if (th) {
3318 uthread_t uth = get_bsdthread_info(th);
3319 tvp = uth->uu_cdir;
3320 uth->uu_cdir = nd.ni_vp;
b0d623f7 3321 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3322 } else {
3323 vnode_rele(nd.ni_vp);
3324 return (ENOENT);
3325 }
3326 } else {
3327 proc_fdlock(p);
3328 tvp = fdp->fd_cdir;
3329 fdp->fd_cdir = nd.ni_vp;
3330 proc_fdunlock(p);
3331 }
91447636
A
3332
3333 if (tvp)
3334 vnode_rele(tvp);
3335
1c79356b
A
3336 return (0);
3337}
3338
b0d623f7
A
3339
3340/*
3341 * chdir
3342 *
3343 * Change current working directory (".") for the entire process
3344 *
3345 * Parameters: p Process requesting the call
3346 * uap User argument descriptor (see below)
3347 * retval (ignored)
3348 *
3349 * Indirect parameters: uap->path Directory path
3350 *
3351 * Returns: 0 Success
3352 * common_chdir: ENOTDIR
3353 * common_chdir: ENOENT No such file or directory
3354 * common_chdir: ???
3355 *
3356 */
2d21ac55 3357int
b0d623f7 3358chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3359{
3360 return common_chdir(p, (void *)uap, 0);
3361}
3362
b0d623f7
A
3363/*
3364 * __pthread_chdir
3365 *
3366 * Change current working directory (".") for a single thread
3367 *
3368 * Parameters: p Process requesting the call
3369 * uap User argument descriptor (see below)
3370 * retval (ignored)
3371 *
3372 * Indirect parameters: uap->path Directory path
3373 *
3374 * Returns: 0 Success
3375 * common_chdir: ENOTDIR
3376 * common_chdir: ENOENT No such file or directory
3377 * common_chdir: ???
3378 *
3379 */
2d21ac55 3380int
b0d623f7 3381__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3382{
3383 return common_chdir(p, (void *)uap, 1);
3384}
3385
3386
1c79356b
A
3387/*
3388 * Change notion of root (``/'') directory.
3389 */
1c79356b
A
3390/* ARGSUSED */
3391int
b0d623f7 3392chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3393{
2d21ac55 3394 struct filedesc *fdp = p->p_fd;
1c79356b
A
3395 int error;
3396 struct nameidata nd;
2d21ac55
A
3397 vnode_t tvp;
3398 vfs_context_t ctx = vfs_context_current();
1c79356b 3399
91447636 3400 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1c79356b
A
3401 return (error);
3402
39037602 3403 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3404 UIO_USERSPACE, uap->path, ctx);
3405 error = change_dir(&nd, ctx);
55e303ae 3406 if (error)
1c79356b
A
3407 return (error);
3408
2d21ac55
A
3409#if CONFIG_MACF
3410 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3411 &nd.ni_cnd);
3412 if (error) {
91447636
A
3413 vnode_put(nd.ni_vp);
3414 return (error);
3415 }
2d21ac55
A
3416#endif
3417
91447636
A
3418 if ( (error = vnode_ref(nd.ni_vp)) ) {
3419 vnode_put(nd.ni_vp);
1c79356b
A
3420 return (error);
3421 }
91447636 3422 vnode_put(nd.ni_vp);
1c79356b 3423
91447636 3424 proc_fdlock(p);
fa4905b1 3425 tvp = fdp->fd_rdir;
1c79356b 3426 fdp->fd_rdir = nd.ni_vp;
91447636
A
3427 fdp->fd_flags |= FD_CHROOT;
3428 proc_fdunlock(p);
3429
fa4905b1 3430 if (tvp != NULL)
91447636
A
3431 vnode_rele(tvp);
3432
1c79356b
A
3433 return (0);
3434}
3435
3436/*
3437 * Common routine for chroot and chdir.
2d21ac55
A
3438 *
3439 * Returns: 0 Success
3440 * ENOTDIR Not a directory
3441 * namei:??? [anything namei can return]
3442 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
3443 */
3444static int
91447636 3445change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 3446{
2d21ac55 3447 vnode_t vp;
1c79356b
A
3448 int error;
3449
91447636 3450 if ((error = namei(ndp)))
1c79356b 3451 return (error);
91447636 3452 nameidone(ndp);
1c79356b 3453 vp = ndp->ni_vp;
2d21ac55
A
3454
3455 if (vp->v_type != VDIR) {
91447636 3456 vnode_put(vp);
2d21ac55
A
3457 return (ENOTDIR);
3458 }
3459
3460#if CONFIG_MACF
3461 error = mac_vnode_check_chdir(ctx, vp);
3462 if (error) {
3463 vnode_put(vp);
3464 return (error);
3465 }
3466#endif
3467
3468 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3469 if (error) {
3470 vnode_put(vp);
3471 return (error);
3472 }
91447636 3473
1c79356b
A
3474 return (error);
3475}
3476
fe8ab488
A
3477/*
3478 * Free the vnode data (for directories) associated with the file glob.
3479 */
3480struct fd_vn_data *
3481fg_vn_data_alloc(void)
3482{
3483 struct fd_vn_data *fvdata;
3484
3485 /* Allocate per fd vnode data */
3486 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3487 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3488 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3489 return fvdata;
3490}
3491
3492/*
3493 * Free the vnode data (for directories) associated with the file glob.
3494 */
3495void
3496fg_vn_data_free(void *fgvndata)
3497{
3498 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3499
3500 if (fvdata->fv_buf)
3501 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3502 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3503 FREE(fvdata, M_FD_VN_DATA);
3504}
3505
1c79356b
A
3506/*
3507 * Check permissions, allocate an open file structure,
3508 * and call the device open routine if any.
2d21ac55
A
3509 *
3510 * Returns: 0 Success
3511 * EINVAL
3512 * EINTR
3513 * falloc:ENFILE
3514 * falloc:EMFILE
3515 * falloc:ENOMEM
3516 * vn_open_auth:???
3517 * dupfdopen:???
3518 * VNOP_ADVLOCK:???
3519 * vnode_setsize:???
b0d623f7
A
3520 *
3521 * XXX Need to implement uid, gid
1c79356b 3522 */
2d21ac55 3523int
39236c6e
A
3524open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3525 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3526 int32_t *retval)
1c79356b 3527{
2d21ac55
A
3528 proc_t p = vfs_context_proc(ctx);
3529 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3530 struct fileproc *fp;
3531 vnode_t vp;
91447636 3532 int flags, oflags;
1c79356b
A
3533 int type, indx, error;
3534 struct flock lf;
3e170ce0 3535 struct vfs_context context;
ccc36f2f 3536
91447636 3537 oflags = uflags;
ccc36f2f
A
3538
3539 if ((oflags & O_ACCMODE) == O_ACCMODE)
3540 return(EINVAL);
3e170ce0 3541
91447636 3542 flags = FFLAGS(uflags);
3e170ce0
A
3543 CLR(flags, FENCRYPTED);
3544 CLR(flags, FUNENCRYPTED);
91447636
A
3545
3546 AUDIT_ARG(fflags, oflags);
3547 AUDIT_ARG(mode, vap->va_mode);
3548
39236c6e
A
3549 if ((error = falloc_withalloc(p,
3550 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
1c79356b 3551 return (error);
91447636 3552 }
2d21ac55 3553 uu->uu_dupfd = -indx - 1;
91447636 3554
2d21ac55
A
3555 if ((error = vn_open_auth(ndp, &flags, vap))) {
3556 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
39236c6e 3557 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3558 fp_drop(p, indx, NULL, 0);
91447636
A
3559 *retval = indx;
3560 return (0);
3561 }
1c79356b
A
3562 }
3563 if (error == ERESTART)
91447636
A
3564 error = EINTR;
3565 fp_free(p, indx, fp);
1c79356b
A
3566 return (error);
3567 }
2d21ac55
A
3568 uu->uu_dupfd = 0;
3569 vp = ndp->ni_vp;
55e303ae 3570
3e170ce0 3571 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
91447636
A
3572 fp->f_fglob->fg_ops = &vnops;
3573 fp->f_fglob->fg_data = (caddr_t)vp;
3574
1c79356b
A
3575 if (flags & (O_EXLOCK | O_SHLOCK)) {
3576 lf.l_whence = SEEK_SET;
3577 lf.l_start = 0;
3578 lf.l_len = 0;
3579 if (flags & O_EXLOCK)
3580 lf.l_type = F_WRLCK;
3581 else
3582 lf.l_type = F_RDLCK;
3583 type = F_FLOCK;
3584 if ((flags & FNONBLOCK) == 0)
3585 type |= F_WAIT;
2d21ac55
A
3586#if CONFIG_MACF
3587 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3588 F_SETLK, &lf);
3589 if (error)
3590 goto bad;
3591#endif
39236c6e 3592 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
55e303ae 3593 goto bad;
91447636 3594 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3595 }
55e303ae 3596
00867663
A
3597#if DEVELOPMENT || DEBUG
3598 /*
3599 * XXX VSWAP: Check for entitlements or special flag here
3600 * so we can restrict access appropriately.
3601 */
3602#else /* DEVELOPMENT || DEBUG */
3603
3604 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3605 /* block attempt to write/truncate swapfile */
3606 error = EPERM;
3607 goto bad;
3608 }
3609#endif /* DEVELOPMENT || DEBUG */
3610
91447636
A
3611 /* try to truncate by setting the size attribute */
3612 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3613 goto bad;
55e303ae 3614
fe8ab488
A
3615 /*
3616 * For directories we hold some additional information in the fd.
3617 */
3618 if (vnode_vtype(vp) == VDIR) {
3619 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3620 } else {
3621 fp->f_fglob->fg_vn_data = NULL;
2d21ac55
A
3622 }
3623
91447636 3624 vnode_put(vp);
55e303ae 3625
3e170ce0
A
3626 /*
3627 * The first terminal open (without a O_NOCTTY) by a session leader
3628 * results in it being set as the controlling terminal.
3629 */
3630 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3631 !(flags & O_NOCTTY)) {
3632 int tmp = 0;
3633
3634 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3635 (caddr_t)&tmp, ctx);
3636 }
3637
91447636 3638 proc_fdlock(p);
6d2010ae
A
3639 if (flags & O_CLOEXEC)
3640 *fdflags(p, indx) |= UF_EXCLOSE;
39236c6e
A
3641 if (flags & O_CLOFORK)
3642 *fdflags(p, indx) |= UF_FORKCLOSE;
6601e61a 3643 procfdtbl_releasefd(p, indx, NULL);
39037602
A
3644
3645#if CONFIG_SECLUDED_MEMORY
3646 if (secluded_for_filecache &&
3647 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3648 vnode_vtype(vp) == VREG) {
3649 memory_object_control_t moc;
3650
3651 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3652
3653 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3654 /* nothing to do... */
3655 } else if (fp->f_fglob->fg_flag & FWRITE) {
3656 /* writable -> no longer eligible for secluded pages */
3657 memory_object_mark_eligible_for_secluded(moc,
3658 FALSE);
3659 } else if (secluded_for_filecache == 1) {
3660 char pathname[32] = { 0, };
3661 size_t copied;
3662 /* XXX FBDP: better way to detect /Applications/ ? */
3663 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3664 copyinstr(ndp->ni_dirp,
3665 pathname,
3666 sizeof (pathname),
3667 &copied);
3668 } else {
3669 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3670 pathname,
3671 sizeof (pathname),
3672 &copied);
3673 }
3674 pathname[sizeof (pathname) - 1] = '\0';
3675 if (strncmp(pathname,
3676 "/Applications/",
3677 strlen("/Applications/")) == 0 &&
3678 strncmp(pathname,
3679 "/Applications/Camera.app/",
3680 strlen("/Applications/Camera.app/")) != 0) {
3681 /*
3682 * not writable
3683 * AND from "/Applications/"
3684 * AND not from "/Applications/Camera.app/"
3685 * ==> eligible for secluded
3686 */
3687 memory_object_mark_eligible_for_secluded(moc,
3688 TRUE);
3689 }
3690 } else if (secluded_for_filecache == 2) {
5ba3f43e
A
3691#if __arm64__
3692#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3693#elif __arm__
3694#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3695#else
39037602 3696/* not implemented... */
5ba3f43e 3697#endif
39037602
A
3698 if (!strncmp(vp->v_name,
3699 DYLD_SHARED_CACHE_NAME,
3700 strlen(DYLD_SHARED_CACHE_NAME)) ||
3701 !strncmp(vp->v_name,
3702 "dyld",
3703 strlen(vp->v_name)) ||
3704 !strncmp(vp->v_name,
3705 "launchd",
3706 strlen(vp->v_name)) ||
3707 !strncmp(vp->v_name,
3708 "Camera",
3709 strlen(vp->v_name)) ||
3710 !strncmp(vp->v_name,
3711 "mediaserverd",
d9a64523
A
3712 strlen(vp->v_name)) ||
3713 !strncmp(vp->v_name,
3714 "SpringBoard",
3715 strlen(vp->v_name)) ||
3716 !strncmp(vp->v_name,
3717 "backboardd",
39037602
A
3718 strlen(vp->v_name))) {
3719 /*
3720 * This file matters when launching Camera:
3721 * do not store its contents in the secluded
3722 * pool that will be drained on Camera launch.
3723 */
3724 memory_object_mark_eligible_for_secluded(moc,
3725 FALSE);
3726 }
3727 }
3728 }
3729#endif /* CONFIG_SECLUDED_MEMORY */
3730
91447636
A
3731 fp_drop(p, indx, fp, 1);
3732 proc_fdunlock(p);
3733
1c79356b 3734 *retval = indx;
91447636 3735
1c79356b 3736 return (0);
55e303ae 3737bad:
3e170ce0 3738 context = *vfs_context_current();
2d21ac55 3739 context.vc_ucred = fp->f_fglob->fg_cred;
39037602 3740
fe8ab488
A
3741 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3742 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3743 lf.l_whence = SEEK_SET;
3744 lf.l_start = 0;
3745 lf.l_len = 0;
3746 lf.l_type = F_UNLCK;
39037602 3747
fe8ab488
A
3748 (void)VNOP_ADVLOCK(
3749 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3750 }
2d21ac55
A
3751
3752 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3753 vnode_put(vp);
3754 fp_free(p, indx, fp);
3755
55e303ae 3756 return (error);
1c79356b
A
3757}
3758
fe8ab488
A
3759/*
3760 * While most of the *at syscall handlers can call nameiat() which
3761 * is a wrapper around namei, the use of namei and initialisation
3762 * of nameidata are far removed and in different functions - namei
3763 * gets called in vn_open_auth for open1. So we'll just do here what
3764 * nameiat() does.
3765 */
3766static int
3767open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3768 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3769 int dirfd)
3770{
3771 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3772 int error;
3773 char c;
3774
3775 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3776 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3777 if (error)
3778 return (error);
3779 } else {
3780 c = *((char *)(ndp->ni_dirp));
3781 }
3782
3783 if (c != '/') {
3784 vnode_t dvp_at;
3785
3786 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3787 &dvp_at);
3788 if (error)
3789 return (error);
3790
3791 if (vnode_vtype(dvp_at) != VDIR) {
3792 vnode_put(dvp_at);
3793 return (ENOTDIR);
3794 }
3795
3796 ndp->ni_dvp = dvp_at;
3797 ndp->ni_cnd.cn_flags |= USEDVP;
3798 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3799 retval);
3800 vnode_put(dvp_at);
3801 return (error);
3802 }
3803 }
3804
3805 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3806}
3807
0c530ab8 3808/*
b0d623f7 3809 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3810 *
3811 * Parameters: p Process requesting the open
3812 * uap User argument descriptor (see below)
3813 * retval Pointer to an area to receive the
3814 * return calue from the system call
3815 *
3816 * Indirect: uap->path Path to open (same as 'open')
3817 * uap->flags Flags to open (same as 'open'
3818 * uap->uid UID to set, if creating
3819 * uap->gid GID to set, if creating
3820 * uap->mode File mode, if creating (same as 'open')
3821 * uap->xsecurity ACL to set, if creating
3822 *
3823 * Returns: 0 Success
3824 * !0 errno value
3825 *
3826 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3827 *
3828 * XXX: We should enummerate the possible errno values here, and where
3829 * in the code they originated.
3830 */
1c79356b 3831int
b0d623f7 3832open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3833{
2d21ac55 3834 struct filedesc *fdp = p->p_fd;
91447636
A
3835 int ciferror;
3836 kauth_filesec_t xsecdst;
3837 struct vnode_attr va;
2d21ac55 3838 struct nameidata nd;
91447636
A
3839 int cmode;
3840
b0d623f7
A
3841 AUDIT_ARG(owner, uap->uid, uap->gid);
3842
91447636
A
3843 xsecdst = NULL;
3844 if ((uap->xsecurity != USER_ADDR_NULL) &&
3845 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3846 return ciferror;
3847
91447636
A
3848 VATTR_INIT(&va);
3849 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3850 VATTR_SET(&va, va_mode, cmode);
3851 if (uap->uid != KAUTH_UID_NONE)
3852 VATTR_SET(&va, va_uid, uap->uid);
3853 if (uap->gid != KAUTH_GID_NONE)
3854 VATTR_SET(&va, va_gid, uap->gid);
3855 if (xsecdst != NULL)
3856 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3857
6d2010ae
A
3858 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3859 uap->path, vfs_context_current());
2d21ac55 3860
39236c6e
A
3861 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3862 fileproc_alloc_init, NULL, retval);
91447636
A
3863 if (xsecdst != NULL)
3864 kauth_filesec_free(xsecdst);
3865
3866 return ciferror;
3867}
3868
39037602 3869/*
316670eb 3870 * Go through the data-protected atomically controlled open (2)
39037602 3871 *
316670eb
A
3872 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3873 */
3874int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3875 int flags = uap->flags;
3876 int class = uap->class;
3877 int dpflags = uap->dpflags;
3878
39037602 3879 /*
316670eb
A
3880 * Follow the same path as normal open(2)
3881 * Look up the item if it exists, and acquire the vnode.
3882 */
3883 struct filedesc *fdp = p->p_fd;
3884 struct vnode_attr va;
3885 struct nameidata nd;
3886 int cmode;
3887 int error;
39037602 3888
316670eb
A
3889 VATTR_INIT(&va);
3890 /* Mask off all but regular access permissions */
3891 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3892 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3893
3894 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3895 uap->path, vfs_context_current());
3896
39037602
A
3897 /*
3898 * Initialize the extra fields in vnode_attr to pass down our
316670eb
A
3899 * extra fields.
3900 * 1. target cprotect class.
39037602
A
3901 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3902 */
3903 if (flags & O_CREAT) {
3e170ce0
A
3904 /* lower level kernel code validates that the class is valid before applying it. */
3905 if (class != PROTECTION_CLASS_DEFAULT) {
3906 /*
3907 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3908 * file behave the same as open (2)
3909 */
3910 VATTR_SET(&va, va_dataprotect_class, class);
3911 }
316670eb 3912 }
39037602 3913
3e170ce0 3914 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
316670eb
A
3915 if ( flags & (O_RDWR | O_WRONLY)) {
3916 /* Not allowed to write raw encrypted bytes */
39037602
A
3917 return EINVAL;
3918 }
3e170ce0
A
3919 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3920 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3921 }
3922 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3923 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3924 }
316670eb
A
3925 }
3926
39236c6e
A
3927 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3928 fileproc_alloc_init, NULL, retval);
316670eb
A
3929
3930 return error;
3931}
3932
fe8ab488
A
3933static int
3934openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3935 int fd, enum uio_seg segflg, int *retval)
2d21ac55 3936{
fe8ab488 3937 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
91447636 3938 struct vnode_attr va;
2d21ac55 3939 struct nameidata nd;
91447636 3940 int cmode;
1c79356b 3941
91447636
A
3942 VATTR_INIT(&va);
3943 /* Mask off all but regular access permissions */
fe8ab488 3944 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636
A
3945 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3946
fe8ab488
A
3947 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3948 segflg, path, ctx);
2d21ac55 3949
fe8ab488
A
3950 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3951 retval, fd));
1c79356b 3952}
91447636 3953
fe8ab488
A
3954int
3955open(proc_t p, struct open_args *uap, int32_t *retval)
3956{
3957 __pthread_testcancel(1);
3958 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3959}
1c79356b 3960
fe8ab488
A
3961int
3962open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3963 int32_t *retval)
3964{
3965 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3966 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3967}
91447636 3968
1c79356b 3969int
fe8ab488
A
3970openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3971 int32_t *retval)
1c79356b 3972{
fe8ab488
A
3973 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3974 uap->mode, uap->fd, UIO_USERSPACE, retval));
3975}
91447636 3976
fe8ab488
A
3977int
3978openat(proc_t p, struct openat_args *uap, int32_t *retval)
3979{
3980 __pthread_testcancel(1);
3981 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3982}
3983
3984/*
3985 * openbyid_np: open a file given a file system id and a file system object id
3986 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3987 * file systems that don't support object ids it is a node id (uint64_t).
3988 *
3989 * Parameters: p Process requesting the open
3990 * uap User argument descriptor (see below)
3991 * retval Pointer to an area to receive the
3992 * return calue from the system call
3993 *
3994 * Indirect: uap->path Path to open (same as 'open')
3995 *
3996 * uap->fsid id of target file system
3997 * uap->objid id of target file system object
3998 * uap->flags Flags to open (same as 'open')
3999 *
4000 * Returns: 0 Success
4001 * !0 errno value
4002 *
4003 *
4004 * XXX: We should enummerate the possible errno values here, and where
4005 * in the code they originated.
4006 */
4007int
4008openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
4009{
4010 fsid_t fsid;
4011 uint64_t objid;
4012 int error;
4013 char *buf = NULL;
4014 int buflen = MAXPATHLEN;
4015 int pathlen = 0;
4016 vfs_context_t ctx = vfs_context_current();
4017
490019cf
A
4018 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
4019 return (error);
4020 }
4021
fe8ab488
A
4022 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
4023 return (error);
4024 }
4025
4026 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4027 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
4028 return (error);
4029 }
4030
4031 AUDIT_ARG(value32, fsid.val[0]);
4032 AUDIT_ARG(value64, objid);
4033
4034 /*resolve path from fsis, objid*/
4035 do {
4036 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
4037 if (buf == NULL) {
4038 return (ENOMEM);
4039 }
4040
4041 error = fsgetpath_internal(
4042 ctx, fsid.val[0], objid,
4043 buflen, buf, &pathlen);
4044
4045 if (error) {
4046 FREE(buf, M_TEMP);
4047 buf = NULL;
4048 }
4049 } while (error == ENOSPC && (buflen += MAXPATHLEN));
4050
4051 if (error) {
4052 return error;
4053 }
4054
4055 buf[pathlen] = 0;
4056
4057 error = openat_internal(
4058 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
4059
4060 FREE(buf, M_TEMP);
4061
4062 return error;
4063}
4064
4065
4066/*
4067 * Create a special file.
4068 */
4069static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4070
4071int
4072mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4073{
4074 struct vnode_attr va;
4075 vfs_context_t ctx = vfs_context_current();
4076 int error;
4077 struct nameidata nd;
4078 vnode_t vp, dvp;
4079
4080 VATTR_INIT(&va);
4081 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4082 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
4083
4084 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4085 if ((uap->mode & S_IFMT) == S_IFIFO)
2d21ac55 4086 return(mkfifo1(ctx, uap->path, &va));
1c79356b 4087
55e303ae 4088 AUDIT_ARG(mode, uap->mode);
b0d623f7 4089 AUDIT_ARG(value32, uap->dev);
91447636 4090
2d21ac55 4091 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 4092 return (error);
39037602 4093 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
2d21ac55 4094 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
4095 error = namei(&nd);
4096 if (error)
1c79356b 4097 return (error);
91447636 4098 dvp = nd.ni_dvp;
1c79356b 4099 vp = nd.ni_vp;
91447636
A
4100
4101 if (vp != NULL) {
1c79356b 4102 error = EEXIST;
91447636 4103 goto out;
1c79356b 4104 }
55e303ae 4105
91447636 4106 switch (uap->mode & S_IFMT) {
91447636
A
4107 case S_IFCHR:
4108 VATTR_SET(&va, va_type, VCHR);
4109 break;
4110 case S_IFBLK:
4111 VATTR_SET(&va, va_type, VBLK);
4112 break;
91447636
A
4113 default:
4114 error = EINVAL;
4115 goto out;
4116 }
2d21ac55
A
4117
4118#if CONFIG_MACF
6d2010ae
A
4119 error = mac_vnode_check_create(ctx,
4120 nd.ni_dvp, &nd.ni_cnd, &va);
4121 if (error)
4122 goto out;
2d21ac55
A
4123#endif
4124
4125 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4126 goto out;
4127
6d2010ae 4128 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
91447636
A
4129 goto out;
4130
4131 if (vp) {
4132 int update_flags = 0;
4133
4134 // Make sure the name & parent pointers are hooked up
4135 if (vp->v_name == NULL)
4136 update_flags |= VNODE_UPDATE_NAME;
4137 if (vp->v_parent == NULLVP)
4138 update_flags |= VNODE_UPDATE_PARENT;
4139
4140 if (update_flags)
4141 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4142
2d21ac55
A
4143#if CONFIG_FSE
4144 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4145 FSE_ARG_VNODE, vp,
4146 FSE_ARG_DONE);
2d21ac55 4147#endif
1c79356b 4148 }
91447636
A
4149
4150out:
4151 /*
4152 * nameidone has to happen before we vnode_put(dvp)
4153 * since it may need to release the fs_nodelock on the dvp
4154 */
4155 nameidone(&nd);
4156
4157 if (vp)
4158 vnode_put(vp);
4159 vnode_put(dvp);
4160
1c79356b
A
4161 return (error);
4162}
4163
4164/*
4165 * Create a named pipe.
2d21ac55
A
4166 *
4167 * Returns: 0 Success
4168 * EEXIST
4169 * namei:???
4170 * vnode_authorize:???
4171 * vn_create:???
1c79356b 4172 */
91447636
A
4173static int
4174mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 4175{
91447636 4176 vnode_t vp, dvp;
1c79356b
A
4177 int error;
4178 struct nameidata nd;
55e303ae 4179
39037602 4180 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
91447636 4181 UIO_USERSPACE, upath, ctx);
55e303ae
A
4182 error = namei(&nd);
4183 if (error)
1c79356b 4184 return (error);
91447636
A
4185 dvp = nd.ni_dvp;
4186 vp = nd.ni_vp;
4187
4188 /* check that this is a new file and authorize addition */
4189 if (vp != NULL) {
4190 error = EEXIST;
4191 goto out;
4192 }
2d21ac55
A
4193 VATTR_SET(vap, va_type, VFIFO);
4194
6d2010ae 4195 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
2d21ac55 4196 goto out;
2d21ac55 4197
6d2010ae 4198 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
4199out:
4200 /*
4201 * nameidone has to happen before we vnode_put(dvp)
4202 * since it may need to release the fs_nodelock on the dvp
4203 */
4204 nameidone(&nd);
4205
4206 if (vp)
4207 vnode_put(vp);
4208 vnode_put(dvp);
4209
55e303ae 4210 return error;
91447636
A
4211}
4212
0c530ab8
A
4213
4214/*
b0d623f7 4215 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
4216 *
4217 * Parameters: p Process requesting the open
4218 * uap User argument descriptor (see below)
4219 * retval (Ignored)
4220 *
4221 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4222 * uap->uid UID to set
4223 * uap->gid GID to set
4224 * uap->mode File mode to set (same as 'mkfifo')
4225 * uap->xsecurity ACL to set, if creating
4226 *
4227 * Returns: 0 Success
4228 * !0 errno value
4229 *
4230 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4231 *
4232 * XXX: We should enummerate the possible errno values here, and where
4233 * in the code they originated.
4234 */
91447636 4235int
b0d623f7 4236mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
4237{
4238 int ciferror;
4239 kauth_filesec_t xsecdst;
91447636
A
4240 struct vnode_attr va;
4241
b0d623f7
A
4242 AUDIT_ARG(owner, uap->uid, uap->gid);
4243
91447636
A
4244 xsecdst = KAUTH_FILESEC_NONE;
4245 if (uap->xsecurity != USER_ADDR_NULL) {
4246 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4247 return ciferror;
4248 }
4249
91447636
A
4250 VATTR_INIT(&va);
4251 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4252 if (uap->uid != KAUTH_UID_NONE)
4253 VATTR_SET(&va, va_uid, uap->uid);
4254 if (uap->gid != KAUTH_GID_NONE)
4255 VATTR_SET(&va, va_gid, uap->gid);
4256 if (xsecdst != KAUTH_FILESEC_NONE)
4257 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4258
2d21ac55 4259 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636
A
4260
4261 if (xsecdst != KAUTH_FILESEC_NONE)
4262 kauth_filesec_free(xsecdst);
4263 return ciferror;
4264}
4265
4266/* ARGSUSED */
4267int
b0d623f7 4268mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 4269{
91447636
A
4270 struct vnode_attr va;
4271
91447636
A
4272 VATTR_INIT(&va);
4273 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4274
2d21ac55 4275 return(mkfifo1(vfs_context_current(), uap->path, &va));
1c79356b
A
4276}
4277
b0d623f7
A
4278
4279static char *
4280my_strrchr(char *p, int ch)
4281{
4282 char *save;
4283
4284 for (save = NULL;; ++p) {
4285 if (*p == ch)
4286 save = p;
4287 if (!*p)
4288 return(save);
4289 }
4290 /* NOTREACHED */
4291}
4292
4293extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4294
4295int
4296safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4297{
4298 int ret, len = _len;
4299
4300 *truncated_path = 0;
4301 ret = vn_getpath(dvp, path, &len);
4302 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4303 if (leafname) {
4304 path[len-1] = '/';
4305 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4306 if (len > MAXPATHLEN) {
4307 char *ptr;
39037602 4308
b0d623f7
A
4309 // the string got truncated!
4310 *truncated_path = 1;
4311 ptr = my_strrchr(path, '/');
4312 if (ptr) {
4313 *ptr = '\0'; // chop off the string at the last directory component
4314 }
4315 len = strlen(path) + 1;
4316 }
4317 }
4318 } else if (ret == 0) {
4319 *truncated_path = 1;
4320 } else if (ret != 0) {
4321 struct vnode *mydvp=dvp;
4322
4323 if (ret != ENOSPC) {
4324 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4325 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
39037602 4326 }
b0d623f7 4327 *truncated_path = 1;
39037602 4328
b0d623f7
A
4329 do {
4330 if (mydvp->v_parent != NULL) {
4331 mydvp = mydvp->v_parent;
4332 } else if (mydvp->v_mount) {
4333 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4334 break;
4335 } else {
4336 // no parent and no mount point? only thing is to punt and say "/" changed
4337 strlcpy(path, "/", _len);
4338 len = 2;
4339 mydvp = NULL;
4340 }
39037602 4341
b0d623f7
A
4342 if (mydvp == NULL) {
4343 break;
4344 }
4345
4346 len = _len;
4347 ret = vn_getpath(mydvp, path, &len);
4348 } while (ret == ENOSPC);
4349 }
4350
4351 return len;
4352}
4353
4354
1c79356b
A
4355/*
4356 * Make a hard file link.
2d21ac55
A
4357 *
4358 * Returns: 0 Success
4359 * EPERM
4360 * EEXIST
4361 * EXDEV
4362 * namei:???
4363 * vnode_authorize:???
4364 * VNOP_LINK:???
1c79356b 4365 */
1c79356b 4366/* ARGSUSED */
fe8ab488
A
4367static int
4368linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4369 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 4370{
91447636 4371 vnode_t vp, dvp, lvp;
1c79356b 4372 struct nameidata nd;
fe8ab488 4373 int follow;
1c79356b 4374 int error;
b0d623f7 4375#if CONFIG_FSE
91447636 4376 fse_info finfo;
b0d623f7 4377#endif
b226f5e5 4378 int need_event, has_listeners, need_kpath2;
2d21ac55 4379 char *target_path = NULL;
b0d623f7 4380 int truncated=0;
1c79356b 4381
91447636
A
4382 vp = dvp = lvp = NULLVP;
4383
4384 /* look up the object we are linking to */
fe8ab488
A
4385 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4386 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4387 segflg, path, ctx);
4388
4389 error = nameiat(&nd, fd1);
55e303ae 4390 if (error)
1c79356b
A
4391 return (error);
4392 vp = nd.ni_vp;
91447636
A
4393
4394 nameidone(&nd);
4395
2d21ac55
A
4396 /*
4397 * Normally, linking to directories is not supported.
4398 * However, some file systems may have limited support.
4399 */
91447636 4400 if (vp->v_type == VDIR) {
39037602 4401 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
2d21ac55
A
4402 error = EPERM; /* POSIX */
4403 goto out;
4404 }
39037602 4405
2d21ac55
A
4406 /* Linking to a directory requires ownership. */
4407 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4408 struct vnode_attr dva;
4409
4410 VATTR_INIT(&dva);
4411 VATTR_WANTED(&dva, va_uid);
4412 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4413 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4414 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4415 error = EACCES;
4416 goto out;
4417 }
4418 }
91447636
A
4419 }
4420
91447636 4421 /* lookup the target node */
6d2010ae
A
4422#if CONFIG_TRIGGERS
4423 nd.ni_op = OP_LINK;
4424#endif
91447636 4425 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 4426 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
4427 nd.ni_dirp = link;
4428 error = nameiat(&nd, fd2);
91447636
A
4429 if (error != 0)
4430 goto out;
4431 dvp = nd.ni_dvp;
4432 lvp = nd.ni_vp;
2d21ac55
A
4433
4434#if CONFIG_MACF
4435 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4436 goto out2;
4437#endif
4438
4439 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4440 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4441 goto out2;
4442
91447636
A
4443 /* target node must not exist */
4444 if (lvp != NULLVP) {
4445 error = EEXIST;
4446 goto out2;
4447 }
4448 /* cannot link across mountpoints */
4449 if (vnode_mount(vp) != vnode_mount(dvp)) {
4450 error = EXDEV;
4451 goto out2;
4452 }
39037602 4453
91447636 4454 /* authorize creation of the target note */
2d21ac55 4455 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
91447636
A
4456 goto out2;
4457
4458 /* and finally make the link */
2d21ac55 4459 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
91447636
A
4460 if (error)
4461 goto out2;
4462
39236c6e
A
4463#if CONFIG_MACF
4464 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4465#endif
4466
2d21ac55 4467#if CONFIG_FSE
91447636 4468 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
4469#else
4470 need_event = 0;
4471#endif
91447636
A
4472 has_listeners = kauth_authorize_fileop_has_listeners();
4473
b226f5e5
A
4474 need_kpath2 = 0;
4475#if CONFIG_AUDIT
4476 if (AUDIT_RECORD_EXISTS()) {
4477 need_kpath2 = 1;
4478 }
4479#endif
4480
4481 if (need_event || has_listeners || need_kpath2) {
91447636
A
4482 char *link_to_path = NULL;
4483 int len, link_name_len;
4484
4485 /* build the path to the new link file */
2d21ac55
A
4486 GET_PATH(target_path);
4487 if (target_path == NULL) {
4488 error = ENOMEM;
4489 goto out2;
4490 }
4491
b0d623f7 4492 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636 4493
b226f5e5
A
4494 AUDIT_ARG(kpath, target_path, ARG_KPATH2);
4495
91447636
A
4496 if (has_listeners) {
4497 /* build the path to file we are linking to */
2d21ac55
A
4498 GET_PATH(link_to_path);
4499 if (link_to_path == NULL) {
4500 error = ENOMEM;
4501 goto out2;
4502 }
4503
91447636 4504 link_name_len = MAXPATHLEN;
fe8ab488
A
4505 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4506 /*
39037602 4507 * Call out to allow 3rd party notification of rename.
fe8ab488
A
4508 * Ignore result of kauth_authorize_fileop call.
4509 */
39037602
A
4510 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4511 (uintptr_t)link_to_path,
fe8ab488
A
4512 (uintptr_t)target_path);
4513 }
2d21ac55
A
4514 if (link_to_path != NULL) {
4515 RELEASE_PATH(link_to_path);
4516 }
91447636 4517 }
2d21ac55 4518#if CONFIG_FSE
91447636
A
4519 if (need_event) {
4520 /* construct fsevent */
2d21ac55 4521 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
4522 if (truncated) {
4523 finfo.mode |= FSE_TRUNCATED_PATH;
4524 }
4525
91447636 4526 // build the path to the destination of the link
2d21ac55 4527 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4528 FSE_ARG_STRING, len, target_path,
4529 FSE_ARG_FINFO, &finfo,
4530 FSE_ARG_DONE);
1c79356b 4531 }
b0d623f7
A
4532 if (vp->v_parent) {
4533 add_fsevent(FSE_STAT_CHANGED, ctx,
4534 FSE_ARG_VNODE, vp->v_parent,
4535 FSE_ARG_DONE);
4536 }
1c79356b 4537 }
2d21ac55 4538#endif
1c79356b 4539 }
91447636
A
4540out2:
4541 /*
4542 * nameidone has to happen before we vnode_put(dvp)
4543 * since it may need to release the fs_nodelock on the dvp
4544 */
4545 nameidone(&nd);
2d21ac55
A
4546 if (target_path != NULL) {
4547 RELEASE_PATH(target_path);
4548 }
91447636
A
4549out:
4550 if (lvp)
4551 vnode_put(lvp);
4552 if (dvp)
4553 vnode_put(dvp);
4554 vnode_put(vp);
4555 return (error);
4556}
1c79356b 4557
fe8ab488
A
4558int
4559link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4560{
4561 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4562 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4563}
4564
4565int
4566linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4567{
4568 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4569 return (EINVAL);
4570
4571 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4572 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4573}
4574
1c79356b
A
4575/*
4576 * Make a symbolic link.
91447636
A
4577 *
4578 * We could add support for ACLs here too...
1c79356b 4579 */
1c79356b 4580/* ARGSUSED */
fe8ab488
A
4581static int
4582symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4583 user_addr_t link, enum uio_seg segflg)
1c79356b 4584{
91447636
A
4585 struct vnode_attr va;
4586 char *path;
1c79356b
A
4587 int error;
4588 struct nameidata nd;
91447636 4589 vnode_t vp, dvp;
1c79356b 4590 size_t dummy=0;
fe8ab488
A
4591 proc_t p;
4592
4593 error = 0;
4594 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4595 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4596 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4597 } else {
4598 path = (char *)path_data;
4599 }
91447636 4600 if (error)
1c79356b 4601 goto out;
55e303ae 4602 AUDIT_ARG(text, path); /* This is the link string */
91447636 4603
fe8ab488
A
4604 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4605 segflg, link, ctx);
4606
4607 error = nameiat(&nd, fd);
55e303ae 4608 if (error)
1c79356b 4609 goto out;
91447636
A
4610 dvp = nd.ni_dvp;
4611 vp = nd.ni_vp;
55e303ae 4612
fe8ab488 4613 p = vfs_context_proc(ctx);
2d21ac55
A
4614 VATTR_INIT(&va);
4615 VATTR_SET(&va, va_type, VLNK);
4616 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488 4617
2d21ac55
A
4618#if CONFIG_MACF
4619 error = mac_vnode_check_create(ctx,
4620 dvp, &nd.ni_cnd, &va);
4621#endif
4622 if (error != 0) {
4623 goto skipit;
4624 }
91447636 4625
2d21ac55
A
4626 if (vp != NULL) {
4627 error = EEXIST;
4628 goto skipit;
4629 }
4630
4631 /* authorize */
4632 if (error == 0)
4633 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4634 /* get default ownership, etc. */
4635 if (error == 0)
4636 error = vnode_authattr_new(dvp, &va, 0, ctx);
4637 if (error == 0)
4638 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4639
39236c6e 4640#if CONFIG_MACF
3e170ce0 4641 if (error == 0 && vp)
39236c6e
A
4642 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4643#endif
4644
2d21ac55 4645 /* do fallback attribute handling */
3e170ce0 4646 if (error == 0 && vp)
2d21ac55 4647 error = vnode_setattr_fallback(vp, &va, ctx);
39236c6e 4648
2d21ac55
A
4649 if (error == 0) {
4650 int update_flags = 0;
55e303ae 4651
3e170ce0 4652 /*check if a new vnode was created, else try to get one*/
2d21ac55
A
4653 if (vp == NULL) {
4654 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
4655#if CONFIG_TRIGGERS
4656 nd.ni_op = OP_LOOKUP;
4657#endif
2d21ac55 4658 nd.ni_cnd.cn_flags = 0;
fe8ab488 4659 error = nameiat(&nd, fd);
2d21ac55 4660 vp = nd.ni_vp;
55e303ae 4661
2d21ac55
A
4662 if (vp == NULL)
4663 goto skipit;
4664 }
fe8ab488 4665
91447636 4666#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 4667 /* call out to allow 3rd party notification of rename.
2d21ac55
A
4668 * Ignore result of kauth_authorize_fileop call.
4669 */
4670 if (kauth_authorize_fileop_has_listeners() &&
4671 namei(&nd) == 0) {
4672 char *new_link_path = NULL;
4673 int len;
fe8ab488 4674
2d21ac55
A
4675 /* build the path to the new link file */
4676 new_link_path = get_pathbuff();
4677 len = MAXPATHLEN;
4678 vn_getpath(dvp, new_link_path, &len);
4679 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 4680 new_link_path[len - 1] = '/';
2d21ac55 4681 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
91447636 4682 }
fe8ab488
A
4683
4684 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2d21ac55
A
4685 (uintptr_t)path, (uintptr_t)new_link_path);
4686 if (new_link_path != NULL)
4687 release_pathbuff(new_link_path);
4688 }
fe8ab488 4689#endif
2d21ac55
A
4690 // Make sure the name & parent pointers are hooked up
4691 if (vp->v_name == NULL)
4692 update_flags |= VNODE_UPDATE_NAME;
4693 if (vp->v_parent == NULLVP)
4694 update_flags |= VNODE_UPDATE_PARENT;
fe8ab488 4695
2d21ac55
A
4696 if (update_flags)
4697 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
91447636 4698
2d21ac55
A
4699#if CONFIG_FSE
4700 add_fsevent(FSE_CREATE_FILE, ctx,
4701 FSE_ARG_VNODE, vp,
4702 FSE_ARG_DONE);
4703#endif
4704 }
91447636
A
4705
4706skipit:
4707 /*
4708 * nameidone has to happen before we vnode_put(dvp)
4709 * since it may need to release the fs_nodelock on the dvp
4710 */
4711 nameidone(&nd);
4712
4713 if (vp)
4714 vnode_put(vp);
4715 vnode_put(dvp);
1c79356b 4716out:
fe8ab488
A
4717 if (path && (path != (char *)path_data))
4718 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
91447636 4719
1c79356b
A
4720 return (error);
4721}
4722
fe8ab488
A
4723int
4724symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4725{
4726 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4727 uap->link, UIO_USERSPACE));
4728}
4729
4730int
4731symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4732 __unused int32_t *retval)
4733{
4734 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4735 uap->path2, UIO_USERSPACE));
4736}
4737
1c79356b
A
4738/*
4739 * Delete a whiteout from the filesystem.
fe8ab488 4740 * No longer supported.
1c79356b 4741 */
1c79356b 4742int
fe8ab488 4743undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 4744{
fe8ab488 4745 return (ENOTSUP);
1c79356b
A
4746}
4747
4748/*
4749 * Delete a name from the filesystem.
4750 */
1c79356b 4751/* ARGSUSED */
fe8ab488 4752static int
c18c124e
A
4753unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4754 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
1c79356b 4755{
c18c124e 4756 struct nameidata nd;
91447636 4757 vnode_t vp, dvp;
1c79356b 4758 int error;
91447636 4759 struct componentname *cnp;
2d21ac55 4760 char *path = NULL;
b0d623f7
A
4761 int len=0;
4762#if CONFIG_FSE
2d21ac55 4763 fse_info finfo;
6d2010ae 4764 struct vnode_attr va;
b0d623f7 4765#endif
c18c124e
A
4766 int flags;
4767 int need_event;
4768 int has_listeners;
4769 int truncated_path;
6d2010ae 4770 int batched;
c18c124e
A
4771 struct vnode_attr *vap;
4772 int do_retry;
4773 int retry_count = 0;
4774 int cn_flags;
4775
4776 cn_flags = LOCKPARENT;
4777 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4778 cn_flags |= AUDITVNPATH1;
4779 /* If a starting dvp is passed, it trumps any fd passed. */
4780 if (start_dvp)
4781 cn_flags |= USEDVP;
6d2010ae 4782
c910b4d9
A
4783#if NAMEDRSRCFORK
4784 /* unlink or delete is allowed on rsrc forks and named streams */
c18c124e 4785 cn_flags |= CN_ALLOWRSRCFORK;
c910b4d9
A
4786#endif
4787
c18c124e
A
4788retry:
4789 do_retry = 0;
4790 flags = 0;
4791 need_event = 0;
4792 has_listeners = 0;
4793 truncated_path = 0;
4794 vap = NULL;
4795
4796 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4797
4798 nd.ni_dvp = start_dvp;
4799 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4800 cnp = &nd.ni_cnd;
91447636 4801
813fb2f6 4802continue_lookup:
c18c124e 4803 error = nameiat(&nd, fd);
2d21ac55
A
4804 if (error)
4805 return (error);
b0d623f7 4806
c18c124e
A
4807 dvp = nd.ni_dvp;
4808 vp = nd.ni_vp;
91447636 4809
6d2010ae 4810
91447636 4811 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4812 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4813 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4814 }
39037602 4815
39236c6e 4816 /* Skip any potential upcalls if told to. */
316670eb
A
4817 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4818 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4819 }
4820
6d2010ae
A
4821 if (vp) {
4822 batched = vnode_compound_remove_available(vp);
4823 /*
4824 * The root of a mounted filesystem cannot be deleted.
4825 */
4826 if (vp->v_flag & VROOT) {
4827 error = EBUSY;
4828 }
2d21ac55 4829
00867663
A
4830#if DEVELOPMENT || DEBUG
4831 /*
4832 * XXX VSWAP: Check for entitlements or special flag here
4833 * so we can restrict access appropriately.
4834 */
4835#else /* DEVELOPMENT || DEBUG */
4836
4837 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4838 error = EPERM;
4839 goto out;
4840 }
4841#endif /* DEVELOPMENT || DEBUG */
4842
6d2010ae
A
4843 if (!batched) {
4844 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4845 if (error) {
3e170ce0
A
4846 if (error == ENOENT) {
4847 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4848 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4849 do_retry = 1;
4850 retry_count++;
4851 }
c18c124e 4852 }
6d2010ae
A
4853 goto out;
4854 }
4855 }
4856 } else {
4857 batched = 1;
2d21ac55 4858
6d2010ae
A
4859 if (!vnode_compound_remove_available(dvp)) {
4860 panic("No vp, but no compound remove?");
4861 }
4862 }
2d21ac55 4863
2d21ac55
A
4864#if CONFIG_FSE
4865 need_event = need_fsevent(FSE_DELETE, dvp);
4866 if (need_event) {
6d2010ae
A
4867 if (!batched) {
4868 if ((vp->v_flag & VISHARDLINK) == 0) {
4869 /* XXX need to get these data in batched VNOP */
4870 get_fse_info(vp, &finfo, ctx);
4871 }
4872 } else {
4873 error = vfs_get_notify_attributes(&va);
4874 if (error) {
4875 goto out;
4876 }
4877
4878 vap = &va;
2d21ac55
A
4879 }
4880 }
4881#endif
4882 has_listeners = kauth_authorize_fileop_has_listeners();
4883 if (need_event || has_listeners) {
2d21ac55 4884 if (path == NULL) {
6d2010ae
A
4885 GET_PATH(path);
4886 if (path == NULL) {
4887 error = ENOMEM;
4888 goto out;
4889 }
2d21ac55 4890 }
c18c124e 4891 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
4892 }
4893
4894#if NAMEDRSRCFORK
c18c124e 4895 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
2d21ac55
A
4896 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4897 else
4898#endif
6d2010ae 4899 {
c18c124e
A
4900 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4901 vp = nd.ni_vp;
6d2010ae
A
4902 if (error == EKEEPLOOKING) {
4903 if (!batched) {
4904 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4905 }
4906
c18c124e 4907 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
4908 panic("EKEEPLOOKING, but continue flag not set?");
4909 }
4910
4911 if (vnode_isdir(vp)) {
4912 error = EISDIR;
4913 goto out;
4914 }
813fb2f6 4915 goto continue_lookup;
3e170ce0
A
4916 } else if (error == ENOENT && batched) {
4917 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4918 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4919 /*
4920 * For compound VNOPs, the authorization callback may
4921 * return ENOENT in case of racing hardlink lookups
4922 * hitting the name cache, redrive the lookup.
4923 */
4924 do_retry = 1;
4925 retry_count += 1;
4926 goto out;
4927 }
6d2010ae
A
4928 }
4929 }
2d21ac55
A
4930
4931 /*
39037602 4932 * Call out to allow 3rd party notification of delete.
2d21ac55
A
4933 * Ignore result of kauth_authorize_fileop call.
4934 */
1c79356b 4935 if (!error) {
2d21ac55 4936 if (has_listeners) {
39037602
A
4937 kauth_authorize_fileop(vfs_context_ucred(ctx),
4938 KAUTH_FILEOP_DELETE,
2d21ac55
A
4939 (uintptr_t)vp,
4940 (uintptr_t)path);
4941 }
91447636 4942
2d21ac55
A
4943 if (vp->v_flag & VISHARDLINK) {
4944 //
4945 // if a hardlink gets deleted we want to blow away the
4946 // v_parent link because the path that got us to this
4947 // instance of the link is no longer valid. this will
4948 // force the next call to get the path to ask the file
4949 // system instead of just following the v_parent link.
4950 //
4951 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 4952 }
91447636 4953
2d21ac55
A
4954#if CONFIG_FSE
4955 if (need_event) {
4956 if (vp->v_flag & VISHARDLINK) {
4957 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
4958 } else if (vap) {
4959 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 4960 }
b0d623f7
A
4961 if (truncated_path) {
4962 finfo.mode |= FSE_TRUNCATED_PATH;
4963 }
2d21ac55
A
4964 add_fsevent(FSE_DELETE, ctx,
4965 FSE_ARG_STRING, len, path,
4966 FSE_ARG_FINFO, &finfo,
4967 FSE_ARG_DONE);
4968 }
4969#endif
1c79356b 4970 }
6d2010ae
A
4971
4972out:
2d21ac55
A
4973 if (path != NULL)
4974 RELEASE_PATH(path);
4975
c910b4d9 4976#if NAMEDRSRCFORK
39037602 4977 /* recycle the deleted rsrc fork vnode to force a reclaim, which
b0d623f7
A
4978 * will cause its shadow file to go away if necessary.
4979 */
6d2010ae
A
4980 if (vp && (vnode_isnamedstream(vp)) &&
4981 (vp->v_parent != NULLVP) &&
4982 vnode_isshadow(vp)) {
4983 vnode_recycle(vp);
39037602 4984 }
c910b4d9 4985#endif
6d2010ae
A
4986 /*
4987 * nameidone has to happen before we vnode_put(dvp)
4988 * since it may need to release the fs_nodelock on the dvp
4989 */
c18c124e 4990 nameidone(&nd);
91447636 4991 vnode_put(dvp);
6d2010ae
A
4992 if (vp) {
4993 vnode_put(vp);
4994 }
c18c124e
A
4995
4996 if (do_retry) {
4997 goto retry;
4998 }
4999
1c79356b
A
5000 return (error);
5001}
5002
fe8ab488 5003int
c18c124e
A
5004unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
5005 enum uio_seg segflg, int unlink_flags)
fe8ab488 5006{
c18c124e
A
5007 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
5008 unlink_flags));
fe8ab488
A
5009}
5010
1c79356b 5011/*
c18c124e 5012 * Delete a name from the filesystem using Carbon semantics.
1c79356b 5013 */
c18c124e
A
5014int
5015delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
fe8ab488 5016{
c18c124e
A
5017 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5018 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
fe8ab488
A
5019}
5020
c18c124e
A
5021/*
5022 * Delete a name from the filesystem using POSIX semantics.
5023 */
1c79356b 5024int
b0d623f7 5025unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 5026{
c18c124e
A
5027 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5028 uap->path, UIO_USERSPACE, 0));
fe8ab488 5029}
2d21ac55 5030
fe8ab488
A
5031int
5032unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
5033{
5034 if (uap->flag & ~AT_REMOVEDIR)
5035 return (EINVAL);
5036
5037 if (uap->flag & AT_REMOVEDIR)
5038 return (rmdirat_internal(vfs_context_current(), uap->fd,
5039 uap->path, UIO_USERSPACE));
5040 else
5041 return (unlinkat_internal(vfs_context_current(), uap->fd,
c18c124e 5042 NULLVP, uap->path, UIO_USERSPACE, 0));
1c79356b
A
5043}
5044
5045/*
5046 * Reposition read/write file offset.
5047 */
1c79356b 5048int
2d21ac55 5049lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 5050{
91447636 5051 struct fileproc *fp;
2d21ac55
A
5052 vnode_t vp;
5053 struct vfs_context *ctx;
91447636 5054 off_t offset = uap->offset, file_size;
1c79356b
A
5055 int error;
5056
91447636
A
5057 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
5058 if (error == ENOTSUP)
5059 return (ESPIPE);
1c79356b 5060 return (error);
55e303ae 5061 }
91447636
A
5062 if (vnode_isfifo(vp)) {
5063 file_drop(uap->fd);
5064 return(ESPIPE);
5065 }
2d21ac55
A
5066
5067
5068 ctx = vfs_context_current();
5069#if CONFIG_MACF
5070 if (uap->whence == L_INCR && uap->offset == 0)
5071 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
5072 fp->f_fglob);
5073 else
5074 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5075 fp->f_fglob);
5076 if (error) {
5077 file_drop(uap->fd);
5078 return (error);
5079 }
5080#endif
91447636
A
5081 if ( (error = vnode_getwithref(vp)) ) {
5082 file_drop(uap->fd);
5083 return(error);
5084 }
5085
1c79356b
A
5086 switch (uap->whence) {
5087 case L_INCR:
91447636 5088 offset += fp->f_fglob->fg_offset;
1c79356b
A
5089 break;
5090 case L_XTND:
2d21ac55 5091 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
55e303ae 5092 break;
91447636 5093 offset += file_size;
1c79356b
A
5094 break;
5095 case L_SET:
1c79356b 5096 break;
813fb2f6 5097 case SEEK_HOLE:
5ba3f43e 5098 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
813fb2f6
A
5099 break;
5100 case SEEK_DATA:
5ba3f43e 5101 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
813fb2f6 5102 break;
1c79356b 5103 default:
55e303ae 5104 error = EINVAL;
1c79356b 5105 }
55e303ae
A
5106 if (error == 0) {
5107 if (uap->offset > 0 && offset < 0) {
5108 /* Incremented/relative move past max size */
5109 error = EOVERFLOW;
5110 } else {
5111 /*
5112 * Allow negative offsets on character devices, per
5113 * POSIX 1003.1-2001. Most likely for writing disk
5114 * labels.
5115 */
5116 if (offset < 0 && vp->v_type != VCHR) {
5117 /* Decremented/relative move before start */
5118 error = EINVAL;
5119 } else {
5120 /* Success */
91447636
A
5121 fp->f_fglob->fg_offset = offset;
5122 *retval = fp->f_fglob->fg_offset;
55e303ae
A
5123 }
5124 }
5125 }
b0d623f7 5126
39037602 5127 /*
b0d623f7
A
5128 * An lseek can affect whether data is "available to read." Use
5129 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5130 */
5131 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
5132 (void)vnode_put(vp);
5133 file_drop(uap->fd);
55e303ae 5134 return (error);
1c79356b
A
5135}
5136
91447636 5137
1c79356b 5138/*
91447636 5139 * Check access permissions.
2d21ac55
A
5140 *
5141 * Returns: 0 Success
5142 * vnode_authorize:???
1c79356b 5143 */
91447636
A
5144static int
5145access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 5146{
91447636 5147 kauth_action_t action;
1c79356b
A
5148 int error;
5149
91447636
A
5150 /*
5151 * If just the regular access bits, convert them to something
5152 * that vnode_authorize will understand.
5153 */
5154 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5155 action = 0;
5156 if (uflags & R_OK)
5157 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5158 if (uflags & W_OK) {
5159 if (vnode_isdir(vp)) {
5160 action |= KAUTH_VNODE_ADD_FILE |
5161 KAUTH_VNODE_ADD_SUBDIRECTORY;
5162 /* might want delete rights here too */
5163 } else {
5164 action |= KAUTH_VNODE_WRITE_DATA;
5165 }
5166 }
5167 if (uflags & X_OK) {
5168 if (vnode_isdir(vp)) {
5169 action |= KAUTH_VNODE_SEARCH;
5170 } else {
5171 action |= KAUTH_VNODE_EXECUTE;
5172 }
5173 }
5174 } else {
5175 /* take advantage of definition of uflags */
5176 action = uflags >> 8;
5177 }
39037602 5178
2d21ac55
A
5179#if CONFIG_MACF
5180 error = mac_vnode_check_access(ctx, vp, uflags);
5181 if (error)
5182 return (error);
5183#endif /* MAC */
5184
91447636
A
5185 /* action == 0 means only check for existence */
5186 if (action != 0) {
5187 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5188 } else {
5189 error = 0;
5190 }
5191
5192 return(error);
1c79356b 5193}
1c79356b 5194
91447636
A
5195
5196
2d21ac55 5197/*
b0d623f7 5198 * access_extended: Check access permissions in bulk.
2d21ac55 5199 *
b0d623f7 5200 * Description: uap->entries Pointer to an array of accessx
39037602
A
5201 * descriptor structs, plus one or
5202 * more NULL terminated strings (see
b0d623f7
A
5203 * "Notes" section below).
5204 * uap->size Size of the area pointed to by
5205 * uap->entries.
5206 * uap->results Pointer to the results array.
2d21ac55
A
5207 *
5208 * Returns: 0 Success
5209 * ENOMEM Insufficient memory
5210 * EINVAL Invalid arguments
5211 * namei:EFAULT Bad address
5212 * namei:ENAMETOOLONG Filename too long
5213 * namei:ENOENT No such file or directory
5214 * namei:ELOOP Too many levels of symbolic links
5215 * namei:EBADF Bad file descriptor
5216 * namei:ENOTDIR Not a directory
5217 * namei:???
5218 * access1:
5219 *
5220 * Implicit returns:
5221 * uap->results Array contents modified
5222 *
5223 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 5224 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
5225 * strings
5226 *
5227 * struct accessx_descriptor[0]
5228 * ...
5229 * struct accessx_descriptor[n]
5230 * char name_data[0];
5231 *
5232 * We determine the entry count by walking the buffer containing
b0d623f7 5233 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
5234 * see, the valid values for the offset ad_name_offset will be
5235 * in the byte range:
5236 *
5237 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5238 * to
5239 * [ uap->entries + uap->size - 2 ]
5240 *
5241 * since we must have at least one string, and the string must
b0d623f7 5242 * be at least one character plus the NULL terminator in length.
39037602 5243 *
2d21ac55
A
5244 * XXX: Need to support the check-as uid argument
5245 */
1c79356b 5246int
b0d623f7 5247access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 5248{
2d21ac55
A
5249 struct accessx_descriptor *input = NULL;
5250 errno_t *result = NULL;
5251 errno_t error = 0;
5252 int wantdelete = 0;
5253 unsigned int desc_max, desc_actual, i, j;
91447636 5254 struct vfs_context context;
1c79356b 5255 struct nameidata nd;
91447636 5256 int niopts;
2d21ac55
A
5257 vnode_t vp = NULL;
5258 vnode_t dvp = NULL;
5259#define ACCESSX_MAX_DESCR_ON_STACK 10
5260 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 5261
91447636
A
5262 context.vc_ucred = NULL;
5263
2d21ac55
A
5264 /*
5265 * Validate parameters; if valid, copy the descriptor array and string
5266 * arguments into local memory. Before proceeding, the following
5267 * conditions must have been met:
5268 *
5269 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5270 * o There must be sufficient room in the request for at least one
5271 * descriptor and a one yte NUL terminated string.
5272 * o The allocation of local storage must not fail.
5273 */
91447636
A
5274 if (uap->size > ACCESSX_MAX_TABLESIZE)
5275 return(ENOMEM);
2d21ac55 5276 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
91447636 5277 return(EINVAL);
2d21ac55
A
5278 if (uap->size <= sizeof (stack_input)) {
5279 input = stack_input;
5280 } else {
91447636
A
5281 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5282 if (input == NULL) {
5283 error = ENOMEM;
5284 goto out;
5285 }
2d21ac55 5286 }
91447636 5287 error = copyin(uap->entries, input, uap->size);
55e303ae 5288 if (error)
91447636 5289 goto out;
1c79356b 5290
b0d623f7
A
5291 AUDIT_ARG(opaque, input, uap->size);
5292
91447636 5293 /*
2d21ac55
A
5294 * Force NUL termination of the copyin buffer to avoid nami() running
5295 * off the end. If the caller passes us bogus data, they may get a
5296 * bogus result.
5297 */
5298 ((char *)input)[uap->size - 1] = 0;
5299
5300 /*
5301 * Access is defined as checking against the process' real identity,
5302 * even if operations are checking the effective identity. This
5303 * requires that we use a local vfs context.
91447636
A
5304 */
5305 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 5306 context.vc_thread = current_thread();
91447636
A
5307
5308 /*
2d21ac55
A
5309 * Find out how many entries we have, so we can allocate the result
5310 * array by walking the list and adjusting the count downward by the
5311 * earliest string offset we see.
91447636 5312 */
2d21ac55
A
5313 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5314 desc_actual = desc_max;
5315 for (i = 0; i < desc_actual; i++) {
91447636 5316 /*
2d21ac55
A
5317 * Take the offset to the name string for this entry and
5318 * convert to an input array index, which would be one off
5319 * the end of the array if this entry was the lowest-addressed
5320 * name string.
91447636
A
5321 */
5322 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
5323
5324 /*
5325 * An offset greater than the max allowable offset is an error.
5326 * It is also an error for any valid entry to point
5327 * to a location prior to the end of the current entry, if
5328 * it's not a reference to the string of the previous entry.
5329 */
5330 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
5331 error = EINVAL;
5332 goto out;
5333 }
2d21ac55 5334
39037602
A
5335 /* Also do not let ad_name_offset point to something beyond the size of the input */
5336 if (input[i].ad_name_offset >= uap->size) {
5337 error = EINVAL;
5338 goto out;
5339 }
5340
2d21ac55
A
5341 /*
5342 * An offset of 0 means use the previous descriptor's offset;
5343 * this is used to chain multiple requests for the same file
5344 * to avoid multiple lookups.
5345 */
91447636 5346 if (j == 0) {
2d21ac55 5347 /* This is not valid for the first entry */
91447636
A
5348 if (i == 0) {
5349 error = EINVAL;
5350 goto out;
5351 }
5352 continue;
5353 }
2d21ac55
A
5354
5355 /*
5356 * If the offset of the string for this descriptor is before
5357 * what we believe is the current actual last descriptor,
5358 * then we need to adjust our estimate downward; this permits
5359 * the string table following the last descriptor to be out
5360 * of order relative to the descriptor list.
5361 */
5362 if (j < desc_actual)
5363 desc_actual = j;
91447636 5364 }
2d21ac55
A
5365
5366 /*
5367 * We limit the actual number of descriptors we are willing to process
5368 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5369 * requested does not exceed this limit,
5370 */
5371 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
5372 error = ENOMEM;
5373 goto out;
5374 }
d9a64523 5375 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK | M_ZERO);
91447636
A
5376 if (result == NULL) {
5377 error = ENOMEM;
5378 goto out;
5379 }
5380
5381 /*
2d21ac55
A
5382 * Do the work by iterating over the descriptor entries we know to
5383 * at least appear to contain valid data.
91447636
A
5384 */
5385 error = 0;
2d21ac55 5386 for (i = 0; i < desc_actual; i++) {
91447636 5387 /*
2d21ac55
A
5388 * If the ad_name_offset is 0, then we use the previous
5389 * results to make the check; otherwise, we are looking up
5390 * a new file name.
91447636
A
5391 */
5392 if (input[i].ad_name_offset != 0) {
5393 /* discard old vnodes */
5394 if (vp) {
5395 vnode_put(vp);
5396 vp = NULL;
5397 }
5398 if (dvp) {
5399 vnode_put(dvp);
5400 dvp = NULL;
5401 }
39037602 5402
2d21ac55
A
5403 /*
5404 * Scan forward in the descriptor list to see if we
5405 * need the parent vnode. We will need it if we are
5406 * deleting, since we must have rights to remove
5407 * entries in the parent directory, as well as the
5408 * rights to delete the object itself.
5409 */
91447636 5410 wantdelete = input[i].ad_flags & _DELETE_OK;
2d21ac55 5411 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
91447636
A
5412 if (input[j].ad_flags & _DELETE_OK)
5413 wantdelete = 1;
39037602 5414
91447636 5415 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 5416
91447636
A
5417 /* need parent for vnode_authorize for deletion test */
5418 if (wantdelete)
5419 niopts |= WANTPARENT;
5420
5421 /* do the lookup */
6d2010ae
A
5422 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5423 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5424 &context);
91447636
A
5425 error = namei(&nd);
5426 if (!error) {
5427 vp = nd.ni_vp;
5428 if (wantdelete)
5429 dvp = nd.ni_dvp;
5430 }
5431 nameidone(&nd);
5432 }
5433
5434 /*
5435 * Handle lookup errors.
5436 */
5437 switch(error) {
5438 case ENOENT:
5439 case EACCES:
5440 case EPERM:
5441 case ENOTDIR:
5442 result[i] = error;
5443 break;
5444 case 0:
5445 /* run this access check */
5446 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5447 break;
5448 default:
5449 /* fatal lookup error */
5450
5451 goto out;
5452 }
5453 }
5454
b0d623f7
A
5455 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5456
91447636 5457 /* copy out results */
2d21ac55 5458 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
39037602 5459
91447636 5460out:
2d21ac55 5461 if (input && input != stack_input)
91447636
A
5462 FREE(input, M_TEMP);
5463 if (result)
5464 FREE(result, M_TEMP);
5465 if (vp)
5466 vnode_put(vp);
5467 if (dvp)
5468 vnode_put(dvp);
0c530ab8
A
5469 if (IS_VALID_CRED(context.vc_ucred))
5470 kauth_cred_unref(&context.vc_ucred);
91447636 5471 return(error);
1c79356b
A
5472}
5473
2d21ac55
A
5474
5475/*
5476 * Returns: 0 Success
5477 * namei:EFAULT Bad address
5478 * namei:ENAMETOOLONG Filename too long
5479 * namei:ENOENT No such file or directory
5480 * namei:ELOOP Too many levels of symbolic links
5481 * namei:EBADF Bad file descriptor
5482 * namei:ENOTDIR Not a directory
5483 * namei:???
5484 * access1:
5485 */
fe8ab488
A
5486static int
5487faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5488 int flag, enum uio_seg segflg)
1c79356b 5489{
1c79356b
A
5490 int error;
5491 struct nameidata nd;
91447636
A
5492 int niopts;
5493 struct vfs_context context;
cf7d32b8
A
5494#if NAMEDRSRCFORK
5495 int is_namedstream = 0;
5496#endif
5497
91447636 5498 /*
fe8ab488
A
5499 * Unless the AT_EACCESS option is used, Access is defined as checking
5500 * against the process' real identity, even if operations are checking
5501 * the effective identity. So we need to tweak the credential
5502 * in the context for that case.
91447636 5503 */
fe8ab488
A
5504 if (!(flag & AT_EACCESS))
5505 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5506 else
5507 context.vc_ucred = ctx->vc_ucred;
5508 context.vc_thread = ctx->vc_thread;
5509
91447636
A
5510
5511 niopts = FOLLOW | AUDITVNPATH1;
5512 /* need parent for vnode_authorize for deletion test */
fe8ab488 5513 if (amode & _DELETE_OK)
91447636 5514 niopts |= WANTPARENT;
fe8ab488
A
5515 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5516 path, &context);
2d21ac55
A
5517
5518#if NAMEDRSRCFORK
5519 /* access(F_OK) calls are allowed for resource forks. */
fe8ab488 5520 if (amode == F_OK)
2d21ac55
A
5521 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5522#endif
fe8ab488 5523 error = nameiat(&nd, fd);
91447636
A
5524 if (error)
5525 goto out;
5526
cf7d32b8 5527#if NAMEDRSRCFORK
39037602 5528 /* Grab reference on the shadow stream file vnode to
b0d623f7
A
5529 * force an inactive on release which will mark it
5530 * for recycle.
cf7d32b8
A
5531 */
5532 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
5533 (nd.ni_vp->v_parent != NULLVP) &&
5534 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
5535 is_namedstream = 1;
5536 vnode_ref(nd.ni_vp);
5537 }
5538#endif
5539
fe8ab488 5540 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 5541
cf7d32b8
A
5542#if NAMEDRSRCFORK
5543 if (is_namedstream) {
5544 vnode_rele(nd.ni_vp);
5545 }
5546#endif
5547
91447636 5548 vnode_put(nd.ni_vp);
fe8ab488 5549 if (amode & _DELETE_OK)
91447636
A
5550 vnode_put(nd.ni_dvp);
5551 nameidone(&nd);
39037602 5552
91447636 5553out:
fe8ab488
A
5554 if (!(flag & AT_EACCESS))
5555 kauth_cred_unref(&context.vc_ucred);
5556 return (error);
5557}
5558
5559int
5560access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5561{
5562 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5563 uap->path, uap->flags, 0, UIO_USERSPACE));
91447636
A
5564}
5565
fe8ab488
A
5566int
5567faccessat(__unused proc_t p, struct faccessat_args *uap,
5568 __unused int32_t *retval)
5569{
5570 if (uap->flag & ~AT_EACCESS)
5571 return (EINVAL);
5572
5573 return (faccessat_internal(vfs_context_current(), uap->fd,
5574 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5575}
91447636 5576
2d21ac55
A
5577/*
5578 * Returns: 0 Success
5579 * EFAULT
5580 * copyout:EFAULT
5581 * namei:???
5582 * vn_stat:???
5583 */
91447636 5584static int
fe8ab488
A
5585fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5586 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5587 enum uio_seg segflg, int fd, int flag)
91447636 5588{
fe8ab488
A
5589 struct nameidata nd;
5590 int follow;
b0d623f7
A
5591 union {
5592 struct stat sb;
5593 struct stat64 sb64;
527f9951 5594 } source = {};
b0d623f7
A
5595 union {
5596 struct user64_stat user64_sb;
5597 struct user32_stat user32_sb;
5598 struct user64_stat64 user64_sb64;
5599 struct user32_stat64 user32_sb64;
527f9951 5600 } dest = {};
91447636
A
5601 caddr_t sbp;
5602 int error, my_size;
5603 kauth_filesec_t fsec;
5604 size_t xsecurity_bufsize;
2d21ac55 5605 void * statptr;
1c79356b 5606
fe8ab488
A
5607 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5608 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5609 segflg, path, ctx);
5610
2d21ac55 5611#if NAMEDRSRCFORK
cf7d32b8 5612 int is_namedstream = 0;
2d21ac55 5613 /* stat calls are allowed for resource forks. */
fe8ab488 5614 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 5615#endif
fe8ab488 5616 error = nameiat(&nd, fd);
91447636 5617 if (error)
1c79356b 5618 return (error);
91447636 5619 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
5620
5621 statptr = (void *)&source;
cf7d32b8
A
5622
5623#if NAMEDRSRCFORK
39037602
A
5624 /* Grab reference on the shadow stream file vnode to
5625 * force an inactive on release which will mark it
b0d623f7 5626 * for recycle.
cf7d32b8 5627 */
fe8ab488
A
5628 if (vnode_isnamedstream(nd.ni_vp) &&
5629 (nd.ni_vp->v_parent != NULLVP) &&
5630 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 5631 is_namedstream = 1;
fe8ab488 5632 vnode_ref(nd.ni_vp);
cf7d32b8
A
5633 }
5634#endif
5635
fe8ab488 5636 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
2d21ac55
A
5637
5638#if NAMEDRSRCFORK
cf7d32b8 5639 if (is_namedstream) {
fe8ab488 5640 vnode_rele(nd.ni_vp);
2d21ac55
A
5641 }
5642#endif
fe8ab488
A
5643 vnode_put(nd.ni_vp);
5644 nameidone(&nd);
91447636 5645
1c79356b
A
5646 if (error)
5647 return (error);
91447636 5648 /* Zap spare fields */
2d21ac55 5649 if (isstat64 != 0) {
b0d623f7
A
5650 source.sb64.st_lspare = 0;
5651 source.sb64.st_qspare[0] = 0LL;
5652 source.sb64.st_qspare[1] = 0LL;
2d21ac55 5653 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5654 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
b0d623f7
A
5655 my_size = sizeof(dest.user64_sb64);
5656 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 5657 } else {
39037602 5658 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
b0d623f7
A
5659 my_size = sizeof(dest.user32_sb64);
5660 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
5661 }
5662 /*
5663 * Check if we raced (post lookup) against the last unlink of a file.
5664 */
b0d623f7
A
5665 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5666 source.sb64.st_nlink = 1;
2d21ac55
A
5667 }
5668 } else {
b0d623f7
A
5669 source.sb.st_lspare = 0;
5670 source.sb.st_qspare[0] = 0LL;
5671 source.sb.st_qspare[1] = 0LL;
2d21ac55 5672 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5673 munge_user64_stat(&source.sb, &dest.user64_sb);
b0d623f7
A
5674 my_size = sizeof(dest.user64_sb);
5675 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 5676 } else {
39037602 5677 munge_user32_stat(&source.sb, &dest.user32_sb);
b0d623f7
A
5678 my_size = sizeof(dest.user32_sb);
5679 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
5680 }
5681
5682 /*
5683 * Check if we raced (post lookup) against the last unlink of a file.
5684 */
b0d623f7
A
5685 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5686 source.sb.st_nlink = 1;
2d21ac55 5687 }
91447636
A
5688 }
5689 if ((error = copyout(sbp, ub, my_size)) != 0)
5690 goto out;
5691
5692 /* caller wants extended security information? */
5693 if (xsecurity != USER_ADDR_NULL) {
5694
5695 /* did we get any? */
5696 if (fsec == KAUTH_FILESEC_NONE) {
5697 if (susize(xsecurity_size, 0) != 0) {
5698 error = EFAULT;
5699 goto out;
5700 }
5701 } else {
5702 /* find the user buffer size */
5703 xsecurity_bufsize = fusize(xsecurity_size);
5704
5705 /* copy out the actual data size */
5706 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5707 error = EFAULT;
5708 goto out;
5709 }
5710
5711 /* if the caller supplied enough room, copy out to it */
5712 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5713 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5714 }
5715 }
5716out:
5717 if (fsec != KAUTH_FILESEC_NONE)
5718 kauth_filesec_free(fsec);
1c79356b
A
5719 return (error);
5720}
5721
b0d623f7
A
5722/*
5723 * stat_extended: Get file status; with extended security (ACL).
5724 *
5725 * Parameters: p (ignored)
5726 * uap User argument descriptor (see below)
39037602 5727 * retval (ignored)
b0d623f7
A
5728 *
5729 * Indirect: uap->path Path of file to get status from
5730 * uap->ub User buffer (holds file status info)
5731 * uap->xsecurity ACL to get (extended security)
5732 * uap->xsecurity_size Size of ACL
39037602 5733 *
b0d623f7
A
5734 * Returns: 0 Success
5735 * !0 errno value
5736 *
5737 */
2d21ac55 5738int
fe8ab488
A
5739stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5740 __unused int32_t *retval)
2d21ac55 5741{
fe8ab488
A
5742 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5743 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5744 0));
1c79356b
A
5745}
5746
2d21ac55
A
5747/*
5748 * Returns: 0 Success
fe8ab488 5749 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 5750 */
91447636 5751int
b0d623f7 5752stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 5753{
fe8ab488
A
5754 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5755 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
91447636 5756}
1c79356b 5757
91447636 5758int
b0d623f7 5759stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 5760{
fe8ab488
A
5761 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5762 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
1c79356b 5763}
1c79356b 5764
b0d623f7
A
5765/*
5766 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5767 *
5768 * Parameters: p (ignored)
5769 * uap User argument descriptor (see below)
39037602 5770 * retval (ignored)
b0d623f7
A
5771 *
5772 * Indirect: uap->path Path of file to get status from
5773 * uap->ub User buffer (holds file status info)
5774 * uap->xsecurity ACL to get (extended security)
5775 * uap->xsecurity_size Size of ACL
39037602 5776 *
b0d623f7
A
5777 * Returns: 0 Success
5778 * !0 errno value
5779 *
5780 */
2d21ac55 5781int
b0d623f7 5782stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 5783{
fe8ab488
A
5784 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5785 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5786 0));
2d21ac55 5787}
91447636 5788
b0d623f7
A
5789/*
5790 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5791 *
5792 * Parameters: p (ignored)
5793 * uap User argument descriptor (see below)
39037602 5794 * retval (ignored)
b0d623f7
A
5795 *
5796 * Indirect: uap->path Path of file to get status from
5797 * uap->ub User buffer (holds file status info)
5798 * uap->xsecurity ACL to get (extended security)
5799 * uap->xsecurity_size Size of ACL
39037602 5800 *
b0d623f7
A
5801 * Returns: 0 Success
5802 * !0 errno value
5803 *
5804 */
2d21ac55 5805int
b0d623f7 5806lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 5807{
fe8ab488
A
5808 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5809 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5810 AT_SYMLINK_NOFOLLOW));
91447636
A
5811}
5812
fe8ab488
A
5813/*
5814 * Get file status; this version does not follow links.
5815 */
91447636 5816int
b0d623f7 5817lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 5818{
fe8ab488
A
5819 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5820 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
2d21ac55 5821}
b0d623f7 5822
2d21ac55 5823int
b0d623f7 5824lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 5825{
fe8ab488
A
5826 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5827 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
91447636
A
5828}
5829
b0d623f7
A
5830/*
5831 * lstat64_extended: Get file status; can handle large inode numbers; does not
5832 * follow links; with extended security (ACL).
5833 *
5834 * Parameters: p (ignored)
5835 * uap User argument descriptor (see below)
39037602 5836 * retval (ignored)
b0d623f7
A
5837 *
5838 * Indirect: uap->path Path of file to get status from
5839 * uap->ub User buffer (holds file status info)
5840 * uap->xsecurity ACL to get (extended security)
5841 * uap->xsecurity_size Size of ACL
39037602 5842 *
b0d623f7
A
5843 * Returns: 0 Success
5844 * !0 errno value
5845 *
5846 */
91447636 5847int
b0d623f7 5848lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 5849{
fe8ab488
A
5850 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5851 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5852 AT_SYMLINK_NOFOLLOW));
5853}
5854
5855int
5856fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5857{
5858 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5859 return (EINVAL);
5860
5861 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5862 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5863}
5864
5865int
5866fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5867 __unused int32_t *retval)
5868{
5869 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5870 return (EINVAL);
5871
5872 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5873 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
91447636
A
5874}
5875
1c79356b 5876/*
91447636 5877 * Get configurable pathname variables.
2d21ac55
A
5878 *
5879 * Returns: 0 Success
5880 * namei:???
5881 * vn_pathconf:???
5882 *
5883 * Notes: Global implementation constants are intended to be
5884 * implemented in this function directly; all other constants
5885 * are per-FS implementation, and therefore must be handled in
5886 * each respective FS, instead.
5887 *
5888 * XXX We implement some things globally right now that should actually be
5889 * XXX per-FS; we will need to deal with this at some point.
1c79356b 5890 */
1c79356b
A
5891/* ARGSUSED */
5892int
b0d623f7 5893pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 5894{
1c79356b
A
5895 int error;
5896 struct nameidata nd;
2d21ac55 5897 vfs_context_t ctx = vfs_context_current();
91447636 5898
39037602 5899 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
2d21ac55 5900 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5901 error = namei(&nd);
5902 if (error)
1c79356b 5903 return (error);
1c79356b 5904
2d21ac55 5905 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 5906
91447636
A
5907 vnode_put(nd.ni_vp);
5908 nameidone(&nd);
1c79356b
A
5909 return (error);
5910}
5911
5912/*
5913 * Return target name of a symbolic link.
5914 */
1c79356b 5915/* ARGSUSED */
fe8ab488
A
5916static int
5917readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5918 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5919 int *retval)
1c79356b 5920{
2d21ac55 5921 vnode_t vp;
91447636 5922 uio_t auio;
1c79356b
A
5923 int error;
5924 struct nameidata nd;
91447636
A
5925 char uio_buf[ UIO_SIZEOF(1) ];
5926
fe8ab488
A
5927 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5928 seg, path, ctx);
5929
5930 error = nameiat(&nd, fd);
55e303ae 5931 if (error)
1c79356b
A
5932 return (error);
5933 vp = nd.ni_vp;
91447636
A
5934
5935 nameidone(&nd);
5936
fe8ab488
A
5937 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5938 &uio_buf[0], sizeof(uio_buf));
5939 uio_addiov(auio, buf, bufsize);
5940 if (vp->v_type != VLNK) {
1c79356b 5941 error = EINVAL;
fe8ab488 5942 } else {
2d21ac55 5943#if CONFIG_MACF
fe8ab488 5944 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55
A
5945#endif
5946 if (error == 0)
fe8ab488
A
5947 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5948 ctx);
91447636 5949 if (error == 0)
2d21ac55 5950 error = VNOP_READLINK(vp, auio, ctx);
91447636
A
5951 }
5952 vnode_put(vp);
b0d623f7 5953
fe8ab488 5954 *retval = bufsize - (int)uio_resid(auio);
1c79356b
A
5955 return (error);
5956}
5957
fe8ab488
A
5958int
5959readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5960{
5961 enum uio_seg procseg;
5962
5963 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5964 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5965 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5966 uap->count, procseg, retval));
5967}
5968
5969int
5970readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5971{
5972 enum uio_seg procseg;
5973
5974 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5975 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5976 procseg, uap->buf, uap->bufsize, procseg, retval));
5977}
5978
5979/*
5980 * Change file flags.
813fb2f6
A
5981 *
5982 * NOTE: this will vnode_put() `vp'
91447636
A
5983 */
5984static int
5985chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5986{
5987 struct vnode_attr va;
5988 kauth_action_t action;
5989 int error;
5990
5991 VATTR_INIT(&va);
5992 VATTR_SET(&va, va_flags, flags);
5993
2d21ac55
A
5994#if CONFIG_MACF
5995 error = mac_vnode_check_setflags(ctx, vp, flags);
5996 if (error)
5997 goto out;
5998#endif
5999
91447636
A
6000 /* request authorisation, disregard immutability */
6001 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6002 goto out;
6003 /*
6004 * Request that the auth layer disregard those file flags it's allowed to when
6005 * authorizing this operation; we need to do this in order to be able to
6006 * clear immutable flags.
6007 */
6008 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
6009 goto out;
6010 error = vnode_setattr(vp, &va, ctx);
6011
39037602
A
6012#if CONFIG_MACF
6013 if (error == 0)
6014 mac_vnode_notify_setflags(ctx, vp, flags);
6015#endif
6016
2d21ac55
A
6017 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
6018 error = ENOTSUP;
6019 }
91447636
A
6020out:
6021 vnode_put(vp);
6022 return(error);
6023}
6024
1c79356b
A
6025/*
6026 * Change flags of a file given a path name.
6027 */
1c79356b
A
6028/* ARGSUSED */
6029int
b0d623f7 6030chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 6031{
2d21ac55
A
6032 vnode_t vp;
6033 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6034 int error;
6035 struct nameidata nd;
6036
55e303ae 6037 AUDIT_ARG(fflags, uap->flags);
39037602 6038 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 6039 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
6040 error = namei(&nd);
6041 if (error)
1c79356b
A
6042 return (error);
6043 vp = nd.ni_vp;
91447636
A
6044 nameidone(&nd);
6045
813fb2f6 6046 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 6047 error = chflags1(vp, uap->flags, ctx);
91447636
A
6048
6049 return(error);
1c79356b
A
6050}
6051
6052/*
6053 * Change flags of a file given a file descriptor.
6054 */
1c79356b
A
6055/* ARGSUSED */
6056int
b0d623f7 6057fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 6058{
2d21ac55 6059 vnode_t vp;
1c79356b
A
6060 int error;
6061
55e303ae
A
6062 AUDIT_ARG(fd, uap->fd);
6063 AUDIT_ARG(fflags, uap->flags);
91447636 6064 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 6065 return (error);
55e303ae 6066
91447636
A
6067 if ((error = vnode_getwithref(vp))) {
6068 file_drop(uap->fd);
6069 return(error);
6070 }
e5568f75
A
6071
6072 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6073
813fb2f6 6074 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 6075 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
6076
6077 file_drop(uap->fd);
6078 return (error);
6079}
6080
6081/*
6082 * Change security information on a filesystem object.
2d21ac55
A
6083 *
6084 * Returns: 0 Success
6085 * EPERM Operation not permitted
6086 * vnode_authattr:??? [anything vnode_authattr can return]
6087 * vnode_authorize:??? [anything vnode_authorize can return]
6088 * vnode_setattr:??? [anything vnode_setattr can return]
6089 *
6090 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6091 * translated to EPERM before being returned.
91447636
A
6092 */
6093static int
fe8ab488 6094chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
6095{
6096 kauth_action_t action;
6097 int error;
39037602 6098
b0d623f7
A
6099 AUDIT_ARG(mode, vap->va_mode);
6100 /* XXX audit new args */
91447636 6101
2d21ac55
A
6102#if NAMEDSTREAMS
6103 /* chmod calls are not allowed for resource forks. */
6104 if (vp->v_flag & VISNAMEDSTREAM) {
6105 return (EPERM);
6106 }
6107#endif
6108
6109#if CONFIG_MACF
316670eb
A
6110 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6111 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
2d21ac55 6112 return (error);
39037602
A
6113
6114 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6115 if ((error = mac_vnode_check_setowner(ctx, vp,
6116 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6117 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
6118 return (error);
6119 }
6120
6121 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6122 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
6123 return (error);
2d21ac55
A
6124#endif
6125
91447636
A
6126 /* make sure that the caller is allowed to set this security information */
6127 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6128 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6129 if (error == EACCES)
6130 error = EPERM;
6131 return(error);
6132 }
39037602
A
6133
6134 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
6135 return (error);
6136
6137#if CONFIG_MACF
6138 if (VATTR_IS_ACTIVE(vap, va_mode))
6139 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6140
6141 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
6142 mac_vnode_notify_setowner(ctx, vp,
6143 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6144 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6145
6146 if (VATTR_IS_ACTIVE(vap, va_acl))
6147 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6148#endif
91447636 6149
1c79356b
A
6150 return (error);
6151}
6152
91447636 6153
1c79356b 6154/*
b0d623f7 6155 * Change mode of a file given a path name.
2d21ac55
A
6156 *
6157 * Returns: 0 Success
6158 * namei:??? [anything namei can return]
fe8ab488 6159 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 6160 */
91447636 6161static int
fe8ab488
A
6162chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6163 int fd, int flag, enum uio_seg segflg)
91447636
A
6164{
6165 struct nameidata nd;
fe8ab488 6166 int follow, error;
91447636 6167
fe8ab488
A
6168 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6169 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6170 segflg, path, ctx);
6171 if ((error = nameiat(&nd, fd)))
91447636 6172 return (error);
fe8ab488 6173 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
6174 vnode_put(nd.ni_vp);
6175 nameidone(&nd);
6176 return(error);
6177}
6178
0c530ab8 6179/*
39037602 6180 * chmod_extended: Change the mode of a file given a path name; with extended
b0d623f7 6181 * argument list (including extended security (ACL)).
0c530ab8
A
6182 *
6183 * Parameters: p Process requesting the open
6184 * uap User argument descriptor (see below)
6185 * retval (ignored)
6186 *
6187 * Indirect: uap->path Path to object (same as 'chmod')
6188 * uap->uid UID to set
6189 * uap->gid GID to set
6190 * uap->mode File mode to set (same as 'chmod')
6191 * uap->xsecurity ACL to set (or delete)
6192 *
6193 * Returns: 0 Success
6194 * !0 errno value
6195 *
6196 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6197 *
6198 * XXX: We should enummerate the possible errno values here, and where
6199 * in the code they originated.
6200 */
1c79356b 6201int
b0d623f7 6202chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 6203{
1c79356b 6204 int error;
91447636
A
6205 struct vnode_attr va;
6206 kauth_filesec_t xsecdst;
6207
b0d623f7
A
6208 AUDIT_ARG(owner, uap->uid, uap->gid);
6209
91447636
A
6210 VATTR_INIT(&va);
6211 if (uap->mode != -1)
6212 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6213 if (uap->uid != KAUTH_UID_NONE)
6214 VATTR_SET(&va, va_uid, uap->uid);
6215 if (uap->gid != KAUTH_GID_NONE)
6216 VATTR_SET(&va, va_gid, uap->gid);
6217
6218 xsecdst = NULL;
6219 switch(uap->xsecurity) {
6220 /* explicit remove request */
6221 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6222 VATTR_SET(&va, va_acl, NULL);
6223 break;
6224 /* not being set */
6225 case USER_ADDR_NULL:
6226 break;
6227 default:
6228 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6229 return(error);
6230 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6231 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6232 }
1c79356b 6233
fe8ab488
A
6234 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6235 UIO_USERSPACE);
55e303ae 6236
91447636
A
6237 if (xsecdst != NULL)
6238 kauth_filesec_free(xsecdst);
6239 return(error);
6240}
4a249263 6241
2d21ac55
A
6242/*
6243 * Returns: 0 Success
fe8ab488 6244 * chmodat:??? [anything chmodat can return]
2d21ac55 6245 */
fe8ab488
A
6246static int
6247fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6248 int flag, enum uio_seg segflg)
91447636 6249{
91447636
A
6250 struct vnode_attr va;
6251
6252 VATTR_INIT(&va);
fe8ab488
A
6253 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6254
6255 return (chmodat(ctx, path, &va, fd, flag, segflg));
6256}
6257
6258int
6259chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6260{
6261 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6262 AT_FDCWD, 0, UIO_USERSPACE));
6263}
91447636 6264
fe8ab488
A
6265int
6266fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6267{
6268 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6269 return (EINVAL);
6270
6271 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6272 uap->fd, uap->flag, UIO_USERSPACE));
1c79356b
A
6273}
6274
6275/*
6276 * Change mode of a file given a file descriptor.
6277 */
91447636 6278static int
2d21ac55 6279fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 6280{
2d21ac55 6281 vnode_t vp;
1c79356b 6282 int error;
55e303ae 6283
91447636 6284 AUDIT_ARG(fd, fd);
55e303ae 6285
91447636
A
6286 if ((error = file_vnode(fd, &vp)) != 0)
6287 return (error);
6288 if ((error = vnode_getwithref(vp)) != 0) {
6289 file_drop(fd);
6290 return(error);
6291 }
55e303ae
A
6292 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6293
fe8ab488 6294 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
6295 (void)vnode_put(vp);
6296 file_drop(fd);
55e303ae 6297
1c79356b
A
6298 return (error);
6299}
6300
b0d623f7
A
6301/*
6302 * fchmod_extended: Change mode of a file given a file descriptor; with
6303 * extended argument list (including extended security (ACL)).
6304 *
6305 * Parameters: p Process requesting to change file mode
6306 * uap User argument descriptor (see below)
39037602 6307 * retval (ignored)
b0d623f7
A
6308 *
6309 * Indirect: uap->mode File mode to set (same as 'chmod')
6310 * uap->uid UID to set
6311 * uap->gid GID to set
6312 * uap->xsecurity ACL to set (or delete)
6313 * uap->fd File descriptor of file to change mode
39037602 6314 *
b0d623f7
A
6315 * Returns: 0 Success
6316 * !0 errno value
6317 *
6318 */
91447636 6319int
b0d623f7 6320fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
6321{
6322 int error;
6323 struct vnode_attr va;
6324 kauth_filesec_t xsecdst;
6325
b0d623f7
A
6326 AUDIT_ARG(owner, uap->uid, uap->gid);
6327
91447636
A
6328 VATTR_INIT(&va);
6329 if (uap->mode != -1)
6330 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6331 if (uap->uid != KAUTH_UID_NONE)
6332 VATTR_SET(&va, va_uid, uap->uid);
6333 if (uap->gid != KAUTH_GID_NONE)
6334 VATTR_SET(&va, va_gid, uap->gid);
6335
6336 xsecdst = NULL;
6337 switch(uap->xsecurity) {
6338 case USER_ADDR_NULL:
6339 VATTR_SET(&va, va_acl, NULL);
6340 break;
39236c6e
A
6341 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6342 VATTR_SET(&va, va_acl, NULL);
6343 break;
6344 /* not being set */
91447636
A
6345 case CAST_USER_ADDR_T(-1):
6346 break;
6347 default:
6348 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6349 return(error);
6350 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6351 }
6352
6353 error = fchmod1(p, uap->fd, &va);
6354
39037602 6355
91447636
A
6356 switch(uap->xsecurity) {
6357 case USER_ADDR_NULL:
6358 case CAST_USER_ADDR_T(-1):
6359 break;
6360 default:
6361 if (xsecdst != NULL)
6362 kauth_filesec_free(xsecdst);
6363 }
6364 return(error);
6365}
6366
6367int
b0d623f7 6368fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
6369{
6370 struct vnode_attr va;
6371
6372 VATTR_INIT(&va);
6373 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6374
6375 return(fchmod1(p, uap->fd, &va));
6376}
6377
6378
1c79356b
A
6379/*
6380 * Set ownership given a path name.
6381 */
1c79356b 6382/* ARGSUSED */
91447636 6383static int
fe8ab488
A
6384fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6385 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 6386{
2d21ac55 6387 vnode_t vp;
91447636 6388 struct vnode_attr va;
1c79356b
A
6389 int error;
6390 struct nameidata nd;
fe8ab488 6391 int follow;
91447636 6392 kauth_action_t action;
1c79356b 6393
fe8ab488 6394 AUDIT_ARG(owner, uid, gid);
55e303ae 6395
fe8ab488
A
6396 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6397 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6398 path, ctx);
6399 error = nameiat(&nd, fd);
55e303ae 6400 if (error)
1c79356b
A
6401 return (error);
6402 vp = nd.ni_vp;
6403
91447636
A
6404 nameidone(&nd);
6405
91447636 6406 VATTR_INIT(&va);
fe8ab488
A
6407 if (uid != (uid_t)VNOVAL)
6408 VATTR_SET(&va, va_uid, uid);
6409 if (gid != (gid_t)VNOVAL)
6410 VATTR_SET(&va, va_gid, gid);
91447636 6411
2d21ac55 6412#if CONFIG_MACF
fe8ab488 6413 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
2d21ac55
A
6414 if (error)
6415 goto out;
6416#endif
6417
91447636
A
6418 /* preflight and authorize attribute changes */
6419 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6420 goto out;
6421 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6422 goto out;
6423 error = vnode_setattr(vp, &va, ctx);
39037602
A
6424
6425#if CONFIG_MACF
6426 if (error == 0)
6427 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6428#endif
6429
91447636
A
6430out:
6431 /*
6432 * EACCES is only allowed from namei(); permissions failure should
6433 * return EPERM, so we need to translate the error code.
6434 */
6435 if (error == EACCES)
6436 error = EPERM;
fe8ab488 6437
91447636 6438 vnode_put(vp);
1c79356b
A
6439 return (error);
6440}
6441
91447636 6442int
fe8ab488 6443chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 6444{
fe8ab488
A
6445 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6446 uap->uid, uap->gid, 0, UIO_USERSPACE));
91447636
A
6447}
6448
6449int
fe8ab488 6450lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 6451{
fe8ab488
A
6452 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6453 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6454}
6455
6456int
6457fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6458{
6459 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6460 return (EINVAL);
6461
6462 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6463 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
91447636
A
6464}
6465
1c79356b
A
6466/*
6467 * Set ownership given a file descriptor.
6468 */
1c79356b
A
6469/* ARGSUSED */
6470int
b0d623f7 6471fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 6472{
91447636 6473 struct vnode_attr va;
2d21ac55
A
6474 vfs_context_t ctx = vfs_context_current();
6475 vnode_t vp;
1c79356b 6476 int error;
91447636 6477 kauth_action_t action;
1c79356b 6478
55e303ae
A
6479 AUDIT_ARG(owner, uap->uid, uap->gid);
6480 AUDIT_ARG(fd, uap->fd);
6481
91447636 6482 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 6483 return (error);
55e303ae 6484
91447636
A
6485 if ( (error = vnode_getwithref(vp)) ) {
6486 file_drop(uap->fd);
6487 return(error);
6488 }
55e303ae
A
6489 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6490
91447636
A
6491 VATTR_INIT(&va);
6492 if (uap->uid != VNOVAL)
6493 VATTR_SET(&va, va_uid, uap->uid);
6494 if (uap->gid != VNOVAL)
6495 VATTR_SET(&va, va_gid, uap->gid);
6496
2d21ac55
A
6497#if NAMEDSTREAMS
6498 /* chown calls are not allowed for resource forks. */
6499 if (vp->v_flag & VISNAMEDSTREAM) {
6500 error = EPERM;
6501 goto out;
6502 }
6503#endif
6504
6505#if CONFIG_MACF
6506 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6507 if (error)
6508 goto out;
6509#endif
91447636
A
6510
6511 /* preflight and authorize attribute changes */
2d21ac55 6512 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6513 goto out;
2d21ac55 6514 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636
A
6515 if (error == EACCES)
6516 error = EPERM;
6517 goto out;
6518 }
2d21ac55 6519 error = vnode_setattr(vp, &va, ctx);
4a249263 6520
39037602
A
6521#if CONFIG_MACF
6522 if (error == 0)
6523 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6524#endif
6525
91447636
A
6526out:
6527 (void)vnode_put(vp);
6528 file_drop(uap->fd);
1c79356b
A
6529 return (error);
6530}
6531
9bccf70c 6532static int
2d21ac55 6533getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 6534{
9bccf70c
A
6535 int error;
6536
91447636
A
6537 if (usrtvp == USER_ADDR_NULL) {
6538 struct timeval old_tv;
6539 /* XXX Y2038 bug because of microtime argument */
6540 microtime(&old_tv);
6541 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
6542 tsp[1] = tsp[0];
6543 } else {
91447636 6544 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 6545 struct user64_timeval tv[2];
91447636 6546 error = copyin(usrtvp, (void *)tv, sizeof(tv));
b0d623f7
A
6547 if (error)
6548 return (error);
6549 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6550 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6551 } else {
b0d623f7
A
6552 struct user32_timeval tv[2];
6553 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6554 if (error)
6555 return (error);
6556 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6557 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6558 }
9bccf70c
A
6559 }
6560 return 0;
6561}
6562
6563static int
2d21ac55 6564setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
91447636 6565 int nullflag)
9bccf70c
A
6566{
6567 int error;
91447636
A
6568 struct vnode_attr va;
6569 kauth_action_t action;
e5568f75
A
6570
6571 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6572
91447636
A
6573 VATTR_INIT(&va);
6574 VATTR_SET(&va, va_access_time, ts[0]);
6575 VATTR_SET(&va, va_modify_time, ts[1]);
9bccf70c 6576 if (nullflag)
91447636
A
6577 va.va_vaflags |= VA_UTIMES_NULL;
6578
2d21ac55
A
6579#if NAMEDSTREAMS
6580 /* utimes calls are not allowed for resource forks. */
6581 if (vp->v_flag & VISNAMEDSTREAM) {
6582 error = EPERM;
6583 goto out;
6584 }
6585#endif
6586
6587#if CONFIG_MACF
6588 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6589 if (error)
6590 goto out;
6591#endif
6592 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6593 if (!nullflag && error == EACCES)
6594 error = EPERM;
91447636 6595 goto out;
2d21ac55
A
6596 }
6597
91447636 6598 /* since we may not need to auth anything, check here */
2d21ac55
A
6599 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6600 if (!nullflag && error == EACCES)
6601 error = EPERM;
91447636 6602 goto out;
2d21ac55 6603 }
91447636 6604 error = vnode_setattr(vp, &va, ctx);
4a249263 6605
39037602
A
6606#if CONFIG_MACF
6607 if (error == 0)
6608 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6609#endif
6610
9bccf70c
A
6611out:
6612 return error;
6613}
6614
1c79356b
A
6615/*
6616 * Set the access and modification times of a file.
6617 */
1c79356b
A
6618/* ARGSUSED */
6619int
b0d623f7 6620utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 6621{
9bccf70c 6622 struct timespec ts[2];
91447636 6623 user_addr_t usrtvp;
1c79356b
A
6624 int error;
6625 struct nameidata nd;
2d21ac55 6626 vfs_context_t ctx = vfs_context_current();
1c79356b 6627
2d21ac55 6628 /*
39037602 6629 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
6630 * name lookup first because auditing wants the path.
6631 */
39037602 6632 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 6633 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
6634 error = namei(&nd);
6635 if (error)
9bccf70c 6636 return (error);
91447636 6637 nameidone(&nd);
55e303ae 6638
91447636
A
6639 /*
6640 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6641 * the current time instead.
6642 */
55e303ae 6643 usrtvp = uap->tptr;
91447636
A
6644 if ((error = getutimes(usrtvp, ts)) != 0)
6645 goto out;
6646
2d21ac55 6647 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
6648
6649out:
6650 vnode_put(nd.ni_vp);
1c79356b
A
6651 return (error);
6652}
6653
9bccf70c
A
6654/*
6655 * Set the access and modification times of a file.
6656 */
9bccf70c
A
6657/* ARGSUSED */
6658int
b0d623f7 6659futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
6660{
6661 struct timespec ts[2];
2d21ac55 6662 vnode_t vp;
91447636 6663 user_addr_t usrtvp;
9bccf70c
A
6664 int error;
6665
55e303ae 6666 AUDIT_ARG(fd, uap->fd);
9bccf70c
A
6667 usrtvp = uap->tptr;
6668 if ((error = getutimes(usrtvp, ts)) != 0)
6669 return (error);
91447636 6670 if ((error = file_vnode(uap->fd, &vp)) != 0)
9bccf70c 6671 return (error);
91447636
A
6672 if((error = vnode_getwithref(vp))) {
6673 file_drop(uap->fd);
6674 return(error);
6675 }
55e303ae 6676
2d21ac55 6677 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
6678 vnode_put(vp);
6679 file_drop(uap->fd);
6680 return(error);
9bccf70c
A
6681}
6682
1c79356b
A
6683/*
6684 * Truncate a file given its path name.
6685 */
1c79356b
A
6686/* ARGSUSED */
6687int
b0d623f7 6688truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 6689{
2d21ac55 6690 vnode_t vp;
91447636 6691 struct vnode_attr va;
2d21ac55 6692 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6693 int error;
6694 struct nameidata nd;
91447636
A
6695 kauth_action_t action;
6696
0b4e3aa0
A
6697 if (uap->length < 0)
6698 return(EINVAL);
39037602 6699 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
2d21ac55 6700 UIO_USERSPACE, uap->path, ctx);
91447636 6701 if ((error = namei(&nd)))
1c79356b
A
6702 return (error);
6703 vp = nd.ni_vp;
91447636
A
6704
6705 nameidone(&nd);
6706
6707 VATTR_INIT(&va);
6708 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
6709
6710#if CONFIG_MACF
6711 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6712 if (error)
6713 goto out;
6714#endif
6715
6716 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6717 goto out;
2d21ac55 6718 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
91447636 6719 goto out;
2d21ac55 6720 error = vnode_setattr(vp, &va, ctx);
39037602
A
6721
6722#if CONFIG_MACF
6723 if (error == 0)
6724 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6725#endif
6726
91447636
A
6727out:
6728 vnode_put(vp);
1c79356b
A
6729 return (error);
6730}
6731
6732/*
6733 * Truncate a file given a file descriptor.
6734 */
1c79356b
A
6735/* ARGSUSED */
6736int
b0d623f7 6737ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 6738{
2d21ac55 6739 vfs_context_t ctx = vfs_context_current();
91447636 6740 struct vnode_attr va;
2d21ac55 6741 vnode_t vp;
91447636
A
6742 struct fileproc *fp;
6743 int error ;
6744 int fd = uap->fd;
1c79356b 6745
55e303ae 6746 AUDIT_ARG(fd, uap->fd);
0b4e3aa0
A
6747 if (uap->length < 0)
6748 return(EINVAL);
39037602 6749
91447636
A
6750 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6751 return(error);
6752 }
1c79356b 6753
39236c6e
A
6754 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6755 case DTYPE_PSXSHM:
91447636
A
6756 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6757 goto out;
39236c6e
A
6758 case DTYPE_VNODE:
6759 break;
6760 default:
91447636
A
6761 error = EINVAL;
6762 goto out;
1c79356b 6763 }
1c79356b 6764
2d21ac55 6765 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 6766
91447636
A
6767 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6768 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6769 error = EINVAL;
6770 goto out;
1c79356b 6771 }
1c79356b 6772
91447636
A
6773 if ((error = vnode_getwithref(vp)) != 0) {
6774 goto out;
6775 }
1c79356b 6776
91447636 6777 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 6778
2d21ac55
A
6779#if CONFIG_MACF
6780 error = mac_vnode_check_truncate(ctx,
6781 fp->f_fglob->fg_cred, vp);
6782 if (error) {
6783 (void)vnode_put(vp);
6784 goto out;
6785 }
6786#endif
91447636
A
6787 VATTR_INIT(&va);
6788 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 6789 error = vnode_setattr(vp, &va, ctx);
39037602
A
6790
6791#if CONFIG_MACF
6792 if (error == 0)
6793 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6794#endif
6795
91447636
A
6796 (void)vnode_put(vp);
6797out:
6798 file_drop(fd);
6799 return (error);
1c79356b 6800}
91447636 6801
1c79356b
A
6802
6803/*
b0d623f7 6804 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 6805 */
1c79356b
A
6806/* ARGSUSED */
6807int
b0d623f7 6808fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 6809{
2d21ac55 6810 __pthread_testcancel(1);
b0d623f7
A
6811 return(fsync_common(p, uap, MNT_WAIT));
6812}
6813
6814
6815/*
6816 * Sync an open file with synchronized I/O _file_ integrity completion
6817 *
6818 * Notes: This is a legacy support function that does not test for
6819 * thread cancellation points.
6820 */
6821/* ARGSUSED */
39037602 6822int
b0d623f7
A
6823fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6824{
6825 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
2d21ac55
A
6826}
6827
b0d623f7
A
6828
6829/*
6830 * Sync an open file with synchronized I/O _data_ integrity completion
6831 */
6832/* ARGSUSED */
2d21ac55 6833int
b0d623f7
A
6834fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6835{
6836 __pthread_testcancel(1);
6837 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6838}
6839
6840
6841/*
6842 * fsync_common
6843 *
6844 * Common fsync code to support both synchronized I/O file integrity completion
6845 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6846 *
6847 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6848 * will only guarantee that the file data contents are retrievable. If
6849 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6850 * includes additional metadata unnecessary for retrieving the file data
6851 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6852 * storage.
6853 *
6854 * Parameters: p The process
6855 * uap->fd The descriptor to synchronize
6856 * flags The data integrity flags
6857 *
6858 * Returns: int Success
6859 * fp_getfvp:EBADF Bad file descriptor
6860 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6861 * VNOP_FSYNC:??? unspecified
6862 *
6863 * Notes: We use struct fsync_args because it is a short name, and all
6864 * caller argument structures are otherwise identical.
6865 */
6866static int
6867fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
6868{
6869 vnode_t vp;
91447636 6870 struct fileproc *fp;
2d21ac55 6871 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6872 int error;
6873
b0d623f7
A
6874 AUDIT_ARG(fd, uap->fd);
6875
91447636 6876 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
1c79356b 6877 return (error);
91447636
A
6878 if ( (error = vnode_getwithref(vp)) ) {
6879 file_drop(uap->fd);
6880 return(error);
6881 }
91447636 6882
b0d623f7
A
6883 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6884
6885 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
6886
6887#if NAMEDRSRCFORK
6888 /* Sync resource fork shadow file if necessary. */
6889 if ((error == 0) &&
39037602 6890 (vp->v_flag & VISNAMEDSTREAM) &&
2d21ac55 6891 (vp->v_parent != NULLVP) &&
b0d623f7 6892 vnode_isshadow(vp) &&
2d21ac55
A
6893 (fp->f_flags & FP_WRITTEN)) {
6894 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6895 }
6896#endif
91447636
A
6897
6898 (void)vnode_put(vp);
6899 file_drop(uap->fd);
1c79356b
A
6900 return (error);
6901}
6902
6903/*
39037602 6904 * Duplicate files. Source must be a file, target must be a file or
1c79356b 6905 * must not exist.
91447636
A
6906 *
6907 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6908 * perform inheritance correctly.
1c79356b 6909 */
1c79356b
A
6910/* ARGSUSED */
6911int
b0d623f7 6912copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 6913{
91447636 6914 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
6915 struct nameidata fromnd, tond;
6916 int error;
2d21ac55 6917 vfs_context_t ctx = vfs_context_current();
39037602
A
6918#if CONFIG_MACF
6919 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6920 struct vnode_attr va;
6921#endif
55e303ae
A
6922
6923 /* Check that the flags are valid. */
1c79356b
A
6924
6925 if (uap->flags & ~CPF_MASK) {
55e303ae
A
6926 return(EINVAL);
6927 }
1c79356b 6928
4bd07ac2 6929 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
2d21ac55 6930 UIO_USERSPACE, uap->from, ctx);
91447636 6931 if ((error = namei(&fromnd)))
1c79356b
A
6932 return (error);
6933 fvp = fromnd.ni_vp;
6934
6d2010ae
A
6935 NDINIT(&tond, CREATE, OP_LINK,
6936 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6937 UIO_USERSPACE, uap->to, ctx);
91447636 6938 if ((error = namei(&tond))) {
1c79356b
A
6939 goto out1;
6940 }
6941 tdvp = tond.ni_dvp;
6942 tvp = tond.ni_vp;
91447636 6943
1c79356b
A
6944 if (tvp != NULL) {
6945 if (!(uap->flags & CPF_OVERWRITE)) {
6946 error = EEXIST;
6947 goto out;
6948 }
6949 }
39037602 6950
1c79356b
A
6951 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6952 error = EISDIR;
6953 goto out;
6954 }
6955
39037602
A
6956 /* This calls existing MAC hooks for open */
6957 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6958 NULL))) {
6959 goto out;
6960 }
6961
6962 if (tvp) {
6963 /*
6964 * See unlinkat_internal for an explanation of the potential
6965 * ENOENT from the MAC hook but the gist is that the MAC hook
6966 * can fail because vn_getpath isn't able to return the full
6967 * path. We choose to ignore this failure.
6968 */
6969 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6970 if (error && error != ENOENT)
6971 goto out;
6972 error = 0;
6973 }
6974
6975#if CONFIG_MACF
6976 VATTR_INIT(&va);
6977 VATTR_SET(&va, va_type, fvp->v_type);
6978 /* Mask off all but regular access permissions */
6979 VATTR_SET(&va, va_mode,
6980 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6981 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6982 if (error)
6983 goto out;
6984#endif /* CONFIG_MACF */
6985
2d21ac55 6986 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
1c79356b
A
6987 goto out;
6988
6989 if (fvp == tdvp)
6990 error = EINVAL;
6991 /*
6992 * If source is the same as the destination (that is the
6993 * same inode number) then there is nothing to do.
6994 * (fixed to have POSIX semantics - CSM 3/2/98)
6995 */
6996 if (fvp == tvp)
6997 error = -1;
91447636 6998 if (!error)
2d21ac55 6999 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
1c79356b 7000out:
91447636
A
7001 sdvp = tond.ni_startdir;
7002 /*
7003 * nameidone has to happen before we vnode_put(tdvp)
7004 * since it may need to release the fs_nodelock on the tdvp
7005 */
7006 nameidone(&tond);
7007
7008 if (tvp)
7009 vnode_put(tvp);
7010 vnode_put(tdvp);
7011 vnode_put(sdvp);
1c79356b 7012out1:
91447636
A
7013 vnode_put(fvp);
7014
91447636
A
7015 nameidone(&fromnd);
7016
1c79356b
A
7017 if (error == -1)
7018 return (0);
7019 return (error);
7020}
7021
39037602 7022#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
91447636 7023
1c79356b 7024/*
39037602
A
7025 * Helper function for doing clones. The caller is expected to provide an
7026 * iocounted source vnode and release it.
1c79356b 7027 */
fe8ab488 7028static int
39037602
A
7029clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
7030 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
1c79356b 7031{
91447636 7032 vnode_t tvp, tdvp;
39037602 7033 struct nameidata tond;
1c79356b 7034 int error;
39037602 7035 int follow;
813fb2f6 7036 boolean_t free_src_acl;
39037602
A
7037 boolean_t attr_cleanup;
7038 enum vtype v_type;
7039 kauth_action_t action;
7040 struct componentname *cnp;
7041 uint32_t defaulted;
7042 struct vnode_attr va;
813fb2f6 7043 struct vnode_attr nva;
5ba3f43e 7044 uint32_t vnop_flags;
316670eb 7045
39037602
A
7046 v_type = vnode_vtype(fvp);
7047 switch (v_type) {
7048 case VLNK:
7049 /* FALLTHRU */
7050 case VREG:
7051 action = KAUTH_VNODE_ADD_FILE;
7052 break;
7053 case VDIR:
7054 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
7055 fvp->v_mountedhere) {
7056 return (EINVAL);
7057 }
7058 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
7059 break;
7060 default:
7061 return (EINVAL);
7062 }
7063
7064 AUDIT_ARG(fd2, dst_dirfd);
7065 AUDIT_ARG(value32, flags);
7066
7067 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7068 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
7069 UIO_USERSPACE, dst, ctx);
7070 if ((error = nameiat(&tond, dst_dirfd)))
7071 return (error);
7072 cnp = &tond.ni_cnd;
7073 tdvp = tond.ni_dvp;
7074 tvp = tond.ni_vp;
7075
813fb2f6 7076 free_src_acl = FALSE;
39037602
A
7077 attr_cleanup = FALSE;
7078
7079 if (tvp != NULL) {
7080 error = EEXIST;
7081 goto out;
7082 }
7083
7084 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7085 error = EXDEV;
7086 goto out;
7087 }
7088
7089#if CONFIG_MACF
7090 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
7091 goto out;
7092#endif
7093 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
7094 goto out;
7095
7096 action = KAUTH_VNODE_GENERIC_READ_BITS;
7097 if (data_read_authorised)
7098 action &= ~KAUTH_VNODE_READ_DATA;
7099 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
7100 goto out;
7101
7102 /*
7103 * certain attributes may need to be changed from the source, we ask for
7104 * those here.
7105 */
7106 VATTR_INIT(&va);
813fb2f6
A
7107 VATTR_WANTED(&va, va_uid);
7108 VATTR_WANTED(&va, va_gid);
39037602
A
7109 VATTR_WANTED(&va, va_mode);
7110 VATTR_WANTED(&va, va_flags);
7111 VATTR_WANTED(&va, va_acl);
7112
7113 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
7114 goto out;
7115
813fb2f6
A
7116 VATTR_INIT(&nva);
7117 VATTR_SET(&nva, va_type, v_type);
7118 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7119 VATTR_SET(&nva, va_acl, va.va_acl);
7120 free_src_acl = TRUE;
39037602
A
7121 }
7122
7123 /* Handle ACL inheritance, initialize vap. */
7124 if (v_type == VLNK) {
813fb2f6 7125 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
39037602 7126 } else {
813fb2f6
A
7127 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7128 if (error)
7129 goto out;
39037602
A
7130 attr_cleanup = TRUE;
7131 }
7132
5ba3f43e 7133 vnop_flags = VNODE_CLONEFILE_DEFAULT;
813fb2f6
A
7134 /*
7135 * We've got initial values for all security parameters,
7136 * If we are superuser, then we can change owners to be the
7137 * same as the source. Both superuser and the owner have default
7138 * WRITE_SECURITY privileges so all other fields can be taken
7139 * from source as well.
7140 */
5ba3f43e 7141 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
813fb2f6
A
7142 if (VATTR_IS_SUPPORTED(&va, va_uid))
7143 VATTR_SET(&nva, va_uid, va.va_uid);
7144 if (VATTR_IS_SUPPORTED(&va, va_gid))
7145 VATTR_SET(&nva, va_gid, va.va_gid);
5ba3f43e
A
7146 } else {
7147 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
813fb2f6 7148 }
5ba3f43e 7149
813fb2f6
A
7150 if (VATTR_IS_SUPPORTED(&va, va_mode))
7151 VATTR_SET(&nva, va_mode, va.va_mode);
7152 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7153 VATTR_SET(&nva, va_flags,
5ba3f43e
A
7154 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7155 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
39037602
A
7156 }
7157
5ba3f43e 7158 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
39037602
A
7159
7160 if (!error && tvp) {
7161 int update_flags = 0;
7162#if CONFIG_FSE
7163 int fsevent;
7164#endif /* CONFIG_FSE */
7165
7166#if CONFIG_MACF
7167 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7168 VNODE_LABEL_CREATE, ctx);
7169#endif
7170 /*
7171 * If some of the requested attributes weren't handled by the
7172 * VNOP, use our fallback code.
7173 */
7174 if (!VATTR_ALL_SUPPORTED(&va))
813fb2f6 7175 (void)vnode_setattr_fallback(tvp, &nva, ctx);
39037602
A
7176
7177 // Make sure the name & parent pointers are hooked up
7178 if (tvp->v_name == NULL)
7179 update_flags |= VNODE_UPDATE_NAME;
7180 if (tvp->v_parent == NULLVP)
7181 update_flags |= VNODE_UPDATE_PARENT;
7182
7183 if (update_flags) {
7184 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7185 cnp->cn_namelen, cnp->cn_hash, update_flags);
7186 }
7187
7188#if CONFIG_FSE
7189 switch (vnode_vtype(tvp)) {
7190 case VLNK:
7191 /* FALLTHRU */
7192 case VREG:
7193 fsevent = FSE_CREATE_FILE;
7194 break;
7195 case VDIR:
7196 fsevent = FSE_CREATE_DIR;
7197 break;
7198 default:
7199 goto out;
7200 }
7201
7202 if (need_fsevent(fsevent, tvp)) {
5ba3f43e
A
7203 /*
7204 * The following is a sequence of three explicit events.
7205 * A pair of FSE_CLONE events representing the source and destination
7206 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7207 * fseventsd may coalesce the destination clone and create events
7208 * into a single event resulting in the following sequence for a client
7209 * FSE_CLONE (src)
7210 * FSE_CLONE | FSE_CREATE (dst)
7211 */
7212 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7213 FSE_ARG_DONE);
39037602
A
7214 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7215 FSE_ARG_DONE);
7216 }
7217#endif /* CONFIG_FSE */
7218 }
39037602
A
7219
7220out:
7221 if (attr_cleanup)
813fb2f6
A
7222 vn_attribute_cleanup(&nva, defaulted);
7223 if (free_src_acl && va.va_acl)
39037602
A
7224 kauth_acl_free(va.va_acl);
7225 nameidone(&tond);
7226 if (tvp)
7227 vnode_put(tvp);
7228 vnode_put(tdvp);
7229 return (error);
7230}
7231
7232/*
7233 * clone files or directories, target must not exist.
7234 */
7235/* ARGSUSED */
7236int
7237clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7238 __unused int32_t *retval)
7239{
7240 vnode_t fvp;
7241 struct nameidata fromnd;
7242 int follow;
7243 int error;
7244 vfs_context_t ctx = vfs_context_current();
7245
7246 /* Check that the flags are valid. */
5ba3f43e 7247 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
39037602
A
7248 return (EINVAL);
7249
7250 AUDIT_ARG(fd, uap->src_dirfd);
7251
7252 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7253 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7254 UIO_USERSPACE, uap->src, ctx);
7255 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7256 return (error);
7257
7258 fvp = fromnd.ni_vp;
7259 nameidone(&fromnd);
7260
7261 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7262 uap->flags, ctx);
7263
7264 vnode_put(fvp);
7265 return (error);
7266}
7267
7268int
7269fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7270 __unused int32_t *retval)
7271{
7272 vnode_t fvp;
7273 struct fileproc *fp;
7274 int error;
7275 vfs_context_t ctx = vfs_context_current();
7276
5ba3f43e
A
7277 /* Check that the flags are valid. */
7278 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7279 return (EINVAL);
7280
39037602
A
7281 AUDIT_ARG(fd, uap->src_fd);
7282 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7283 if (error)
7284 return (error);
7285
7286 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7287 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7288 error = EBADF;
7289 goto out;
7290 }
7291
7292 if ((error = vnode_getwithref(fvp)))
7293 goto out;
7294
7295 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7296
7297 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7298 uap->flags, ctx);
7299
7300 vnode_put(fvp);
7301out:
7302 file_drop(uap->src_fd);
7303 return (error);
7304}
7305
7306/*
7307 * Rename files. Source and destination must either both be directories,
7308 * or both not be directories. If target is a directory, it must be empty.
7309 */
7310/* ARGSUSED */
7311static int
7312renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7313 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7314{
7315 if (flags & ~VFS_RENAME_FLAGS_MASK)
7316 return EINVAL;
7317
7318 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7319 return EINVAL;
7320
7321 vnode_t tvp, tdvp;
7322 vnode_t fvp, fdvp;
7323 struct nameidata *fromnd, *tond;
7324 int error;
7325 int do_retry;
7326 int retry_count;
7327 int mntrename;
7328 int need_event;
b226f5e5
A
7329 int need_kpath2;
7330 int has_listeners;
39037602
A
7331 const char *oname = NULL;
7332 char *from_name = NULL, *to_name = NULL;
7333 int from_len=0, to_len=0;
7334 int holding_mntlock;
7335 mount_t locked_mp = NULL;
7336 vnode_t oparent = NULLVP;
7337#if CONFIG_FSE
7338 fse_info from_finfo, to_finfo;
7339#endif
7340 int from_truncated=0, to_truncated;
7341 int batched = 0;
7342 struct vnode_attr *fvap, *tvap;
7343 int continuing = 0;
7344 /* carving out a chunk for structs that are too big to be on stack. */
7345 struct {
7346 struct nameidata from_node, to_node;
7347 struct vnode_attr fv_attr, tv_attr;
7348 } * __rename_data;
7349 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7350 fromnd = &__rename_data->from_node;
7351 tond = &__rename_data->to_node;
7352
7353 holding_mntlock = 0;
7354 do_retry = 0;
7355 retry_count = 0;
91447636
A
7356retry:
7357 fvp = tvp = NULL;
7358 fdvp = tdvp = NULL;
6d2010ae 7359 fvap = tvap = NULL;
1c79356b
A
7360 mntrename = FALSE;
7361
316670eb 7362 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 7363 segflg, from, ctx);
316670eb 7364 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7365
316670eb 7366 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 7367 segflg, to, ctx);
316670eb 7368 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7369
6d2010ae 7370continue_lookup:
316670eb 7371 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7372 if ( (error = nameiat(fromnd, fromfd)) )
6d2010ae 7373 goto out1;
316670eb
A
7374 fdvp = fromnd->ni_dvp;
7375 fvp = fromnd->ni_vp;
1c79356b 7376
6d2010ae 7377 if (fvp && fvp->v_type == VDIR)
316670eb 7378 tond->ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae 7379 }
2d21ac55 7380
316670eb 7381 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7382 if ( (error = nameiat(tond, tofd)) ) {
6d2010ae
A
7383 /*
7384 * Translate error code for rename("dir1", "dir2/.").
7385 */
fe8ab488 7386 if (error == EISDIR && fvp->v_type == VDIR)
6d2010ae
A
7387 error = EINVAL;
7388 goto out1;
7389 }
316670eb
A
7390 tdvp = tond->ni_dvp;
7391 tvp = tond->ni_vp;
fe8ab488 7392 }
91447636 7393
00867663
A
7394#if DEVELOPMENT || DEBUG
7395 /*
7396 * XXX VSWAP: Check for entitlements or special flag here
7397 * so we can restrict access appropriately.
7398 */
7399#else /* DEVELOPMENT || DEBUG */
7400
7401 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7402 error = EPERM;
7403 goto out1;
7404 }
7405
7406 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7407 error = EPERM;
7408 goto out1;
7409 }
7410#endif /* DEVELOPMENT || DEBUG */
7411
39037602
A
7412 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7413 error = ENOENT;
7414 goto out1;
7415 }
7416
7417 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7418 error = EEXIST;
7419 goto out1;
7420 }
7421
6d2010ae 7422 batched = vnode_compound_rename_available(fdvp);
d9a64523
A
7423
7424#if CONFIG_FSE
7425 need_event = need_fsevent(FSE_RENAME, fdvp);
7426 if (need_event) {
7427 if (fvp) {
7428 get_fse_info(fvp, &from_finfo, ctx);
7429 } else {
7430 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
7431 if (error) {
7432 goto out1;
7433 }
7434
7435 fvap = &__rename_data->fv_attr;
7436 }
7437
7438 if (tvp) {
7439 get_fse_info(tvp, &to_finfo, ctx);
7440 } else if (batched) {
7441 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
7442 if (error) {
7443 goto out1;
7444 }
7445
7446 tvap = &__rename_data->tv_attr;
7447 }
7448 }
7449#else
7450 need_event = 0;
7451#endif /* CONFIG_FSE */
7452
b226f5e5
A
7453 has_listeners = kauth_authorize_fileop_has_listeners();
7454
7455 need_kpath2 = 0;
7456#if CONFIG_AUDIT
7457 if (AUDIT_RECORD_EXISTS()) {
7458 need_kpath2 = 1;
7459 }
7460#endif
7461
7462 if (need_event || has_listeners) {
d9a64523
A
7463 if (from_name == NULL) {
7464 GET_PATH(from_name);
7465 if (from_name == NULL) {
7466 error = ENOMEM;
7467 goto out1;
7468 }
7469 }
7470
7471 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
b226f5e5 7472 }
d9a64523 7473
b226f5e5 7474 if (need_event || need_kpath2 || has_listeners) {
d9a64523
A
7475 if (to_name == NULL) {
7476 GET_PATH(to_name);
7477 if (to_name == NULL) {
7478 error = ENOMEM;
7479 goto out1;
7480 }
7481 }
7482
7483 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
b226f5e5
A
7484 if (to_name && need_kpath2) {
7485 AUDIT_ARG(kpath, to_name, ARG_KPATH2);
7486 }
d9a64523 7487 }
6d2010ae 7488 if (!fvp) {
fe8ab488 7489 /*
6d2010ae
A
7490 * Claim: this check will never reject a valid rename.
7491 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7492 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 7493 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6d2010ae
A
7494 * then you can't move it to within another dir on the same mountpoint.
7495 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7496 *
7497 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 7498 */
6d2010ae
A
7499 if (fdvp->v_mount != tdvp->v_mount) {
7500 error = EXDEV;
7501 goto out1;
7502 }
7503 goto skipped_lookup;
1c79356b 7504 }
2d21ac55 7505
6d2010ae 7506 if (!batched) {
d9a64523 7507 error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
6d2010ae 7508 if (error) {
3e170ce0
A
7509 if (error == ENOENT) {
7510 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7511 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7512 /*
7513 * We encountered a race where after doing the namei, tvp stops
7514 * being valid. If so, simply re-drive the rename call from the
7515 * top.
7516 */
7517 do_retry = 1;
7518 retry_count += 1;
7519 }
6d2010ae 7520 }
91447636 7521 goto out1;
1c79356b
A
7522 }
7523 }
6d2010ae 7524
2d21ac55
A
7525 /*
7526 * If the source and destination are the same (i.e. they're
7527 * links to the same vnode) and the target file system is
7528 * case sensitive, then there is nothing to do.
6d2010ae
A
7529 *
7530 * XXX Come back to this.
2d21ac55
A
7531 */
7532 if (fvp == tvp) {
7533 int pathconf_val;
fe8ab488 7534
2d21ac55
A
7535 /*
7536 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7537 * then assume that this file system is case sensitive.
7538 */
7539 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7540 pathconf_val != 0) {
7541 goto out1;
fe8ab488 7542 }
2d21ac55 7543 }
91447636 7544
1c79356b
A
7545 /*
7546 * Allow the renaming of mount points.
7547 * - target must not exist
7548 * - target must reside in the same directory as source
7549 * - union mounts cannot be renamed
7550 * - "/" cannot be renamed
6d2010ae
A
7551 *
7552 * XXX Handle this in VFS after a continued lookup (if we missed
7553 * in the cache to start off)
39037602
A
7554 *
7555 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7556 * we'll skip past here. The file system is responsible for
7557 * checking that @tvp is not a descendent of @fvp and vice versa
7558 * so it should always return EINVAL if either @tvp or @fvp is the
7559 * root of a volume.
1c79356b 7560 */
91447636 7561 if ((fvp->v_flag & VROOT) &&
1c79356b
A
7562 (fvp->v_type == VDIR) &&
7563 (tvp == NULL) &&
7564 (fvp->v_mountedhere == NULL) &&
91447636 7565 (fdvp == tdvp) &&
1c79356b
A
7566 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7567 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 7568 vnode_t coveredvp;
fe8ab488 7569
1c79356b 7570 /* switch fvp to the covered vnode */
91447636
A
7571 coveredvp = fvp->v_mount->mnt_vnodecovered;
7572 if ( (vnode_getwithref(coveredvp)) ) {
7573 error = ENOENT;
7574 goto out1;
7575 }
7576 vnode_put(fvp);
7577
7578 fvp = coveredvp;
1c79356b
A
7579 mntrename = TRUE;
7580 }
91447636
A
7581 /*
7582 * Check for cross-device rename.
7583 */
7584 if ((fvp->v_mount != tdvp->v_mount) ||
7585 (tvp && (fvp->v_mount != tvp->v_mount))) {
7586 error = EXDEV;
7587 goto out1;
7588 }
55e303ae 7589
91447636
A
7590 /*
7591 * If source is the same as the destination (that is the
7592 * same inode number) then there is nothing to do...
7593 * EXCEPT if the underlying file system supports case
7594 * insensitivity and is case preserving. In this case
7595 * the file system needs to handle the special case of
7596 * getting the same vnode as target (fvp) and source (tvp).
7597 *
7598 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7599 * and _PC_CASE_PRESERVING can have this exception, and they need to
7600 * handle the special case of getting the same vnode as target and
7601 * source. NOTE: Then the target is unlocked going into vnop_rename,
7602 * so not to cause locking problems. There is a single reference on tvp.
7603 *
fe8ab488 7604 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
7605 * that correct behaviour then is just to return success without doing
7606 * anything.
6d2010ae
A
7607 *
7608 * XXX filesystem should take care of this itself, perhaps...
91447636
A
7609 */
7610 if (fvp == tvp && fdvp == tdvp) {
316670eb
A
7611 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7612 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7613 fromnd->ni_cnd.cn_namelen)) {
91447636 7614 goto out1;
55e303ae 7615 }
91447636 7616 }
55e303ae 7617
91447636
A
7618 if (holding_mntlock && fvp->v_mount != locked_mp) {
7619 /*
7620 * we're holding a reference and lock
7621 * on locked_mp, but it no longer matches
7622 * what we want to do... so drop our hold
7623 */
7624 mount_unlock_renames(locked_mp);
7625 mount_drop(locked_mp, 0);
7626 holding_mntlock = 0;
7627 }
7628 if (tdvp != fdvp && fvp->v_type == VDIR) {
7629 /*
7630 * serialize renames that re-shape
7631 * the tree... if holding_mntlock is
7632 * set, then we're ready to go...
7633 * otherwise we
7634 * first need to drop the iocounts
7635 * we picked up, second take the
7636 * lock to serialize the access,
7637 * then finally start the lookup
7638 * process over with the lock held
7639 */
7640 if (!holding_mntlock) {
7641 /*
7642 * need to grab a reference on
7643 * the mount point before we
7644 * drop all the iocounts... once
7645 * the iocounts are gone, the mount
7646 * could follow
7647 */
7648 locked_mp = fvp->v_mount;
7649 mount_ref(locked_mp, 0);
55e303ae 7650
91447636
A
7651 /*
7652 * nameidone has to happen before we vnode_put(tvp)
7653 * since it may need to release the fs_nodelock on the tvp
7654 */
316670eb 7655 nameidone(tond);
55e303ae 7656
91447636
A
7657 if (tvp)
7658 vnode_put(tvp);
7659 vnode_put(tdvp);
7660
7661 /*
7662 * nameidone has to happen before we vnode_put(fdvp)
7663 * since it may need to release the fs_nodelock on the fvp
7664 */
316670eb 7665 nameidone(fromnd);
55e303ae 7666
91447636
A
7667 vnode_put(fvp);
7668 vnode_put(fdvp);
7669
7670 mount_lock_renames(locked_mp);
7671 holding_mntlock = 1;
7672
7673 goto retry;
55e303ae 7674 }
91447636
A
7675 } else {
7676 /*
7677 * when we dropped the iocounts to take
fe8ab488 7678 * the lock, we allowed the identity of
91447636
A
7679 * the various vnodes to change... if they did,
7680 * we may no longer be dealing with a rename
7681 * that reshapes the tree... once we're holding
7682 * the iocounts, the vnodes can't change type
7683 * so we're free to drop the lock at this point
7684 * and continue on
1c79356b 7685 */
91447636
A
7686 if (holding_mntlock) {
7687 mount_unlock_renames(locked_mp);
7688 mount_drop(locked_mp, 0);
7689 holding_mntlock = 0;
1c79356b 7690 }
91447636 7691 }
6d2010ae 7692
91447636
A
7693 // save these off so we can later verify that fvp is the same
7694 oname = fvp->v_name;
7695 oparent = fvp->v_parent;
55e303ae 7696
6d2010ae 7697skipped_lookup:
316670eb
A
7698 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7699 tdvp, &tvp, &tond->ni_cnd, tvap,
39037602 7700 flags, ctx);
55e303ae 7701
91447636
A
7702 if (holding_mntlock) {
7703 /*
7704 * we can drop our serialization
7705 * lock now
7706 */
7707 mount_unlock_renames(locked_mp);
7708 mount_drop(locked_mp, 0);
7709 holding_mntlock = 0;
7710 }
7711 if (error) {
6d2010ae 7712 if (error == EKEEPLOOKING) {
316670eb
A
7713 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7714 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
7715 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7716 }
7717 }
7718
316670eb
A
7719 fromnd->ni_vp = fvp;
7720 tond->ni_vp = tvp;
fe8ab488 7721
6d2010ae
A
7722 goto continue_lookup;
7723 }
7724
7725 /*
fe8ab488
A
7726 * We may encounter a race in the VNOP where the destination didn't
7727 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
7728 * try to create the entry. In this case, we should re-drive this rename
7729 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 7730 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
7731 */
7732 if (error == ERECYCLE) {
7733 do_retry = 1;
7734 }
55e303ae 7735
c18c124e
A
7736 /*
7737 * For compound VNOPs, the authorization callback may return
7738 * ENOENT in case of racing hardlink lookups hitting the name
7739 * cache, redrive the lookup.
7740 */
3e170ce0
A
7741 if (batched && error == ENOENT) {
7742 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7743 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7744 do_retry = 1;
7745 retry_count += 1;
7746 }
c18c124e
A
7747 }
7748
91447636 7749 goto out1;
fe8ab488
A
7750 }
7751
7752 /* call out to allow 3rd party notification of rename.
91447636
A
7753 * Ignore result of kauth_authorize_fileop call.
7754 */
fe8ab488
A
7755 kauth_authorize_fileop(vfs_context_ucred(ctx),
7756 KAUTH_FILEOP_RENAME,
2d21ac55 7757 (uintptr_t)from_name, (uintptr_t)to_name);
39037602
A
7758 if (flags & VFS_RENAME_SWAP) {
7759 kauth_authorize_fileop(vfs_context_ucred(ctx),
7760 KAUTH_FILEOP_RENAME,
7761 (uintptr_t)to_name, (uintptr_t)from_name);
7762 }
91447636 7763
2d21ac55 7764#if CONFIG_FSE
91447636 7765 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
7766 if (from_truncated || to_truncated) {
7767 // set it here since only the from_finfo gets reported up to user space
7768 from_finfo.mode |= FSE_TRUNCATED_PATH;
7769 }
6d2010ae
A
7770
7771 if (tvap && tvp) {
7772 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7773 }
7774 if (fvap) {
7775 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7776 }
7777
39037602
A
7778 if (tvp) {
7779 add_fsevent(FSE_RENAME, ctx,
7780 FSE_ARG_STRING, from_len, from_name,
7781 FSE_ARG_FINFO, &from_finfo,
7782 FSE_ARG_STRING, to_len, to_name,
7783 FSE_ARG_FINFO, &to_finfo,
7784 FSE_ARG_DONE);
7785 if (flags & VFS_RENAME_SWAP) {
7786 /*
7787 * Strictly speaking, swap is the equivalent of
7788 * *three* renames. FSEvents clients should only take
7789 * the events as a hint, so we only bother reporting
7790 * two.
7791 */
7792 add_fsevent(FSE_RENAME, ctx,
7793 FSE_ARG_STRING, to_len, to_name,
7794 FSE_ARG_FINFO, &to_finfo,
7795 FSE_ARG_STRING, from_len, from_name,
7796 FSE_ARG_FINFO, &from_finfo,
7797 FSE_ARG_DONE);
7798 }
55e303ae 7799 } else {
2d21ac55 7800 add_fsevent(FSE_RENAME, ctx,
91447636
A
7801 FSE_ARG_STRING, from_len, from_name,
7802 FSE_ARG_FINFO, &from_finfo,
7803 FSE_ARG_STRING, to_len, to_name,
7804 FSE_ARG_DONE);
7805 }
7806 }
2d21ac55 7807#endif /* CONFIG_FSE */
fe8ab488 7808
91447636
A
7809 /*
7810 * update filesystem's mount point data
7811 */
7812 if (mntrename) {
7813 char *cp, *pathend, *mpname;
7814 char * tobuf;
7815 struct mount *mp;
7816 int maxlen;
7817 size_t len = 0;
7818
7819 mp = fvp->v_mountedhere;
7820
7821 if (vfs_busy(mp, LK_NOWAIT)) {
7822 error = EBUSY;
7823 goto out1;
55e303ae 7824 }
91447636 7825 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 7826
fe8ab488
A
7827 if (UIO_SEG_IS_USER_SPACE(segflg))
7828 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7829 else
7830 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
91447636
A
7831 if (!error) {
7832 /* find current mount point prefix */
7833 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7834 for (cp = pathend; *cp != '\0'; ++cp) {
7835 if (*cp == '/')
7836 pathend = cp + 1;
7837 }
7838 /* find last component of target name */
7839 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7840 if (*cp == '/')
7841 mpname = cp + 1;
7842 }
7843 /* append name to prefix */
7844 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7845 bzero(pathend, maxlen);
2d21ac55 7846 strlcpy(pathend, mpname, maxlen);
91447636
A
7847 }
7848 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7849
7850 vfs_unbusy(mp);
7851 }
7852 /*
fe8ab488 7853 * fix up name & parent pointers. note that we first
91447636
A
7854 * check that fvp has the same name/parent pointers it
7855 * had before the rename call... this is a 'weak' check
7856 * at best...
6d2010ae
A
7857 *
7858 * XXX oparent and oname may not be set in the compound vnop case
91447636 7859 */
6d2010ae 7860 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
91447636
A
7861 int update_flags;
7862
7863 update_flags = VNODE_UPDATE_NAME;
7864
7865 if (fdvp != tdvp)
7866 update_flags |= VNODE_UPDATE_PARENT;
7867
316670eb 7868 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
7869 }
7870out1:
593a1d5f
A
7871 if (to_name != NULL) {
7872 RELEASE_PATH(to_name);
7873 to_name = NULL;
7874 }
7875 if (from_name != NULL) {
7876 RELEASE_PATH(from_name);
7877 from_name = NULL;
7878 }
91447636
A
7879 if (holding_mntlock) {
7880 mount_unlock_renames(locked_mp);
7881 mount_drop(locked_mp, 0);
593a1d5f 7882 holding_mntlock = 0;
91447636
A
7883 }
7884 if (tdvp) {
7885 /*
7886 * nameidone has to happen before we vnode_put(tdvp)
7887 * since it may need to release the fs_nodelock on the tdvp
7888 */
316670eb 7889 nameidone(tond);
91447636
A
7890
7891 if (tvp)
7892 vnode_put(tvp);
7893 vnode_put(tdvp);
7894 }
7895 if (fdvp) {
7896 /*
7897 * nameidone has to happen before we vnode_put(fdvp)
7898 * since it may need to release the fs_nodelock on the fdvp
7899 */
316670eb 7900 nameidone(fromnd);
91447636
A
7901
7902 if (fvp)
7903 vnode_put(fvp);
7904 vnode_put(fdvp);
7905 }
fe8ab488 7906
6d2010ae
A
7907 /*
7908 * If things changed after we did the namei, then we will re-drive
7909 * this rename call from the top.
7910 */
316670eb 7911 if (do_retry) {
6d2010ae 7912 do_retry = 0;
593a1d5f
A
7913 goto retry;
7914 }
316670eb
A
7915
7916 FREE(__rename_data, M_TEMP);
1c79356b
A
7917 return (error);
7918}
7919
fe8ab488
A
7920int
7921rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7922{
7923 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7924 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7925}
7926
39037602 7927int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
fe8ab488
A
7928{
7929 return renameat_internal(
39037602
A
7930 vfs_context_current(),
7931 uap->fromfd, uap->from,
7932 uap->tofd, uap->to,
fe8ab488
A
7933 UIO_USERSPACE, uap->flags);
7934}
39037602 7935
fe8ab488
A
7936int
7937renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7938{
7939 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7940 uap->tofd, uap->to, UIO_USERSPACE, 0));
7941}
7942
1c79356b
A
7943/*
7944 * Make a directory file.
2d21ac55
A
7945 *
7946 * Returns: 0 Success
7947 * EEXIST
7948 * namei:???
7949 * vnode_authorize:???
7950 * vn_create:???
1c79356b 7951 */
1c79356b 7952/* ARGSUSED */
91447636 7953static int
fe8ab488
A
7954mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7955 enum uio_seg segflg)
1c79356b 7956{
91447636 7957 vnode_t vp, dvp;
1c79356b 7958 int error;
91447636 7959 int update_flags = 0;
6d2010ae 7960 int batched;
1c79356b
A
7961 struct nameidata nd;
7962
91447636 7963 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 7964 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
6d2010ae 7965 path, ctx);
9bccf70c 7966 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
7967 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7968
7969continue_lookup:
fe8ab488 7970 error = nameiat(&nd, fd);
55e303ae 7971 if (error)
1c79356b 7972 return (error);
91447636 7973 dvp = nd.ni_dvp;
1c79356b 7974 vp = nd.ni_vp;
55e303ae 7975
fe8ab488
A
7976 if (vp != NULL) {
7977 error = EEXIST;
7978 goto out;
7979 }
7980
6d2010ae 7981 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
7982
7983 VATTR_SET(vap, va_type, VDIR);
fe8ab488 7984
6d2010ae
A
7985 /*
7986 * XXX
7987 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7988 * only get EXISTS or EISDIR for existing path components, and not that it could see
7989 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7990 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7991 */
fe8ab488 7992 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
7993 if (error == EACCES || error == EPERM) {
7994 int error2;
7995
7996 nameidone(&nd);
7997 vnode_put(dvp);
7998 dvp = NULLVP;
7999
fe8ab488
A
8000 /*
8001 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
8002 * rather than EACCESS if the target exists.
8003 */
fe8ab488
A
8004 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
8005 path, ctx);
8006 error2 = nameiat(&nd, fd);
6d2010ae
A
8007 if (error2) {
8008 goto out;
8009 } else {
8010 vp = nd.ni_vp;
8011 error = EEXIST;
8012 goto out;
8013 }
8014 }
8015
2d21ac55 8016 goto out;
6d2010ae
A
8017 }
8018
8019 /*
fe8ab488 8020 * make the directory
6d2010ae 8021 */
fe8ab488 8022 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
8023 if (error == EKEEPLOOKING) {
8024 nd.ni_vp = vp;
8025 goto continue_lookup;
8026 }
2d21ac55 8027
fe8ab488 8028 goto out;
6d2010ae 8029 }
fe8ab488 8030
91447636
A
8031 // Make sure the name & parent pointers are hooked up
8032 if (vp->v_name == NULL)
8033 update_flags |= VNODE_UPDATE_NAME;
8034 if (vp->v_parent == NULLVP)
8035 update_flags |= VNODE_UPDATE_PARENT;
8036
8037 if (update_flags)
8038 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
55e303ae 8039
2d21ac55 8040#if CONFIG_FSE
91447636 8041 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 8042#endif
91447636
A
8043
8044out:
8045 /*
8046 * nameidone has to happen before we vnode_put(dvp)
8047 * since it may need to release the fs_nodelock on the dvp
8048 */
8049 nameidone(&nd);
8050
8051 if (vp)
6d2010ae 8052 vnode_put(vp);
fe8ab488 8053 if (dvp)
6d2010ae 8054 vnode_put(dvp);
55e303ae 8055
1c79356b
A
8056 return (error);
8057}
8058
b0d623f7
A
8059/*
8060 * mkdir_extended: Create a directory; with extended security (ACL).
8061 *
8062 * Parameters: p Process requesting to create the directory
8063 * uap User argument descriptor (see below)
fe8ab488 8064 * retval (ignored)
b0d623f7
A
8065 *
8066 * Indirect: uap->path Path of directory to create
8067 * uap->mode Access permissions to set
8068 * uap->xsecurity ACL to set
fe8ab488 8069 *
b0d623f7
A
8070 * Returns: 0 Success
8071 * !0 Not success
8072 *
8073 */
1c79356b 8074int
b0d623f7 8075mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 8076{
91447636
A
8077 int ciferror;
8078 kauth_filesec_t xsecdst;
8079 struct vnode_attr va;
8080
b0d623f7
A
8081 AUDIT_ARG(owner, uap->uid, uap->gid);
8082
91447636
A
8083 xsecdst = NULL;
8084 if ((uap->xsecurity != USER_ADDR_NULL) &&
8085 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
8086 return ciferror;
8087
91447636 8088 VATTR_INIT(&va);
fe8ab488 8089 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
91447636
A
8090 if (xsecdst != NULL)
8091 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
8092
fe8ab488
A
8093 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8094 UIO_USERSPACE);
91447636
A
8095 if (xsecdst != NULL)
8096 kauth_filesec_free(xsecdst);
8097 return ciferror;
1c79356b
A
8098}
8099
1c79356b 8100int
b0d623f7 8101mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 8102{
91447636 8103 struct vnode_attr va;
1c79356b 8104
91447636 8105 VATTR_INIT(&va);
fe8ab488 8106 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 8107
fe8ab488
A
8108 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8109 UIO_USERSPACE));
91447636 8110}
1c79356b 8111
91447636 8112int
fe8ab488
A
8113mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8114{
8115 struct vnode_attr va;
8116
8117 VATTR_INIT(&va);
8118 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8119
8120 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8121 UIO_USERSPACE));
8122}
8123
8124static int
8125rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8126 enum uio_seg segflg)
1c79356b 8127{
2d21ac55 8128 vnode_t vp, dvp;
91447636
A
8129 int error;
8130 struct nameidata nd;
6d2010ae
A
8131 char *path = NULL;
8132 int len=0;
8133 int has_listeners = 0;
8134 int need_event = 0;
8135 int truncated = 0;
6d2010ae
A
8136#if CONFIG_FSE
8137 struct vnode_attr va;
8138#endif /* CONFIG_FSE */
8139 struct vnode_attr *vap = NULL;
c18c124e 8140 int restart_count = 0;
6d2010ae 8141 int batched;
91447636 8142
b0d623f7 8143 int restart_flag;
91447636 8144
fe8ab488 8145 /*
2d21ac55
A
8146 * This loop exists to restart rmdir in the unlikely case that two
8147 * processes are simultaneously trying to remove the same directory
8148 * containing orphaned appleDouble files.
8149 */
8150 do {
6d2010ae 8151 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 8152 segflg, dirpath, ctx);
6d2010ae
A
8153 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8154continue_lookup:
2d21ac55 8155 restart_flag = 0;
6d2010ae 8156 vap = NULL;
2d21ac55 8157
fe8ab488 8158 error = nameiat(&nd, fd);
2d21ac55
A
8159 if (error)
8160 return (error);
8161
8162 dvp = nd.ni_dvp;
8163 vp = nd.ni_vp;
8164
6d2010ae
A
8165 if (vp) {
8166 batched = vnode_compound_rmdir_available(vp);
2d21ac55 8167
6d2010ae
A
8168 if (vp->v_flag & VROOT) {
8169 /*
8170 * The root of a mounted filesystem cannot be deleted.
8171 */
8172 error = EBUSY;
8173 goto out;
8174 }
1c79356b 8175
00867663
A
8176#if DEVELOPMENT || DEBUG
8177 /*
8178 * XXX VSWAP: Check for entitlements or special flag here
8179 * so we can restrict access appropriately.
8180 */
8181#else /* DEVELOPMENT || DEBUG */
8182
8183 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8184 error = EPERM;
8185 goto out;
8186 }
8187#endif /* DEVELOPMENT || DEBUG */
8188
2d21ac55 8189 /*
6d2010ae
A
8190 * Removed a check here; we used to abort if vp's vid
8191 * was not the same as what we'd seen the last time around.
8192 * I do not think that check was valid, because if we retry
8193 * and all dirents are gone, the directory could legitimately
8194 * be recycled but still be present in a situation where we would
fe8ab488 8195 * have had permission to delete. Therefore, we won't make
6d2010ae
A
8196 * an effort to preserve that check now that we may not have a
8197 * vp here.
2d21ac55 8198 */
6d2010ae
A
8199
8200 if (!batched) {
8201 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8202 if (error) {
3e170ce0
A
8203 if (error == ENOENT) {
8204 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8205 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8206 restart_flag = 1;
8207 restart_count += 1;
8208 }
c18c124e 8209 }
6d2010ae
A
8210 goto out;
8211 }
8212 }
2d21ac55 8213 } else {
6d2010ae
A
8214 batched = 1;
8215
8216 if (!vnode_compound_rmdir_available(dvp)) {
8217 panic("No error, but no compound rmdir?");
8218 }
91447636 8219 }
6d2010ae 8220
2d21ac55 8221#if CONFIG_FSE
6d2010ae 8222 fse_info finfo;
b0d623f7 8223
6d2010ae
A
8224 need_event = need_fsevent(FSE_DELETE, dvp);
8225 if (need_event) {
8226 if (!batched) {
2d21ac55 8227 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
8228 } else {
8229 error = vfs_get_notify_attributes(&va);
8230 if (error) {
8231 goto out;
8232 }
8233
8234 vap = &va;
2d21ac55 8235 }
6d2010ae 8236 }
2d21ac55 8237#endif
6d2010ae
A
8238 has_listeners = kauth_authorize_fileop_has_listeners();
8239 if (need_event || has_listeners) {
8240 if (path == NULL) {
2d21ac55
A
8241 GET_PATH(path);
8242 if (path == NULL) {
8243 error = ENOMEM;
8244 goto out;
8245 }
6d2010ae 8246 }
b0d623f7 8247
6d2010ae 8248 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 8249#if CONFIG_FSE
6d2010ae
A
8250 if (truncated) {
8251 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 8252 }
6d2010ae
A
8253#endif
8254 }
91447636 8255
6d2010ae
A
8256 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8257 nd.ni_vp = vp;
8258 if (vp == NULLVP) {
8259 /* Couldn't find a vnode */
8260 goto out;
8261 }
2d21ac55 8262
6d2010ae
A
8263 if (error == EKEEPLOOKING) {
8264 goto continue_lookup;
3e170ce0
A
8265 } else if (batched && error == ENOENT) {
8266 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8267 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8268 /*
8269 * For compound VNOPs, the authorization callback
8270 * may return ENOENT in case of racing hard link lookups
8271 * redrive the lookup.
8272 */
8273 restart_flag = 1;
8274 restart_count += 1;
8275 goto out;
8276 }
6d2010ae 8277 }
39236c6e 8278#if CONFIG_APPLEDOUBLE
6d2010ae
A
8279 /*
8280 * Special case to remove orphaned AppleDouble
8281 * files. I don't like putting this in the kernel,
8282 * but carbon does not like putting this in carbon either,
8283 * so here we are.
8284 */
8285 if (error == ENOTEMPTY) {
8286 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8287 if (error == EBUSY) {
8288 goto out;
2d21ac55
A
8289 }
8290
6d2010ae 8291
2d21ac55 8292 /*
fe8ab488 8293 * Assuming everything went well, we will try the RMDIR again
2d21ac55 8294 */
6d2010ae
A
8295 if (!error)
8296 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8297 }
39236c6e 8298#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 8299 /*
fe8ab488 8300 * Call out to allow 3rd party notification of delete.
6d2010ae
A
8301 * Ignore result of kauth_authorize_fileop call.
8302 */
8303 if (!error) {
8304 if (has_listeners) {
fe8ab488
A
8305 kauth_authorize_fileop(vfs_context_ucred(ctx),
8306 KAUTH_FILEOP_DELETE,
6d2010ae
A
8307 (uintptr_t)vp,
8308 (uintptr_t)path);
8309 }
8310
8311 if (vp->v_flag & VISHARDLINK) {
8312 // see the comment in unlink1() about why we update
8313 // the parent of a hard link when it is removed
8314 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8315 }
2d21ac55
A
8316
8317#if CONFIG_FSE
6d2010ae
A
8318 if (need_event) {
8319 if (vap) {
8320 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 8321 }
6d2010ae
A
8322 add_fsevent(FSE_DELETE, ctx,
8323 FSE_ARG_STRING, len, path,
8324 FSE_ARG_FINFO, &finfo,
8325 FSE_ARG_DONE);
2d21ac55 8326 }
6d2010ae 8327#endif
2d21ac55
A
8328 }
8329
8330out:
6d2010ae
A
8331 if (path != NULL) {
8332 RELEASE_PATH(path);
8333 path = NULL;
8334 }
2d21ac55
A
8335 /*
8336 * nameidone has to happen before we vnode_put(dvp)
8337 * since it may need to release the fs_nodelock on the dvp
8338 */
8339 nameidone(&nd);
2d21ac55 8340 vnode_put(dvp);
6d2010ae 8341
fe8ab488 8342 if (vp)
6d2010ae 8343 vnode_put(vp);
2d21ac55
A
8344
8345 if (restart_flag == 0) {
8346 wakeup_one((caddr_t)vp);
8347 return (error);
8348 }
8349 tsleep(vp, PVFS, "rm AD", 1);
8350
8351 } while (restart_flag != 0);
91447636 8352
1c79356b 8353 return (error);
2d21ac55 8354
1c79356b 8355}
91447636 8356
fe8ab488
A
8357/*
8358 * Remove a directory file.
8359 */
8360/* ARGSUSED */
8361int
8362rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8363{
8364 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8365 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8366}
8367
2d21ac55
A
8368/* Get direntry length padded to 8 byte alignment */
8369#define DIRENT64_LEN(namlen) \
8370 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8371
5ba3f43e
A
8372/* Get dirent length padded to 4 byte alignment */
8373#define DIRENT_LEN(namelen) \
8374 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8375
8376/* Get the end of this dirent */
8377#define DIRENT_END(dep) \
8378 (((char *)(dep)) + (dep)->d_reclen - 1)
8379
fe8ab488 8380errno_t
2d21ac55
A
8381vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8382 int *numdirent, vfs_context_t ctxp)
8383{
8384 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
39037602 8385 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6d2010ae 8386 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
8387 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8388 } else {
8389 size_t bufsize;
8390 void * bufptr;
8391 uio_t auio;
15129b1c 8392 struct direntry *entry64;
2d21ac55
A
8393 struct dirent *dep;
8394 int bytesread;
8395 int error;
8396
8397 /*
5ba3f43e
A
8398 * We're here because the underlying file system does not
8399 * support direnties or we mounted denying support so we must
8400 * fall back to dirents and convert them to direntries.
8401 *
8402 * Our kernel buffer needs to be smaller since re-packing will
8403 * expand each dirent. The worse case (when the name length
8404 * is 3 or less) corresponds to a struct direntry size of 32
2d21ac55
A
8405 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8406 * (4-byte aligned). So having a buffer that is 3/8 the size
8407 * will prevent us from reading more than we can pack.
8408 *
8409 * Since this buffer is wired memory, we will limit the
39037602 8410 * buffer size to a maximum of 32K. We would really like to
2d21ac55 8411 * use 32K in the MIN(), but we use magic number 87371 to
39037602 8412 * prevent uio_resid() * 3 / 8 from overflowing.
2d21ac55 8413 */
316670eb 8414 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 8415 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
8416 if (bufptr == NULL) {
8417 return ENOMEM;
8418 }
2d21ac55 8419
b0d623f7 8420 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
8421 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8422 auio->uio_offset = uio->uio_offset;
8423
8424 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8425
8426 dep = (struct dirent *)bufptr;
8427 bytesread = bufsize - uio_resid(auio);
8428
15129b1c
A
8429 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8430 M_TEMP, M_WAITOK);
2d21ac55
A
8431 /*
8432 * Convert all the entries and copy them out to user's buffer.
8433 */
8434 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
15129b1c
A
8435 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8436
5ba3f43e
A
8437 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
8438 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
8439 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
8440 vp->v_mount->mnt_vfsstat.f_mntonname,
8441 vp->v_name ? vp->v_name : "<unknown>");
8442 error = EIO;
8443 break;
8444 }
8445
15129b1c 8446 bzero(entry64, enbufsize);
2d21ac55 8447 /* Convert a dirent to a dirent64. */
15129b1c
A
8448 entry64->d_ino = dep->d_ino;
8449 entry64->d_seekoff = 0;
8450 entry64->d_reclen = enbufsize;
8451 entry64->d_namlen = dep->d_namlen;
8452 entry64->d_type = dep->d_type;
8453 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
8454
8455 /* Move to next entry. */
8456 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8457
8458 /* Copy entry64 to user's buffer. */
15129b1c 8459 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
8460 }
8461
8462 /* Update the real offset using the offset we got from VNOP_READDIR. */
8463 if (error == 0) {
8464 uio->uio_offset = auio->uio_offset;
8465 }
8466 uio_free(auio);
8467 FREE(bufptr, M_TEMP);
15129b1c 8468 FREE(entry64, M_TEMP);
2d21ac55
A
8469 return (error);
8470 }
8471}
1c79356b 8472
39236c6e
A
8473#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8474
1c79356b
A
8475/*
8476 * Read a block of directory entries in a file system independent format.
8477 */
2d21ac55
A
8478static int
8479getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8480 off_t *offset, int flags)
1c79356b 8481{
2d21ac55
A
8482 vnode_t vp;
8483 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
8484 struct fileproc *fp;
8485 uio_t auio;
2d21ac55
A
8486 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8487 off_t loff;
8488 int error, eofflag, numdirent;
91447636 8489 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8490
2d21ac55
A
8491 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8492 if (error) {
1c79356b 8493 return (error);
2d21ac55 8494 }
91447636
A
8495 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8496 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8497 error = EBADF;
8498 goto out;
8499 }
2d21ac55 8500
39236c6e
A
8501 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8502 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8503
2d21ac55
A
8504#if CONFIG_MACF
8505 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8506 if (error)
8507 goto out;
8508#endif
91447636
A
8509 if ( (error = vnode_getwithref(vp)) ) {
8510 goto out;
8511 }
91447636 8512 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 8513
1c79356b 8514unionread:
91447636
A
8515 if (vp->v_type != VDIR) {
8516 (void)vnode_put(vp);
8517 error = EINVAL;
8518 goto out;
8519 }
2d21ac55
A
8520
8521#if CONFIG_MACF
8522 error = mac_vnode_check_readdir(&context, vp);
8523 if (error != 0) {
8524 (void)vnode_put(vp);
8525 goto out;
8526 }
8527#endif /* MAC */
91447636
A
8528
8529 loff = fp->f_fglob->fg_offset;
2d21ac55
A
8530 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8531 uio_addiov(auio, bufp, bufsize);
91447636 8532
2d21ac55
A
8533 if (flags & VNODE_READDIR_EXTENDED) {
8534 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8535 fp->f_fglob->fg_offset = uio_offset(auio);
8536 } else {
8537 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8538 fp->f_fglob->fg_offset = uio_offset(auio);
8539 }
91447636
A
8540 if (error) {
8541 (void)vnode_put(vp);
8542 goto out;
8543 }
1c79356b 8544
2d21ac55
A
8545 if ((user_ssize_t)bufsize == uio_resid(auio)){
8546 if (union_dircheckp) {
8547 error = union_dircheckp(&vp, fp, &context);
8548 if (error == -1)
8549 goto unionread;
813fb2f6
A
8550 if (error) {
8551 (void)vnode_put(vp);
2d21ac55 8552 goto out;
813fb2f6 8553 }
1c79356b
A
8554 }
8555
39236c6e 8556 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 8557 struct vnode *tvp = vp;
39236c6e
A
8558 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8559 vnode_ref(vp);
8560 fp->f_fglob->fg_data = (caddr_t) vp;
8561 fp->f_fglob->fg_offset = 0;
8562 vnode_rele(tvp);
8563 vnode_put(tvp);
8564 goto unionread;
8565 }
8566 vp = tvp;
1c79356b
A
8567 }
8568 }
2d21ac55 8569
91447636 8570 vnode_put(vp);
2d21ac55
A
8571 if (offset) {
8572 *offset = loff;
8573 }
39037602 8574
2d21ac55 8575 *bytesread = bufsize - uio_resid(auio);
91447636
A
8576out:
8577 file_drop(fd);
1c79356b
A
8578 return (error);
8579}
8580
2d21ac55
A
8581
8582int
b0d623f7 8583getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
8584{
8585 off_t offset;
2d21ac55
A
8586 ssize_t bytesread;
8587 int error;
8588
8589 AUDIT_ARG(fd, uap->fd);
8590 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8591
8592 if (error == 0) {
b0d623f7
A
8593 if (proc_is64bit(p)) {
8594 user64_long_t base = (user64_long_t)offset;
8595 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8596 } else {
8597 user32_long_t base = (user32_long_t)offset;
8598 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8599 }
2d21ac55
A
8600 *retval = bytesread;
8601 }
8602 return (error);
8603}
8604
8605int
8606getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8607{
8608 off_t offset;
8609 ssize_t bytesread;
8610 int error;
8611
8612 AUDIT_ARG(fd, uap->fd);
8613 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8614
8615 if (error == 0) {
8616 *retval = bytesread;
8617 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8618 }
8619 return (error);
8620}
8621
8622
1c79356b
A
8623/*
8624 * Set the mode mask for creation of filesystem nodes.
b0d623f7 8625 * XXX implement xsecurity
1c79356b 8626 */
91447636
A
8627#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8628static int
b0d623f7 8629umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 8630{
2d21ac55 8631 struct filedesc *fdp;
1c79356b 8632
91447636 8633 AUDIT_ARG(mask, newmask);
2d21ac55 8634 proc_fdlock(p);
1c79356b
A
8635 fdp = p->p_fd;
8636 *retval = fdp->fd_cmask;
91447636 8637 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 8638 proc_fdunlock(p);
1c79356b
A
8639 return (0);
8640}
8641
b0d623f7
A
8642/*
8643 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8644 *
8645 * Parameters: p Process requesting to set the umask
8646 * uap User argument descriptor (see below)
8647 * retval umask of the process (parameter p)
8648 *
8649 * Indirect: uap->newmask umask to set
8650 * uap->xsecurity ACL to set
39037602 8651 *
b0d623f7
A
8652 * Returns: 0 Success
8653 * !0 Not success
8654 *
8655 */
8656int
8657umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
8658{
8659 int ciferror;
8660 kauth_filesec_t xsecdst;
8661
8662 xsecdst = KAUTH_FILESEC_NONE;
8663 if (uap->xsecurity != USER_ADDR_NULL) {
8664 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8665 return ciferror;
8666 } else {
8667 xsecdst = KAUTH_FILESEC_NONE;
8668 }
8669
8670 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8671
8672 if (xsecdst != KAUTH_FILESEC_NONE)
8673 kauth_filesec_free(xsecdst);
8674 return ciferror;
8675}
8676
8677int
b0d623f7 8678umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636
A
8679{
8680 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8681}
8682
1c79356b
A
8683/*
8684 * Void all references to file by ripping underlying filesystem
8685 * away from vnode.
8686 */
1c79356b
A
8687/* ARGSUSED */
8688int
b0d623f7 8689revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 8690{
2d21ac55 8691 vnode_t vp;
91447636 8692 struct vnode_attr va;
2d21ac55 8693 vfs_context_t ctx = vfs_context_current();
1c79356b
A
8694 int error;
8695 struct nameidata nd;
8696
6d2010ae
A
8697 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8698 uap->path, ctx);
55e303ae
A
8699 error = namei(&nd);
8700 if (error)
1c79356b
A
8701 return (error);
8702 vp = nd.ni_vp;
91447636
A
8703
8704 nameidone(&nd);
8705
b0d623f7
A
8706 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8707 error = ENOTSUP;
8708 goto out;
8709 }
8710
8711 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8712 error = EBUSY;
8713 goto out;
8714 }
8715
2d21ac55
A
8716#if CONFIG_MACF
8717 error = mac_vnode_check_revoke(ctx, vp);
8718 if (error)
8719 goto out;
8720#endif
8721
91447636
A
8722 VATTR_INIT(&va);
8723 VATTR_WANTED(&va, va_uid);
2d21ac55 8724 if ((error = vnode_getattr(vp, &va, ctx)))
1c79356b 8725 goto out;
2d21ac55
A
8726 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8727 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 8728 goto out;
b0d623f7 8729 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
2d21ac55 8730 VNOP_REVOKE(vp, REVOKEALL, ctx);
1c79356b 8731out:
91447636 8732 vnode_put(vp);
1c79356b
A
8733 return (error);
8734}
8735
0b4e3aa0 8736
1c79356b
A
8737/*
8738 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 8739 * The following system calls are designed to support features
1c79356b
A
8740 * which are specific to the HFS & HFS Plus volume formats
8741 */
8742
9bccf70c 8743
1c79356b 8744/*
39236c6e
A
8745 * Obtain attribute information on objects in a directory while enumerating
8746 * the directory.
8747 */
1c79356b
A
8748/* ARGSUSED */
8749int
b0d623f7 8750getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 8751{
2d21ac55 8752 vnode_t vp;
91447636
A
8753 struct fileproc *fp;
8754 uio_t auio = NULL;
8755 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
d9a64523
A
8756 uint32_t count = 0, savecount = 0;
8757 uint32_t newstate = 0;
91447636 8758 int error, eofflag;
d9a64523 8759 uint32_t loff = 0;
39037602 8760 struct attrlist attributelist;
2d21ac55 8761 vfs_context_t ctx = vfs_context_current();
91447636
A
8762 int fd = uap->fd;
8763 char uio_buf[ UIO_SIZEOF(1) ];
8764 kauth_action_t action;
8765
8766 AUDIT_ARG(fd, fd);
39037602 8767
91447636 8768 /* Get the attributes into kernel space */
2d21ac55 8769 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
91447636 8770 return(error);
2d21ac55
A
8771 }
8772 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8773 return(error);
8774 }
39236c6e 8775 savecount = count;
2d21ac55 8776 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
91447636 8777 return (error);
2d21ac55 8778 }
91447636
A
8779 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8780 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8781 error = EBADF;
8782 goto out;
8783 }
2d21ac55
A
8784
8785
8786#if CONFIG_MACF
8787 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8788 fp->f_fglob);
8789 if (error)
8790 goto out;
8791#endif
8792
8793
91447636
A
8794 if ( (error = vnode_getwithref(vp)) )
8795 goto out;
55e303ae 8796
91447636 8797 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 8798
39236c6e 8799unionread:
91447636
A
8800 if (vp->v_type != VDIR) {
8801 (void)vnode_put(vp);
8802 error = EINVAL;
8803 goto out;
8804 }
55e303ae 8805
2d21ac55
A
8806#if CONFIG_MACF
8807 error = mac_vnode_check_readdir(ctx, vp);
8808 if (error != 0) {
8809 (void)vnode_put(vp);
8810 goto out;
8811 }
8812#endif /* MAC */
8813
91447636
A
8814 /* set up the uio structure which will contain the users return buffer */
8815 loff = fp->f_fglob->fg_offset;
39236c6e 8816 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636 8817 uio_addiov(auio, uap->buffer, uap->buffersize);
39037602 8818
91447636
A
8819 /*
8820 * If the only item requested is file names, we can let that past with
8821 * just LIST_DIRECTORY. If they want any other attributes, that means
8822 * they need SEARCH as well.
8823 */
8824 action = KAUTH_VNODE_LIST_DIRECTORY;
8825 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8826 attributelist.fileattr || attributelist.dirattr)
8827 action |= KAUTH_VNODE_SEARCH;
39037602 8828
2d21ac55 8829 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
2d21ac55 8830
b0d623f7
A
8831 /* Believe it or not, uap->options only has 32-bits of valid
8832 * info, so truncate before extending again */
39236c6e
A
8833
8834 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8835 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8836 }
8837
8838 if (error) {
8839 (void) vnode_put(vp);
8840 goto out;
8841 }
8842
8843 /*
8844 * If we've got the last entry of a directory in a union mount
8845 * then reset the eofflag and pretend there's still more to come.
8846 * The next call will again set eofflag and the buffer will be empty,
8847 * so traverse to the underlying directory and do the directory
8848 * read there.
8849 */
8850 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8851 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8852 eofflag = 0;
8853 } else { // Empty buffer
8854 struct vnode *tvp = vp;
8855 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8856 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8857 fp->f_fglob->fg_data = (caddr_t) vp;
8858 fp->f_fglob->fg_offset = 0; // reset index for new dir
8859 count = savecount;
8860 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8861 vnode_put(tvp);
8862 goto unionread;
8863 }
8864 vp = tvp;
8865 }
2d21ac55 8866 }
39236c6e 8867
91447636 8868 (void)vnode_put(vp);
1c79356b 8869
39037602 8870 if (error)
91447636
A
8871 goto out;
8872 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 8873
2d21ac55 8874 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
91447636 8875 goto out;
2d21ac55 8876 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
91447636 8877 goto out;
2d21ac55 8878 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
91447636 8879 goto out;
1c79356b
A
8880
8881 *retval = eofflag; /* similar to getdirentries */
91447636 8882 error = 0;
2d21ac55 8883out:
91447636
A
8884 file_drop(fd);
8885 return (error); /* return error earlier, an retval of 0 or 1 now */
1c79356b 8886
39236c6e 8887} /* end of getdirentriesattr system call */
1c79356b
A
8888
8889/*
8890* Exchange data between two files
8891*/
8892
1c79356b
A
8893/* ARGSUSED */
8894int
b0d623f7 8895exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b
A
8896{
8897
8898 struct nameidata fnd, snd;
2d21ac55
A
8899 vfs_context_t ctx = vfs_context_current();
8900 vnode_t fvp;
8901 vnode_t svp;
8902 int error;
b0d623f7 8903 u_int32_t nameiflags;
91447636
A
8904 char *fpath = NULL;
8905 char *spath = NULL;
b0d623f7
A
8906 int flen=0, slen=0;
8907 int from_truncated=0, to_truncated=0;
8908#if CONFIG_FSE
91447636 8909 fse_info f_finfo, s_finfo;
b0d623f7 8910#endif
39037602 8911
1c79356b
A
8912 nameiflags = 0;
8913 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8914
6d2010ae
A
8915 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8916 UIO_USERSPACE, uap->path1, ctx);
1c79356b 8917
6d2010ae
A
8918 error = namei(&fnd);
8919 if (error)
8920 goto out2;
1c79356b 8921
91447636
A
8922 nameidone(&fnd);
8923 fvp = fnd.ni_vp;
1c79356b 8924
39037602 8925 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
6d2010ae 8926 UIO_USERSPACE, uap->path2, ctx);
1c79356b 8927
6d2010ae
A
8928 error = namei(&snd);
8929 if (error) {
91447636 8930 vnode_put(fvp);
55e303ae 8931 goto out2;
6d2010ae 8932 }
91447636 8933 nameidone(&snd);
1c79356b
A
8934 svp = snd.ni_vp;
8935
91447636
A
8936 /*
8937 * if the files are the same, return an inval error
8938 */
1c79356b 8939 if (svp == fvp) {
91447636
A
8940 error = EINVAL;
8941 goto out;
39037602 8942 }
1c79356b 8943
91447636
A
8944 /*
8945 * if the files are on different volumes, return an error
8946 */
8947 if (svp->v_mount != fvp->v_mount) {
8948 error = EXDEV;
8949 goto out;
8950 }
2d21ac55 8951
39236c6e
A
8952 /* If they're not files, return an error */
8953 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
8954 error = EINVAL;
8955 goto out;
8956 }
8957
2d21ac55
A
8958#if CONFIG_MACF
8959 error = mac_vnode_check_exchangedata(ctx,
8960 fvp, svp);
8961 if (error)
8962 goto out;
8963#endif
8964 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8965 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
91447636 8966 goto out;
1c79356b 8967
2d21ac55
A
8968 if (
8969#if CONFIG_FSE
39037602 8970 need_fsevent(FSE_EXCHANGE, fvp) ||
2d21ac55
A
8971#endif
8972 kauth_authorize_fileop_has_listeners()) {
8973 GET_PATH(fpath);
8974 GET_PATH(spath);
8975 if (fpath == NULL || spath == NULL) {
8976 error = ENOMEM;
8977 goto out;
8978 }
b0d623f7
A
8979
8980 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8981 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
39037602 8982
2d21ac55
A
8983#if CONFIG_FSE
8984 get_fse_info(fvp, &f_finfo, ctx);
8985 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
8986 if (from_truncated || to_truncated) {
8987 // set it here since only the f_finfo gets reported up to user space
8988 f_finfo.mode |= FSE_TRUNCATED_PATH;
8989 }
2d21ac55 8990#endif
91447636 8991 }
1c79356b 8992 /* Ok, make the call */
2d21ac55 8993 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 8994
91447636 8995 if (error == 0) {
2d21ac55 8996 const char *tmpname;
91447636
A
8997
8998 if (fpath != NULL && spath != NULL) {
39037602 8999 /* call out to allow 3rd party notification of exchangedata.
91447636
A
9000 * Ignore result of kauth_authorize_fileop call.
9001 */
39037602 9002 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
91447636
A
9003 (uintptr_t)fpath, (uintptr_t)spath);
9004 }
9005 name_cache_lock();
9006
9007 tmpname = fvp->v_name;
9008 fvp->v_name = svp->v_name;
9009 svp->v_name = tmpname;
39037602 9010
91447636 9011 if (fvp->v_parent != svp->v_parent) {
2d21ac55 9012 vnode_t tmp;
91447636
A
9013
9014 tmp = fvp->v_parent;
9015 fvp->v_parent = svp->v_parent;
9016 svp->v_parent = tmp;
9017 }
9018 name_cache_unlock();
9019
2d21ac55 9020#if CONFIG_FSE
91447636 9021 if (fpath != NULL && spath != NULL) {
2d21ac55 9022 add_fsevent(FSE_EXCHANGE, ctx,
91447636
A
9023 FSE_ARG_STRING, flen, fpath,
9024 FSE_ARG_FINFO, &f_finfo,
9025 FSE_ARG_STRING, slen, spath,
9026 FSE_ARG_FINFO, &s_finfo,
9027 FSE_ARG_DONE);
9028 }
2d21ac55 9029#endif
55e303ae
A
9030 }
9031
1c79356b 9032out:
2d21ac55
A
9033 if (fpath != NULL)
9034 RELEASE_PATH(fpath);
9035 if (spath != NULL)
9036 RELEASE_PATH(spath);
91447636
A
9037 vnode_put(svp);
9038 vnode_put(fvp);
1c79356b 9039out2:
1c79356b 9040 return (error);
91447636 9041}
1c79356b 9042
39236c6e
A
9043/*
9044 * Return (in MB) the amount of freespace on the given vnode's volume.
9045 */
9046uint32_t freespace_mb(vnode_t vp);
9047
9048uint32_t
9049freespace_mb(vnode_t vp)
9050{
39037602 9051 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
39236c6e
A
9052 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
9053 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
9054}
9055
316670eb 9056#if CONFIG_SEARCHFS
1c79356b 9057
1c79356b
A
9058/* ARGSUSED */
9059
9060int
b0d623f7 9061searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 9062{
39236c6e
A
9063 vnode_t vp, tvp;
9064 int i, error=0;
1c79356b
A
9065 int fserror = 0;
9066 struct nameidata nd;
b0d623f7 9067 struct user64_fssearchblock searchblock;
1c79356b
A
9068 struct searchstate *state;
9069 struct attrlist *returnattrs;
b0d623f7 9070 struct timeval timelimit;
1c79356b 9071 void *searchparams1,*searchparams2;
91447636
A
9072 uio_t auio = NULL;
9073 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 9074 uint32_t nummatches;
1c79356b 9075 int mallocsize;
b0d623f7 9076 uint32_t nameiflags;
2d21ac55 9077 vfs_context_t ctx = vfs_context_current();
91447636 9078 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 9079
39236c6e 9080 /* Start by copying in fsearchblock parameter list */
91447636 9081 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
9082 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
9083 timelimit.tv_sec = searchblock.timelimit.tv_sec;
9084 timelimit.tv_usec = searchblock.timelimit.tv_usec;
91447636
A
9085 }
9086 else {
b0d623f7
A
9087 struct user32_fssearchblock tmp_searchblock;
9088
91447636
A
9089 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
9090 // munge into 64-bit version
9091 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
9092 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
9093 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
9094 searchblock.maxmatches = tmp_searchblock.maxmatches;
39037602 9095 /*
b0d623f7
A
9096 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9097 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9098 */
9099 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9100 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
91447636
A
9101 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9102 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9103 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9104 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9105 searchblock.searchattrs = tmp_searchblock.searchattrs;
9106 }
9107 if (error)
1c79356b
A
9108 return(error);
9109
39037602 9110 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
a3d08fcd 9111 */
39037602 9112 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
a3d08fcd
A
9113 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
9114 return(EINVAL);
39037602 9115
1c79356b
A
9116 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9117 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9118 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9119 /* block. */
fe8ab488
A
9120 /* */
9121 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9122 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9123 /* assumes the size is still 556 bytes it will continue to work */
39037602 9124
91447636 9125 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
fe8ab488 9126 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
1c79356b
A
9127
9128 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9129
9130 /* Now set up the various pointers to the correct place in our newly allocated memory */
9131
9132 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9133 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9134 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
9135
9136 /* Now copy in the stuff given our local variables. */
9137
91447636 9138 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
1c79356b
A
9139 goto freeandexit;
9140
91447636 9141 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
1c79356b
A
9142 goto freeandexit;
9143
91447636 9144 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
1c79356b 9145 goto freeandexit;
39037602 9146
91447636 9147 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
1c79356b 9148 goto freeandexit;
1c79356b 9149
39236c6e
A
9150 /*
9151 * When searching a union mount, need to set the
9152 * start flag at the first call on each layer to
9153 * reset state for the new volume.
9154 */
9155 if (uap->options & SRCHFS_START)
9156 state->ss_union_layer = 0;
39037602 9157 else
39236c6e
A
9158 uap->options |= state->ss_union_flags;
9159 state->ss_union_flags = 0;
b0d623f7
A
9160
9161 /*
9162 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9163 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
39037602
A
9164 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9165 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
b0d623f7
A
9166 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9167 */
9168
9169 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
9170 attrreference_t* string_ref;
9171 u_int32_t* start_length;
39037602 9172 user64_size_t param_length;
b0d623f7
A
9173
9174 /* validate searchparams1 */
39037602 9175 param_length = searchblock.sizeofsearchparams1;
b0d623f7
A
9176 /* skip the word that specifies length of the buffer */
9177 start_length= (u_int32_t*) searchparams1;
9178 start_length= start_length+1;
9179 string_ref= (attrreference_t*) start_length;
9180
9181 /* ensure no negative offsets or too big offsets */
9182 if (string_ref->attr_dataoffset < 0 ) {
9183 error = EINVAL;
39037602 9184 goto freeandexit;
b0d623f7
A
9185 }
9186 if (string_ref->attr_length > MAXPATHLEN) {
9187 error = EINVAL;
9188 goto freeandexit;
9189 }
39037602 9190
b0d623f7
A
9191 /* Check for pointer overflow in the string ref */
9192 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
9193 error = EINVAL;
9194 goto freeandexit;
9195 }
9196
9197 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
9198 error = EINVAL;
9199 goto freeandexit;
9200 }
9201 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
9202 error = EINVAL;
9203 goto freeandexit;
9204 }
9205 }
9206
9207 /* set up the uio structure which will contain the users return buffer */
39236c6e
A
9208 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9209 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 9210
91447636 9211 nameiflags = 0;
1c79356b 9212 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
9213 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
9214 UIO_USERSPACE, uap->path, ctx);
1c79356b 9215
55e303ae
A
9216 error = namei(&nd);
9217 if (error)
1c79356b 9218 goto freeandexit;
39236c6e 9219 vp = nd.ni_vp;
91447636 9220 nameidone(&nd);
39236c6e
A
9221
9222 /*
9223 * Switch to the root vnode for the volume
9224 */
9225 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 9226 vnode_put(vp);
39236c6e
A
9227 if (error)
9228 goto freeandexit;
39236c6e
A
9229 vp = tvp;
9230
9231 /*
9232 * If it's a union mount, the path lookup takes
9233 * us to the top layer. But we may need to descend
9234 * to a lower layer. For non-union mounts the layer
9235 * is always zero.
9236 */
9237 for (i = 0; i < (int) state->ss_union_layer; i++) {
9238 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
9239 break;
9240 tvp = vp;
9241 vp = vp->v_mount->mnt_vnodecovered;
9242 if (vp == NULL) {
fe8ab488 9243 vnode_put(tvp);
39236c6e
A
9244 error = ENOENT;
9245 goto freeandexit;
9246 }
813fb2f6 9247 error = vnode_getwithref(vp);
39236c6e 9248 vnode_put(tvp);
813fb2f6
A
9249 if (error)
9250 goto freeandexit;
39236c6e 9251 }
1c79356b 9252
6d2010ae
A
9253#if CONFIG_MACF
9254 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9255 if (error) {
9256 vnode_put(vp);
9257 goto freeandexit;
9258 }
9259#endif
9260
39037602 9261
1c79356b 9262 /*
39037602 9263 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 9264 * before and sometimes the underlying code doesnt deal with it well.
1c79356b
A
9265 */
9266 if (searchblock.maxmatches == 0) {
9267 nummatches = 0;
9268 goto saveandexit;
9269 }
9270
9271 /*
39236c6e 9272 * Allright, we have everything we need, so lets make that call.
39037602 9273 *
39236c6e
A
9274 * We keep special track of the return value from the file system:
9275 * EAGAIN is an acceptable error condition that shouldn't keep us
9276 * from copying out any results...
1c79356b
A
9277 */
9278
6d2010ae 9279 fserror = VNOP_SEARCHFS(vp,
39236c6e
A
9280 searchparams1,
9281 searchparams2,
9282 &searchblock.searchattrs,
9283 (u_long)searchblock.maxmatches,
9284 &timelimit,
9285 returnattrs,
9286 &nummatches,
9287 (u_long)uap->scriptcode,
9288 (u_long)uap->options,
9289 auio,
9290 (struct searchstate *) &state->ss_fsstate,
9291 ctx);
39037602 9292
39236c6e
A
9293 /*
9294 * If it's a union mount we need to be called again
9295 * to search the mounted-on filesystem.
9296 */
9297 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9298 state->ss_union_flags = SRCHFS_START;
9299 state->ss_union_layer++; // search next layer down
9300 fserror = EAGAIN;
9301 }
9302
6d2010ae
A
9303saveandexit:
9304
9305 vnode_put(vp);
9306
9307 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9308 search state. Everything was already put into he return buffer by the vop call. */
9309
9310 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9311 goto freeandexit;
9312
39236c6e 9313 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6d2010ae 9314 goto freeandexit;
39037602 9315
6d2010ae
A
9316 error = fserror;
9317
9318freeandexit:
9319
9320 FREE(searchparams1,M_TEMP);
9321
9322 return(error);
9323
9324
9325} /* end of searchfs system call */
9326
316670eb
A
9327#else /* CONFIG_SEARCHFS */
9328
9329int
9330searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9331{
9332 return (ENOTSUP);
9333}
9334
9335#endif /* CONFIG_SEARCHFS */
6d2010ae
A
9336
9337
9338lck_grp_attr_t * nspace_group_attr;
9339lck_attr_t * nspace_lock_attr;
9340lck_grp_t * nspace_mutex_group;
9341
9342lck_mtx_t nspace_handler_lock;
9343lck_mtx_t nspace_handler_exclusion_lock;
9344
9345time_t snapshot_timestamp=0;
9346int nspace_allow_virtual_devs=0;
9347
9348void nspace_handler_init(void);
9349
9350typedef struct nspace_item_info {
9351 struct vnode *vp;
9352 void *arg;
9353 uint64_t op;
9354 uint32_t vid;
9355 uint32_t flags;
9356 uint32_t token;
9357 uint32_t refcount;
9358} nspace_item_info;
9359
9360#define MAX_NSPACE_ITEMS 128
9361nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9362uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9363uint32_t nspace_token_id=0;
9364uint32_t nspace_handler_timeout = 15; // seconds
9365
9366#define NSPACE_ITEM_NEW 0x0001
9367#define NSPACE_ITEM_PROCESSING 0x0002
9368#define NSPACE_ITEM_DEAD 0x0004
9369#define NSPACE_ITEM_CANCELLED 0x0008
9370#define NSPACE_ITEM_DONE 0x0010
9371#define NSPACE_ITEM_RESET_TIMER 0x0020
9372
9373#define NSPACE_ITEM_NSPACE_EVENT 0x0040
9374#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
6d2010ae 9375
fe8ab488 9376#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
6d2010ae
A
9377
9378//#pragma optimization_level 0
9379
9380typedef enum {
9381 NSPACE_HANDLER_NSPACE = 0,
9382 NSPACE_HANDLER_SNAPSHOT = 1,
6d2010ae
A
9383
9384 NSPACE_HANDLER_COUNT,
9385} nspace_type_t;
9386
9387typedef struct {
9388 uint64_t handler_tid;
9389 struct proc *handler_proc;
9390 int handler_busy;
9391} nspace_handler_t;
9392
9393nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9394
39236c6e
A
9395/* namespace fsctl functions */
9396static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9397static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9398static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9399static nspace_type_t nspace_type_for_op(uint64_t op);
9400static int nspace_is_special_process(struct proc *proc);
9401static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9402static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9403static int validate_namespace_args (int is64bit, int size);
9404static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9405
9406
6d2010ae
A
9407static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9408{
9409 switch(nspace_type) {
9410 case NSPACE_HANDLER_NSPACE:
9411 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9412 case NSPACE_HANDLER_SNAPSHOT:
9413 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9414 default:
9415 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9416 return 0;
9417 }
9418}
9419
9420static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9421{
9422 switch(nspace_type) {
9423 case NSPACE_HANDLER_NSPACE:
9424 return NSPACE_ITEM_NSPACE_EVENT;
9425 case NSPACE_HANDLER_SNAPSHOT:
9426 return NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9427 default:
9428 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9429 return 0;
9430 }
9431}
9432
9433static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9434{
9435 switch(nspace_type) {
9436 case NSPACE_HANDLER_NSPACE:
9437 return FREAD | FWRITE | O_EVTONLY;
9438 case NSPACE_HANDLER_SNAPSHOT:
6d2010ae
A
9439 return FREAD | O_EVTONLY;
9440 default:
9441 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9442 return 0;
9443 }
9444}
9445
9446static inline nspace_type_t nspace_type_for_op(uint64_t op)
9447{
9448 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9449 case NAMESPACE_HANDLER_NSPACE_EVENT:
9450 return NSPACE_HANDLER_NSPACE;
9451 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9452 return NSPACE_HANDLER_SNAPSHOT;
6d2010ae
A
9453 default:
9454 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9455 return NSPACE_HANDLER_NSPACE;
9456 }
9457}
9458
9459static inline int nspace_is_special_process(struct proc *proc)
9460{
9461 int i;
9462 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9463 if (proc == nspace_handlers[i].handler_proc)
9464 return 1;
9465 }
9466 return 0;
9467}
9468
9469void
9470nspace_handler_init(void)
9471{
9472 nspace_lock_attr = lck_attr_alloc_init();
9473 nspace_group_attr = lck_grp_attr_alloc_init();
9474 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9475 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9476 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9477 memset(&nspace_items[0], 0, sizeof(nspace_items));
9478}
9479
9480void
9481nspace_proc_exit(struct proc *p)
9482{
9483 int i, event_mask = 0;
39037602 9484
6d2010ae
A
9485 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9486 if (p == nspace_handlers[i].handler_proc) {
9487 event_mask |= nspace_item_flags_for_type(i);
9488 nspace_handlers[i].handler_tid = 0;
9489 nspace_handlers[i].handler_proc = NULL;
9490 }
9491 }
9492
9493 if (event_mask == 0) {
9494 return;
9495 }
39037602
A
9496
9497 lck_mtx_lock(&nspace_handler_lock);
6d2010ae
A
9498 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9499 // if this process was the snapshot handler, zero snapshot_timeout
9500 snapshot_timestamp = 0;
9501 }
39037602 9502
6d2010ae
A
9503 //
9504 // unblock anyone that's waiting for the handler that died
9505 //
6d2010ae
A
9506 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9507 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9508
9509 if ( nspace_items[i].flags & event_mask ) {
9510
9511 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9512 vnode_lock_spin(nspace_items[i].vp);
9513 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9514 vnode_unlock(nspace_items[i].vp);
9515 }
9516 nspace_items[i].vp = NULL;
9517 nspace_items[i].vid = 0;
9518 nspace_items[i].flags = NSPACE_ITEM_DONE;
9519 nspace_items[i].token = 0;
39037602 9520
6d2010ae
A
9521 wakeup((caddr_t)&(nspace_items[i].vp));
9522 }
9523 }
9524 }
39037602 9525
6d2010ae
A
9526 wakeup((caddr_t)&nspace_item_idx);
9527 lck_mtx_unlock(&nspace_handler_lock);
9528}
9529
9530
39037602 9531int
6d2010ae
A
9532resolve_nspace_item(struct vnode *vp, uint64_t op)
9533{
9534 return resolve_nspace_item_ext(vp, op, NULL);
9535}
9536
39037602 9537int
6d2010ae
A
9538resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9539{
9540 int i, error, keep_waiting;
9541 struct timespec ts;
9542 nspace_type_t nspace_type = nspace_type_for_op(op);
9543
9544 // only allow namespace events on regular files, directories and symlinks.
9545 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9546 return 0;
9547 }
9548
9549 //
9550 // if this is a snapshot event and the vnode is on a
9551 // disk image just pretend nothing happened since any
9552 // change to the disk image will cause the disk image
9553 // itself to get backed up and this avoids multi-way
9554 // deadlocks between the snapshot handler and the ever
9555 // popular diskimages-helper process. the variable
9556 // nspace_allow_virtual_devs allows this behavior to
9557 // be overridden (for use by the Mobile TimeMachine
9558 // testing infrastructure which uses disk images)
9559 //
9560 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9561 && (vp->v_mount != NULL)
9562 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9563 && !nspace_allow_virtual_devs) {
9564
9565 return 0;
9566 }
9567
9568 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9569 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9570 return 0;
9571 }
9572
9573 if (nspace_is_special_process(current_proc())) {
9574 return EDEADLK;
9575 }
9576
9577 lck_mtx_lock(&nspace_handler_lock);
9578
9579retry:
9580 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9581 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9582 break;
9583 }
9584 }
9585
9586 if (i >= MAX_NSPACE_ITEMS) {
9587 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9588 if (nspace_items[i].flags == 0) {
9589 break;
9590 }
9591 }
9592 } else {
9593 nspace_items[i].refcount++;
9594 }
39037602 9595
6d2010ae
A
9596 if (i >= MAX_NSPACE_ITEMS) {
9597 ts.tv_sec = nspace_handler_timeout;
9598 ts.tv_nsec = 0;
9599
9600 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9601 if (error == 0) {
9602 // an entry got free'd up, go see if we can get a slot
9603 goto retry;
9604 } else {
9605 lck_mtx_unlock(&nspace_handler_lock);
9606 return error;
9607 }
9608 }
9609
9610 //
9611 // if it didn't already exist, add it. if it did exist
9612 // we'll get woken up when someone does a wakeup() on
9613 // the slot in the nspace_items table.
9614 //
9615 if (vp != nspace_items[i].vp) {
9616 nspace_items[i].vp = vp;
39236c6e 9617 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
9618 nspace_items[i].op = op;
9619 nspace_items[i].vid = vnode_vid(vp);
9620 nspace_items[i].flags = NSPACE_ITEM_NEW;
9621 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9622 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9623 if (arg) {
9624 vnode_lock_spin(vp);
9625 vp->v_flag |= VNEEDSSNAPSHOT;
9626 vnode_unlock(vp);
9627 }
9628 }
9629
9630 nspace_items[i].token = 0;
9631 nspace_items[i].refcount = 1;
39037602 9632
6d2010ae
A
9633 wakeup((caddr_t)&nspace_item_idx);
9634 }
9635
9636 //
9637 // Now go to sleep until the handler does a wakeup on this
9638 // slot in the nspace_items table (or we timeout).
9639 //
9640 keep_waiting = 1;
9641 while(keep_waiting) {
9642 ts.tv_sec = nspace_handler_timeout;
9643 ts.tv_nsec = 0;
9644 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9645
9646 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9647 error = 0;
9648 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9649 error = nspace_items[i].token;
9650 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9651 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9652 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9653 continue;
9654 } else {
9655 error = ETIMEDOUT;
9656 }
9657 } else if (error == 0) {
9658 // hmmm, why did we get woken up?
9659 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9660 nspace_items[i].token);
39037602 9661 }
6d2010ae
A
9662
9663 if (--nspace_items[i].refcount == 0) {
9664 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9665 nspace_items[i].arg = NULL;
9666 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9667 nspace_items[i].flags = 0; // this clears it for re-use
9668 }
9669 wakeup(&nspace_token_id);
9670 keep_waiting = 0;
9671 }
9672
9673 lck_mtx_unlock(&nspace_handler_lock);
9674
9675 return error;
9676}
9677
39037602 9678int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
6d2010ae 9679{
39037602 9680 int snapshot_error = 0;
6d2010ae 9681
39037602
A
9682 if (vp == NULL) {
9683 return 0;
9684 }
9685
9686 /* Swap files are special; skip them */
9687 if (vnode_isswap(vp)) {
9688 return 0;
9689 }
9690
9691 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9692 // the change time is within this epoch
9693 int error;
9694
9695 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9696 if (error == EDEADLK) {
9697 snapshot_error = 0;
9698 } else if (error) {
9699 if (error == EAGAIN) {
9700 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9701 } else if (error == EINTR) {
9702 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9703 snapshot_error = EINTR;
9704 }
9705 }
9706 }
9707
9708 return snapshot_error;
9709}
9710
9711int
9712get_nspace_item_status(struct vnode *vp, int32_t *status)
9713{
9714 int i;
9715
9716 lck_mtx_lock(&nspace_handler_lock);
9717 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9718 if (nspace_items[i].vp == vp) {
9719 break;
6d2010ae
A
9720 }
9721 }
9722
9723 if (i >= MAX_NSPACE_ITEMS) {
9724 lck_mtx_unlock(&nspace_handler_lock);
9725 return ENOENT;
9726 }
9727
9728 *status = nspace_items[i].flags;
9729 lck_mtx_unlock(&nspace_handler_lock);
9730 return 0;
9731}
39037602 9732
6d2010ae
A
9733
9734#if 0
9735static int
9736build_volfs_path(struct vnode *vp, char *path, int *len)
9737{
9738 struct vnode_attr va;
9739 int ret;
9740
9741 VATTR_INIT(&va);
9742 VATTR_WANTED(&va, va_fsid);
9743 VATTR_WANTED(&va, va_fileid);
9744
9745 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9746 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9747 ret = -1;
9748 } else {
9749 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9750 ret = 0;
9751 }
9752
9753 return ret;
9754}
9755#endif
9756
9757//
9758// Note: this function does NOT check permissions on all of the
9759// parent directories leading to this vnode. It should only be
9760// called on behalf of a root process. Otherwise a process may
9761// get access to a file because the file itself is readable even
9762// though its parent directories would prevent access.
9763//
9764static int
9765vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9766{
9767 int error, action;
9768
9769 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9770 return error;
9771 }
9772
9773#if CONFIG_MACF
9774 error = mac_vnode_check_open(ctx, vp, fmode);
9775 if (error)
9776 return error;
9777#endif
1c79356b 9778
6d2010ae
A
9779 /* compute action to be authorized */
9780 action = 0;
9781 if (fmode & FREAD) {
9782 action |= KAUTH_VNODE_READ_DATA;
9783 }
9784 if (fmode & (FWRITE | O_TRUNC)) {
9785 /*
9786 * If we are writing, appending, and not truncating,
9787 * indicate that we are appending so that if the
9788 * UF_APPEND or SF_APPEND bits are set, we do not deny
9789 * the open.
9790 */
9791 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9792 action |= KAUTH_VNODE_APPEND_DATA;
9793 } else {
9794 action |= KAUTH_VNODE_WRITE_DATA;
9795 }
9796 }
1c79356b 9797
6d2010ae
A
9798 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9799 return error;
39037602 9800
1c79356b 9801
6d2010ae
A
9802 //
9803 // if the vnode is tagged VOPENEVT and the current process
9804 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9805 // flag to the open mode so that this open won't count against
9806 // the vnode when carbon delete() does a vnode_isinuse() to see
9807 // if a file is currently in use. this allows spotlight
9808 // importers to not interfere with carbon apps that depend on
9809 // the no-delete-if-busy semantics of carbon delete().
9810 //
9811 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9812 fmode |= O_EVTONLY;
9813 }
1c79356b 9814
6d2010ae
A
9815 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9816 return error;
9817 }
9818 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9819 VNOP_CLOSE(vp, fmode, ctx);
9820 return error;
9821 }
1c79356b 9822
39037602 9823 /* Call out to allow 3rd party notification of open.
6d2010ae
A
9824 * Ignore result of kauth_authorize_fileop call.
9825 */
4b17d6b6
A
9826#if CONFIG_MACF
9827 mac_vnode_notify_open(ctx, vp, fmode);
9828#endif
39037602 9829 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
6d2010ae 9830 (uintptr_t)vp, 0);
1c79356b 9831
1c79356b 9832
6d2010ae
A
9833 return 0;
9834}
1c79356b 9835
6d2010ae 9836static int
39236c6e 9837wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae 9838{
39037602
A
9839 int i;
9840 int error = 0;
9841 int unblock = 0;
6d2010ae 9842 task_t curtask;
39037602 9843
6d2010ae
A
9844 lck_mtx_lock(&nspace_handler_exclusion_lock);
9845 if (nspace_handlers[nspace_type].handler_busy) {
9846 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9847 return EBUSY;
9848 }
39037602 9849
6d2010ae
A
9850 nspace_handlers[nspace_type].handler_busy = 1;
9851 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602
A
9852
9853 /*
6d2010ae
A
9854 * Any process that gets here will be one of the namespace handlers.
9855 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9856 * as we can cause deadlocks to occur, because the namespace handler may prevent
39037602 9857 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
6d2010ae
A
9858 * process.
9859 */
9860 curtask = current_task();
39037602
A
9861 bsd_set_dependency_capable (curtask);
9862
6d2010ae
A
9863 lck_mtx_lock(&nspace_handler_lock);
9864 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9865 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9866 nspace_handlers[nspace_type].handler_proc = current_proc();
9867 }
39037602
A
9868
9869 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9870 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9871 error = EINVAL;
9872 }
9873
6d2010ae 9874 while (error == 0) {
39037602
A
9875
9876 /* Try to find matching namespace item */
9877 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae 9878 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
39037602
A
9879 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9880 break;
6d2010ae 9881 }
6d2010ae
A
9882 }
9883 }
39236c6e 9884
39037602
A
9885 if (i >= MAX_NSPACE_ITEMS) {
9886 /* Nothing is there yet. Wait for wake up and retry */
6d2010ae
A
9887 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9888 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9889 /* Prevent infinite loop if snapshot handler exited */
6d2010ae
A
9890 error = EINVAL;
9891 break;
9892 }
39037602 9893 continue;
6d2010ae 9894 }
39037602
A
9895
9896 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9897 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9898 nspace_items[i].token = ++nspace_token_id;
9899
9900 assert(nspace_items[i].vp);
9901 struct fileproc *fp;
9902 int32_t indx;
9903 int32_t fmode;
9904 struct proc *p = current_proc();
9905 vfs_context_t ctx = vfs_context_current();
9906 struct vnode_attr va;
9907 bool vn_get_succsessful = false;
9908 bool vn_open_successful = false;
9909 bool fp_alloc_successful = false;
9910
9911 /*
9912 * Use vnode pointer to acquire a file descriptor for
9913 * hand-off to userland
9914 */
9915 fmode = nspace_open_flags_for_type(nspace_type);
9916 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9917 if (error) goto cleanup;
9918 vn_get_succsessful = true;
9919
9920 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9921 if (error) goto cleanup;
9922 vn_open_successful = true;
9923
9924 error = falloc(p, &fp, &indx, ctx);
9925 if (error) goto cleanup;
9926 fp_alloc_successful = true;
9927
9928 fp->f_fglob->fg_flag = fmode;
9929 fp->f_fglob->fg_ops = &vnops;
9930 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9931
9932 proc_fdlock(p);
9933 procfdtbl_releasefd(p, indx, NULL);
9934 fp_drop(p, indx, fp, 1);
9935 proc_fdunlock(p);
9936
9937 /*
9938 * All variants of the namespace handler struct support these three fields:
9939 * token, flags, and the FD pointer
9940 */
9941 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9942 if (error) goto cleanup;
9943 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9944 if (error) goto cleanup;
9945 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9946 if (error) goto cleanup;
9947
9948 /*
9949 * Handle optional fields:
9950 * extended version support an info ptr (offset, length), and the
9951 *
9952 * namedata version supports a unique per-link object ID
9953 *
9954 */
9955 if (nhd->infoptr) {
9956 uio_t uio = (uio_t)nspace_items[i].arg;
9957 uint64_t u_offset, u_length;
9958
9959 if (uio) {
9960 u_offset = uio_offset(uio);
9961 u_length = uio_resid(uio);
9962 } else {
9963 u_offset = 0;
9964 u_length = 0;
9965 }
9966 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9967 if (error) goto cleanup;
9968 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9969 if (error) goto cleanup;
9970 }
9971
9972 if (nhd->objid) {
9973 VATTR_INIT(&va);
9974 VATTR_WANTED(&va, va_linkid);
9975 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9976 if (error) goto cleanup;
9977
9978 uint64_t linkid = 0;
9979 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9980 linkid = (uint64_t)va.va_linkid;
9981 }
9982 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9983 }
9984cleanup:
9985 if (error) {
9986 if (fp_alloc_successful) fp_free(p, indx, fp);
9987 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9988 unblock = 1;
9989 }
9990
9991 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9992
9993 break;
6d2010ae 9994 }
39037602 9995
6d2010ae
A
9996 if (unblock) {
9997 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9998 vnode_lock_spin(nspace_items[i].vp);
9999 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10000 vnode_unlock(nspace_items[i].vp);
10001 }
10002 nspace_items[i].vp = NULL;
10003 nspace_items[i].vid = 0;
10004 nspace_items[i].flags = NSPACE_ITEM_DONE;
10005 nspace_items[i].token = 0;
39037602 10006
6d2010ae
A
10007 wakeup((caddr_t)&(nspace_items[i].vp));
10008 }
39037602 10009
6d2010ae
A
10010 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
10011 // just go through every snapshot event and unblock it immediately.
10012 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 10013 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae
A
10014 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
10015 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
10016 nspace_items[i].vp = NULL;
10017 nspace_items[i].vid = 0;
10018 nspace_items[i].flags = NSPACE_ITEM_DONE;
10019 nspace_items[i].token = 0;
39037602
A
10020
10021 wakeup((caddr_t)&(nspace_items[i].vp));
6d2010ae
A
10022 }
10023 }
10024 }
10025 }
10026 }
39037602 10027
6d2010ae 10028 lck_mtx_unlock(&nspace_handler_lock);
39037602 10029
6d2010ae
A
10030 lck_mtx_lock(&nspace_handler_exclusion_lock);
10031 nspace_handlers[nspace_type].handler_busy = 0;
10032 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602 10033
6d2010ae
A
10034 return error;
10035}
1c79356b 10036
39236c6e
A
10037static inline int validate_namespace_args (int is64bit, int size) {
10038
10039 if (is64bit) {
10040 /* Must be one of these */
10041 if (size == sizeof(user64_namespace_handler_info)) {
10042 goto sizeok;
10043 }
10044 if (size == sizeof(user64_namespace_handler_info_ext)) {
10045 goto sizeok;
10046 }
10047 if (size == sizeof(user64_namespace_handler_data)) {
10048 goto sizeok;
10049 }
10050 return EINVAL;
10051 }
10052 else {
10053 /* 32 bit -- must be one of these */
10054 if (size == sizeof(user32_namespace_handler_info)) {
10055 goto sizeok;
10056 }
10057 if (size == sizeof(user32_namespace_handler_info_ext)) {
10058 goto sizeok;
10059 }
10060 if (size == sizeof(user32_namespace_handler_data)) {
10061 goto sizeok;
10062 }
10063 return EINVAL;
10064 }
10065
10066sizeok:
10067
10068 return 0;
10069
10070}
1c79356b 10071
6d2010ae
A
10072static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
10073{
10074 int error = 0;
39236c6e 10075 namespace_handler_data nhd;
39037602 10076
39236c6e
A
10077 bzero (&nhd, sizeof(namespace_handler_data));
10078
6d2010ae
A
10079 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10080 return error;
10081 }
39037602 10082
39236c6e
A
10083 error = validate_namespace_args (is64bit, size);
10084 if (error) {
10085 return error;
6d2010ae 10086 }
39037602 10087
39236c6e
A
10088 /* Copy in the userland pointers into our kernel-only struct */
10089
6d2010ae 10090 if (is64bit) {
39236c6e
A
10091 /* 64 bit userland structures */
10092 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
10093 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
10094 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
10095
10096 /* If the size is greater than the standard info struct, add in extra fields */
10097 if (size > (sizeof(user64_namespace_handler_info))) {
10098 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
10099 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
10100 }
10101 if (size == (sizeof(user64_namespace_handler_data))) {
10102 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
10103 }
10104 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 10105 }
39037602 10106 }
39236c6e
A
10107 else {
10108 /* 32 bit userland structures */
10109 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
10110 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
10111 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
39037602 10112
39236c6e
A
10113 if (size > (sizeof(user32_namespace_handler_info))) {
10114 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
10115 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
10116 }
10117 if (size == (sizeof(user32_namespace_handler_data))) {
10118 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
10119 }
10120 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
10121 }
10122 }
39037602 10123
39236c6e 10124 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 10125}
1c79356b 10126
5ba3f43e
A
10127static unsigned long
10128fsctl_bogus_command_compat(unsigned long cmd)
10129{
10130
10131 switch (cmd) {
10132 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10133 return (FSIOC_SYNC_VOLUME);
10134 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10135 return (FSIOC_ROUTEFS_SETROUTEID);
10136 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10137 return (FSIOC_SET_PACKAGE_EXTS);
10138 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET):
10139 return (FSIOC_NAMESPACE_HANDLER_GET);
10140 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET):
10141 return (FSIOC_OLD_SNAPSHOT_HANDLER_GET);
10142 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT):
10143 return (FSIOC_SNAPSHOT_HANDLER_GET_EXT);
10144 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE):
10145 return (FSIOC_NAMESPACE_HANDLER_UPDATE);
10146 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK):
10147 return (FSIOC_NAMESPACE_HANDLER_UNBLOCK);
10148 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL):
10149 return (FSIOC_NAMESPACE_HANDLER_CANCEL);
10150 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME):
10151 return (FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME);
10152 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS):
10153 return (FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS);
10154 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10155 return (FSIOC_SET_FSTYPENAME_OVERRIDE);
10156 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10157 return (DISK_CONDITIONER_IOC_GET);
10158 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10159 return (DISK_CONDITIONER_IOC_SET);
10160 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10161 return (FSIOC_FIOSEEKHOLE);
10162 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10163 return (FSIOC_FIOSEEKDATA);
10164 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10165 return (SPOTLIGHT_IOC_GET_MOUNT_TIME);
10166 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10167 return (SPOTLIGHT_IOC_GET_LAST_MTIME);
10168 }
10169
10170 return (cmd);
10171}
10172
1c79356b
A
10173/*
10174 * Make a filesystem-specific control call:
10175 */
1c79356b 10176/* ARGSUSED */
b0d623f7
A
10177static int
10178fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 10179{
b0d623f7 10180 int error=0;
91447636 10181 boolean_t is64bit;
2d21ac55 10182 u_int size;
1c79356b 10183#define STK_PARAMS 128
39037602 10184 char stkbuf[STK_PARAMS] = {0};
1c79356b 10185 caddr_t data, memp;
b0d623f7 10186 vnode_t vp = *arg_vp;
1c79356b 10187
5ba3f43e
A
10188 cmd = fsctl_bogus_command_compat(cmd);
10189
1c79356b
A
10190 size = IOCPARM_LEN(cmd);
10191 if (size > IOCPARM_MAX) return (EINVAL);
10192
6d2010ae 10193 is64bit = proc_is64bit(p);
91447636 10194
1c79356b 10195 memp = NULL;
04b8595b 10196
1c79356b
A
10197 if (size > sizeof (stkbuf)) {
10198 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
10199 data = memp;
10200 } else {
91447636 10201 data = &stkbuf[0];
1c79356b 10202 };
39037602 10203
1c79356b
A
10204 if (cmd & IOC_IN) {
10205 if (size) {
b0d623f7 10206 error = copyin(udata, data, size);
39037602 10207 if (error) {
fe8ab488 10208 if (memp) {
39037602 10209 kfree (memp, size);
fe8ab488
A
10210 }
10211 return error;
10212 }
1c79356b 10213 } else {
6d2010ae
A
10214 if (is64bit) {
10215 *(user_addr_t *)data = udata;
10216 }
10217 else {
10218 *(uint32_t *)data = (uint32_t)udata;
10219 }
1c79356b
A
10220 };
10221 } else if ((cmd & IOC_OUT) && size) {
10222 /*
10223 * Zero the buffer so the user always
10224 * gets back something deterministic.
10225 */
10226 bzero(data, size);
91447636 10227 } else if (cmd & IOC_VOID) {
b0d623f7 10228 if (is64bit) {
6d2010ae 10229 *(user_addr_t *)data = udata;
b0d623f7
A
10230 }
10231 else {
6d2010ae 10232 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 10233 }
91447636 10234 }
1c79356b 10235
b0d623f7 10236 /* Check to see if it's a generic command */
5ba3f43e 10237 switch (cmd) {
91447636 10238
5ba3f43e 10239 case FSIOC_SYNC_VOLUME: {
fe8ab488
A
10240 mount_t mp = vp->v_mount;
10241 int arg = *(uint32_t*)data;
b0d623f7 10242
fe8ab488
A
10243 /* record vid of vp so we can drop it below. */
10244 uint32_t vvid = vp->v_id;
b0d623f7 10245
fe8ab488
A
10246 /*
10247 * Then grab mount_iterref so that we can release the vnode.
10248 * Without this, a thread may call vnode_iterate_prepare then
10249 * get into a deadlock because we've never released the root vp
10250 */
10251 error = mount_iterref (mp, 0);
10252 if (error) {
10253 break;
10254 }
10255 vnode_put(vp);
10256
10257 /* issue the sync for this volume */
10258 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
10259
39037602 10260 /*
fe8ab488
A
10261 * Then release the mount_iterref once we're done syncing; it's not
10262 * needed for the VNOP_IOCTL below
10263 */
10264 mount_iterdrop(mp);
10265
10266 if (arg & FSCTL_SYNC_FULLSYNC) {
10267 /* re-obtain vnode iocount on the root vp, if possible */
10268 error = vnode_getwithvid (vp, vvid);
10269 if (error == 0) {
10270 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10271 vnode_put (vp);
10272 }
b0d623f7 10273 }
fe8ab488
A
10274 /* mark the argument VP as having been released */
10275 *arg_vp = NULL;
b0d623f7 10276 }
fe8ab488 10277 break;
b0d623f7 10278
5ba3f43e 10279 case FSIOC_ROUTEFS_SETROUTEID: {
490019cf
A
10280#if ROUTEFS
10281 char routepath[MAXPATHLEN];
10282 size_t len = 0;
39037602 10283
490019cf
A
10284 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10285 break;
10286 }
10287 bzero(routepath, MAXPATHLEN);
10288 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10289 if (error) {
10290 break;
10291 }
10292 error = routefs_kernel_mount(routepath);
10293 if (error) {
10294 break;
10295 }
10296#endif
10297 }
10298 break;
10299
5ba3f43e 10300 case FSIOC_SET_PACKAGE_EXTS: {
fe8ab488
A
10301 user_addr_t ext_strings;
10302 uint32_t num_entries;
10303 uint32_t max_width;
b0d623f7 10304
39037602
A
10305 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
10306 break;
10307
fe8ab488
A
10308 if ( (is64bit && size != sizeof(user64_package_ext_info))
10309 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10310
10311 // either you're 64-bit and passed a 64-bit struct or
10312 // you're 32-bit and passed a 32-bit struct. otherwise
10313 // it's not ok.
10314 error = EINVAL;
10315 break;
10316 }
10317
10318 if (is64bit) {
10319 ext_strings = ((user64_package_ext_info *)data)->strings;
10320 num_entries = ((user64_package_ext_info *)data)->num_entries;
10321 max_width = ((user64_package_ext_info *)data)->max_width;
10322 } else {
10323 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10324 num_entries = ((user32_package_ext_info *)data)->num_entries;
10325 max_width = ((user32_package_ext_info *)data)->max_width;
10326 }
10327 error = set_package_extensions_table(ext_strings, num_entries, max_width);
6d2010ae 10328 }
fe8ab488 10329 break;
2d21ac55 10330
39037602 10331 /* namespace handlers */
5ba3f43e 10332 case FSIOC_NAMESPACE_HANDLER_GET: {
fe8ab488 10333 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
6d2010ae 10334 }
fe8ab488 10335 break;
b0d623f7 10336
fe8ab488 10337 /* Snapshot handlers */
5ba3f43e 10338 case FSIOC_OLD_SNAPSHOT_HANDLER_GET: {
fe8ab488 10339 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
39037602 10340 }
fe8ab488 10341 break;
39236c6e 10342
5ba3f43e 10343 case FSIOC_SNAPSHOT_HANDLER_GET_EXT: {
fe8ab488
A
10344 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10345 }
39037602 10346 break;
39236c6e 10347
5ba3f43e 10348 case FSIOC_NAMESPACE_HANDLER_UPDATE: {
fe8ab488
A
10349 uint32_t token, val;
10350 int i;
39236c6e 10351
fe8ab488
A
10352 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10353 break;
10354 }
39236c6e 10355
fe8ab488
A
10356 if (!nspace_is_special_process(p)) {
10357 error = EINVAL;
10358 break;
10359 }
6d2010ae 10360
fe8ab488
A
10361 token = ((uint32_t *)data)[0];
10362 val = ((uint32_t *)data)[1];
6d2010ae 10363
fe8ab488 10364 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10365
fe8ab488
A
10366 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10367 if (nspace_items[i].token == token) {
10368 break; /* exit for loop, not case stmt */
10369 }
10370 }
6d2010ae 10371
fe8ab488
A
10372 if (i >= MAX_NSPACE_ITEMS) {
10373 error = ENOENT;
10374 } else {
10375 //
10376 // if this bit is set, when resolve_nspace_item() times out
10377 // it will loop and go back to sleep.
10378 //
10379 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10380 }
6d2010ae 10381
fe8ab488
A
10382 lck_mtx_unlock(&nspace_handler_lock);
10383
10384 if (error) {
10385 printf("nspace-handler-update: did not find token %u\n", token);
10386 }
39037602 10387 }
fe8ab488 10388 break;
39037602 10389
5ba3f43e 10390 case FSIOC_NAMESPACE_HANDLER_UNBLOCK: {
fe8ab488
A
10391 uint32_t token, val;
10392 int i;
10393
10394 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10395 break;
10396 }
6d2010ae 10397
fe8ab488
A
10398 if (!nspace_is_special_process(p)) {
10399 error = EINVAL;
10400 break;
10401 }
6d2010ae 10402
fe8ab488
A
10403 token = ((uint32_t *)data)[0];
10404 val = ((uint32_t *)data)[1];
6d2010ae 10405
fe8ab488 10406 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10407
fe8ab488
A
10408 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10409 if (nspace_items[i].token == token) {
10410 break; /* exit for loop, not case statement */
10411 }
10412 }
6d2010ae 10413
fe8ab488
A
10414 if (i >= MAX_NSPACE_ITEMS) {
10415 printf("nspace-handler-unblock: did not find token %u\n", token);
10416 error = ENOENT;
10417 } else {
10418 if (val == 0 && nspace_items[i].vp) {
10419 vnode_lock_spin(nspace_items[i].vp);
10420 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10421 vnode_unlock(nspace_items[i].vp);
10422 }
6d2010ae 10423
fe8ab488
A
10424 nspace_items[i].vp = NULL;
10425 nspace_items[i].arg = NULL;
10426 nspace_items[i].op = 0;
10427 nspace_items[i].vid = 0;
10428 nspace_items[i].flags = NSPACE_ITEM_DONE;
10429 nspace_items[i].token = 0;
6d2010ae 10430
fe8ab488
A
10431 wakeup((caddr_t)&(nspace_items[i].vp));
10432 }
10433
10434 lck_mtx_unlock(&nspace_handler_lock);
39037602 10435 }
fe8ab488 10436 break;
6d2010ae 10437
5ba3f43e 10438 case FSIOC_NAMESPACE_HANDLER_CANCEL: {
fe8ab488
A
10439 uint32_t token, val;
10440 int i;
6d2010ae 10441
fe8ab488 10442 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10443 break;
10444 }
6d2010ae 10445
fe8ab488
A
10446 if (!nspace_is_special_process(p)) {
10447 error = EINVAL;
10448 break;
6d2010ae
A
10449 }
10450
fe8ab488
A
10451 token = ((uint32_t *)data)[0];
10452 val = ((uint32_t *)data)[1];
6d2010ae 10453
fe8ab488 10454 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10455
fe8ab488
A
10456 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10457 if (nspace_items[i].token == token) {
10458 break; /* exit for loop, not case stmt */
10459 }
10460 }
6d2010ae 10461
fe8ab488
A
10462 if (i >= MAX_NSPACE_ITEMS) {
10463 printf("nspace-handler-cancel: did not find token %u\n", token);
10464 error = ENOENT;
10465 } else {
10466 if (nspace_items[i].vp) {
10467 vnode_lock_spin(nspace_items[i].vp);
10468 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10469 vnode_unlock(nspace_items[i].vp);
10470 }
6d2010ae 10471
39037602
A
10472 nspace_items[i].vp = NULL;
10473 nspace_items[i].arg = NULL;
fe8ab488
A
10474 nspace_items[i].vid = 0;
10475 nspace_items[i].token = val;
10476 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
39037602 10477 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
6d2010ae 10478
fe8ab488
A
10479 wakeup((caddr_t)&(nspace_items[i].vp));
10480 }
6d2010ae 10481
fe8ab488 10482 lck_mtx_unlock(&nspace_handler_lock);
39037602 10483 }
fe8ab488 10484 break;
6d2010ae 10485
5ba3f43e 10486 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
fe8ab488 10487 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
6d2010ae
A
10488 break;
10489 }
6d2010ae 10490
fe8ab488 10491 // we explicitly do not do the namespace_handler_proc check here
6d2010ae 10492
fe8ab488
A
10493 lck_mtx_lock(&nspace_handler_lock);
10494 snapshot_timestamp = ((uint32_t *)data)[0];
10495 wakeup(&nspace_item_idx);
10496 lck_mtx_unlock(&nspace_handler_lock);
10497 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
6d2010ae 10498
39037602 10499 }
fe8ab488 10500 break;
6d2010ae 10501
5ba3f43e 10502 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
fe8ab488
A
10503 {
10504 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10505 break;
10506 }
6d2010ae 10507
fe8ab488
A
10508 lck_mtx_lock(&nspace_handler_lock);
10509 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10510 lck_mtx_unlock(&nspace_handler_lock);
10511 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10512 nspace_allow_virtual_devs ? "" : " NOT");
10513 error = 0;
6d2010ae 10514
6d2010ae 10515 }
fe8ab488 10516 break;
6d2010ae 10517
5ba3f43e 10518 case FSIOC_SET_FSTYPENAME_OVERRIDE:
39037602 10519 {
fe8ab488
A
10520 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10521 break;
10522 }
10523 if (vp->v_mount) {
10524 mount_lock(vp->v_mount);
10525 if (data[0] != 0) {
10526 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10527 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10528 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10529 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10530 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10531 }
10532 } else {
10533 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10534 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10535 }
10536 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10537 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 10538 }
fe8ab488 10539 mount_unlock(vp->v_mount);
6d2010ae 10540 }
6d2010ae 10541 }
fe8ab488 10542 break;
39037602 10543
5ba3f43e
A
10544 case DISK_CONDITIONER_IOC_GET: {
10545 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
10546 }
10547 break;
10548
10549 case DISK_CONDITIONER_IOC_SET: {
10550 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
10551 }
10552 break;
10553
fe8ab488 10554 default: {
a39ff7e2
A
10555 /* other, known commands shouldn't be passed down here */
10556 switch (cmd) {
10557 case F_PUNCHHOLE:
10558 case F_TRIM_ACTIVE_FILE:
10559 case F_RDADVISE:
10560 case F_TRANSCODEKEY:
10561 case F_GETPROTECTIONLEVEL:
10562 case F_GETDEFAULTPROTLEVEL:
10563 case F_MAKECOMPRESSED:
10564 case F_SET_GREEDY_MODE:
10565 case F_SETSTATICCONTENT:
10566 case F_SETIOTYPE:
10567 case F_SETBACKINGSTORE:
10568 case F_GETPATH_MTMINFO:
10569 case APFSIOC_REVERT_TO_SNAPSHOT:
10570 case FSIOC_FIOSEEKHOLE:
10571 case FSIOC_FIOSEEKDATA:
10572 case HFS_GET_BOOT_INFO:
10573 case HFS_SET_BOOT_INFO:
10574 case FIOPINSWAP:
10575 case F_CHKCLEAN:
10576 case F_FULLFSYNC:
10577 case F_BARRIERFSYNC:
10578 case F_FREEZE_FS:
10579 case F_THAW_FS:
10580 error = EINVAL;
10581 goto outdrop;
10582 }
fe8ab488 10583 /* Invoke the filesystem-specific code */
5ba3f43e 10584 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
fe8ab488
A
10585 }
10586
10587 } /* end switch stmt */
10588
1c79356b 10589 /*
fe8ab488 10590 * if no errors, copy any data to user. Size was
1c79356b
A
10591 * already set and checked above.
10592 */
39037602 10593 if (error == 0 && (cmd & IOC_OUT) && size)
b0d623f7 10594 error = copyout(data, udata, size);
39037602 10595
a39ff7e2 10596outdrop:
fe8ab488
A
10597 if (memp) {
10598 kfree(memp, size);
10599 }
39037602 10600
1c79356b
A
10601 return error;
10602}
b0d623f7
A
10603
10604/* ARGSUSED */
10605int
10606fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10607{
10608 int error;
39037602 10609 struct nameidata nd;
b0d623f7
A
10610 u_long nameiflags;
10611 vnode_t vp = NULL;
10612 vfs_context_t ctx = vfs_context_current();
10613
10614 AUDIT_ARG(cmd, uap->cmd);
10615 AUDIT_ARG(value32, uap->options);
10616 /* Get the vnode for the file we are getting info on: */
10617 nameiflags = 0;
10618 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
10619 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10620 UIO_USERSPACE, uap->path, ctx);
b0d623f7
A
10621 if ((error = namei(&nd))) goto done;
10622 vp = nd.ni_vp;
10623 nameidone(&nd);
10624
10625#if CONFIG_MACF
10626 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10627 if (error) {
10628 goto done;
10629 }
10630#endif
10631
10632 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10633
10634done:
10635 if (vp)
10636 vnode_put(vp);
10637 return error;
10638}
10639/* ARGSUSED */
10640int
10641ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10642{
10643 int error;
10644 vnode_t vp = NULL;
10645 vfs_context_t ctx = vfs_context_current();
10646 int fd = -1;
10647
10648 AUDIT_ARG(fd, uap->fd);
10649 AUDIT_ARG(cmd, uap->cmd);
10650 AUDIT_ARG(value32, uap->options);
39037602 10651
b0d623f7
A
10652 /* Get the vnode for the file we are getting info on: */
10653 if ((error = file_vnode(uap->fd, &vp)))
3e170ce0 10654 return error;
b0d623f7
A
10655 fd = uap->fd;
10656 if ((error = vnode_getwithref(vp))) {
3e170ce0
A
10657 file_drop(fd);
10658 return error;
b0d623f7
A
10659 }
10660
10661#if CONFIG_MACF
3e170ce0
A
10662 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10663 file_drop(fd);
10664 vnode_put(vp);
10665 return error;
b0d623f7
A
10666 }
10667#endif
10668
10669 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10670
3e170ce0 10671 file_drop(fd);
b0d623f7 10672
3e170ce0
A
10673 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10674 if (vp) {
b0d623f7 10675 vnode_put(vp);
3e170ce0
A
10676 }
10677
b0d623f7
A
10678 return error;
10679}
1c79356b 10680/* end of fsctl system call */
0b4e3aa0 10681
91447636
A
10682/*
10683 * Retrieve the data of an extended attribute.
10684 */
10685int
2d21ac55 10686getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 10687{
2d21ac55 10688 vnode_t vp;
91447636
A
10689 struct nameidata nd;
10690 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10691 vfs_context_t ctx = vfs_context_current();
91447636
A
10692 uio_t auio = NULL;
10693 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10694 size_t attrsize = 0;
10695 size_t namelen;
b0d623f7 10696 u_int32_t nameiflags;
91447636
A
10697 int error;
10698 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10699
2d21ac55 10700 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10701 return (EINVAL);
55e303ae 10702
91447636 10703 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10704 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10705 if ((error = namei(&nd))) {
10706 return (error);
10707 }
10708 vp = nd.ni_vp;
10709 nameidone(&nd);
55e303ae 10710
d9a64523
A
10711 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10712 if (error != 0) {
91447636
A
10713 goto out;
10714 }
10715 if (xattr_protected(attrname)) {
6d2010ae
A
10716 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10717 error = EPERM;
10718 goto out;
10719 }
91447636 10720 }
b0d623f7
A
10721 /*
10722 * the specific check for 0xffffffff is a hack to preserve
10723 * binaray compatibilty in K64 with applications that discovered
39037602 10724 * that passing in a buf pointer and a size of -1 resulted in
b0d623f7
A
10725 * just the size of the indicated extended attribute being returned.
10726 * this isn't part of the documented behavior, but because of the
10727 * original implemtation's check for "uap->size > 0", this behavior
10728 * was allowed. In K32 that check turned into a signed comparison
10729 * even though uap->size is unsigned... in K64, we blow by that
10730 * check because uap->size is unsigned and doesn't get sign smeared
39037602 10731 * in the munger for a 32 bit user app. we also need to add a
b0d623f7
A
10732 * check to limit the maximum size of the buffer being passed in...
10733 * unfortunately, the underlying fileystems seem to just malloc
10734 * the requested size even if the actual extended attribute is tiny.
10735 * because that malloc is for kernel wired memory, we have to put a
10736 * sane limit on it.
10737 *
10738 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10739 * U64 running on K64 will yield -1 (64 bits wide)
10740 * U32/U64 running on K32 will yield -1 (32 bits wide)
10741 */
10742 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10743 goto no_uio;
10744
b0d623f7 10745 if (uap->value) {
6d2010ae
A
10746 if (uap->size > (size_t)XATTR_MAXSIZE)
10747 uap->size = XATTR_MAXSIZE;
39037602 10748
91447636
A
10749 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10750 &uio_buf[0], sizeof(uio_buf));
10751 uio_addiov(auio, uap->value, uap->size);
10752 }
b0d623f7 10753no_uio:
2d21ac55 10754 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
10755out:
10756 vnode_put(vp);
55e303ae 10757
91447636
A
10758 if (auio) {
10759 *retval = uap->size - uio_resid(auio);
10760 } else {
10761 *retval = (user_ssize_t)attrsize;
55e303ae
A
10762 }
10763
91447636
A
10764 return (error);
10765}
55e303ae 10766
91447636
A
10767/*
10768 * Retrieve the data of an extended attribute.
10769 */
10770int
2d21ac55 10771fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 10772{
2d21ac55 10773 vnode_t vp;
91447636 10774 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10775 uio_t auio = NULL;
10776 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10777 size_t attrsize = 0;
10778 size_t namelen;
10779 int error;
10780 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10781
2d21ac55 10782 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10783 return (EINVAL);
55e303ae 10784
91447636
A
10785 if ( (error = file_vnode(uap->fd, &vp)) ) {
10786 return (error);
10787 }
10788 if ( (error = vnode_getwithref(vp)) ) {
10789 file_drop(uap->fd);
10790 return(error);
10791 }
d9a64523
A
10792 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10793 if (error != 0) {
91447636
A
10794 goto out;
10795 }
10796 if (xattr_protected(attrname)) {
10797 error = EPERM;
10798 goto out;
10799 }
10800 if (uap->value && uap->size > 0) {
10801 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10802 &uio_buf[0], sizeof(uio_buf));
10803 uio_addiov(auio, uap->value, uap->size);
10804 }
55e303ae 10805
2d21ac55 10806 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
10807out:
10808 (void)vnode_put(vp);
10809 file_drop(uap->fd);
55e303ae 10810
91447636
A
10811 if (auio) {
10812 *retval = uap->size - uio_resid(auio);
10813 } else {
10814 *retval = (user_ssize_t)attrsize;
10815 }
10816 return (error);
10817}
55e303ae 10818
91447636
A
10819/*
10820 * Set the data of an extended attribute.
10821 */
55e303ae 10822int
2d21ac55 10823setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 10824{
2d21ac55 10825 vnode_t vp;
91447636
A
10826 struct nameidata nd;
10827 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10828 vfs_context_t ctx = vfs_context_current();
91447636
A
10829 uio_t auio = NULL;
10830 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10831 size_t namelen;
b0d623f7 10832 u_int32_t nameiflags;
91447636
A
10833 int error;
10834 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10835
2d21ac55 10836 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10837 return (EINVAL);
55e303ae 10838
d9a64523
A
10839 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10840 if (error != 0) {
6d2010ae
A
10841 if (error == EPERM) {
10842 /* if the string won't fit in attrname, copyinstr emits EPERM */
10843 return (ENAMETOOLONG);
10844 }
10845 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10846 return error;
91447636
A
10847 }
10848 if (xattr_protected(attrname))
10849 return(EPERM);
2d21ac55 10850 if (uap->size != 0 && uap->value == 0) {
91447636 10851 return (EINVAL);
55e303ae 10852 }
55e303ae 10853
91447636 10854 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10855 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10856 if ((error = namei(&nd))) {
10857 return (error);
10858 }
10859 vp = nd.ni_vp;
10860 nameidone(&nd);
55e303ae 10861
91447636
A
10862 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10863 &uio_buf[0], sizeof(uio_buf));
10864 uio_addiov(auio, uap->value, uap->size);
55e303ae 10865
2d21ac55
A
10866 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10867#if CONFIG_FSE
10868 if (error == 0) {
10869 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10870 FSE_ARG_VNODE, vp,
10871 FSE_ARG_DONE);
10872 }
10873#endif
91447636
A
10874 vnode_put(vp);
10875 *retval = 0;
10876 return (error);
10877}
55e303ae 10878
91447636
A
10879/*
10880 * Set the data of an extended attribute.
10881 */
10882int
2d21ac55 10883fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 10884{
2d21ac55 10885 vnode_t vp;
91447636 10886 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10887 uio_t auio = NULL;
10888 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10889 size_t namelen;
10890 int error;
10891 char uio_buf[ UIO_SIZEOF(1) ];
6d2010ae 10892#if CONFIG_FSE
2d21ac55 10893 vfs_context_t ctx = vfs_context_current();
6d2010ae 10894#endif
55e303ae 10895
2d21ac55 10896 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10897 return (EINVAL);
55e303ae 10898
d9a64523
A
10899 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10900 if (error != 0) {
3e170ce0
A
10901 if (error == EPERM) {
10902 /* if the string won't fit in attrname, copyinstr emits EPERM */
10903 return (ENAMETOOLONG);
10904 }
10905 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10906 return error;
55e303ae 10907 }
91447636
A
10908 if (xattr_protected(attrname))
10909 return(EPERM);
2d21ac55 10910 if (uap->size != 0 && uap->value == 0) {
91447636 10911 return (EINVAL);
55e303ae 10912 }
91447636
A
10913 if ( (error = file_vnode(uap->fd, &vp)) ) {
10914 return (error);
55e303ae 10915 }
91447636
A
10916 if ( (error = vnode_getwithref(vp)) ) {
10917 file_drop(uap->fd);
10918 return(error);
10919 }
10920 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10921 &uio_buf[0], sizeof(uio_buf));
10922 uio_addiov(auio, uap->value, uap->size);
91447636 10923
2d21ac55
A
10924 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10925#if CONFIG_FSE
10926 if (error == 0) {
10927 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10928 FSE_ARG_VNODE, vp,
10929 FSE_ARG_DONE);
10930 }
10931#endif
91447636
A
10932 vnode_put(vp);
10933 file_drop(uap->fd);
10934 *retval = 0;
10935 return (error);
10936}
55e303ae 10937
91447636
A
10938/*
10939 * Remove an extended attribute.
b0d623f7 10940 * XXX Code duplication here.
91447636 10941 */
91447636 10942int
2d21ac55 10943removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 10944{
2d21ac55 10945 vnode_t vp;
91447636
A
10946 struct nameidata nd;
10947 char attrname[XATTR_MAXNAMELEN+1];
10948 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 10949 vfs_context_t ctx = vfs_context_current();
91447636 10950 size_t namelen;
b0d623f7 10951 u_int32_t nameiflags;
91447636 10952 int error;
55e303ae 10953
2d21ac55 10954 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10955 return (EINVAL);
55e303ae 10956
91447636
A
10957 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10958 if (error != 0) {
10959 return (error);
10960 }
10961 if (xattr_protected(attrname))
10962 return(EPERM);
10963 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10964 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10965 if ((error = namei(&nd))) {
10966 return (error);
10967 }
10968 vp = nd.ni_vp;
10969 nameidone(&nd);
55e303ae 10970
2d21ac55
A
10971 error = vn_removexattr(vp, attrname, uap->options, ctx);
10972#if CONFIG_FSE
10973 if (error == 0) {
10974 add_fsevent(FSE_XATTR_REMOVED, ctx,
10975 FSE_ARG_VNODE, vp,
10976 FSE_ARG_DONE);
10977 }
10978#endif
91447636
A
10979 vnode_put(vp);
10980 *retval = 0;
10981 return (error);
55e303ae
A
10982}
10983
91447636
A
10984/*
10985 * Remove an extended attribute.
b0d623f7 10986 * XXX Code duplication here.
91447636 10987 */
91447636 10988int
2d21ac55 10989fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 10990{
2d21ac55 10991 vnode_t vp;
91447636 10992 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10993 size_t namelen;
10994 int error;
6d2010ae 10995#if CONFIG_FSE
2d21ac55 10996 vfs_context_t ctx = vfs_context_current();
6d2010ae 10997#endif
55e303ae 10998
2d21ac55 10999 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
11000 return (EINVAL);
11001
11002 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11003 if (error != 0) {
11004 return (error);
11005 }
11006 if (xattr_protected(attrname))
11007 return(EPERM);
11008 if ( (error = file_vnode(uap->fd, &vp)) ) {
11009 return (error);
11010 }
11011 if ( (error = vnode_getwithref(vp)) ) {
11012 file_drop(uap->fd);
11013 return(error);
11014 }
4a249263 11015
2d21ac55
A
11016 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
11017#if CONFIG_FSE
11018 if (error == 0) {
11019 add_fsevent(FSE_XATTR_REMOVED, ctx,
11020 FSE_ARG_VNODE, vp,
11021 FSE_ARG_DONE);
11022 }
11023#endif
91447636
A
11024 vnode_put(vp);
11025 file_drop(uap->fd);
11026 *retval = 0;
11027 return (error);
55e303ae
A
11028}
11029
91447636
A
11030/*
11031 * Retrieve the list of extended attribute names.
b0d623f7 11032 * XXX Code duplication here.
91447636 11033 */
91447636 11034int
2d21ac55 11035listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 11036{
2d21ac55 11037 vnode_t vp;
91447636 11038 struct nameidata nd;
2d21ac55 11039 vfs_context_t ctx = vfs_context_current();
91447636
A
11040 uio_t auio = NULL;
11041 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11042 size_t attrsize = 0;
b0d623f7 11043 u_int32_t nameiflags;
91447636
A
11044 int error;
11045 char uio_buf[ UIO_SIZEOF(1) ];
4a249263 11046
2d21ac55 11047 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 11048 return (EINVAL);
55e303ae 11049
fe8ab488 11050 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 11051 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
11052 if ((error = namei(&nd))) {
11053 return (error);
11054 }
11055 vp = nd.ni_vp;
11056 nameidone(&nd);
11057 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae
A
11058 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
11059 &uio_buf[0], sizeof(uio_buf));
91447636
A
11060 uio_addiov(auio, uap->namebuf, uap->bufsize);
11061 }
55e303ae 11062
2d21ac55 11063 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 11064
91447636
A
11065 vnode_put(vp);
11066 if (auio) {
11067 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11068 } else {
11069 *retval = (user_ssize_t)attrsize;
11070 }
11071 return (error);
55e303ae
A
11072}
11073
91447636
A
11074/*
11075 * Retrieve the list of extended attribute names.
b0d623f7 11076 * XXX Code duplication here.
91447636 11077 */
55e303ae 11078int
2d21ac55 11079flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 11080{
2d21ac55 11081 vnode_t vp;
91447636
A
11082 uio_t auio = NULL;
11083 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11084 size_t attrsize = 0;
11085 int error;
11086 char uio_buf[ UIO_SIZEOF(1) ];
11087
2d21ac55 11088 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
11089 return (EINVAL);
11090
11091 if ( (error = file_vnode(uap->fd, &vp)) ) {
11092 return (error);
11093 }
11094 if ( (error = vnode_getwithref(vp)) ) {
11095 file_drop(uap->fd);
11096 return(error);
11097 }
11098 if (uap->namebuf != 0 && uap->bufsize > 0) {
39037602 11099 auio = uio_createwithbuffer(1, 0, spacetype,
91447636
A
11100 UIO_READ, &uio_buf[0], sizeof(uio_buf));
11101 uio_addiov(auio, uap->namebuf, uap->bufsize);
11102 }
91447636 11103
2d21ac55 11104 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 11105
91447636
A
11106 vnode_put(vp);
11107 file_drop(uap->fd);
11108 if (auio) {
11109 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11110 } else {
11111 *retval = (user_ssize_t)attrsize;
11112 }
11113 return (error);
55e303ae 11114}
4a249263 11115
fe8ab488
A
11116static int fsgetpath_internal(
11117 vfs_context_t ctx, int volfs_id, uint64_t objid,
11118 vm_size_t bufsize, caddr_t buf, int *pathlen)
b0d623f7 11119{
fe8ab488 11120 int error;
b0d623f7 11121 struct mount *mp = NULL;
fe8ab488 11122 vnode_t vp;
b0d623f7 11123 int length;
fe8ab488 11124 int bpflags;
813fb2f6
A
11125 /* maximum number of times to retry build_path */
11126 unsigned int retries = 0x10;
b0d623f7 11127
fe8ab488 11128 if (bufsize > PAGE_SIZE) {
b0d623f7 11129 return (EINVAL);
fe8ab488
A
11130 }
11131
11132 if (buf == NULL) {
b0d623f7
A
11133 return (ENOMEM);
11134 }
fe8ab488 11135
813fb2f6 11136retry:
fe8ab488 11137 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 11138 error = ENOTSUP; /* unexpected failure */
fe8ab488 11139 return ENOTSUP;
b0d623f7 11140 }
fe8ab488 11141
39236c6e 11142unionget:
fe8ab488 11143 if (objid == 2) {
b0d623f7
A
11144 error = VFS_ROOT(mp, &vp, ctx);
11145 } else {
fe8ab488 11146 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 11147 }
39236c6e
A
11148
11149 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11150 /*
11151 * If the fileid isn't found and we're in a union
11152 * mount volume, then see if the fileid is in the
11153 * mounted-on volume.
11154 */
11155 struct mount *tmp = mp;
11156 mp = vnode_mount(tmp->mnt_vnodecovered);
11157 vfs_unbusy(tmp);
11158 if (vfs_busy(mp, LK_NOWAIT) == 0)
11159 goto unionget;
fe8ab488 11160 } else {
39236c6e 11161 vfs_unbusy(mp);
fe8ab488 11162 }
39236c6e 11163
b0d623f7 11164 if (error) {
fe8ab488 11165 return error;
b0d623f7 11166 }
fe8ab488 11167
6d2010ae
A
11168#if CONFIG_MACF
11169 error = mac_vnode_check_fsgetpath(ctx, vp);
11170 if (error) {
11171 vnode_put(vp);
fe8ab488 11172 return error;
6d2010ae
A
11173 }
11174#endif
fe8ab488 11175
b0d623f7
A
11176 /* Obtain the absolute path to this vnode. */
11177 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 11178 bpflags |= BUILDPATH_CHECK_MOVED;
fe8ab488 11179 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
b0d623f7 11180 vnode_put(vp);
fe8ab488 11181
b0d623f7 11182 if (error) {
813fb2f6
A
11183 /* there was a race building the path, try a few more times */
11184 if (error == EAGAIN) {
11185 --retries;
11186 if (retries > 0)
11187 goto retry;
11188
11189 error = ENOENT;
11190 }
b0d623f7
A
11191 goto out;
11192 }
fe8ab488
A
11193
11194 AUDIT_ARG(text, buf);
39236c6e
A
11195
11196 if (kdebug_enable) {
11197 long dbg_parms[NUMPARMS];
d9a64523 11198 int dbg_namelen;
39236c6e 11199
d9a64523 11200 dbg_namelen = (int)sizeof(dbg_parms);
39236c6e 11201
fe8ab488
A
11202 if (length < dbg_namelen) {
11203 memcpy((char *)dbg_parms, buf, length);
39236c6e
A
11204 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11205
11206 dbg_namelen = length;
fe8ab488
A
11207 } else {
11208 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11209 }
39236c6e 11210
d9a64523
A
11211 kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)vp,
11212 KDBG_VFS_LOOKUP_FLAG_LOOKUP);
39236c6e 11213 }
fe8ab488
A
11214
11215 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11216
11217out:
11218 return (error);
11219}
11220
11221/*
11222 * Obtain the full pathname of a file system object by id.
fe8ab488 11223 */
fe8ab488
A
11224int
11225fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11226{
11227 vfs_context_t ctx = vfs_context_current();
11228 fsid_t fsid;
11229 char *realpath;
11230 int length;
11231 int error;
11232
11233 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11234 return (error);
11235 }
11236 AUDIT_ARG(value32, fsid.val[0]);
11237 AUDIT_ARG(value64, uap->objid);
11238 /* Restrict output buffer size for now. */
39037602 11239
fe8ab488
A
11240 if (uap->bufsize > PAGE_SIZE) {
11241 return (EINVAL);
39037602 11242 }
d9a64523 11243 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK | M_ZERO);
fe8ab488
A
11244 if (realpath == NULL) {
11245 return (ENOMEM);
11246 }
11247
11248 error = fsgetpath_internal(
39037602 11249 ctx, fsid.val[0], uap->objid,
fe8ab488
A
11250 uap->bufsize, realpath, &length);
11251
11252 if (error) {
11253 goto out;
11254 }
39037602 11255
b0d623f7
A
11256 error = copyout((caddr_t)realpath, uap->buf, length);
11257
11258 *retval = (user_ssize_t)length; /* may be superseded by error */
11259out:
11260 if (realpath) {
11261 FREE(realpath, M_TEMP);
11262 }
11263 return (error);
11264}
11265
91447636
A
11266/*
11267 * Common routine to handle various flavors of statfs data heading out
11268 * to user space.
2d21ac55
A
11269 *
11270 * Returns: 0 Success
11271 * EFAULT
91447636
A
11272 */
11273static int
39037602
A
11274munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11275 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 11276 boolean_t partial_copy)
4a249263 11277{
91447636
A
11278 int error;
11279 int my_size, copy_size;
11280
11281 if (is_64_bit) {
b0d623f7 11282 struct user64_statfs sfs;
91447636
A
11283 my_size = copy_size = sizeof(sfs);
11284 bzero(&sfs, my_size);
11285 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11286 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11287 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
11288 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
11289 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
11290 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
11291 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
11292 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
11293 sfs.f_files = (user64_long_t)sfsp->f_files;
11294 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
11295 sfs.f_fsid = sfsp->f_fsid;
11296 sfs.f_owner = sfsp->f_owner;
6d2010ae 11297 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11298 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11299 } else {
11300 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11301 }
2d21ac55
A
11302 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11303 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11304
11305 if (partial_copy) {
11306 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11307 }
11308 error = copyout((caddr_t)&sfs, bufp, copy_size);
11309 }
11310 else {
b0d623f7
A
11311 struct user32_statfs sfs;
11312
91447636
A
11313 my_size = copy_size = sizeof(sfs);
11314 bzero(&sfs, my_size);
39037602 11315
91447636
A
11316 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11317 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11318 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
39037602 11319
91447636
A
11320 /*
11321 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11322 * have to fudge the numbers here in that case. We inflate the blocksize in order
11323 * to reflect the filesystem size as best we can.
11324 */
39037602
A
11325 if ((sfsp->f_blocks > INT_MAX)
11326 /* Hack for 4061702 . I think the real fix is for Carbon to
91447636 11327 * look for some volume capability and not depend on hidden
39037602 11328 * semantics agreed between a FS and carbon.
91447636
A
11329 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11330 * for Carbon to set bNoVolumeSizes volume attribute.
39037602 11331 * Without this the webdavfs files cannot be copied onto
91447636
A
11332 * disk as they look huge. This change should not affect
11333 * XSAN as they should not setting these to -1..
11334 */
2d21ac55
A
11335 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11336 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11337 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
91447636
A
11338 int shift;
11339
11340 /*
11341 * Work out how far we have to shift the block count down to make it fit.
11342 * Note that it's possible to have to shift so far that the resulting
11343 * blocksize would be unreportably large. At that point, we will clip
11344 * any values that don't fit.
11345 *
11346 * For safety's sake, we also ensure that f_iosize is never reported as
11347 * being smaller than f_bsize.
11348 */
11349 for (shift = 0; shift < 32; shift++) {
b0d623f7 11350 if ((sfsp->f_blocks >> shift) <= INT_MAX)
91447636 11351 break;
b0d623f7 11352 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
91447636
A
11353 break;
11354 }
b0d623f7
A
11355#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11356 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11357 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11358 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 11359#undef __SHIFT_OR_CLIP
b0d623f7 11360 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
11361 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11362 } else {
11363 /* filesystem is small enough to be reported honestly */
b0d623f7
A
11364 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11365 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11366 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11367 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11368 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 11369 }
b0d623f7
A
11370 sfs.f_files = (user32_long_t)sfsp->f_files;
11371 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
11372 sfs.f_fsid = sfsp->f_fsid;
11373 sfs.f_owner = sfsp->f_owner;
6d2010ae 11374 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11375 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11376 } else {
11377 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11378 }
2d21ac55
A
11379 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11380 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11381
11382 if (partial_copy) {
11383 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11384 }
11385 error = copyout((caddr_t)&sfs, bufp, copy_size);
11386 }
39037602 11387
91447636
A
11388 if (sizep != NULL) {
11389 *sizep = my_size;
11390 }
11391 return(error);
11392}
11393
11394/*
11395 * copy stat structure into user_stat structure.
11396 */
b0d623f7 11397void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 11398{
b0d623f7
A
11399 bzero(usbp, sizeof(*usbp));
11400
11401 usbp->st_dev = sbp->st_dev;
11402 usbp->st_ino = sbp->st_ino;
11403 usbp->st_mode = sbp->st_mode;
11404 usbp->st_nlink = sbp->st_nlink;
11405 usbp->st_uid = sbp->st_uid;
11406 usbp->st_gid = sbp->st_gid;
11407 usbp->st_rdev = sbp->st_rdev;
11408#ifndef _POSIX_C_SOURCE
11409 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11410 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11411 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11412 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11413 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11414 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11415#else
11416 usbp->st_atime = sbp->st_atime;
11417 usbp->st_atimensec = sbp->st_atimensec;
11418 usbp->st_mtime = sbp->st_mtime;
11419 usbp->st_mtimensec = sbp->st_mtimensec;
11420 usbp->st_ctime = sbp->st_ctime;
11421 usbp->st_ctimensec = sbp->st_ctimensec;
11422#endif
11423 usbp->st_size = sbp->st_size;
11424 usbp->st_blocks = sbp->st_blocks;
11425 usbp->st_blksize = sbp->st_blksize;
11426 usbp->st_flags = sbp->st_flags;
11427 usbp->st_gen = sbp->st_gen;
11428 usbp->st_lspare = sbp->st_lspare;
11429 usbp->st_qspare[0] = sbp->st_qspare[0];
11430 usbp->st_qspare[1] = sbp->st_qspare[1];
11431}
11432
11433void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11434{
11435 bzero(usbp, sizeof(*usbp));
0c530ab8 11436
91447636
A
11437 usbp->st_dev = sbp->st_dev;
11438 usbp->st_ino = sbp->st_ino;
11439 usbp->st_mode = sbp->st_mode;
11440 usbp->st_nlink = sbp->st_nlink;
11441 usbp->st_uid = sbp->st_uid;
11442 usbp->st_gid = sbp->st_gid;
11443 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
11444#ifndef _POSIX_C_SOURCE
11445 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11446 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11447 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11448 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11449 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11450 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11451#else
11452 usbp->st_atime = sbp->st_atime;
11453 usbp->st_atimensec = sbp->st_atimensec;
11454 usbp->st_mtime = sbp->st_mtime;
11455 usbp->st_mtimensec = sbp->st_mtimensec;
11456 usbp->st_ctime = sbp->st_ctime;
11457 usbp->st_ctimensec = sbp->st_ctimensec;
11458#endif
11459 usbp->st_size = sbp->st_size;
11460 usbp->st_blocks = sbp->st_blocks;
11461 usbp->st_blksize = sbp->st_blksize;
11462 usbp->st_flags = sbp->st_flags;
11463 usbp->st_gen = sbp->st_gen;
11464 usbp->st_lspare = sbp->st_lspare;
11465 usbp->st_qspare[0] = sbp->st_qspare[0];
11466 usbp->st_qspare[1] = sbp->st_qspare[1];
11467}
11468
11469/*
11470 * copy stat64 structure into user_stat64 structure.
11471 */
b0d623f7
A
11472void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11473{
11474 bzero(usbp, sizeof(*usbp));
11475
11476 usbp->st_dev = sbp->st_dev;
11477 usbp->st_ino = sbp->st_ino;
11478 usbp->st_mode = sbp->st_mode;
11479 usbp->st_nlink = sbp->st_nlink;
11480 usbp->st_uid = sbp->st_uid;
11481 usbp->st_gid = sbp->st_gid;
11482 usbp->st_rdev = sbp->st_rdev;
11483#ifndef _POSIX_C_SOURCE
11484 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11485 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11486 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11487 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11488 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11489 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11490 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11491 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11492#else
11493 usbp->st_atime = sbp->st_atime;
11494 usbp->st_atimensec = sbp->st_atimensec;
11495 usbp->st_mtime = sbp->st_mtime;
11496 usbp->st_mtimensec = sbp->st_mtimensec;
11497 usbp->st_ctime = sbp->st_ctime;
11498 usbp->st_ctimensec = sbp->st_ctimensec;
11499 usbp->st_birthtime = sbp->st_birthtime;
11500 usbp->st_birthtimensec = sbp->st_birthtimensec;
11501#endif
11502 usbp->st_size = sbp->st_size;
11503 usbp->st_blocks = sbp->st_blocks;
11504 usbp->st_blksize = sbp->st_blksize;
11505 usbp->st_flags = sbp->st_flags;
11506 usbp->st_gen = sbp->st_gen;
11507 usbp->st_lspare = sbp->st_lspare;
11508 usbp->st_qspare[0] = sbp->st_qspare[0];
11509 usbp->st_qspare[1] = sbp->st_qspare[1];
11510}
11511
11512void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 11513{
b0d623f7 11514 bzero(usbp, sizeof(*usbp));
2d21ac55
A
11515
11516 usbp->st_dev = sbp->st_dev;
11517 usbp->st_ino = sbp->st_ino;
11518 usbp->st_mode = sbp->st_mode;
11519 usbp->st_nlink = sbp->st_nlink;
11520 usbp->st_uid = sbp->st_uid;
11521 usbp->st_gid = sbp->st_gid;
11522 usbp->st_rdev = sbp->st_rdev;
11523#ifndef _POSIX_C_SOURCE
91447636
A
11524 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11525 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11526 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11527 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11528 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11529 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
11530 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11531 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
11532#else
11533 usbp->st_atime = sbp->st_atime;
11534 usbp->st_atimensec = sbp->st_atimensec;
11535 usbp->st_mtime = sbp->st_mtime;
11536 usbp->st_mtimensec = sbp->st_mtimensec;
11537 usbp->st_ctime = sbp->st_ctime;
11538 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
11539 usbp->st_birthtime = sbp->st_birthtime;
11540 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
11541#endif
11542 usbp->st_size = sbp->st_size;
11543 usbp->st_blocks = sbp->st_blocks;
11544 usbp->st_blksize = sbp->st_blksize;
11545 usbp->st_flags = sbp->st_flags;
11546 usbp->st_gen = sbp->st_gen;
11547 usbp->st_lspare = sbp->st_lspare;
11548 usbp->st_qspare[0] = sbp->st_qspare[0];
11549 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 11550}
39236c6e
A
11551
11552/*
11553 * Purge buffer cache for simulating cold starts
11554 */
11555static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11556{
11557 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11558
11559 return VNODE_RETURNED;
11560}
11561
11562static int vfs_purge_callback(mount_t mp, __unused void * arg)
11563{
11564 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11565
11566 return VFS_RETURNED;
11567}
11568
11569int
11570vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11571{
11572 if (!kauth_cred_issuser(kauth_cred_get()))
11573 return EPERM;
11574
11575 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11576
11577 return 0;
11578}
11579
39037602
A
11580/*
11581 * gets the vnode associated with the (unnamed) snapshot directory
11582 * for a Filesystem. The snapshot directory vnode is returned with
11583 * an iocount on it.
11584 */
11585int
11586vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11587{
813fb2f6 11588 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
39037602
A
11589}
11590
11591/*
11592 * Get the snapshot vnode.
11593 *
11594 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11595 * needs nameidone() on ndp.
11596 *
11597 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11598 *
11599 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11600 * not needed.
11601 */
11602static int
11603vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11604 user_addr_t name, struct nameidata *ndp, int32_t op,
11605#if !CONFIG_TRIGGERS
11606 __unused
11607#endif
11608 enum path_operation pathop,
11609 vfs_context_t ctx)
11610{
11611 int error, i;
11612 caddr_t name_buf;
11613 size_t name_len;
11614 struct vfs_attr vfa;
11615
11616 *sdvpp = NULLVP;
11617 *rvpp = NULLVP;
11618
11619 error = vnode_getfromfd(ctx, dirfd, rvpp);
11620 if (error)
11621 return (error);
11622
11623 if (!vnode_isvroot(*rvpp)) {
11624 error = EINVAL;
11625 goto out;
11626 }
11627
11628 /* Make sure the filesystem supports snapshots */
11629 VFSATTR_INIT(&vfa);
11630 VFSATTR_WANTED(&vfa, f_capabilities);
11631 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11632 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11633 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11634 VOL_CAP_INT_SNAPSHOT)) ||
11635 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11636 VOL_CAP_INT_SNAPSHOT))) {
11637 error = ENOTSUP;
11638 goto out;
11639 }
11640
11641 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11642 if (error)
11643 goto out;
11644
11645 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11646 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11647 if (error)
11648 goto out1;
11649
11650 /*
11651 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11652 * (the length returned by copyinstr includes the terminating NUL)
11653 */
11654 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11655 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11656 error = EINVAL;
11657 goto out1;
11658 }
11659 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11660 if (i < (int)name_len) {
11661 error = EINVAL;
11662 goto out1;
11663 }
11664
11665#if CONFIG_MACF
11666 if (op == CREATE) {
11667 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11668 name_buf);
11669 } else if (op == DELETE) {
11670 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11671 name_buf);
11672 }
11673 if (error)
11674 goto out1;
11675#endif
11676
11677 /* Check if the snapshot already exists ... */
11678 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11679 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11680 ndp->ni_dvp = *sdvpp;
11681
11682 error = namei(ndp);
11683out1:
11684 FREE(name_buf, M_TEMP);
11685out:
11686 if (error) {
11687 if (*sdvpp) {
11688 vnode_put(*sdvpp);
11689 *sdvpp = NULLVP;
11690 }
11691 if (*rvpp) {
11692 vnode_put(*rvpp);
11693 *rvpp = NULLVP;
11694 }
11695 }
11696 return (error);
11697}
11698
11699/*
11700 * create a filesystem snapshot (for supporting filesystems)
11701 *
11702 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11703 * We get to the (unnamed) snapshot directory vnode and create the vnode
11704 * for the snapshot in it.
11705 *
11706 * Restrictions:
11707 *
11708 * a) Passed in name for snapshot cannot have slashes.
11709 * b) name can't be "." or ".."
11710 *
11711 * Since this requires superuser privileges, vnode_authorize calls are not
11712 * made.
11713 */
11714static int
11715snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11716 vfs_context_t ctx)
11717{
11718 vnode_t rvp, snapdvp;
11719 int error;
11720 struct nameidata namend;
11721
11722 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11723 OP_LINK, ctx);
11724 if (error)
11725 return (error);
11726
11727 if (namend.ni_vp) {
11728 vnode_put(namend.ni_vp);
11729 error = EEXIST;
11730 } else {
11731 struct vnode_attr va;
11732 vnode_t vp = NULLVP;
11733
11734 VATTR_INIT(&va);
11735 VATTR_SET(&va, va_type, VREG);
11736 VATTR_SET(&va, va_mode, 0);
11737
11738 error = vn_create(snapdvp, &vp, &namend, &va,
11739 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11740 if (!error && vp)
11741 vnode_put(vp);
39037602
A
11742 }
11743
11744 nameidone(&namend);
11745 vnode_put(snapdvp);
11746 vnode_put(rvp);
11747 return (error);
11748}
11749
11750/*
11751 * Delete a Filesystem snapshot
11752 *
11753 * get the vnode for the unnamed snapshot directory and the snapshot and
11754 * delete the snapshot.
11755 */
11756static int
11757snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11758 vfs_context_t ctx)
11759{
11760 vnode_t rvp, snapdvp;
11761 int error;
11762 struct nameidata namend;
11763
11764 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11765 OP_UNLINK, ctx);
11766 if (error)
11767 goto out;
11768
11769 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11770 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11771
11772 vnode_put(namend.ni_vp);
11773 nameidone(&namend);
11774 vnode_put(snapdvp);
11775 vnode_put(rvp);
11776out:
11777 return (error);
11778}
11779
11780/*
11781 * Revert a filesystem to a snapshot
11782 *
11783 * Marks the filesystem to revert to the given snapshot on next mount.
11784 */
11785static int
11786snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11787 vfs_context_t ctx)
11788{
11789 int error;
11790 vnode_t rvp;
11791 mount_t mp;
11792 struct fs_snapshot_revert_args revert_data;
11793 struct componentname cnp;
11794 caddr_t name_buf;
11795 size_t name_len;
11796
11797 error = vnode_getfromfd(ctx, dirfd, &rvp);
11798 if (error) {
11799 return (error);
11800 }
11801 mp = vnode_mount(rvp);
11802
813fb2f6
A
11803 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11804 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11805 if (error) {
11806 FREE(name_buf, M_TEMP);
11807 vnode_put(rvp);
11808 return (error);
11809 }
11810
11811#if CONFIG_MACF
11812 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11813 if (error) {
11814 FREE(name_buf, M_TEMP);
11815 vnode_put(rvp);
11816 return (error);
11817 }
11818#endif
11819
39037602
A
11820 /*
11821 * Grab mount_iterref so that we can release the vnode,
11822 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11823 */
11824 error = mount_iterref (mp, 0);
11825 vnode_put(rvp);
11826 if (error) {
39037602
A
11827 FREE(name_buf, M_TEMP);
11828 return (error);
11829 }
11830
11831 memset(&cnp, 0, sizeof(cnp));
11832 cnp.cn_pnbuf = (char *)name_buf;
11833 cnp.cn_nameiop = LOOKUP;
11834 cnp.cn_flags = ISLASTCN | HASBUF;
11835 cnp.cn_pnlen = MAXPATHLEN;
11836 cnp.cn_nameptr = cnp.cn_pnbuf;
11837 cnp.cn_namelen = (int)name_len;
11838 revert_data.sr_cnp = &cnp;
11839
11840 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11841 mount_iterdrop(mp);
11842 FREE(name_buf, M_TEMP);
11843
11844 if (error) {
11845 /* If there was any error, try again using VNOP_IOCTL */
11846
11847 vnode_t snapdvp;
11848 struct nameidata namend;
11849
11850 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11851 OP_LOOKUP, ctx);
11852 if (error) {
11853 return (error);
11854 }
11855
11856
5ba3f43e 11857 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
39037602
A
11858 0, ctx);
11859
11860 vnode_put(namend.ni_vp);
11861 nameidone(&namend);
11862 vnode_put(snapdvp);
11863 vnode_put(rvp);
11864 }
11865
11866 return (error);
11867}
11868
11869/*
11870 * rename a Filesystem snapshot
11871 *
11872 * get the vnode for the unnamed snapshot directory and the snapshot and
11873 * rename the snapshot. This is a very specialised (and simple) case of
11874 * rename(2) (which has to deal with a lot more complications). It differs
11875 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11876 */
11877static int
11878snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11879 __unused uint32_t flags, vfs_context_t ctx)
11880{
11881 vnode_t rvp, snapdvp;
11882 int error, i;
11883 caddr_t newname_buf;
11884 size_t name_len;
11885 vnode_t fvp;
11886 struct nameidata *fromnd, *tond;
11887 /* carving out a chunk for structs that are too big to be on stack. */
11888 struct {
11889 struct nameidata from_node;
11890 struct nameidata to_node;
11891 } * __rename_data;
11892
11893 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11894 fromnd = &__rename_data->from_node;
11895 tond = &__rename_data->to_node;
11896
11897 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11898 OP_UNLINK, ctx);
11899 if (error)
11900 goto out;
11901 fvp = fromnd->ni_vp;
11902
11903 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11904 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11905 if (error)
11906 goto out1;
11907
11908 /*
11909 * Some sanity checks- new name can't be empty, "." or ".." or have
11910 * slashes.
11911 * (the length returned by copyinstr includes the terminating NUL)
11912 *
11913 * The FS rename VNOP is suppossed to handle this but we'll pick it
11914 * off here itself.
11915 */
11916 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11917 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11918 error = EINVAL;
11919 goto out1;
11920 }
11921 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11922 if (i < (int)name_len) {
11923 error = EINVAL;
11924 goto out1;
11925 }
11926
11927#if CONFIG_MACF
11928 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11929 newname_buf);
11930 if (error)
11931 goto out1;
11932#endif
11933
11934 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11935 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11936 tond->ni_dvp = snapdvp;
11937
11938 error = namei(tond);
11939 if (error) {
11940 goto out2;
11941 } else if (tond->ni_vp) {
11942 /*
11943 * snapshot rename behaves differently than rename(2) - if the
11944 * new name exists, EEXIST is returned.
11945 */
11946 vnode_put(tond->ni_vp);
11947 error = EEXIST;
11948 goto out2;
11949 }
11950
11951 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11952 &tond->ni_cnd, ctx);
11953
11954out2:
11955 nameidone(tond);
11956out1:
11957 FREE(newname_buf, M_TEMP);
11958 vnode_put(fvp);
11959 vnode_put(snapdvp);
11960 vnode_put(rvp);
11961 nameidone(fromnd);
11962out:
11963 FREE(__rename_data, M_TEMP);
11964 return (error);
11965}
11966
11967/*
11968 * Mount a Filesystem snapshot
11969 *
11970 * get the vnode for the unnamed snapshot directory and the snapshot and
11971 * mount the snapshot.
11972 */
11973static int
11974snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
813fb2f6 11975 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
39037602
A
11976{
11977 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11978 int error;
11979 struct nameidata *snapndp, *dirndp;
11980 /* carving out a chunk for structs that are too big to be on stack. */
11981 struct {
11982 struct nameidata snapnd;
11983 struct nameidata dirnd;
11984 } * __snapshot_mount_data;
11985
11986 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11987 M_TEMP, M_WAITOK);
11988 snapndp = &__snapshot_mount_data->snapnd;
11989 dirndp = &__snapshot_mount_data->dirnd;
11990
11991 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11992 OP_LOOKUP, ctx);
11993 if (error)
11994 goto out;
11995
11996 snapvp = snapndp->ni_vp;
11997 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11998 error = EIO;
11999 goto out1;
12000 }
12001
12002 /* Get the vnode to be covered */
12003 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
12004 UIO_USERSPACE, directory, ctx);
12005 error = namei(dirndp);
12006 if (error)
12007 goto out1;
12008
12009 vp = dirndp->ni_vp;
12010 pvp = dirndp->ni_dvp;
12011
12012 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
12013 error = EINVAL;
12014 } else {
12015 mount_t mp = vnode_mount(rvp);
12016 struct fs_snapshot_mount_args smnt_data;
12017
12018 smnt_data.sm_mp = mp;
12019 smnt_data.sm_cnp = &snapndp->ni_cnd;
12020 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
5ba3f43e 12021 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
39037602 12022 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
39037602
A
12023 }
12024
12025 vnode_put(vp);
12026 vnode_put(pvp);
12027 nameidone(dirndp);
12028out1:
12029 vnode_put(snapvp);
12030 vnode_put(snapdvp);
12031 vnode_put(rvp);
12032 nameidone(snapndp);
12033out:
12034 FREE(__snapshot_mount_data, M_TEMP);
12035 return (error);
12036}
12037
813fb2f6
A
12038/*
12039 * Root from a snapshot of the filesystem
12040 *
12041 * Marks the filesystem to root from the given snapshot on next boot.
12042 */
12043static int
12044snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
12045 vfs_context_t ctx)
12046{
12047 int error;
12048 vnode_t rvp;
12049 mount_t mp;
12050 struct fs_snapshot_root_args root_data;
12051 struct componentname cnp;
12052 caddr_t name_buf;
12053 size_t name_len;
12054
12055 error = vnode_getfromfd(ctx, dirfd, &rvp);
12056 if (error) {
12057 return (error);
12058 }
12059 mp = vnode_mount(rvp);
12060
12061 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12062 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12063 if (error) {
12064 FREE(name_buf, M_TEMP);
12065 vnode_put(rvp);
12066 return (error);
12067 }
12068
12069 // XXX MAC checks ?
12070
12071 /*
12072 * Grab mount_iterref so that we can release the vnode,
12073 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12074 */
12075 error = mount_iterref (mp, 0);
12076 vnode_put(rvp);
12077 if (error) {
12078 FREE(name_buf, M_TEMP);
12079 return (error);
12080 }
12081
12082 memset(&cnp, 0, sizeof(cnp));
12083 cnp.cn_pnbuf = (char *)name_buf;
12084 cnp.cn_nameiop = LOOKUP;
12085 cnp.cn_flags = ISLASTCN | HASBUF;
12086 cnp.cn_pnlen = MAXPATHLEN;
12087 cnp.cn_nameptr = cnp.cn_pnbuf;
12088 cnp.cn_namelen = (int)name_len;
12089 root_data.sr_cnp = &cnp;
12090
12091 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
12092
12093 mount_iterdrop(mp);
12094 FREE(name_buf, M_TEMP);
12095
12096 return (error);
12097}
12098
39037602
A
12099/*
12100 * FS snapshot operations dispatcher
12101 */
12102int
12103fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12104 __unused int32_t *retval)
12105{
12106 int error;
12107 vfs_context_t ctx = vfs_context_current();
12108
813fb2f6
A
12109 AUDIT_ARG(fd, uap->dirfd);
12110 AUDIT_ARG(value32, uap->op);
12111
39037602
A
12112 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
12113 if (error)
12114 return (error);
12115
12116 switch (uap->op) {
12117 case SNAPSHOT_OP_CREATE:
12118 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12119 break;
12120 case SNAPSHOT_OP_DELETE:
12121 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12122 break;
12123 case SNAPSHOT_OP_RENAME:
12124 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12125 uap->flags, ctx);
12126 break;
12127 case SNAPSHOT_OP_MOUNT:
12128 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12129 uap->data, uap->flags, ctx);
12130 break;
12131 case SNAPSHOT_OP_REVERT:
12132 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12133 break;
d9a64523 12134#if CONFIG_MNT_ROOTSNAP
813fb2f6
A
12135 case SNAPSHOT_OP_ROOT:
12136 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12137 break;
d9a64523 12138#endif /* CONFIG_MNT_ROOTSNAP */
39037602
A
12139 default:
12140 error = ENOSYS;
12141 }
12142
12143 return (error);
12144}